-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_single_susp.m~
executable file
·79 lines (69 loc) · 2.33 KB
/
run_single_susp.m~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
function [per_sample_susp,per_sample_susp_in_gene_set] = run_single_susp(brew,susp,susp_cutoff,cell_name)
param = {'mkfid'};
dflts = {true}'
%% Check IDs
common_ids = intersect(brew.cid,susp.cid);
brew = ds_slice(brew,'cid',common_ids);
susp = ds_slice(susp,'cid',common_ids);
assert(isequal(brew.rid,susp.rid))
assert(isequal(brew.cid,susp.cid))
%% Overall CDF
figure
[~,~,x,f] = kde(susp.mat(:),2^10,0,50);
plot(x,f);
hold on
title([cell_name ' - Suspiciousness CDF'])
xlabel('Suspiciousness')
grid on
namefig(strcat(cell_name,'_susp_cdf'))
xlim([3 10])
%% Susp per sample
figure
per_sample_susp = sum(susp.mat > susp_cutoff);
med = median(per_sample_susp);
histogram(per_sample_susp)%,'normalization','cdf')
hold on
ax = gca;
plot([med med],[0 ax.YLim(2)],'r-')
title_str = sprintf('%s - Suspect genes per signature\n cutoff = %d \n num_sig = %d\n median = %d',cell_name,susp_cutoff,length(brew.cid),med);
title(title_str,'Interpreter','none')
xlabel('Number of Suspect Genes')
ylabel('Count')
xlim([0 200])
namefig(strcat(cell_name,'_susp_per_sig'))
%% Number of suspects in top/bottom N genes
N = 50;
M = length(brew.rid);
% [~,sort_idx] = sort(brew.mat);
% sig_idx = sort_idx([1:N (M-N):M],:)
% susp.mat(sig_idx,:)
% size(susp.mat(sig_idx,:))
% per_sample_susp_in_gene_set = sum(susp.mat(sig_idx,:) > susp_cutoff)
per_sample_susp_in_gene_set = zeros(1,length(brew.cid));
for ii = 1:length(brew.cid)
[~,sort_idx] = sort(brew.mat(:,ii));
temp_idx = sort_idx([1:N (M-N):M]);
per_sample_susp_in_gene_set(ii) = sum(susp.mat(temp_idx,ii) > susp_cutoff);
end
figure
histogram(per_sample_susp_in_gene_set)%,'normalization','cdf')
med = median(per_sample_susp_in_gene_set);
hold on
ax = gca;
plot([med med],[0 ax.YLim(2)],'r-')
title_str = sprintf('%s - Suspect entries in gene set per signature\n cutoff = %d \n num_sig = %d \n median = %d',cell_name,susp_cutoff,length(brew.cid),med);
title(title_str,'Interpreter','none')
xlabel('Number of Suspect Genes in top/bottom 50')
ylabel('Count')
namefig(strcat(cell_name,'_susp_per_sig_gene_set'))
%Scatter
figure
scatter(per_sample_susp,per_sample_susp_in_gene_set)
xlabel('Number of suspects per sample')
ylabel('Number of suspects in gene set per sample')
title(cell_name)
ax = gca;
temp_lim = min(horzcat(ax.YLim(2),ax.XLim(2)));
hold on
plot([0 temp_lim],[0 temp_lim],'r:')
namefig(strcat(cell_name,'_susp_scatter'))