@@ -12,6 +12,13 @@ def get_ref_dict(reffilename):
12
12
ref = {chrom : np .array (list (fasta .fetch (reference = chrom )), dtype = np .unicode ) for chrom in fasta .references }
13
13
return ref
14
14
15
+ def get_var_sites (vcf ):
16
+ vcf = pysam .VariantFile (vcf )
17
+ d = dict ()
18
+ for record in vcf :
19
+ d .setdefault (record .chrom , list ()).append (int (record .pos )- 1 )
20
+ return d
21
+
15
22
def get_full_skips (refdict , var_sites ):
16
23
skips = {chrom : np .zeros (len (refdict [chrom ]), dtype = np .bool ) for chrom in refdict .keys ()}
17
24
for chrom in skips .keys ():
@@ -71,7 +78,7 @@ def calculate_q(errors, quals):
71
78
actual_q [nonzero ] = q
72
79
return actual_q , numtotal
73
80
74
- def benchmark_fastq (fqfile , bamfile , fafile , varfile ):
81
+ def benchmark_fastq (fqfile , bamfile , ref , varfile ):
75
82
var_sites = compare_reads .load_positions (varfile )
76
83
ref = get_ref_dict (fafile )
77
84
fullskips = get_full_skips (ref , var_sites )
@@ -108,7 +115,7 @@ def print_benchmark(actual_q, label, nbases):
108
115
for pq , aq , nb in zip (predicted_q , actual_q , nbases ):
109
116
print (pq , aq , label , nb , sep = "\t " )
110
117
111
- def benchmark (bamfile , fafile , varfile , fastqfile = None , label = None ):
118
+ def benchmark (bamfile , fafile , vcffile , fastq = None , label = None ):
112
119
"""
113
120
Perform the benchmark and print the results to stdout.
114
121
@@ -119,10 +126,12 @@ def benchmark(bamfile, fafile, varfile, fastqfile = None, label = None):
119
126
reads.
120
127
"""
121
128
bam = pysam .AlignmentFile (bamfile , 'r' )
129
+ ref = get_ref_dict (fafile )
130
+ var_sites = get_var_sites (vcffile )
122
131
if fastqfile is not None :
123
- actual_q , nbases = benchmark_fastq (fastqfile , bamfile , fafile , varfile )
132
+ actual_q , nbases = benchmark_fastq (fastqfile , bam , ref , var_sites )
124
133
label = (fastqfile if label is None else label )
125
134
else :
126
- actual_q , nbases = benchmark_bam (bamfile , fafile , varfile )
135
+ actual_q , nbases = benchmark_bam (bam , ref , var_sites )
127
136
label = (bamfile if label is None else label )
128
137
print_benchmark (actual_q , label , nbases )
0 commit comments