Skip to content

Commit a7a8d13

Browse files
committed
update interface, work on #7
1 parent bdac19d commit a7a8d13

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

kbbq/benchmark.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@ def get_ref_dict(reffilename):
1212
ref = {chrom : np.array(list(fasta.fetch(reference = chrom)), dtype = np.unicode) for chrom in fasta.references}
1313
return ref
1414

15+
def get_var_sites(vcf):
16+
vcf = pysam.VariantFile(vcf)
17+
d = dict()
18+
for record in vcf:
19+
d.setdefault(record.chrom, list()).append(int(record.pos)-1)
20+
return d
21+
1522
def get_full_skips(refdict, var_sites):
1623
skips = {chrom: np.zeros(len(refdict[chrom]), dtype = np.bool) for chrom in refdict.keys()}
1724
for chrom in skips.keys():
@@ -71,7 +78,7 @@ def calculate_q(errors, quals):
7178
actual_q[nonzero] = q
7279
return actual_q, numtotal
7380

74-
def benchmark_fastq(fqfile, bamfile, fafile, varfile):
81+
def benchmark_fastq(fqfile, bamfile, ref, varfile):
7582
var_sites = compare_reads.load_positions(varfile)
7683
ref = get_ref_dict(fafile)
7784
fullskips = get_full_skips(ref, var_sites)
@@ -108,7 +115,7 @@ def print_benchmark(actual_q, label, nbases):
108115
for pq, aq, nb in zip(predicted_q, actual_q, nbases):
109116
print(pq, aq, label, nb, sep = "\t")
110117

111-
def benchmark(bamfile, fafile, varfile, fastqfile = None, label = None):
118+
def benchmark(bamfile, fafile, vcffile, fastq = None, label = None):
112119
"""
113120
Perform the benchmark and print the results to stdout.
114121
@@ -119,10 +126,12 @@ def benchmark(bamfile, fafile, varfile, fastqfile = None, label = None):
119126
reads.
120127
"""
121128
bam = pysam.AlignmentFile(bamfile, 'r')
129+
ref = get_ref_dict(fafile)
130+
var_sites = get_var_sites(vcffile)
122131
if fastqfile is not None:
123-
actual_q, nbases = benchmark_fastq(fastqfile, bamfile, fafile, varfile)
132+
actual_q, nbases = benchmark_fastq(fastqfile, bam, ref, var_sites)
124133
label = (fastqfile if label is None else label)
125134
else:
126-
actual_q, nbases = benchmark_bam(bamfile, fafile, varfile)
135+
actual_q, nbases = benchmark_bam(bam, ref, var_sites)
127136
label = (bamfile if label is None else label)
128137
print_benchmark(actual_q, label, nbases)

0 commit comments

Comments
 (0)