Skip to content

Commit 6ae65a9

Browse files
committed
refactor rcorrected -> corrected
1 parent b7c9ece commit 6ae65a9

File tree

1 file changed

+12
-12
lines changed

1 file changed

+12
-12
lines changed

kbbq/compare_reads.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ def load_positions(posfile):
3434
d.setdefault(chrom, list()).append(int(pos)-1)
3535
return d
3636

37-
def find_rcorrected_sites(uncorrfile, corrfile):
38-
print(tstamp(), "Finding Rcorrected sites . . .", file=sys.stderr)
37+
def find_corrected_sites(uncorrfile, corrfile):
38+
print(tstamp(), "Finding corrected sites . . .", file=sys.stderr)
3939
uncorr_reads = list(pysam.FastxFile(uncorrfile))
4040
corr_reads = list(pysam.FastxFile(corrfile))
4141
#verify the sequences are the same and can be accessed by index
@@ -50,7 +50,7 @@ def find_rcorrected_sites(uncorrfile, corrfile):
5050
names = dict()
5151
seqlen = len(uncorr_reads[0].get_quality_array())
5252
rawquals = np.zeros([len(uncorr_reads), seqlen], dtype = np.int)
53-
rcorrected = np.zeros([len(uncorr_reads),seqlen], dtype = np.bool)
53+
corrected = np.zeros([len(uncorr_reads),seqlen], dtype = np.bool)
5454
seqs = np.zeros(len(uncorr_reads), dtype = 'U' + str(seqlen))
5555
rgs = np.zeros(len(uncorr_reads), dtype = 'U7')
5656
for i in range(len(uncorr_reads)):
@@ -61,13 +61,13 @@ def find_rcorrected_sites(uncorrfile, corrfile):
6161
seqs[i] = uncorr_reads[i].sequence
6262
uncorr_s = np.array(list(uncorr_reads[i].sequence), dtype = np.unicode)
6363
corr_s = np.array(list(corr_reads[i].sequence), dtype = np.unicode)
64-
rcorrected[i] = (uncorr_s == corr_s)
65-
return names, rawquals.copy(), rcorrected.copy(), seqs.copy(), rgs.copy(), seqlen
64+
corrected[i] = (uncorr_s == corr_s)
65+
return names, rawquals.copy(), corrected.copy(), seqs.copy(), rgs.copy(), seqlen
6666

67-
def train_regression(rawquals, rcorrected, tol = 1e-4):
67+
def train_regression(rawquals, corrected, tol = 1e-4):
6868
print(tstamp(), "Doing Logit Regression", file=sys.stderr)
6969
lr = LR(tol = tol)
70-
lr = lr.fit(rawquals.flatten().reshape(-1,1), rcorrected.flatten())
70+
lr = lr.fit(rawquals.flatten().reshape(-1,1), corrected.flatten())
7171
return lr
7272

7373
def recalibrate(lr, q):
@@ -806,10 +806,10 @@ def main():
806806
corrfile = "nospace.lighter.fq"
807807
bamfilename = "only_confident.sorted.recal.bam"
808808
fastafilename = "../chr1.renamed.fa"
809-
names, rawquals, rcorrected, seqs, rgs, seqlen = find_rcorrected_sites(uncorrfile, corrfile)
809+
names, rawquals, corrected, seqs, rgs, seqlen = find_corrected_sites(uncorrfile, corrfile)
810810

811-
#rcorrected is true when the bases match the original, false otherwise
812-
#hence rcorrected == false is where the errors are
811+
#corrected is true when the bases match the original, false otherwise
812+
#hence corrected == false is where the errors are
813813

814814
bad_positions = load_positions("variable_sites.txt")
815815
cachefile = 'cached_recal_errs.npz'
@@ -824,7 +824,7 @@ def main():
824824
gatkcalibratedquals, erroneous, skips = find_errors(bamfilename, fastafilename, bad_positions, names, seqlen)
825825
np.savez_compressed(cachefile, gatkcalibratedquals = gatkcalibratedquals, erroneous = erroneous, skips = skips)
826826

827-
#important arrays: names, rawquals, rcorrected, calibquals, gatkcalibratedquals, erroneous, hmmquals
827+
#important arrays: names, rawquals, corrected, calibquals, gatkcalibratedquals, erroneous, hmmquals
828828

829829
dinucleotide = get_dinucleotide(seqs, rawquals)
830830
unique_rgs = np.unique(rgs)
@@ -840,7 +840,7 @@ def main():
840840
reversecycle = np.zeros(len(names), dtype = np.bool)
841841
reversecycle[np.array(list(names.values()), dtype = np.int)] = np.char.endswith(np.array(list(names.keys()), dtype = np.unicode),'/2')
842842

843-
dq_calibrated = delta_q_recalibrate(rawquals, rgs, dinucleotide, np.logical_not(rcorrected), reversecycle)
843+
dq_calibrated = delta_q_recalibrate(rawquals, rgs, dinucleotide, np.logical_not(corrected), reversecycle)
844844
#custom_gatk_calibrated = delta_q_recalibrate(rawquals, rgs, dinucleotide, erroneous, reversecycle)
845845
#from_table = table_recalibrate(rawquals, tablefile, unique_pus, dinuc_order, seqlen, reversecycle, rgs, dinucleotide)
846846
#assert np.array_equal(from_table, gatkcalibratedquals)

0 commit comments

Comments
 (0)