Skip to content

Commit

Permalink
Fixed a bug where all sequences were mapped to the chr1 for some data…
Browse files Browse the repository at this point in the history
…sets
  • Loading branch information
cegonse committed Jul 20, 2017
1 parent 69b9e62 commit ffaa672
Show file tree
Hide file tree
Showing 20 changed files with 474 additions and 1,000 deletions.
Empty file added .Rhistory
Empty file.
2 changes: 1 addition & 1 deletion src/batch_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include "commons/system_utils.h"

#include "containers/list.h"
#include "containers/cprops/hashtable.h"
//#include "containers/cprops/hashtable.h"

#include "bioformats/fastq/fastq_file.h"
#include "bioformats/fastq/fastq_batch.h"
Expand Down
2 changes: 0 additions & 2 deletions src/breakpoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ cigar_code_t *cigar_code_new() {
cigar_code_t *cigar_code_new_by_string(char *cigar_str) {
cigar_code_t *p = cigar_code_new();
int cigar_len = strlen(cigar_str);
int cigar_op_counter = 0;
int c = 0;
char op;
char op_value[1024];
Expand Down Expand Up @@ -275,7 +274,6 @@ cigar_code_t *generate_cigar_code(char *query_map, char *ref_map, unsigned int m
unsigned char status;
unsigned char transition;
char operation;
char operation_number[map_len * 2];


if (query_start > 0) {
Expand Down
21 changes: 15 additions & 6 deletions src/bs/bs_aligner.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ void run_bs_aligner(genome_t *genome2, genome_t *genome1, genome_t *genome,
// Preparing input FastQ file
fastq_batch_reader_input_t reader_input;
fastq_batch_reader_input_init(options->in_filename, options->in_filename2,
options->pair_mode, options->batch_size,
NULL, &reader_input);
options->pair_mode, options->batch_size, NULL,
0, &reader_input);

if (options->pair_mode == SINGLE_END_MODE) {
reader_input.fq_file1 = fastq_fopen(options->in_filename);
Expand Down Expand Up @@ -102,7 +102,6 @@ void run_bs_aligner(genome_t *genome2, genome_t *genome1, genome_t *genome,
int gen_loads = load_encode_context(options->bwt_dirname, sw_input.valuesCT, sw_input.valuesGA);

// Workflow management
struct timeval start, end;

// Added option write_mcontext, true//false in order to write metilation context files
batch_t *batch = batch_new(&bwt_input, &region_input, &cal_input,
Expand All @@ -123,6 +122,15 @@ void run_bs_aligner(genome_t *genome2, genome_t *genome1, genome_t *genome,
bs_status_stage
};

workflow_stage_workspace_cleanup_function_t cleanup_functions[] = {
clean_bwt_stage_bs_workspace,
clean_apply_caling_bs_stage_workspace,
clean_apply_pair_stage_workspace,
clean_apply_sw_stage_workspace,
clean_prepare_alignments_bs_stage_workspace,
clean_methylation_stage_workspace
};

char *stage_labels[] = {
"BWT",
"CAL",
Expand All @@ -132,11 +140,12 @@ void run_bs_aligner(genome_t *genome2, genome_t *genome1, genome_t *genome,
"BS STATUS"
};

workflow_set_stages(6, &stage_functions, stage_labels, wf);
workflow_set_stages(6, (workflow_stage_function_t*)&stage_functions, (char**)stage_labels, wf,
(workflow_stage_workspace_cleanup_function_t*)&cleanup_functions);

// Optional producer and consumer functions
workflow_set_producer(fastq_reader, "FastQ reader", wf);
workflow_set_consumer(bs_writer, "BAM BS writer", wf);
workflow_set_producer((workflow_producer_function_t)fastq_reader, "FastQ reader", wf);
workflow_set_consumer((workflow_consumer_function_t)bs_writer, "BAM BS writer", wf);

extern double time_alig;
extern struct timeval time_start_alig, time_end_alig;
Expand Down
16 changes: 4 additions & 12 deletions src/bs/bs_writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ int bs_writer(void *data) {

batch_t *batch = (batch_t *) data;
fastq_read_t *fq_read;
array_list_t *array_list;
size_t num_items;

mapping_batch_t *mapping_batch = (mapping_batch_t *) batch->mapping_batch;
Expand All @@ -25,26 +24,17 @@ int bs_writer(void *data) {

batch_writer_input_t *writer_input = batch->writer_input;
bam_file_t *bam_file = writer_input->bam_file;
linked_list_t *linked_list = writer_input->list_p;
size_t num_reads = array_list_size(mapping_batch->fq_batch);
size_t num_mapped_reads = 0;
size_t total_mappings = 0;
unsigned char found_p1 = 0;
unsigned char found_p2 = 0;
int i = 0;

extern size_t bwt_correct;
extern size_t bwt_error;
extern pthread_mutex_t bwt_mutex;

writer_input->total_batches++;

array_list_t **mapping_lists;
int *found = (int *) calloc(num_reads, sizeof(int));
metil_file_t *metil_file = writer_input->metil_file;

array_list_t *bs_stat = mapping_batch->bs_status;
char *bs_seq;
size_t num_chromosomes = batch->bwt_input->genome->num_chromosomes;

// Process mapping_lists and mapping_lists2
Expand Down Expand Up @@ -86,7 +76,7 @@ int bs_writer(void *data) {

if (basic_st->total_reads >= writer_input->limit_print) {
LOG_DEBUG_F("TOTAL READS PROCESS: %lu\n", basic_st->total_reads);
LOG_DEBUG_F("\tTotal Reads Mapped: %lu(%.2f%)\n",
LOG_DEBUG_F("\tTotal Reads Mapped: %lu(%.2f%%)\n",
basic_st->num_mapped_reads,
(float) (basic_st->num_mapped_reads*100)/(float)(basic_st->total_reads));
writer_input->limit_print += 1000000;
Expand All @@ -110,6 +100,8 @@ int bs_writer(void *data) {
stop_timer(start, end, time);
timing_add(time, BAM_WRITER, timing);
}

return 0;
}

//--------------------------------------------------------------------
Expand Down Expand Up @@ -148,7 +140,7 @@ void write_unmapped_read(fastq_read_t *fq_read, bam_file_t *bam_file) {
bam1_t *bam1;

// Calculating cigar
sprintf(aux, "%luX", fq_read->length);
sprintf(aux, "%iX", fq_read->length);

alig = alignment_new();
header_len = strlen(fq_read->id);
Expand Down
60 changes: 41 additions & 19 deletions src/bs/methylation.c
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,29 @@ void remove_duplicates(size_t reads, array_list_t **list, array_list_t **list2)

//------------------------------------------------------------------------------------

int methylation_status_report(sw_server_input_t* input, batch_t *batch) {
void clean_methylation_stage_workspace(void *workspace) {
if (workspace) {
methylation_stage_workspace_t* wf = workspace;

if (wf->add_status_seq) {
free(wf->add_status_seq);
}

if (wf->add_status_gen) {
free(wf->add_status_gen);
}

if (wf->add_status_seq_dup) {
free(wf->add_status_seq_dup);
}

free(wf);
}
}

//------------------------------------------------------------------------------------

int methylation_status_report(sw_server_input_t* input, batch_t *batch, methylation_stage_workspace_t *workspace) {

LOG_DEBUG("========= METHYLATION STATUS REPORT START =========\n");

Expand All @@ -350,7 +372,6 @@ int methylation_status_report(sw_server_input_t* input, batch_t *batch) {
size_t num_items;
size_t num_reads = array_list_size(mapping_batch->fq_batch);
genome_t *genome = input->genome_p;
fastq_read_t *orig;

struct timeval methylation_time_start, methylation_time_end;
float methylation_time = 0.0f;
Expand All @@ -373,7 +394,7 @@ int methylation_status_report(sw_server_input_t* input, batch_t *batch) {

// Mapped or not mapped ?
if (num_items != 0) {
add_metilation_status(mapping_lists[i], bs_context, genome, mapping_batch->fq_batch, i, k);
add_metilation_status(mapping_lists[i], bs_context, genome, mapping_batch->fq_batch, i, k, workspace);
}
}
}
Expand All @@ -388,7 +409,9 @@ int methylation_status_report(sw_server_input_t* input, batch_t *batch) {

//------------------------------------------------------------------------------------

void add_metilation_status(array_list_t *array_list, bs_context_t *bs_context, genome_t * genome, array_list_t * orig_seq, size_t index, int conversion) {
void add_metilation_status(array_list_t *array_list, bs_context_t *bs_context,
genome_t * genome, array_list_t * orig_seq, size_t index, int conversion,
methylation_stage_workspace_t *workspace) {
size_t num_items = array_list_size(array_list);
size_t len, end, start;

Expand All @@ -397,10 +420,8 @@ void add_metilation_status(array_list_t *array_list, bs_context_t *bs_context, g

alignment_t *alig;
fastq_read_t *orig;
metil_data_t *metil_data;

char *seq, *gen;
char *new_stage;

int new_strand;
int write_file = 1;
Expand All @@ -414,14 +435,24 @@ void add_metilation_status(array_list_t *array_list, bs_context_t *bs_context, g
seq = obtain_seq(alig, orig);

if (alig->seq_strand == 1) {
char *seq_dup = strdup(seq);
if (workspace->add_status_seq_dup == NULL) {
workspace->add_status_seq_dup = strdup(seq);
} else {
strcpy(workspace->add_status_seq_dup, seq);
}

char *seq_dup = workspace->add_status_seq_dup;
rev_comp(seq_dup, seq, orig->length);
free(seq_dup);
}

// Increase the counter number of bases
len = orig->length;
gen = (char *)calloc(len + 6, sizeof(char));

if (workspace->add_status_gen == NULL) {
workspace->add_status_gen = calloc(len + 6, sizeof(char));
}

gen = workspace->add_status_gen;

start = alig->position + 1;
end = start + len + 4;
Expand Down Expand Up @@ -665,14 +696,6 @@ void add_metilation_status(array_list_t *array_list, bs_context_t *bs_context, g
}
}

if (seq) {
free(seq);
}

if (gen) {
free(gen);
}

// Set the number of methylated C's to the ZM tag
bam_tag_set_scalar(tag_zm, &num_methyl);

Expand All @@ -699,8 +722,7 @@ void write_bs_context(metil_file_t *metil_file, bs_context_t *bs_context, size_t
array_list_t *context_CHH = bs_context->context_CHH;
array_list_t *context_MUT = bs_context->context_MUT;

size_t num_items, num_reads;
char *bs_seq;
size_t num_items;
int file_error;
metil_data_t *metil_data;

Expand Down
25 changes: 22 additions & 3 deletions src/bs/methylation.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#include "pair_server.h"

#include "hash_table.h"
#include "array_list_bs.h"

#include "bioformats/bam/bam_tags.h"
#include "bwt_server.h"
Expand All @@ -37,6 +36,17 @@
#define AT 3


typedef struct metil_data {
char* query_name;
char status;
int chromosome;
size_t start;
char context;
int strand;
int zone;
} metil_data_t;


//====================================================================================

typedef struct metil_file {
Expand Down Expand Up @@ -76,6 +86,12 @@ typedef struct metil_file {

//====================================================================================

typedef struct methylation_stage_workspace {
char* add_status_seq;
char* add_status_gen;
char* add_status_seq_dup;
} methylation_stage_workspace_t;

/**
* @brief Initialization of the metilation structure.
* @param metil_file metilation structure to use in the final analysis.
Expand Down Expand Up @@ -195,7 +211,8 @@ void revert_mappings_seqs(array_list_t **src1, array_list_t **src2, array_list_t
*
*
*/
int methylation_status_report(sw_server_input_t* input, batch_t *batch);
int methylation_status_report(sw_server_input_t* input, batch_t *batch, methylation_stage_workspace_t *workspace);
void clean_methylation_stage_workspace(void *workspace);

//====================================================================================

Expand All @@ -207,7 +224,9 @@ int methylation_status_report(sw_server_input_t* input, batch_t *batch);
*
*/
//void add_metilation_status(array_list_t *array_list, array_list_t *list, bs_context_t *bs_context, genome_t * genome, array_list_t * orig_seq, size_t index, int conversion);
void add_metilation_status(array_list_t *array_list, bs_context_t *bs_context, genome_t * genome, array_list_t * orig_seq, size_t index, int conversion);
void add_metilation_status(array_list_t *array_list, bs_context_t *bs_context,
genome_t * genome, array_list_t * orig_seq, size_t index, int conversion,
methylation_stage_workspace_t *workspace);

//====================================================================================

Expand Down
3 changes: 2 additions & 1 deletion src/bwt_server.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ void bwt_server_input_init(list_t* read_list_p, unsigned int batch_size, bwt_opt
// apply_bwt
//====================================================================================

int apply_bwt_bs(bwt_server_input_t* input, batch_t *batch_p);
int apply_bwt_bs(bwt_server_input_t* input, batch_t *batch_p, bwt_stage_bs_workspace_t *workspace);
void clean_bwt_stage_bs_workspace(void *workspace);

#endif // BW_SERVER_H
Loading

0 comments on commit ffaa672

Please sign in to comment.