From fa66e5dc78c4a350aa2dd086a466c91fd714eaf0 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 11:11:39 -0500 Subject: [PATCH 001/123] added assemble_denovo_bulk with one task inside scatter --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 pipes/WDL/workflows/assemble_denovo_bulk.wdl diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl new file mode 100644 index 000000000..a064e369b --- /dev/null +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -0,0 +1,14 @@ +import "tasks_taxon_filter.wdl" as taxon_filter +import "tasks_assembly.wdl" as assembly + +workflow assemble_denovo_bulk { + + Array[File] reads_unmapped_bam_files + + scatter(reads_unmapped_bam in reads_unmapped_bam_files) { + call taxon_filter.filter_to_taxon { + input: + reads_unmapped_bam = reads_unmapped_bam + } + } +} \ No newline at end of file From 77c7c4d38e17608b5f3a9e05308ed8dccc57793c Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 11:35:14 -0500 Subject: [PATCH 002/123] added second task as-is to workflow --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index a064e369b..8af4fde71 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -10,5 +10,10 @@ workflow assemble_denovo_bulk { input: reads_unmapped_bam = reads_unmapped_bam } + + call assembly.assemble { + input: + reads_unmapped_bam = filter_to_taxon.taxfilt_bam + } } } \ No newline at end of file From 4bd3f226d3dfe28a2c0dbeac83fb71117fa8a0e7 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 11:37:03 -0500 Subject: [PATCH 003/123] gave the tasks aliases --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 8af4fde71..ad86d4313 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -6,14 +6,14 @@ workflow assemble_denovo_bulk { Array[File] reads_unmapped_bam_files scatter(reads_unmapped_bam in reads_unmapped_bam_files) { - call taxon_filter.filter_to_taxon { + call taxon_filter.filter_to_taxon as filterfilter { input: reads_unmapped_bam = reads_unmapped_bam } - call assembly.assemble { + call assembly.assemble as assembleassemble { input: - reads_unmapped_bam = filter_to_taxon.taxfilt_bam + reads_unmapped_bam = filterfilter.taxfilt_bam } } } \ No newline at end of file From 2ec6e0f3d54b366009e432897195e4a807903a07 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 11:50:32 -0500 Subject: [PATCH 004/123] one task in the workflow, with aliasing --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index ad86d4313..7839f73a5 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -10,10 +10,5 @@ workflow assemble_denovo_bulk { input: reads_unmapped_bam = reads_unmapped_bam } - - call assembly.assemble as assembleassemble { - input: - reads_unmapped_bam = filterfilter.taxfilt_bam - } } } \ No newline at end of file From 92d119d9d619bd8aadaf1d70f8db85e0f839fa23 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 12:32:13 -0500 Subject: [PATCH 005/123] added dx-defaults-assemble_denovo_bulk faile --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 5 +++++ pipes/dnax/dx-defaults-assemble_denovo_bulk.json | 4 ++++ 2 files changed, 9 insertions(+) create mode 100644 pipes/dnax/dx-defaults-assemble_denovo_bulk.json diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 7839f73a5..ad86d4313 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -10,5 +10,10 @@ workflow assemble_denovo_bulk { input: reads_unmapped_bam = reads_unmapped_bam } + + call assembly.assemble as assembleassemble { + input: + reads_unmapped_bam = filterfilter.taxfilt_bam + } } } \ No newline at end of file diff --git a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json new file mode 100644 index 000000000..cb76edc0b --- /dev/null +++ b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json @@ -0,0 +1,4 @@ +{ + "assemble_denovo_bulk.assemble.trim_clip_db": + "dx://file-BXF0vYQ0QyBF509G9J12g927" +} From 1333e6d01288acab3f7d763aeaad6a87ea2cdf67 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 12:36:31 -0500 Subject: [PATCH 006/123] made lastal_db_fasta from first task a workflow variable outside the scatter, so it actually shows up as an input on dnanexus --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index ad86d4313..b9a343518 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -4,11 +4,13 @@ import "tasks_assembly.wdl" as assembly workflow assemble_denovo_bulk { Array[File] reads_unmapped_bam_files + File lastal_db_fasta scatter(reads_unmapped_bam in reads_unmapped_bam_files) { call taxon_filter.filter_to_taxon as filterfilter { input: reads_unmapped_bam = reads_unmapped_bam + lastal_db_fasta = lastal_db_fasta } call assembly.assemble as assembleassemble { From 7327f7715c6c9944fad340e7a6267abfaacbdc0a Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 12:41:46 -0500 Subject: [PATCH 007/123] added all tasks without aliasing with input variables as global variables --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 26 +++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index b9a343518..07c742575 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -3,19 +3,33 @@ import "tasks_assembly.wdl" as assembly workflow assemble_denovo_bulk { - Array[File] reads_unmapped_bam_files - File lastal_db_fasta + Array[File] reads_unmapped_bam_files + File lastal_db_fasta + Array[File]+ reference_genome_fasta scatter(reads_unmapped_bam in reads_unmapped_bam_files) { - call taxon_filter.filter_to_taxon as filterfilter { + call taxon_filter.filter_to_taxon { input: reads_unmapped_bam = reads_unmapped_bam lastal_db_fasta = lastal_db_fasta } - - call assembly.assemble as assembleassemble { + + call assembly.assemble { + input: + reads_unmapped_bam = filter_to_taxon.taxfilt_bam + } + + call assembly.scaffold { input: - reads_unmapped_bam = filterfilter.taxfilt_bam + contigs_fasta = assemble.contigs_fasta, + reads_bam = filter_to_taxon.taxfilt_bam + reference_genome_fasta = reference_genome_fasta + } + + call assembly.refine_2x_and_plot { + input: + assembly_fasta = scaffold.scaffold_fasta, + reads_unmapped_bam = reads_unmapped_bam } } } \ No newline at end of file From c14ab66be4c4673884ce1c8a40768cbcf162e79d Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 12:45:21 -0500 Subject: [PATCH 008/123] added missing commas, oops --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 07c742575..fe1857c6c 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -3,14 +3,14 @@ import "tasks_assembly.wdl" as assembly workflow assemble_denovo_bulk { - Array[File] reads_unmapped_bam_files + Array[File]+ reads_unmapped_bam_files File lastal_db_fasta Array[File]+ reference_genome_fasta scatter(reads_unmapped_bam in reads_unmapped_bam_files) { call taxon_filter.filter_to_taxon { input: - reads_unmapped_bam = reads_unmapped_bam + reads_unmapped_bam = reads_unmapped_bam, lastal_db_fasta = lastal_db_fasta } @@ -22,7 +22,7 @@ workflow assemble_denovo_bulk { call assembly.scaffold { input: contigs_fasta = assemble.contigs_fasta, - reads_bam = filter_to_taxon.taxfilt_bam + reads_bam = filter_to_taxon.taxfilt_bam, reference_genome_fasta = reference_genome_fasta } From ab5e06cc488301c47728f033753d595d5f976fd4 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 13:00:16 -0500 Subject: [PATCH 009/123] limited to two tasks in workflow --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index fe1857c6c..3b13bd0b9 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -5,7 +5,6 @@ workflow assemble_denovo_bulk { Array[File]+ reads_unmapped_bam_files File lastal_db_fasta - Array[File]+ reference_genome_fasta scatter(reads_unmapped_bam in reads_unmapped_bam_files) { call taxon_filter.filter_to_taxon { @@ -18,18 +17,5 @@ workflow assemble_denovo_bulk { input: reads_unmapped_bam = filter_to_taxon.taxfilt_bam } - - call assembly.scaffold { - input: - contigs_fasta = assemble.contigs_fasta, - reads_bam = filter_to_taxon.taxfilt_bam, - reference_genome_fasta = reference_genome_fasta - } - - call assembly.refine_2x_and_plot { - input: - assembly_fasta = scaffold.scaffold_fasta, - reads_unmapped_bam = reads_unmapped_bam - } } } \ No newline at end of file From cda9c49bae7f0c864ab1f29ed6773b9e39061482 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 13:03:28 -0500 Subject: [PATCH 010/123] three tasks in workflow --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 3b13bd0b9..3400fef6b 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -5,6 +5,7 @@ workflow assemble_denovo_bulk { Array[File]+ reads_unmapped_bam_files File lastal_db_fasta + Array[File]+ reference_genome_fasta scatter(reads_unmapped_bam in reads_unmapped_bam_files) { call taxon_filter.filter_to_taxon { @@ -17,5 +18,12 @@ workflow assemble_denovo_bulk { input: reads_unmapped_bam = filter_to_taxon.taxfilt_bam } + + call assembly.scaffold { + input: + contigs_fasta = assemble.contigs_fasta, + reads_bam = filter_to_taxon.taxfilt_bam, + reference_genome_fasta = reference_genome_fasta + } } } \ No newline at end of file From 5f56e1e874853840a8224388aa090de2a36d79c2 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 13:07:33 -0500 Subject: [PATCH 011/123] deleted extra whitespace to see if that makes any difference --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 3400fef6b..f0d7bd253 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -4,8 +4,7 @@ import "tasks_assembly.wdl" as assembly workflow assemble_denovo_bulk { Array[File]+ reads_unmapped_bam_files - File lastal_db_fasta - Array[File]+ reference_genome_fasta + File lastal_db_fasta scatter(reads_unmapped_bam in reads_unmapped_bam_files) { call taxon_filter.filter_to_taxon { @@ -18,12 +17,5 @@ workflow assemble_denovo_bulk { input: reads_unmapped_bam = filter_to_taxon.taxfilt_bam } - - call assembly.scaffold { - input: - contigs_fasta = assemble.contigs_fasta, - reads_bam = filter_to_taxon.taxfilt_bam, - reference_genome_fasta = reference_genome_fasta - } } } \ No newline at end of file From 309b84a9b3676db2dc048228178671c7f5282269 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 13:18:44 -0500 Subject: [PATCH 012/123] added trim_clip_db as a workflow-level variable --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index f0d7bd253..3a5f165fa 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -5,6 +5,7 @@ workflow assemble_denovo_bulk { Array[File]+ reads_unmapped_bam_files File lastal_db_fasta + File trim_clip_db scatter(reads_unmapped_bam in reads_unmapped_bam_files) { call taxon_filter.filter_to_taxon { @@ -15,7 +16,8 @@ workflow assemble_denovo_bulk { call assembly.assemble { input: - reads_unmapped_bam = filter_to_taxon.taxfilt_bam + reads_unmapped_bam = filter_to_taxon.taxfilt_bam, + trim_clip_db = trim_clip_db } } } \ No newline at end of file From 2103912486467394be3747f3c88c8603408f9578 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 13:21:35 -0500 Subject: [PATCH 013/123] added remaining 2 tasks to workflow with workflow-level variables --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 3a5f165fa..4c01de9d3 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -6,6 +6,7 @@ workflow assemble_denovo_bulk { Array[File]+ reads_unmapped_bam_files File lastal_db_fasta File trim_clip_db + Array[File]+ reference_genome_fasta scatter(reads_unmapped_bam in reads_unmapped_bam_files) { call taxon_filter.filter_to_taxon { @@ -19,5 +20,18 @@ workflow assemble_denovo_bulk { reads_unmapped_bam = filter_to_taxon.taxfilt_bam, trim_clip_db = trim_clip_db } + + call assembly.scaffold { + input: + contigs_fasta = assemble.contigs_fasta, + reads_bam = filter_to_taxon.taxfilt_bam, + reference_genome_fasta = reference_genome_fasta + } + + call assembly.refine_2x_and_plot { + input: + assembly_fasta = scaffold.scaffold_fasta, + reads_unmapped_bam = reads_unmapped_bam + } } } \ No newline at end of file From 49e36d7565466a4628de0f4b053dcb61c0465ee6 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 13:43:53 -0500 Subject: [PATCH 014/123] moved default value of trim_clip_db to workflow-level variable in json file --- pipes/dnax/dx-defaults-assemble_denovo_bulk.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json index cb76edc0b..2539f4974 100644 --- a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json +++ b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json @@ -1,4 +1,4 @@ { - "assemble_denovo_bulk.assemble.trim_clip_db": + "assemble_denovo_bulk.trim_clip_db": "dx://file-BXF0vYQ0QyBF509G9J12g927" } From 6899f3497bd394a92a0a2770dbea3f14ae3610bc Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 14:41:46 -0500 Subject: [PATCH 015/123] turned interior of scatter in assemble_denovo_bulk into subworkflow call to assemble_denovo --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 31 ++----------------- .../dx-defaults-assemble_denovo_bulk.json | 2 -- 2 files changed, 3 insertions(+), 30 deletions(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 4c01de9d3..60525565e 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -1,37 +1,12 @@ -import "tasks_taxon_filter.wdl" as taxon_filter -import "tasks_assembly.wdl" as assembly +import "assemble_denovo.wdl" as assembly workflow assemble_denovo_bulk { Array[File]+ reads_unmapped_bam_files - File lastal_db_fasta - File trim_clip_db - Array[File]+ reference_genome_fasta scatter(reads_unmapped_bam in reads_unmapped_bam_files) { - call taxon_filter.filter_to_taxon { - input: - reads_unmapped_bam = reads_unmapped_bam, - lastal_db_fasta = lastal_db_fasta - } - - call assembly.assemble { - input: - reads_unmapped_bam = filter_to_taxon.taxfilt_bam, - trim_clip_db = trim_clip_db - } - - call assembly.scaffold { - input: - contigs_fasta = assemble.contigs_fasta, - reads_bam = filter_to_taxon.taxfilt_bam, - reference_genome_fasta = reference_genome_fasta - } - - call assembly.refine_2x_and_plot { - input: - assembly_fasta = scaffold.scaffold_fasta, - reads_unmapped_bam = reads_unmapped_bam + call sub.assemble_denovo { + input: reads_unmapped_bam = reads_unmapped_bam } } } \ No newline at end of file diff --git a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json index 2539f4974..2c63c0851 100644 --- a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json +++ b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json @@ -1,4 +1,2 @@ { - "assemble_denovo_bulk.trim_clip_db": - "dx://file-BXF0vYQ0QyBF509G9J12g927" } From 0eb1b283d98717f9ecd4d76a91878223bffd559f Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 14:43:52 -0500 Subject: [PATCH 016/123] actually named the subworkflow oops --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 60525565e..160b3d6c5 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -1,11 +1,11 @@ -import "assemble_denovo.wdl" as assembly +import "assemble_denovo.wdl" as denovo_assembly workflow assemble_denovo_bulk { Array[File]+ reads_unmapped_bam_files scatter(reads_unmapped_bam in reads_unmapped_bam_files) { - call sub.assemble_denovo { + call denovo_assembly.assemble_denovo { input: reads_unmapped_bam = reads_unmapped_bam } } From 2b475f981fd81c0bf8513072818c9727af4d44bf Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 14:51:15 -0500 Subject: [PATCH 017/123] added additional arguments --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 160b3d6c5..0dd3d4fb9 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -3,10 +3,14 @@ import "assemble_denovo.wdl" as denovo_assembly workflow assemble_denovo_bulk { Array[File]+ reads_unmapped_bam_files + File lastal_db_fasta + Array[File]+ reference_genome_fasta scatter(reads_unmapped_bam in reads_unmapped_bam_files) { - call denovo_assembly.assemble_denovo { - input: reads_unmapped_bam = reads_unmapped_bam + call sub.assemble_denovo { + input: reads_unmapped_bam = reads_unmapped_bam, + lastal_db_fasta = lastal_db_fasta, + reference_genome_fasta = reference_genome_fasta } } } \ No newline at end of file From 6ddae5bacb58f30d34e002947d88836c05f68b8c Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 14:51:50 -0500 Subject: [PATCH 018/123] actually named the subworkflow again gah --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 0dd3d4fb9..53ef25087 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -7,7 +7,7 @@ workflow assemble_denovo_bulk { Array[File]+ reference_genome_fasta scatter(reads_unmapped_bam in reads_unmapped_bam_files) { - call sub.assemble_denovo { + call denovo_assembly.assemble_denovo { input: reads_unmapped_bam = reads_unmapped_bam, lastal_db_fasta = lastal_db_fasta, reference_genome_fasta = reference_genome_fasta From f905e27ad6c4c9486645c90b4534c294f80b5bb2 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 15:09:03 -0500 Subject: [PATCH 019/123] added lastal_db_fasta and reference_genome_fasta as workflow-level variables in original assemble_denovo so that assemble_denovo_bulk can provide them when calling assemble_denovo as subworkflow --- pipes/WDL/workflows/assemble_denovo.wdl | 8 ++++++-- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 7 ++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pipes/WDL/workflows/assemble_denovo.wdl b/pipes/WDL/workflows/assemble_denovo.wdl index cb08d4f28..73418ff33 100644 --- a/pipes/WDL/workflows/assemble_denovo.wdl +++ b/pipes/WDL/workflows/assemble_denovo.wdl @@ -4,10 +4,13 @@ import "tasks_assembly.wdl" as assembly workflow assemble_denovo { File reads_unmapped_bam + File lastal_db_fasta + Array[File]+ reference_genome_fasta call taxon_filter.filter_to_taxon { input: - reads_unmapped_bam = reads_unmapped_bam + reads_unmapped_bam = reads_unmapped_bam, + lastal_db_fasta = lastal_db_fasta } call assembly.assemble { @@ -18,7 +21,8 @@ workflow assemble_denovo { call assembly.scaffold { input: contigs_fasta = assemble.contigs_fasta, - reads_bam = filter_to_taxon.taxfilt_bam + reads_bam = filter_to_taxon.taxfilt_bam, + reference_genome_fasta = reference_genome_fasta } call assembly.refine_2x_and_plot { diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 53ef25087..6d3920863 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -8,9 +8,10 @@ workflow assemble_denovo_bulk { scatter(reads_unmapped_bam in reads_unmapped_bam_files) { call denovo_assembly.assemble_denovo { - input: reads_unmapped_bam = reads_unmapped_bam, - lastal_db_fasta = lastal_db_fasta, - reference_genome_fasta = reference_genome_fasta + input: + reads_unmapped_bam = reads_unmapped_bam, + lastal_db_fasta = lastal_db_fasta, + reference_genome_fasta = reference_genome_fasta } } } \ No newline at end of file From 2c69f08fb06d55dd689b180d2669db007819cbd5 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 15:19:47 -0500 Subject: [PATCH 020/123] moved trim_clip_db out to workflow-level variable and added it to the defaults for assemble_denovo_bulk too --- pipes/WDL/workflows/assemble_denovo.wdl | 4 +++- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 4 +++- pipes/dnax/dx-defaults-assemble_denovo.json | 2 +- pipes/dnax/dx-defaults-assemble_denovo_bulk.json | 2 ++ 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/pipes/WDL/workflows/assemble_denovo.wdl b/pipes/WDL/workflows/assemble_denovo.wdl index 73418ff33..10732d07a 100644 --- a/pipes/WDL/workflows/assemble_denovo.wdl +++ b/pipes/WDL/workflows/assemble_denovo.wdl @@ -6,6 +6,7 @@ workflow assemble_denovo { File reads_unmapped_bam File lastal_db_fasta Array[File]+ reference_genome_fasta + trim_clip_db call taxon_filter.filter_to_taxon { input: @@ -15,7 +16,8 @@ workflow assemble_denovo { call assembly.assemble { input: - reads_unmapped_bam = filter_to_taxon.taxfilt_bam + reads_unmapped_bam = filter_to_taxon.taxfilt_bam, + trim_clip_db = trim_clip_db } call assembly.scaffold { diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 6d3920863..230a1b798 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -5,13 +5,15 @@ workflow assemble_denovo_bulk { Array[File]+ reads_unmapped_bam_files File lastal_db_fasta Array[File]+ reference_genome_fasta + trim_clip_db scatter(reads_unmapped_bam in reads_unmapped_bam_files) { call denovo_assembly.assemble_denovo { input: reads_unmapped_bam = reads_unmapped_bam, lastal_db_fasta = lastal_db_fasta, - reference_genome_fasta = reference_genome_fasta + reference_genome_fasta = reference_genome_fasta, + trim_clip_db = trim_clip_db } } } \ No newline at end of file diff --git a/pipes/dnax/dx-defaults-assemble_denovo.json b/pipes/dnax/dx-defaults-assemble_denovo.json index 2bee672c8..ebe1ccb45 100644 --- a/pipes/dnax/dx-defaults-assemble_denovo.json +++ b/pipes/dnax/dx-defaults-assemble_denovo.json @@ -1,4 +1,4 @@ { - "assemble_denovo.assemble.trim_clip_db": + "assemble_denovo.trim_clip_db": "dx://file-BXF0vYQ0QyBF509G9J12g927" } diff --git a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json index 2c63c0851..ebe1ccb45 100644 --- a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json +++ b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json @@ -1,2 +1,4 @@ { + "assemble_denovo.trim_clip_db": + "dx://file-BXF0vYQ0QyBF509G9J12g927" } From 1af46b3991c3b1040070542c93fd98e34bbc39bb Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 15:24:45 -0500 Subject: [PATCH 021/123] fixed dumb syntax error --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 230a1b798..a8f2d3a35 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -5,7 +5,7 @@ workflow assemble_denovo_bulk { Array[File]+ reads_unmapped_bam_files File lastal_db_fasta Array[File]+ reference_genome_fasta - trim_clip_db + File trim_clip_db scatter(reads_unmapped_bam in reads_unmapped_bam_files) { call denovo_assembly.assemble_denovo { From 011403a81ec4735030a0b37c16eb1695b2373e24 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 15:28:53 -0500 Subject: [PATCH 022/123] actually fixed dumb syntax error --- pipes/WDL/workflows/assemble_denovo.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/assemble_denovo.wdl b/pipes/WDL/workflows/assemble_denovo.wdl index 10732d07a..874405bbf 100644 --- a/pipes/WDL/workflows/assemble_denovo.wdl +++ b/pipes/WDL/workflows/assemble_denovo.wdl @@ -6,7 +6,7 @@ workflow assemble_denovo { File reads_unmapped_bam File lastal_db_fasta Array[File]+ reference_genome_fasta - trim_clip_db + File trim_clip_db call taxon_filter.filter_to_taxon { input: From 4d79c3b6c3b3624a0404378dcc4115bc32a2f13a Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 15:38:58 -0500 Subject: [PATCH 023/123] reverted back to before subworkflow --- pipes/WDL/workflows/assemble_denovo.wdl | 12 ++------ pipes/WDL/workflows/assemble_denovo_bulk.wdl | 30 +++++++++++++++---- pipes/dnax/dx-defaults-assemble_denovo.json | 2 +- .../dx-defaults-assemble_denovo_bulk.json | 2 -- 4 files changed, 29 insertions(+), 17 deletions(-) diff --git a/pipes/WDL/workflows/assemble_denovo.wdl b/pipes/WDL/workflows/assemble_denovo.wdl index 874405bbf..cb08d4f28 100644 --- a/pipes/WDL/workflows/assemble_denovo.wdl +++ b/pipes/WDL/workflows/assemble_denovo.wdl @@ -4,27 +4,21 @@ import "tasks_assembly.wdl" as assembly workflow assemble_denovo { File reads_unmapped_bam - File lastal_db_fasta - Array[File]+ reference_genome_fasta - File trim_clip_db call taxon_filter.filter_to_taxon { input: - reads_unmapped_bam = reads_unmapped_bam, - lastal_db_fasta = lastal_db_fasta + reads_unmapped_bam = reads_unmapped_bam } call assembly.assemble { input: - reads_unmapped_bam = filter_to_taxon.taxfilt_bam, - trim_clip_db = trim_clip_db + reads_unmapped_bam = filter_to_taxon.taxfilt_bam } call assembly.scaffold { input: contigs_fasta = assemble.contigs_fasta, - reads_bam = filter_to_taxon.taxfilt_bam, - reference_genome_fasta = reference_genome_fasta + reads_bam = filter_to_taxon.taxfilt_bam } call assembly.refine_2x_and_plot { diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index a8f2d3a35..95106b3a2 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -1,19 +1,39 @@ -import "assemble_denovo.wdl" as denovo_assembly +import "tasks_taxon_filter.wdl" as taxon_filter +import "tasks_assembly.wdl" as assembly workflow assemble_denovo_bulk { Array[File]+ reads_unmapped_bam_files File lastal_db_fasta - Array[File]+ reference_genome_fasta File trim_clip_db + Array[File]+ reference_genome_fasta scatter(reads_unmapped_bam in reads_unmapped_bam_files) { - call denovo_assembly.assemble_denovo { + call sub. + + call taxon_filter.filter_to_taxon { input: reads_unmapped_bam = reads_unmapped_bam, - lastal_db_fasta = lastal_db_fasta, - reference_genome_fasta = reference_genome_fasta, + lastal_db_fasta = lastal_db_fasta + } + + call assembly.assemble { + input: + reads_unmapped_bam = filter_to_taxon.taxfilt_bam, trim_clip_db = trim_clip_db } + + call assembly.scaffold { + input: + contigs_fasta = assemble.contigs_fasta, + reads_bam = filter_to_taxon.taxfilt_bam, + reference_genome_fasta = reference_genome_fasta + } + + call assembly.refine_2x_and_plot { + input: + assembly_fasta = scaffold.scaffold_fasta, + reads_unmapped_bam = reads_unmapped_bam + } } } \ No newline at end of file diff --git a/pipes/dnax/dx-defaults-assemble_denovo.json b/pipes/dnax/dx-defaults-assemble_denovo.json index ebe1ccb45..2bee672c8 100644 --- a/pipes/dnax/dx-defaults-assemble_denovo.json +++ b/pipes/dnax/dx-defaults-assemble_denovo.json @@ -1,4 +1,4 @@ { - "assemble_denovo.trim_clip_db": + "assemble_denovo.assemble.trim_clip_db": "dx://file-BXF0vYQ0QyBF509G9J12g927" } diff --git a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json index ebe1ccb45..2c63c0851 100644 --- a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json +++ b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json @@ -1,4 +1,2 @@ { - "assemble_denovo.trim_clip_db": - "dx://file-BXF0vYQ0QyBF509G9J12g927" } From fbea2de74ea6ef07825d065c7a38e1de41602559 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 15:39:37 -0500 Subject: [PATCH 024/123] ...now without dumb syntax error --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 2 -- 1 file changed, 2 deletions(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 95106b3a2..4c01de9d3 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -9,8 +9,6 @@ workflow assemble_denovo_bulk { Array[File]+ reference_genome_fasta scatter(reads_unmapped_bam in reads_unmapped_bam_files) { - call sub. - call taxon_filter.filter_to_taxon { input: reads_unmapped_bam = reads_unmapped_bam, From 589a18fa385d3361c38c088702c68faa290792cd Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 15:42:59 -0500 Subject: [PATCH 025/123] added option to add novocraft license --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 4c01de9d3..0b8e56df1 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -7,6 +7,7 @@ workflow assemble_denovo_bulk { File lastal_db_fasta File trim_clip_db Array[File]+ reference_genome_fasta + File? novocraft_license scatter(reads_unmapped_bam in reads_unmapped_bam_files) { call taxon_filter.filter_to_taxon { @@ -31,7 +32,8 @@ workflow assemble_denovo_bulk { call assembly.refine_2x_and_plot { input: assembly_fasta = scaffold.scaffold_fasta, - reads_unmapped_bam = reads_unmapped_bam + reads_unmapped_bam = reads_unmapped_bam, + novocraft_license = novocraft_license } } } \ No newline at end of file From ad3861ec9fff556a23766fbc029cdb9ebf1cf9fc Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 15:47:16 -0500 Subject: [PATCH 026/123] removed empty defaults file for assemble_denovo_bulk --- pipes/dnax/dx-defaults-assemble_denovo_bulk.json | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 pipes/dnax/dx-defaults-assemble_denovo_bulk.json diff --git a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json deleted file mode 100644 index 2c63c0851..000000000 --- a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json +++ /dev/null @@ -1,2 +0,0 @@ -{ -} From fb2644f688edc582493503dfba2be3202163423d Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 15:55:07 -0500 Subject: [PATCH 027/123] trying subworkflows again... --- pipes/WDL/workflows/assemble_denovo.wdl | 24 ++++++++++------ pipes/WDL/workflows/assemble_denovo_bulk.wdl | 28 ++++--------------- pipes/dnax/dx-defaults-assemble_denovo.json | 2 +- .../dx-defaults-assemble_denovo_bulk.json | 4 +++ 4 files changed, 26 insertions(+), 32 deletions(-) create mode 100644 pipes/dnax/dx-defaults-assemble_denovo_bulk.json diff --git a/pipes/WDL/workflows/assemble_denovo.wdl b/pipes/WDL/workflows/assemble_denovo.wdl index cb08d4f28..5e96126db 100644 --- a/pipes/WDL/workflows/assemble_denovo.wdl +++ b/pipes/WDL/workflows/assemble_denovo.wdl @@ -3,27 +3,35 @@ import "tasks_assembly.wdl" as assembly workflow assemble_denovo { - File reads_unmapped_bam - + Array reads_unmapped_bam + File lastal_db_fasta + File trim_clip_db + Array[File]+ reference_genome_fasta + File? novocraft_license + call taxon_filter.filter_to_taxon { input: - reads_unmapped_bam = reads_unmapped_bam + reads_unmapped_bam = reads_unmapped_bam, + lastal_db_fasta = lastal_db_fasta } call assembly.assemble { input: - reads_unmapped_bam = filter_to_taxon.taxfilt_bam + reads_unmapped_bam = filter_to_taxon.taxfilt_bam, + trim_clip_db = trim_clip_db } - + call assembly.scaffold { input: contigs_fasta = assemble.contigs_fasta, - reads_bam = filter_to_taxon.taxfilt_bam + reads_bam = filter_to_taxon.taxfilt_bam, + reference_genome_fasta = reference_genome_fasta } - + call assembly.refine_2x_and_plot { input: assembly_fasta = scaffold.scaffold_fasta, - reads_unmapped_bam = reads_unmapped_bam + reads_unmapped_bam = reads_unmapped_bam, + novocraft_license = novocraft_license } } \ No newline at end of file diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 0b8e56df1..6abe7ec97 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -1,5 +1,4 @@ -import "tasks_taxon_filter.wdl" as taxon_filter -import "tasks_assembly.wdl" as assembly +import "assemble_denovo.wdl" as assemble_denovo workflow assemble_denovo_bulk { @@ -10,29 +9,12 @@ workflow assemble_denovo_bulk { File? novocraft_license scatter(reads_unmapped_bam in reads_unmapped_bam_files) { - call taxon_filter.filter_to_taxon { + call assemble_denovo_bulk { input: reads_unmapped_bam = reads_unmapped_bam, - lastal_db_fasta = lastal_db_fasta - } - - call assembly.assemble { - input: - reads_unmapped_bam = filter_to_taxon.taxfilt_bam, - trim_clip_db = trim_clip_db - } - - call assembly.scaffold { - input: - contigs_fasta = assemble.contigs_fasta, - reads_bam = filter_to_taxon.taxfilt_bam, - reference_genome_fasta = reference_genome_fasta - } - - call assembly.refine_2x_and_plot { - input: - assembly_fasta = scaffold.scaffold_fasta, - reads_unmapped_bam = reads_unmapped_bam, + lastal_db_fasta = lastal_db_fasta, + trim_clip_db = trim_clip_db, + reference_genome_fasta = reference_genome_fasta, novocraft_license = novocraft_license } } diff --git a/pipes/dnax/dx-defaults-assemble_denovo.json b/pipes/dnax/dx-defaults-assemble_denovo.json index 2bee672c8..ebe1ccb45 100644 --- a/pipes/dnax/dx-defaults-assemble_denovo.json +++ b/pipes/dnax/dx-defaults-assemble_denovo.json @@ -1,4 +1,4 @@ { - "assemble_denovo.assemble.trim_clip_db": + "assemble_denovo.trim_clip_db": "dx://file-BXF0vYQ0QyBF509G9J12g927" } diff --git a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json new file mode 100644 index 000000000..2539f4974 --- /dev/null +++ b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json @@ -0,0 +1,4 @@ +{ + "assemble_denovo_bulk.trim_clip_db": + "dx://file-BXF0vYQ0QyBF509G9J12g927" +} From 135c0f17bfb565d38880b07a3410527501e395ab Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 15:56:03 -0500 Subject: [PATCH 028/123] without syntax error --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 6abe7ec97..1166c1c70 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -9,7 +9,7 @@ workflow assemble_denovo_bulk { File? novocraft_license scatter(reads_unmapped_bam in reads_unmapped_bam_files) { - call assemble_denovo_bulk { + call assemble_denovo_bulk.assemble_denovo { input: reads_unmapped_bam = reads_unmapped_bam, lastal_db_fasta = lastal_db_fasta, From 6ca90edb7f934bd6827b859756bf9bb11b9fdf61 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 15:59:59 -0500 Subject: [PATCH 029/123] renamed subworkflow to avoid confusion --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 1166c1c70..8314092b6 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -1,4 +1,4 @@ -import "assemble_denovo.wdl" as assemble_denovo +import "assemble_denovo.wdl" as sub workflow assemble_denovo_bulk { @@ -9,7 +9,7 @@ workflow assemble_denovo_bulk { File? novocraft_license scatter(reads_unmapped_bam in reads_unmapped_bam_files) { - call assemble_denovo_bulk.assemble_denovo { + call sub.assemble_denovo { input: reads_unmapped_bam = reads_unmapped_bam, lastal_db_fasta = lastal_db_fasta, From 1280d6b080322bf21a84d7eb8035e4aed6b8aeca Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 16:00:35 -0500 Subject: [PATCH 030/123] fixed typo --- pipes/WDL/workflows/assemble_denovo.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/assemble_denovo.wdl b/pipes/WDL/workflows/assemble_denovo.wdl index 5e96126db..19689488f 100644 --- a/pipes/WDL/workflows/assemble_denovo.wdl +++ b/pipes/WDL/workflows/assemble_denovo.wdl @@ -3,7 +3,7 @@ import "tasks_assembly.wdl" as assembly workflow assemble_denovo { - Array reads_unmapped_bam + File reads_unmapped_bam File lastal_db_fasta File trim_clip_db Array[File]+ reference_genome_fasta From 0023bd66cdef927717db2a39934a766fccceaf1d Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 16:09:34 -0500 Subject: [PATCH 031/123] gave up on resolving 'Found Errors in generated WDL source' bug; reverted to no subworkflow --- pipes/WDL/workflows/assemble_denovo.wdl | 22 +++++---------- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 28 +++++++++++++++---- pipes/dnax/dx-defaults-assemble_denovo.json | 2 +- .../dx-defaults-assemble_denovo_bulk.json | 4 --- 4 files changed, 31 insertions(+), 25 deletions(-) delete mode 100644 pipes/dnax/dx-defaults-assemble_denovo_bulk.json diff --git a/pipes/WDL/workflows/assemble_denovo.wdl b/pipes/WDL/workflows/assemble_denovo.wdl index 19689488f..cb08d4f28 100644 --- a/pipes/WDL/workflows/assemble_denovo.wdl +++ b/pipes/WDL/workflows/assemble_denovo.wdl @@ -4,34 +4,26 @@ import "tasks_assembly.wdl" as assembly workflow assemble_denovo { File reads_unmapped_bam - File lastal_db_fasta - File trim_clip_db - Array[File]+ reference_genome_fasta - File? novocraft_license - + call taxon_filter.filter_to_taxon { input: - reads_unmapped_bam = reads_unmapped_bam, - lastal_db_fasta = lastal_db_fasta + reads_unmapped_bam = reads_unmapped_bam } call assembly.assemble { input: - reads_unmapped_bam = filter_to_taxon.taxfilt_bam, - trim_clip_db = trim_clip_db + reads_unmapped_bam = filter_to_taxon.taxfilt_bam } - + call assembly.scaffold { input: contigs_fasta = assemble.contigs_fasta, - reads_bam = filter_to_taxon.taxfilt_bam, - reference_genome_fasta = reference_genome_fasta + reads_bam = filter_to_taxon.taxfilt_bam } - + call assembly.refine_2x_and_plot { input: assembly_fasta = scaffold.scaffold_fasta, - reads_unmapped_bam = reads_unmapped_bam, - novocraft_license = novocraft_license + reads_unmapped_bam = reads_unmapped_bam } } \ No newline at end of file diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 8314092b6..0b8e56df1 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -1,4 +1,5 @@ -import "assemble_denovo.wdl" as sub +import "tasks_taxon_filter.wdl" as taxon_filter +import "tasks_assembly.wdl" as assembly workflow assemble_denovo_bulk { @@ -9,12 +10,29 @@ workflow assemble_denovo_bulk { File? novocraft_license scatter(reads_unmapped_bam in reads_unmapped_bam_files) { - call sub.assemble_denovo { + call taxon_filter.filter_to_taxon { input: reads_unmapped_bam = reads_unmapped_bam, - lastal_db_fasta = lastal_db_fasta, - trim_clip_db = trim_clip_db, - reference_genome_fasta = reference_genome_fasta, + lastal_db_fasta = lastal_db_fasta + } + + call assembly.assemble { + input: + reads_unmapped_bam = filter_to_taxon.taxfilt_bam, + trim_clip_db = trim_clip_db + } + + call assembly.scaffold { + input: + contigs_fasta = assemble.contigs_fasta, + reads_bam = filter_to_taxon.taxfilt_bam, + reference_genome_fasta = reference_genome_fasta + } + + call assembly.refine_2x_and_plot { + input: + assembly_fasta = scaffold.scaffold_fasta, + reads_unmapped_bam = reads_unmapped_bam, novocraft_license = novocraft_license } } diff --git a/pipes/dnax/dx-defaults-assemble_denovo.json b/pipes/dnax/dx-defaults-assemble_denovo.json index ebe1ccb45..2bee672c8 100644 --- a/pipes/dnax/dx-defaults-assemble_denovo.json +++ b/pipes/dnax/dx-defaults-assemble_denovo.json @@ -1,4 +1,4 @@ { - "assemble_denovo.trim_clip_db": + "assemble_denovo.assemble.trim_clip_db": "dx://file-BXF0vYQ0QyBF509G9J12g927" } diff --git a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json deleted file mode 100644 index 2539f4974..000000000 --- a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "assemble_denovo_bulk.trim_clip_db": - "dx://file-BXF0vYQ0QyBF509G9J12g927" -} From 237836bc7f1e0770687222adfb10918730219038 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 16:19:38 -0500 Subject: [PATCH 032/123] added trim_clip_db default value --- pipes/dnax/dx-defaults-assemble_denovo_bulk.json | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 pipes/dnax/dx-defaults-assemble_denovo_bulk.json diff --git a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json new file mode 100644 index 000000000..2539f4974 --- /dev/null +++ b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json @@ -0,0 +1,4 @@ +{ + "assemble_denovo_bulk.trim_clip_db": + "dx://file-BXF0vYQ0QyBF509G9J12g927" +} From cb651d7ed3837d077a0e6c9b77bf02d8944144b4 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 16:25:04 -0500 Subject: [PATCH 033/123] reordered workflow-level input variables --- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl index 0b8e56df1..89a67dabe 100644 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ b/pipes/WDL/workflows/assemble_denovo_bulk.wdl @@ -4,9 +4,9 @@ import "tasks_assembly.wdl" as assembly workflow assemble_denovo_bulk { Array[File]+ reads_unmapped_bam_files + Array[File]+ reference_genome_fasta File lastal_db_fasta File trim_clip_db - Array[File]+ reference_genome_fasta File? novocraft_license scatter(reads_unmapped_bam in reads_unmapped_bam_files) { From b60a71c7dc6c816fc74371e6cd3bc25535fc0494 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 17:14:44 -0500 Subject: [PATCH 034/123] pulled align_and_plot file variables out to workflow-level variables --- pipes/WDL/workflows/align_and_plot_bulk.wdl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 pipes/WDL/workflows/align_and_plot_bulk.wdl diff --git a/pipes/WDL/workflows/align_and_plot_bulk.wdl b/pipes/WDL/workflows/align_and_plot_bulk.wdl new file mode 100644 index 000000000..e8c241a99 --- /dev/null +++ b/pipes/WDL/workflows/align_and_plot_bulk.wdl @@ -0,0 +1,16 @@ +import "tasks_reports.wdl" as reports + +workflow align_and_plot_bulk { + + File assembly_fasta + File reads_unmapped_bam + File? novocraft_license + + call reports.plot_coverage { + input: + aligner_options = "-r Random -l 30 -g 40 -x 20 -t 502", + assembly_fasta = assembly_fasta, + reads_unmapped_bam = reads_unmapped_bam, + novocraft_license = novocraft_license + } +} From 3f17184d8f1b76b7f9c8951c36aaf1be76e64eaa Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 17:17:12 -0500 Subject: [PATCH 035/123] added scatter on bam files --- pipes/WDL/workflows/align_and_plot_bulk.wdl | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pipes/WDL/workflows/align_and_plot_bulk.wdl b/pipes/WDL/workflows/align_and_plot_bulk.wdl index e8c241a99..9aa9e96e6 100644 --- a/pipes/WDL/workflows/align_and_plot_bulk.wdl +++ b/pipes/WDL/workflows/align_and_plot_bulk.wdl @@ -2,15 +2,17 @@ import "tasks_reports.wdl" as reports workflow align_and_plot_bulk { + Array[File]+ reads_unmapped_bam_files File assembly_fasta - File reads_unmapped_bam File? novocraft_license - call reports.plot_coverage { - input: - aligner_options = "-r Random -l 30 -g 40 -x 20 -t 502", - assembly_fasta = assembly_fasta, - reads_unmapped_bam = reads_unmapped_bam, - novocraft_license = novocraft_license + scatter(reads_unmapped_bam in reads_unmapped_bam_files) { + call reports.plot_coverage { + input: + aligner_options = "-r Random -l 30 -g 40 -x 20 -t 502", + assembly_fasta = assembly_fasta, + reads_unmapped_bam = reads_unmapped_bam, + novocraft_license = novocraft_license + } } } From d5b3f39a09c5de7e24e1f4d46e0c59a235cdc808 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 17:30:16 -0500 Subject: [PATCH 036/123] added alignment options to workflow-level variables so they'll show up on dnanexus --- pipes/WDL/workflows/align_and_plot_bulk.wdl | 23 +++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/align_and_plot_bulk.wdl b/pipes/WDL/workflows/align_and_plot_bulk.wdl index 9aa9e96e6..9548b5696 100644 --- a/pipes/WDL/workflows/align_and_plot_bulk.wdl +++ b/pipes/WDL/workflows/align_and_plot_bulk.wdl @@ -6,13 +6,32 @@ workflow align_and_plot_bulk { File assembly_fasta File? novocraft_license + String? aligner="novoalign" # novoalign or bwa + String? aligner_options="-r Random -l 30 -g 40 -x 20 -t 502" + + Boolean? skip_mark_dupes=false + Boolean? plot_only_non_duplicates=false + Boolean? bin_large_plots=false + String? binning_summary_statistic="max" # max or min + + String? docker="quay.io/broadinstitute/viral-core" + + scatter(reads_unmapped_bam in reads_unmapped_bam_files) { call reports.plot_coverage { input: - aligner_options = "-r Random -l 30 -g 40 -x 20 -t 502", assembly_fasta = assembly_fasta, reads_unmapped_bam = reads_unmapped_bam, - novocraft_license = novocraft_license + novocraft_license = novocraft_license, + + aligner = aligner, + aligner_options = aligner_options, + skip_mark_dupes = skip_mark_dupes, + plot_only_non_duplicates = plot_only_non_duplicates, + bin_large_plots = bin_large_plots, + binning_summary_statistic = binning_summary_statistic, + + docker = docker } } } From bc785a42044b6ef6fd513b4a3c237d8fadc01159 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 17:42:14 -0500 Subject: [PATCH 037/123] moved binary and string variable values into task call --- pipes/WDL/workflows/align_and_plot_bulk.wdl | 25 ++++++--------------- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/pipes/WDL/workflows/align_and_plot_bulk.wdl b/pipes/WDL/workflows/align_and_plot_bulk.wdl index 9548b5696..0c591aa6e 100644 --- a/pipes/WDL/workflows/align_and_plot_bulk.wdl +++ b/pipes/WDL/workflows/align_and_plot_bulk.wdl @@ -6,17 +6,6 @@ workflow align_and_plot_bulk { File assembly_fasta File? novocraft_license - String? aligner="novoalign" # novoalign or bwa - String? aligner_options="-r Random -l 30 -g 40 -x 20 -t 502" - - Boolean? skip_mark_dupes=false - Boolean? plot_only_non_duplicates=false - Boolean? bin_large_plots=false - String? binning_summary_statistic="max" # max or min - - String? docker="quay.io/broadinstitute/viral-core" - - scatter(reads_unmapped_bam in reads_unmapped_bam_files) { call reports.plot_coverage { input: @@ -24,14 +13,14 @@ workflow align_and_plot_bulk { reads_unmapped_bam = reads_unmapped_bam, novocraft_license = novocraft_license, - aligner = aligner, - aligner_options = aligner_options, - skip_mark_dupes = skip_mark_dupes, - plot_only_non_duplicates = plot_only_non_duplicates, - bin_large_plots = bin_large_plots, - binning_summary_statistic = binning_summary_statistic, + aligner = "novoalign", + aligner_options = "-r Random -l 30 -g 40 -x 20 -t 502", + skip_mark_dupes = false, + plot_only_non_duplicates = false, + bin_large_plots = false, + binning_summary_statistic = "max", - docker = docker + docker = "quay.io/broadinstitute/viral-core" } } } From 6a08adffe4e9faa27f0a674cd5dfe10b4de4b540 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 17:48:05 -0500 Subject: [PATCH 038/123] moved non-file variables back to 'common' --- pipes/WDL/workflows/align_and_plot_bulk.wdl | 25 +++++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/pipes/WDL/workflows/align_and_plot_bulk.wdl b/pipes/WDL/workflows/align_and_plot_bulk.wdl index 0c591aa6e..9548b5696 100644 --- a/pipes/WDL/workflows/align_and_plot_bulk.wdl +++ b/pipes/WDL/workflows/align_and_plot_bulk.wdl @@ -6,6 +6,17 @@ workflow align_and_plot_bulk { File assembly_fasta File? novocraft_license + String? aligner="novoalign" # novoalign or bwa + String? aligner_options="-r Random -l 30 -g 40 -x 20 -t 502" + + Boolean? skip_mark_dupes=false + Boolean? plot_only_non_duplicates=false + Boolean? bin_large_plots=false + String? binning_summary_statistic="max" # max or min + + String? docker="quay.io/broadinstitute/viral-core" + + scatter(reads_unmapped_bam in reads_unmapped_bam_files) { call reports.plot_coverage { input: @@ -13,14 +24,14 @@ workflow align_and_plot_bulk { reads_unmapped_bam = reads_unmapped_bam, novocraft_license = novocraft_license, - aligner = "novoalign", - aligner_options = "-r Random -l 30 -g 40 -x 20 -t 502", - skip_mark_dupes = false, - plot_only_non_duplicates = false, - bin_large_plots = false, - binning_summary_statistic = "max", + aligner = aligner, + aligner_options = aligner_options, + skip_mark_dupes = skip_mark_dupes, + plot_only_non_duplicates = plot_only_non_duplicates, + bin_large_plots = bin_large_plots, + binning_summary_statistic = binning_summary_statistic, - docker = "quay.io/broadinstitute/viral-core" + docker = docker } } } From c8e9c226e04225c633e59a8cc167049681ee8925 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 17:51:14 -0500 Subject: [PATCH 039/123] pulled variables out to workflow-level for merge_bams_bulk --- pipes/WDL/workflows/merge_bams_bulk.wdl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 pipes/WDL/workflows/merge_bams_bulk.wdl diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl new file mode 100644 index 000000000..543c3892c --- /dev/null +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -0,0 +1,16 @@ +import "tasks_demux.wdl" as demux + +workflow merge_bams_bulk { + Array[File]+ in_bams + File? reheader_table # tsv with 3 cols: field, old value, new value + String out_basename + String? docker="quay.io/broadinstitute/viral-core" + + call demux.merge_and_reheader_bams { + input: + in_bams = in_bams, + reheader_table = reheader_table, + out_basename = out_basename, + docker = docker + } +} From f57a1f0c09b820d9d46774d4ce13e34358378fc8 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 18:03:48 -0500 Subject: [PATCH 040/123] added input table and started working on reading input table --- pipes/WDL/workflows/merge_bams_bulk.wdl | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 543c3892c..542243fbb 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -1,16 +1,19 @@ import "tasks_demux.wdl" as demux workflow merge_bams_bulk { - Array[File]+ in_bams - File? reheader_table # tsv with 3 cols: field, old value, new value - String out_basename - String? docker="quay.io/broadinstitute/viral-core" + File out_basename_in_bams_table # first column is out_basename, remaining columns are in_bams for that basename + Array[Array[String]] input_values = read_tsv(out_basename_in_bams_table) - call demux.merge_and_reheader_bams { - input: - in_bams = in_bams, - reheader_table = reheader_table, - out_basename = out_basename, - docker = docker +# Array[File]+ in_bams +# String out_basename + String? docker="quay.io/broadinstitute/viral-core" + + scatter (input_value in input_values) { + call demux.merge_and_reheader_bams { + input: + out_basename = input_value[0], + in_bams = input_value[1], + docker = docker + } } } From 13d98da651f8c368ed512922f71ec775e397ad33 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 12 Nov 2019 18:43:48 -0500 Subject: [PATCH 041/123] made in_bams a two-element array --- pipes/WDL/workflows/merge_bams_bulk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 542243fbb..f5f88b981 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -12,7 +12,7 @@ workflow merge_bams_bulk { call demux.merge_and_reheader_bams { input: out_basename = input_value[0], - in_bams = input_value[1], + in_bams = [input_value[1], input_value[2]], docker = docker } } From 2270e2a7937c73eb615b26dc6d8c6b3f456cfdc1 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 15:06:36 -0500 Subject: [PATCH 042/123] automatically maps input files to the output basename; temporarily marking all input files as matching each output basement to see if it works so far --- pipes/WDL/workflows/merge_bams_bulk.wdl | 160 ++++++++++++++++++++++-- 1 file changed, 152 insertions(+), 8 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index f5f88b981..68c88f7fe 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -1,19 +1,163 @@ import "tasks_demux.wdl" as demux workflow merge_bams_bulk { - File out_basename_in_bams_table # first column is out_basename, remaining columns are in_bams for that basename - Array[Array[String]] input_values = read_tsv(out_basename_in_bams_table) -# Array[File]+ in_bams -# String out_basename + Array[File]+ in_bams + File out_basenames_file # one per line, same order as in_bams_tsv String? docker="quay.io/broadinstitute/viral-core" + + Array[String] out_basenames = read_lines(out_basenames_file) - scatter (input_value in input_values) { - call demux.merge_and_reheader_bams { + scatter (basename_index in range(length(out_basenames))) { + # retrieves output file basename and list of input filenames for this row + String out_basename = out_basenames[basename_index] + + # identifies the indices of the input bam files containing this output basename + scatter (in_bams_index in range(length(in_bams))) { + in_bam = in_bams[in_bams_index] + in_bam_name = basename(in_bam, ".bam") + + if(true) { + relevant_in_bam_index = in_bams_index + } + } + Array[Int] relevant_in_bam_indices = relevant_in_bam_index # gathers results from the scatter + + # retrieves the input bam files corresponding to the filenames + scatter (relevant_in_bam_index in relevant_in_bam_indices) { + relevant_in_bam = in_bams[relevant_in_bam_index] + } + Array[File] relevant_in_bams = relevant_in_bam # gathers results from the scatter + + # merges the bam files to produce this output file + call demux.merge_and_reheader_bams { input: - out_basename = input_value[0], - in_bams = [input_value[1], input_value[2]], + out_basename = out_basename, + in_bams = relevant_in_bams, docker = docker } } } + +# File in_bams_tsv # filenames to merge, tab-separated, each line one output file +# Array[Array[File]] in_bams_table = read_tsv(in_bams_tsv) + +# Array[Array[String]] in_bams_filenames_table = read_tsv(in_bams_tsv) +# Array[String] relevant_in_bams_filenames = in_bams_filenames_table[basename_index] + +# goes through an array of files and identifies the indices of those that contain a pattern +# task subset_files_list { +# Array[File]+ files +# String pattern +# +# scatter (index in range(length(out_basenames))) { +# } +# +# output { +# } +# } + + # retrieves the input bam files corresponding to the filenames +# scatter(relevant_in_bams_filename in relevant_in_bams_filenames) { +# call subset_files_list { +# input: +# input_files = in_bams, +# exact_pattern = relevant_in_bams_filename +# } +# relevant_in_bam = subset_files_list.matching_files +# } +# Array[File] relevant_in_bams = relevant_in_bam # gathers results from the scatter + +# task subset_files_list { +# Array[File]+ input_files +# String pattern +# +# output { +# Array[File] all_files = input_files +# Array[File] matching_files = glob(pattern) +# } +# } + + +# scatter(in_bam in in_bams) { +# in_bam_filename = basename(in_bam, ".bam") +# +# # input bam files matching the input table +# # TODO +# +# # input bam files containing this output basename +# # TODO +# +# relevant_in_bam = in_bam +# } +# Array[File] relevant_in_bams = relevant_in_bam # gathers results from the scatter + + + +# File out_basename_in_bams_table # first column is out_basename, remaining columns are in_bams for that basename +# Array[Array[String]] input_values = read_tsv(out_basename_in_bams_table) +# +# scatter (input_value in input_values) { +# String out_basename = input_value[0] +# Array[String] in_bam_filenames = [input_value[1], input_value[2]] +# call demux.merge_and_reheader_bams { +# input: +# out_basename = out_basename, +# in_bams = in_bams, +# docker = docker +# } +# } +# +# +# +# File out_basenames_file +# Array[String] out_basenames = read_lines(out_basenames_file) +# +# Array[File]+ in_bams +# String? docker="quay.io/broadinstitute/viral-core" +# +# scatter (out_basename in out_basenames) { +# these_in_bams = in_bams +# +# if() +# } + +# String sample_name = basename(basename(basename(reads_unmapped_bam, ".bam"), ".taxfilt"), ".clean") + +# } + +# task subset_files_list_subset { +# Array[File]+ files +# Array[String]+ included_filenames +# +# command { +# set -ex -o pipefail +# +# read_utils.py --version | tee VERSION +# +# if [ ${length(in_bams)} -gt 1 ]; then +# read_utils.py merge_bams ${sep=' ' in_bams} merged.bam --loglevel DEBUG +# else +# echo "Skipping merge, only one input file" +# ln -s ${select_first(in_bams)} merged.bam +# fi +# +# if [[ -f "${reheader_table}" ]]; then +# read_utils.py reheader_bam merged.bam ${reheader_table} ${out_basename}.bam --loglevel DEBUG +# else +# echo "Skipping reheader, no mapping table specified" +# ln -s merged.bam ${out_basename}.bam +# fi +# } +# +# output { +# File out_bam = "${out_basename}.bam" +# } +# +# runtime { +# docker: "${docker}" +# memory: "2000 MB" +# cpu: 2 +# dx_instance_type: "mem1_ssd2_v2_x4" +# } +# } \ No newline at end of file From e2601d6e4b3dd7c7a76cf674456332a7bc7a4bad Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 15:13:38 -0500 Subject: [PATCH 043/123] removed first embedded scatter --- pipes/WDL/workflows/merge_bams_bulk.wdl | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 68c88f7fe..a44bf4f8b 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -13,15 +13,16 @@ workflow merge_bams_bulk { String out_basename = out_basenames[basename_index] # identifies the indices of the input bam files containing this output basename - scatter (in_bams_index in range(length(in_bams))) { - in_bam = in_bams[in_bams_index] - in_bam_name = basename(in_bam, ".bam") - - if(true) { - relevant_in_bam_index = in_bams_index - } - } - Array[Int] relevant_in_bam_indices = relevant_in_bam_index # gathers results from the scatter +# scatter (in_bams_index in range(length(in_bams))) { +# in_bam = in_bams[in_bams_index] +# in_bam_name = basename(in_bam, ".bam") +# +# if(true) { +# relevant_in_bam_index = in_bams_index +# } +# } +# Array[Int] relevant_in_bam_indices = relevant_in_bam_index # gathers results from the scatter + Array[Int] relevant_in_bam_indices = range(length(in_bams)) # retrieves the input bam files corresponding to the filenames scatter (relevant_in_bam_index in relevant_in_bam_indices) { From 41463b02d3b7528eda18ab691b2972ca8c11460d Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 15:22:00 -0500 Subject: [PATCH 044/123] removed all nested scatters, since apparently those don't exist --- pipes/WDL/workflows/merge_bams_bulk.wdl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index a44bf4f8b..557739cd1 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -22,13 +22,16 @@ workflow merge_bams_bulk { # } # } # Array[Int] relevant_in_bam_indices = relevant_in_bam_index # gathers results from the scatter + Array[Int] relevant_in_bam_indices = range(length(in_bams)) # retrieves the input bam files corresponding to the filenames - scatter (relevant_in_bam_index in relevant_in_bam_indices) { - relevant_in_bam = in_bams[relevant_in_bam_index] - } - Array[File] relevant_in_bams = relevant_in_bam # gathers results from the scatter +# scatter (relevant_in_bam_index in relevant_in_bam_indices) { +# relevant_in_bam = in_bams[relevant_in_bam_index] +# } +# Array[File] relevant_in_bams = relevant_in_bam # gathers results from the scatter + + Array[File] relevant_in_bams = in_bams # merges the bam files to produce this output file call demux.merge_and_reheader_bams { From d69cff6cd310402c1b845633b178a7f18e2758a0 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 21:14:07 -0500 Subject: [PATCH 045/123] moved contents of outer scatter to a task --- pipes/WDL/workflows/merge_bams_bulk.wdl | 44 +++++++++++++++++-------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 557739cd1..72e3b5167 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -12,7 +12,34 @@ workflow merge_bams_bulk { # retrieves output file basename and list of input filenames for this row String out_basename = out_basenames[basename_index] - # identifies the indices of the input bam files containing this output basename + call merge_bams_for_basename { + input: + out_basename = out_basename, + in_bams = in_bams, + docker = docker + } + } +} + +task merge_bams_for_basename { + String out_basename + Array[File]+ in_bams + String? docker + + Array[File] relevant_in_bams = in_bams + + # merges the bam files to produce this output file + call demux.merge_and_reheader_bams { + input: + out_basename = out_basename, + in_bams = relevant_in_bams, + docker = docker + } +} + +# Array[Int] relevant_in_bam_indices = range(length(in_bams)) + + # identifies the indices of the input bam files containing this output basename # scatter (in_bams_index in range(length(in_bams))) { # in_bam = in_bams[in_bams_index] # in_bam_name = basename(in_bam, ".bam") @@ -23,25 +50,14 @@ workflow merge_bams_bulk { # } # Array[Int] relevant_in_bam_indices = relevant_in_bam_index # gathers results from the scatter - Array[Int] relevant_in_bam_indices = range(length(in_bams)) + # retrieves the input bam files corresponding to the filenames # scatter (relevant_in_bam_index in relevant_in_bam_indices) { # relevant_in_bam = in_bams[relevant_in_bam_index] # } # Array[File] relevant_in_bams = relevant_in_bam # gathers results from the scatter - - Array[File] relevant_in_bams = in_bams - - # merges the bam files to produce this output file - call demux.merge_and_reheader_bams { - input: - out_basename = out_basename, - in_bams = relevant_in_bams, - docker = docker - } - } -} + # File in_bams_tsv # filenames to merge, tab-separated, each line one output file # Array[Array[File]] in_bams_table = read_tsv(in_bams_tsv) From b5aa092f6d1c93f278ab79d07f0c6c6586e6e74b Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 21:20:27 -0500 Subject: [PATCH 046/123] removed text right at start of scatter? --- pipes/WDL/workflows/merge_bams_bulk.wdl | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 72e3b5167..1554c2254 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -9,12 +9,9 @@ workflow merge_bams_bulk { Array[String] out_basenames = read_lines(out_basenames_file) scatter (basename_index in range(length(out_basenames))) { - # retrieves output file basename and list of input filenames for this row - String out_basename = out_basenames[basename_index] - call merge_bams_for_basename { input: - out_basename = out_basename, + out_basename = out_basenames[basename_index], in_bams = in_bams, docker = docker } From 47165fc6aa66ee0fcbda7851d118bb60eadb0089 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 21:25:03 -0500 Subject: [PATCH 047/123] removed inside of scatter --- pipes/WDL/workflows/merge_bams_bulk.wdl | 44 +++++++++++++------------ 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 1554c2254..2861dbaff 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -9,30 +9,32 @@ workflow merge_bams_bulk { Array[String] out_basenames = read_lines(out_basenames_file) scatter (basename_index in range(length(out_basenames))) { - call merge_bams_for_basename { - input: - out_basename = out_basenames[basename_index], - in_bams = in_bams, - docker = docker - } - } -} -task merge_bams_for_basename { - String out_basename - Array[File]+ in_bams - String? docker - - Array[File] relevant_in_bams = in_bams - - # merges the bam files to produce this output file - call demux.merge_and_reheader_bams { - input: - out_basename = out_basename, - in_bams = relevant_in_bams, - docker = docker } } + +# call merge_bams_for_basename { +# input: +# out_basename = out_basenames[basename_index], +# in_bams = in_bams, +# docker = docker +# } +# +# task merge_bams_for_basename { +# String out_basename +# Array[File]+ in_bams +# String? docker +# +# Array[File] relevant_in_bams = in_bams +# +# # merges the bam files to produce this output file +# call demux.merge_and_reheader_bams { +# input: +# out_basename = out_basename, +# in_bams = relevant_in_bams, +# docker = docker +# } +# } # Array[Int] relevant_in_bam_indices = range(length(in_bams)) From 3d14888105a3e387cd33fe11ea2f912dd8f94294 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 21:30:43 -0500 Subject: [PATCH 048/123] added call to a task --- pipes/WDL/workflows/merge_bams_bulk.wdl | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 2861dbaff..ce4f048b8 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -5,11 +5,16 @@ workflow merge_bams_bulk { Array[File]+ in_bams File out_basenames_file # one per line, same order as in_bams_tsv String? docker="quay.io/broadinstitute/viral-core" - + Array[String] out_basenames = read_lines(out_basenames_file) scatter (basename_index in range(length(out_basenames))) { - + call demux.merge_and_reheader_bams { + input: + out_basename = out_basename, + in_bams = relevant_in_bams, + docker = docker + } } } @@ -59,9 +64,9 @@ workflow merge_bams_bulk { # File in_bams_tsv # filenames to merge, tab-separated, each line one output file -# Array[Array[File]] in_bams_table = read_tsv(in_bams_tsv) +# Array[Array[File]] in_bams_table = read_tsv(in_bams_tsv) -# Array[Array[String]] in_bams_filenames_table = read_tsv(in_bams_tsv) +# Array[Array[String]] in_bams_filenames_table = read_tsv(in_bams_tsv) # Array[String] relevant_in_bams_filenames = in_bams_filenames_table[basename_index] # goes through an array of files and identifies the indices of those that contain a pattern @@ -114,14 +119,14 @@ workflow merge_bams_bulk { # File out_basename_in_bams_table # first column is out_basename, remaining columns are in_bams for that basename -# Array[Array[String]] input_values = read_tsv(out_basename_in_bams_table) +# Array[Array[String]] input_values = read_tsv(out_basename_in_bams_table) # # scatter (input_value in input_values) { # String out_basename = input_value[0] # Array[String] in_bam_filenames = [input_value[1], input_value[2]] -# call demux.merge_and_reheader_bams { +# call demux.merge_and_reheader_bams { # input: -# out_basename = out_basename, +# out_basename = out_basename, # in_bams = in_bams, # docker = docker # } From b05995409e1e09be87340f70f086d51f89c6bb49 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 21:37:07 -0500 Subject: [PATCH 049/123] added some code inside the scatter --- pipes/WDL/workflows/merge_bams_bulk.wdl | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index ce4f048b8..eb8a587a4 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -9,21 +9,18 @@ workflow merge_bams_bulk { Array[String] out_basenames = read_lines(out_basenames_file) scatter (basename_index in range(length(out_basenames))) { - call demux.merge_and_reheader_bams { + out_basename = out_basenames[basename_index] + + call merge_bams_for_basename { input: out_basename = out_basename, - in_bams = relevant_in_bams, + in_bams = in_bams, docker = docker } } } -# call merge_bams_for_basename { -# input: -# out_basename = out_basenames[basename_index], -# in_bams = in_bams, -# docker = docker -# } + # # task merge_bams_for_basename { # String out_basename From 074eeb272228070c527a5574c4cb690fa375cfc7 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 21:39:47 -0500 Subject: [PATCH 050/123] just one really easy line of code in the scatter --- pipes/WDL/workflows/merge_bams_bulk.wdl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index eb8a587a4..1f435f6dc 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -10,16 +10,15 @@ workflow merge_bams_bulk { scatter (basename_index in range(length(out_basenames))) { out_basename = out_basenames[basename_index] - - call merge_bams_for_basename { - input: - out_basename = out_basename, - in_bams = in_bams, - docker = docker - } } } +# call merge_bams_for_basename { +# input: +# out_basename = out_basename, +# in_bams = in_bams, +# docker = docker +# } # # task merge_bams_for_basename { From 7b5133ddba44dd027cd232b96ad90e07af13d468 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 21:40:59 -0500 Subject: [PATCH 051/123] and a scatter in the scatter --- pipes/WDL/workflows/merge_bams_bulk.wdl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 1f435f6dc..3070f425a 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -10,6 +10,9 @@ workflow merge_bams_bulk { scatter (basename_index in range(length(out_basenames))) { out_basename = out_basenames[basename_index] + scatter (in_bams_index in range(length(in_bams))) { + in_bam = in_bams[in_bams_index] + } } } From 9cba71a8456f1eb40f2f3a1987b660a048921112 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 22:02:08 -0500 Subject: [PATCH 052/123] fixed stupid stupid syntax errors and brought back the nested scatters, yaaaay --- pipes/WDL/workflows/merge_bams_bulk.wdl | 50 +++++++++++++------------ 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 3070f425a..c195082a5 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -9,13 +9,36 @@ workflow merge_bams_bulk { Array[String] out_basenames = read_lines(out_basenames_file) scatter (basename_index in range(length(out_basenames))) { - out_basename = out_basenames[basename_index] + String out_basename = out_basenames[basename_index] + + # identifies the indices of the input bam files containing this output basename scatter (in_bams_index in range(length(in_bams))) { - in_bam = in_bams[in_bams_index] + File in_bam = in_bams[in_bams_index] + String in_bam_name = basename(in_bam, ".bam") + + if(true) { + Int relevant_in_bam_index = in_bams_index + } + } + Array[Int?] relevant_in_bam_indices = relevant_in_bam_index # gathers results from the scatter + + # retrieves the input bam files corresponding to the filenames + scatter (relevant_in_bam_index in relevant_in_bam_indices) { + File relevant_in_bam = in_bams[relevant_in_bam_index] + } + Array[File] relevant_in_bams = relevant_in_bam # gathers results from the scatter + + # merges the bam files to produce this output file + call demux.merge_and_reheader_bams { + input: + out_basename = out_basename, + in_bams = relevant_in_bams, + docker = docker } } } +# # call merge_bams_for_basename { # input: # out_basename = out_basename, @@ -31,35 +54,14 @@ workflow merge_bams_bulk { # # Array[File] relevant_in_bams = in_bams # -# # merges the bam files to produce this output file -# call demux.merge_and_reheader_bams { -# input: -# out_basename = out_basename, -# in_bams = relevant_in_bams, -# docker = docker -# } + # } # Array[Int] relevant_in_bam_indices = range(length(in_bams)) - # identifies the indices of the input bam files containing this output basename -# scatter (in_bams_index in range(length(in_bams))) { -# in_bam = in_bams[in_bams_index] -# in_bam_name = basename(in_bam, ".bam") -# -# if(true) { -# relevant_in_bam_index = in_bams_index -# } -# } -# Array[Int] relevant_in_bam_indices = relevant_in_bam_index # gathers results from the scatter - # retrieves the input bam files corresponding to the filenames -# scatter (relevant_in_bam_index in relevant_in_bam_indices) { -# relevant_in_bam = in_bams[relevant_in_bam_index] -# } -# Array[File] relevant_in_bams = relevant_in_bam # gathers results from the scatter # File in_bams_tsv # filenames to merge, tab-separated, each line one output file From 622b060f4fb2f21846f7fa33003605183b9d4a14 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 22:05:52 -0500 Subject: [PATCH 053/123] removed unnecessary index step --- pipes/WDL/workflows/merge_bams_bulk.wdl | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index c195082a5..cfb0c2de8 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -17,16 +17,10 @@ workflow merge_bams_bulk { String in_bam_name = basename(in_bam, ".bam") if(true) { - Int relevant_in_bam_index = in_bams_index + File relevant_in_bam = in_bam } } - Array[Int?] relevant_in_bam_indices = relevant_in_bam_index # gathers results from the scatter - - # retrieves the input bam files corresponding to the filenames - scatter (relevant_in_bam_index in relevant_in_bam_indices) { - File relevant_in_bam = in_bams[relevant_in_bam_index] - } - Array[File] relevant_in_bams = relevant_in_bam # gathers results from the scatter + Array[File?] relevant_in_bams = relevant_in_bam # gathers results from the scatter # merges the bam files to produce this output file call demux.merge_and_reheader_bams { From 2daf2f74136c87819723eb85bff171dbcc10f53b Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 22:10:16 -0500 Subject: [PATCH 054/123] renamed workflow so it gets validated first --- .../workflows/{merge_bams_bulk.wdl => aamerge_bams_bulk.wdl} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename pipes/WDL/workflows/{merge_bams_bulk.wdl => aamerge_bams_bulk.wdl} (99%) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl similarity index 99% rename from pipes/WDL/workflows/merge_bams_bulk.wdl rename to pipes/WDL/workflows/aamerge_bams_bulk.wdl index cfb0c2de8..b46f7f01d 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -1,6 +1,6 @@ import "tasks_demux.wdl" as demux -workflow merge_bams_bulk { +workflow aamerge_bams_bulk { Array[File]+ in_bams File out_basenames_file # one per line, same order as in_bams_tsv From 0a2ca2750e05cc769a2702715e8ef5687f5ea7d7 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 22:12:44 -0500 Subject: [PATCH 055/123] deleted no longer useful commented-out code at the end --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 133 +--------------------- 1 file changed, 1 insertion(+), 132 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index b46f7f01d..7247a20d8 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -30,33 +30,7 @@ workflow aamerge_bams_bulk { docker = docker } } -} - -# -# call merge_bams_for_basename { -# input: -# out_basename = out_basename, -# in_bams = in_bams, -# docker = docker -# } - -# -# task merge_bams_for_basename { -# String out_basename -# Array[File]+ in_bams -# String? docker -# -# Array[File] relevant_in_bams = in_bams -# - -# } - -# Array[Int] relevant_in_bam_indices = range(length(in_bams)) - - - - - +} # File in_bams_tsv # filenames to merge, tab-separated, each line one output file # Array[Array[File]] in_bams_table = read_tsv(in_bams_tsv) @@ -64,29 +38,6 @@ workflow aamerge_bams_bulk { # Array[Array[String]] in_bams_filenames_table = read_tsv(in_bams_tsv) # Array[String] relevant_in_bams_filenames = in_bams_filenames_table[basename_index] -# goes through an array of files and identifies the indices of those that contain a pattern -# task subset_files_list { -# Array[File]+ files -# String pattern -# -# scatter (index in range(length(out_basenames))) { -# } -# -# output { -# } -# } - - # retrieves the input bam files corresponding to the filenames -# scatter(relevant_in_bams_filename in relevant_in_bams_filenames) { -# call subset_files_list { -# input: -# input_files = in_bams, -# exact_pattern = relevant_in_bams_filename -# } -# relevant_in_bam = subset_files_list.matching_files -# } -# Array[File] relevant_in_bams = relevant_in_bam # gathers results from the scatter - # task subset_files_list { # Array[File]+ input_files # String pattern @@ -97,86 +48,4 @@ workflow aamerge_bams_bulk { # } # } - -# scatter(in_bam in in_bams) { -# in_bam_filename = basename(in_bam, ".bam") -# -# # input bam files matching the input table -# # TODO -# -# # input bam files containing this output basename -# # TODO -# -# relevant_in_bam = in_bam -# } -# Array[File] relevant_in_bams = relevant_in_bam # gathers results from the scatter - - - -# File out_basename_in_bams_table # first column is out_basename, remaining columns are in_bams for that basename -# Array[Array[String]] input_values = read_tsv(out_basename_in_bams_table) -# -# scatter (input_value in input_values) { -# String out_basename = input_value[0] -# Array[String] in_bam_filenames = [input_value[1], input_value[2]] -# call demux.merge_and_reheader_bams { -# input: -# out_basename = out_basename, -# in_bams = in_bams, -# docker = docker -# } -# } -# -# -# -# File out_basenames_file -# Array[String] out_basenames = read_lines(out_basenames_file) -# -# Array[File]+ in_bams -# String? docker="quay.io/broadinstitute/viral-core" -# -# scatter (out_basename in out_basenames) { -# these_in_bams = in_bams -# -# if() -# } - # String sample_name = basename(basename(basename(reads_unmapped_bam, ".bam"), ".taxfilt"), ".clean") - -# } - -# task subset_files_list_subset { -# Array[File]+ files -# Array[String]+ included_filenames -# -# command { -# set -ex -o pipefail -# -# read_utils.py --version | tee VERSION -# -# if [ ${length(in_bams)} -gt 1 ]; then -# read_utils.py merge_bams ${sep=' ' in_bams} merged.bam --loglevel DEBUG -# else -# echo "Skipping merge, only one input file" -# ln -s ${select_first(in_bams)} merged.bam -# fi -# -# if [[ -f "${reheader_table}" ]]; then -# read_utils.py reheader_bam merged.bam ${reheader_table} ${out_basename}.bam --loglevel DEBUG -# else -# echo "Skipping reheader, no mapping table specified" -# ln -s merged.bam ${out_basename}.bam -# fi -# } -# -# output { -# File out_bam = "${out_basename}.bam" -# } -# -# runtime { -# docker: "${docker}" -# memory: "2000 MB" -# cpu: 2 -# dx_instance_type: "mem1_ssd2_v2_x4" -# } -# } \ No newline at end of file From 8f87d81680d85744bb27e40f2ca2a2db34ed3637 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 23:18:07 -0500 Subject: [PATCH 056/123] renamed out_basenames_file --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index 7247a20d8..438b46564 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -3,13 +3,13 @@ import "tasks_demux.wdl" as demux workflow aamerge_bams_bulk { Array[File]+ in_bams - File out_basenames_file # one per line, same order as in_bams_tsv + File out_basenames # one per line, same order as in_bams_tsv String? docker="quay.io/broadinstitute/viral-core" - Array[String] out_basenames = read_lines(out_basenames_file) + Array[String] out_basenames_list = read_lines(out_basenames) - scatter (basename_index in range(length(out_basenames))) { - String out_basename = out_basenames[basename_index] + scatter (basename_index in range(length(out_basenames_list))) { + String out_basename = out_basenames_list[basename_index] # identifies the indices of the input bam files containing this output basename scatter (in_bams_index in range(length(in_bams))) { From 59763a0eebbf3c00f14cf6ae2cbef44c4de29f56 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 23:31:00 -0500 Subject: [PATCH 057/123] replaced inner scanner with placeholder line --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index 438b46564..9af717215 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -12,15 +12,17 @@ workflow aamerge_bams_bulk { String out_basename = out_basenames_list[basename_index] # identifies the indices of the input bam files containing this output basename - scatter (in_bams_index in range(length(in_bams))) { - File in_bam = in_bams[in_bams_index] - String in_bam_name = basename(in_bam, ".bam") - - if(true) { - File relevant_in_bam = in_bam - } - } - Array[File?] relevant_in_bams = relevant_in_bam # gathers results from the scatter +# scatter (in_bams_index in range(length(in_bams))) { +# File in_bam = in_bams[in_bams_index] +# String in_bam_name = basename(in_bam, ".bam") +# +# if(true) { +# File relevant_in_bam = in_bam +# } +# } +# Array[File?] relevant_in_bams = relevant_in_bam # gathers results from the scatter + + Array[File?] relevant_in_bams = in_bams # merges the bam files to produce this output file call demux.merge_and_reheader_bams { From c0fda0cc23f4de96c6f0c7a2ee23a8b5ff4a0f51 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 23:35:14 -0500 Subject: [PATCH 058/123] moved scatter indices array out of the scatter --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index 9af717215..220572705 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -7,8 +7,8 @@ workflow aamerge_bams_bulk { String? docker="quay.io/broadinstitute/viral-core" Array[String] out_basenames_list = read_lines(out_basenames) - - scatter (basename_index in range(length(out_basenames_list))) { + Array[Int] basename_scatter_range = range(length(out_basenames_list)) + scatter (basename_index in basename_scatter_range) { String out_basename = out_basenames_list[basename_index] # identifies the indices of the input bam files containing this output basename From e2c676e766de66f08c37d71d374eb94bbbc7c42d Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Thu, 14 Nov 2019 23:49:15 -0500 Subject: [PATCH 059/123] removed basename list traversal by index --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index 220572705..de75caf13 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -3,13 +3,14 @@ import "tasks_demux.wdl" as demux workflow aamerge_bams_bulk { Array[File]+ in_bams - File out_basenames # one per line, same order as in_bams_tsv + File out_basenames # one per line String? docker="quay.io/broadinstitute/viral-core" Array[String] out_basenames_list = read_lines(out_basenames) - Array[Int] basename_scatter_range = range(length(out_basenames_list)) - scatter (basename_index in basename_scatter_range) { - String out_basename = out_basenames_list[basename_index] +# Array[Int] basename_scatter_range = range(length(out_basenames_list)) +# scatter (basename_index in basename_scatter_range) { +# String out_basename = out_basenames_list[basename_index] + scatter (out_basename in out_basenames_list) { # identifies the indices of the input bam files containing this output basename # scatter (in_bams_index in range(length(in_bams))) { From c5bd7188e6cf70cd5052deb858e312e8d3bdca85 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Fri, 15 Nov 2019 00:10:42 -0500 Subject: [PATCH 060/123] used select_all to get rid of optional files --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 30 +++++++++++++++++------ 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index de75caf13..ed0d8a456 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -7,14 +7,10 @@ workflow aamerge_bams_bulk { String? docker="quay.io/broadinstitute/viral-core" Array[String] out_basenames_list = read_lines(out_basenames) -# Array[Int] basename_scatter_range = range(length(out_basenames_list)) -# scatter (basename_index in basename_scatter_range) { -# String out_basename = out_basenames_list[basename_index] scatter (out_basename in out_basenames_list) { # identifies the indices of the input bam files containing this output basename -# scatter (in_bams_index in range(length(in_bams))) { -# File in_bam = in_bams[in_bams_index] +# scatter (in_bam in in_bams) { # String in_bam_name = basename(in_bam, ".bam") # # if(true) { @@ -23,7 +19,8 @@ workflow aamerge_bams_bulk { # } # Array[File?] relevant_in_bams = relevant_in_bam # gathers results from the scatter - Array[File?] relevant_in_bams = in_bams + Array[File?] relevant_in_bams_optional = in_bams + Array[File] relevant_in_bams = select_all(relevant_in_bams_optional) # merges the bam files to produce this output file call demux.merge_and_reheader_bams { @@ -33,7 +30,26 @@ workflow aamerge_bams_bulk { docker = docker } } -} +} + +task does_in_bam_match_out_basename { + File in_bam + String out_basename + + String in_bam_name = basename(in_bam, ".bam") + + command { + + } + + output { + + } +} + +# Array[Int] basename_scatter_range = range(length(out_basenames_list)) +# scatter (basename_index in basename_scatter_range) { +# String out_basename = out_basenames_list[basename_index] # File in_bams_tsv # filenames to merge, tab-separated, each line one output file # Array[Array[File]] in_bams_table = read_tsv(in_bams_tsv) From 306652a6b391df96a4df933c6d537d237cd0c32d Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Fri, 15 Nov 2019 00:32:20 -0500 Subject: [PATCH 061/123] added back inside scatter and coded matching task, without task call --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 26 +++++++++++++---------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index ed0d8a456..641129c89 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -10,16 +10,16 @@ workflow aamerge_bams_bulk { scatter (out_basename in out_basenames_list) { # identifies the indices of the input bam files containing this output basename -# scatter (in_bam in in_bams) { -# String in_bam_name = basename(in_bam, ".bam") -# -# if(true) { -# File relevant_in_bam = in_bam -# } -# } -# Array[File?] relevant_in_bams = relevant_in_bam # gathers results from the scatter + scatter (in_bam in in_bams) { + String in_bam_name = basename(in_bam, ".bam") + + if(true) { + File relevant_in_bam = in_bam + } + } + Array[File?] relevant_in_bams_optional = relevant_in_bam # gathers results from the scatter - Array[File?] relevant_in_bams_optional = in_bams +# Array[File?] relevant_in_bams_optional = in_bams Array[File] relevant_in_bams = select_all(relevant_in_bams_optional) # merges the bam files to produce this output file @@ -39,11 +39,15 @@ task does_in_bam_match_out_basename { String in_bam_name = basename(in_bam, ".bam") command { - + if [[ ${in_bam_name} =~ ^${out_basename}$ ]] || [[ ${in_bam_name} =~ [._-]${out_basename}$ ]] || [[ ${in_bam_name} =~ ^${out_basename}[._-] ]] || [[ ${in_bam_name} =~ [._-]${out_basename}[._-] ]]; then + echo true | tee match + else + echo false | tee match + fi } output { - + Boolean match = read_boolean("match") } } From 56d6a93b50ecc300e3e0db5aa10cb4cc3503d9da Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Fri, 15 Nov 2019 00:36:53 -0500 Subject: [PATCH 062/123] added call to does_in_bam_match_out_basename --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index 641129c89..35edaabdc 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -11,15 +11,17 @@ workflow aamerge_bams_bulk { # identifies the indices of the input bam files containing this output basename scatter (in_bam in in_bams) { - String in_bam_name = basename(in_bam, ".bam") + call does_in_bam_match_out_basename { + input: + out_basename = out_basename, + in_bam = in_bam + } - if(true) { + if(does_in_bam_match_out_basename.match) { File relevant_in_bam = in_bam } } - Array[File?] relevant_in_bams_optional = relevant_in_bam # gathers results from the scatter - -# Array[File?] relevant_in_bams_optional = in_bams + Array[File?] relevant_in_bams_optional = relevant_in_bam # gathers results from the scatter Array[File] relevant_in_bams = select_all(relevant_in_bams_optional) # merges the bam files to produce this output file From 3f240cacc331759df741a03db6d3e8dc01ee93c3 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 16 Nov 2019 13:12:37 -0500 Subject: [PATCH 063/123] added reheader table --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index 35edaabdc..71d521b81 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -4,6 +4,7 @@ workflow aamerge_bams_bulk { Array[File]+ in_bams File out_basenames # one per line + File? reheader_table String? docker="quay.io/broadinstitute/viral-core" Array[String] out_basenames_list = read_lines(out_basenames) @@ -29,6 +30,7 @@ workflow aamerge_bams_bulk { input: out_basename = out_basename, in_bams = relevant_in_bams, + reheader_table = reheader_table, docker = docker } } From d75a7d12dedf6b450702de06bfdf7055a313627a Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 16 Nov 2019 13:31:18 -0500 Subject: [PATCH 064/123] added task for exact matching input file name; added optional in_bam_basenames input field and conditional to handle it --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 47 ++++++++++++++++++----- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index 71d521b81..0c0357fd4 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -4,25 +4,33 @@ workflow aamerge_bams_bulk { Array[File]+ in_bams File out_basenames # one per line + File? in_bam_basenames # tab-separated; one line per output file listed in out_basenames File? reheader_table String? docker="quay.io/broadinstitute/viral-core" Array[String] out_basenames_list = read_lines(out_basenames) scatter (out_basename in out_basenames_list) { - # identifies the indices of the input bam files containing this output basename - scatter (in_bam in in_bams) { - call does_in_bam_match_out_basename { - input: - out_basename = out_basename, - in_bam = in_bam - } + if(defined(in_bam_basenames)) + { + Array[File?] relevant_in_bams_optional = in_bams + } + else + { + # identifies the indices of the input bam files containing this output basename + scatter (in_bam in in_bams) { + call does_in_bam_match_out_basename { + input: + out_basename = out_basename, + in_bam = in_bam + } - if(does_in_bam_match_out_basename.match) { - File relevant_in_bam = in_bam + if(does_in_bam_match_out_basename.match) { + File relevant_in_bam = in_bam + } } + Array[File?] relevant_in_bams_optional = relevant_in_bam # gathers results from the scatter } - Array[File?] relevant_in_bams_optional = relevant_in_bam # gathers results from the scatter Array[File] relevant_in_bams = select_all(relevant_in_bams_optional) # merges the bam files to produce this output file @@ -55,6 +63,25 @@ task does_in_bam_match_out_basename { } } +task does_in_bam_match_expected_in_bam { + File in_bam + String expected_in_bam_name + + String in_bam_name = basename(in_bam, ".bam") + + command { + if [[ ${in_bam_name} =~ ^${expected_in_bam_name}$ ]]; then + echo true | tee match + else + echo false | tee match + fi + } + + output { + Boolean match = read_boolean("match") + } +} + # Array[Int] basename_scatter_range = range(length(out_basenames_list)) # scatter (basename_index in basename_scatter_range) { # String out_basename = out_basenames_list[basename_index] From 26445d2f4594d6c69880d83fe209abafbe82dca6 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 16 Nov 2019 13:38:43 -0500 Subject: [PATCH 065/123] removed else statement, since those don't exist here, and added some comments --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index 0c0357fd4..8de70642d 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -11,12 +11,17 @@ workflow aamerge_bams_bulk { Array[String] out_basenames_list = read_lines(out_basenames) scatter (out_basename in out_basenames_list) { - if(defined(in_bam_basenames)) - { - Array[File?] relevant_in_bams_optional = in_bams + if(defined(in_bam_basenames) == true) { + # we have an input file listing the input bams for each output bam, + # so we will use it + Array[File] relevant_in_bams_optional = in_bams } - else - { + + if(defined(in_bam_basenames) == false) { + # there is no input file listing the input bams for each output bam, + # so we will merge all potential input bams matching the output bam + # basename at start or end or surrounded by non-character strings (._-) + # identifies the indices of the input bam files containing this output basename scatter (in_bam in in_bams) { call does_in_bam_match_out_basename { From b405037b279c479185bd8bb3868fdd3fba02c850 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 16 Nov 2019 13:42:19 -0500 Subject: [PATCH 066/123] added comments to tasks and separated out regex if statement to be less looooong --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index 8de70642d..042e4361f 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -20,7 +20,7 @@ workflow aamerge_bams_bulk { if(defined(in_bam_basenames) == false) { # there is no input file listing the input bams for each output bam, # so we will merge all potential input bams matching the output bam - # basename at start or end or surrounded by non-character strings (._-) + # basename at start or end or surrounded by any of [._-] # identifies the indices of the input bam files containing this output basename scatter (in_bam in in_bams) { @@ -49,6 +49,8 @@ workflow aamerge_bams_bulk { } } +# returns true if the basename of in_bam contains out_basename, +# either at the start or end of the string or surrounded by any of [._-] task does_in_bam_match_out_basename { File in_bam String out_basename @@ -56,7 +58,13 @@ task does_in_bam_match_out_basename { String in_bam_name = basename(in_bam, ".bam") command { - if [[ ${in_bam_name} =~ ^${out_basename}$ ]] || [[ ${in_bam_name} =~ [._-]${out_basename}$ ]] || [[ ${in_bam_name} =~ ^${out_basename}[._-] ]] || [[ ${in_bam_name} =~ [._-]${out_basename}[._-] ]]; then + if [[ ${in_bam_name} =~ ^${out_basename}$ ]]; then + echo true | tee match + elsif [[ ${in_bam_name} =~ [._-]${out_basename}$ ]]; then + echo true | tee match + elsif [[ ${in_bam_name} =~ ^${out_basename}[._-] ]]; then + echo true | tee match + elsif [[ ${in_bam_name} =~ [._-]${out_basename}[._-] ]]; then echo true | tee match else echo false | tee match @@ -68,6 +76,7 @@ task does_in_bam_match_out_basename { } } +# returns true if the basename of in_bam exactly matches expected_in_bam_name task does_in_bam_match_expected_in_bam { File in_bam String expected_in_bam_name From 01284c981622fff0f0e5c91b154eaf4abe58f098 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 16 Nov 2019 13:43:28 -0500 Subject: [PATCH 067/123] changed elsifs to elifs --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index 042e4361f..6538a3a34 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -60,11 +60,11 @@ task does_in_bam_match_out_basename { command { if [[ ${in_bam_name} =~ ^${out_basename}$ ]]; then echo true | tee match - elsif [[ ${in_bam_name} =~ [._-]${out_basename}$ ]]; then + elif [[ ${in_bam_name} =~ [._-]${out_basename}$ ]]; then echo true | tee match - elsif [[ ${in_bam_name} =~ ^${out_basename}[._-] ]]; then + elif [[ ${in_bam_name} =~ ^${out_basename}[._-] ]]; then echo true | tee match - elsif [[ ${in_bam_name} =~ [._-]${out_basename}[._-] ]]; then + elif [[ ${in_bam_name} =~ [._-]${out_basename}[._-] ]]; then echo true | tee match else echo false | tee match From af63b200014003d58cad60e1c237c548a14e9b09 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 16 Nov 2019 13:44:54 -0500 Subject: [PATCH 068/123] moved declaration of relevant_in_bams_optional to outside the if statements --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index 6538a3a34..1769ed207 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -11,10 +11,11 @@ workflow aamerge_bams_bulk { Array[String] out_basenames_list = read_lines(out_basenames) scatter (out_basename in out_basenames_list) { + Array[File?] relevant_in_bams_optional if(defined(in_bam_basenames) == true) { # we have an input file listing the input bams for each output bam, # so we will use it - Array[File] relevant_in_bams_optional = in_bams + relevant_in_bams_optional = in_bams } if(defined(in_bam_basenames) == false) { @@ -34,7 +35,7 @@ workflow aamerge_bams_bulk { File relevant_in_bam = in_bam } } - Array[File?] relevant_in_bams_optional = relevant_in_bam # gathers results from the scatter + relevant_in_bams_optional = relevant_in_bam # gathers results from the scatter } Array[File] relevant_in_bams = select_all(relevant_in_bams_optional) From cdde6724c0067e82f7efe37d14377bd4df4dde16 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 16 Nov 2019 14:11:44 -0500 Subject: [PATCH 069/123] resolved syntax errors --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index 1769ed207..27d8d2d2e 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -11,14 +11,13 @@ workflow aamerge_bams_bulk { Array[String] out_basenames_list = read_lines(out_basenames) scatter (out_basename in out_basenames_list) { - Array[File?] relevant_in_bams_optional - if(defined(in_bam_basenames) == true) { + if(defined(in_bam_basenames)) { # we have an input file listing the input bams for each output bam, # so we will use it - relevant_in_bams_optional = in_bams + } - if(defined(in_bam_basenames) == false) { + if(!defined(in_bam_basenames) == false) { # there is no input file listing the input bams for each output bam, # so we will merge all potential input bams matching the output bam # basename at start or end or surrounded by any of [._-] @@ -35,7 +34,7 @@ workflow aamerge_bams_bulk { File relevant_in_bam = in_bam } } - relevant_in_bams_optional = relevant_in_bam # gathers results from the scatter + Array[File?] relevant_in_bams_optional = relevant_in_bam # gathers results from the scatter } Array[File] relevant_in_bams = select_all(relevant_in_bams_optional) From 8d09e93931619e96d07bd90c21ac94f4e4832244 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 16 Nov 2019 14:16:11 -0500 Subject: [PATCH 070/123] added declaration of relevant_in_bams inside if statements --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index 27d8d2d2e..b758bcdb2 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -14,7 +14,7 @@ workflow aamerge_bams_bulk { if(defined(in_bam_basenames)) { # we have an input file listing the input bams for each output bam, # so we will use it - + Array[File] relevant_in_bams = in_bams } if(!defined(in_bam_basenames) == false) { @@ -35,8 +35,8 @@ workflow aamerge_bams_bulk { } } Array[File?] relevant_in_bams_optional = relevant_in_bam # gathers results from the scatter + Array[File] relevant_in_bams = select_all(relevant_in_bams_optional) } - Array[File] relevant_in_bams = select_all(relevant_in_bams_optional) # merges the bam files to produce this output file call demux.merge_and_reheader_bams { From b6baf7293d34c8888c984e81bc54f471c3ee35d5 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 16 Nov 2019 14:21:59 -0500 Subject: [PATCH 071/123] switched back to what we had, with no input table option --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 78 ++++------------------- 1 file changed, 11 insertions(+), 67 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index b758bcdb2..4b555bda4 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -4,39 +4,25 @@ workflow aamerge_bams_bulk { Array[File]+ in_bams File out_basenames # one per line - File? in_bam_basenames # tab-separated; one line per output file listed in out_basenames File? reheader_table String? docker="quay.io/broadinstitute/viral-core" Array[String] out_basenames_list = read_lines(out_basenames) scatter (out_basename in out_basenames_list) { - if(defined(in_bam_basenames)) { - # we have an input file listing the input bams for each output bam, - # so we will use it - Array[File] relevant_in_bams = in_bams - } - - if(!defined(in_bam_basenames) == false) { - # there is no input file listing the input bams for each output bam, - # so we will merge all potential input bams matching the output bam - # basename at start or end or surrounded by any of [._-] - - # identifies the indices of the input bam files containing this output basename - scatter (in_bam in in_bams) { - call does_in_bam_match_out_basename { - input: - out_basename = out_basename, - in_bam = in_bam - } + # identifies the indices of the input bam files containing this output basename + scatter (in_bam in in_bams) { + call does_in_bam_match_out_basename { + input: + out_basename = out_basename, + in_bam = in_bam + } - if(does_in_bam_match_out_basename.match) { - File relevant_in_bam = in_bam - } + if(does_in_bam_match_out_basename.match) { + File relevant_in_bam = in_bam } - Array[File?] relevant_in_bams_optional = relevant_in_bam # gathers results from the scatter - Array[File] relevant_in_bams = select_all(relevant_in_bams_optional) } + Array[File?] relevant_in_bams_optional = relevant_in_bam # gathers results from the scatter Array[File] relevant_in_bams = select_all(relevant_in_bams_optional) # merges the bam files to produce this output file call demux.merge_and_reheader_bams { @@ -74,46 +60,4 @@ task does_in_bam_match_out_basename { output { Boolean match = read_boolean("match") } -} - -# returns true if the basename of in_bam exactly matches expected_in_bam_name -task does_in_bam_match_expected_in_bam { - File in_bam - String expected_in_bam_name - - String in_bam_name = basename(in_bam, ".bam") - - command { - if [[ ${in_bam_name} =~ ^${expected_in_bam_name}$ ]]; then - echo true | tee match - else - echo false | tee match - fi - } - - output { - Boolean match = read_boolean("match") - } -} - -# Array[Int] basename_scatter_range = range(length(out_basenames_list)) -# scatter (basename_index in basename_scatter_range) { -# String out_basename = out_basenames_list[basename_index] - -# File in_bams_tsv # filenames to merge, tab-separated, each line one output file -# Array[Array[File]] in_bams_table = read_tsv(in_bams_tsv) - -# Array[Array[String]] in_bams_filenames_table = read_tsv(in_bams_tsv) -# Array[String] relevant_in_bams_filenames = in_bams_filenames_table[basename_index] - -# task subset_files_list { -# Array[File]+ input_files -# String pattern -# -# output { -# Array[File] all_files = input_files -# Array[File] matching_files = glob(pattern) -# } -# } - -# String sample_name = basename(basename(basename(reads_unmapped_bam, ".bam"), ".taxfilt"), ".clean") +} \ No newline at end of file From db53b41c35d1326541ca6e00665f870bfd9908b8 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 16 Nov 2019 15:44:03 -0500 Subject: [PATCH 072/123] readded accidentally deleted select_allline, oops --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index 4b555bda4..c8671f537 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -23,6 +23,7 @@ workflow aamerge_bams_bulk { } } Array[File?] relevant_in_bams_optional = relevant_in_bam # gathers results from the scatter Array[File] relevant_in_bams = select_all(relevant_in_bams_optional) + Array[File] relevant_in_bams = select_all(relevant_in_bams_optional) # merges the bam files to produce this output file call demux.merge_and_reheader_bams { From f2f5ee245ec4904a05a66f7d7febe0623e1f4f64 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 16 Nov 2019 17:37:20 -0500 Subject: [PATCH 073/123] added comments to regex task --- pipes/WDL/workflows/aamerge_bams_bulk.wdl | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/aamerge_bams_bulk.wdl index c8671f537..bb3b90c34 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aamerge_bams_bulk.wdl @@ -39,25 +39,29 @@ workflow aamerge_bams_bulk { # returns true if the basename of in_bam contains out_basename, # either at the start or end of the string or surrounded by any of [._-] task does_in_bam_match_out_basename { - File in_bam String out_basename + File in_bam String in_bam_name = basename(in_bam, ".bam") - + command { + # basename (exact match) if [[ ${in_bam_name} =~ ^${out_basename}$ ]]; then echo true | tee match + # something[._-]basename elif [[ ${in_bam_name} =~ [._-]${out_basename}$ ]]; then echo true | tee match + # basename[._-]something elif [[ ${in_bam_name} =~ ^${out_basename}[._-] ]]; then echo true | tee match + # something[._-]basename[._-]something elif [[ ${in_bam_name} =~ [._-]${out_basename}[._-] ]]; then echo true | tee match else echo false | tee match fi } - + output { Boolean match = read_boolean("match") } From f78f56c09ee70919e5985973233da113de91cb7e Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 16 Nov 2019 17:38:04 -0500 Subject: [PATCH 074/123] renamed merge_bams_bulk back to merge_bams_bulk --- .../workflows/{aamerge_bams_bulk.wdl => merge_bams_bulk.wdl} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename pipes/WDL/workflows/{aamerge_bams_bulk.wdl => merge_bams_bulk.wdl} (98%) diff --git a/pipes/WDL/workflows/aamerge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl similarity index 98% rename from pipes/WDL/workflows/aamerge_bams_bulk.wdl rename to pipes/WDL/workflows/merge_bams_bulk.wdl index bb3b90c34..f79ea5b77 100644 --- a/pipes/WDL/workflows/aamerge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -1,6 +1,6 @@ import "tasks_demux.wdl" as demux -workflow aamerge_bams_bulk { +workflow merge_bams_bulk { Array[File]+ in_bams File out_basenames # one per line From 8fff2bb699dfe8ae2a848725766417c38e844667 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 16 Nov 2019 20:01:14 -0500 Subject: [PATCH 075/123] condensed some wordy code into single line --- pipes/WDL/workflows/merge_bams_bulk.wdl | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index f79ea5b77..ba992491d 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -7,10 +7,10 @@ workflow merge_bams_bulk { File? reheader_table String? docker="quay.io/broadinstitute/viral-core" - Array[String] out_basenames_list = read_lines(out_basenames) - scatter (out_basename in out_basenames_list) { + scatter (out_basename in read_lines(out_basenames)) { - # identifies the indices of the input bam files containing this output basename + # identifies the input bam files containing this output basename + # (surrounded by start or end of string or any of [._-]) scatter (in_bam in in_bams) { call does_in_bam_match_out_basename { input: @@ -22,10 +22,9 @@ workflow merge_bams_bulk { File relevant_in_bam = in_bam } } - Array[File?] relevant_in_bams_optional = relevant_in_bam # gathers results from the scatter Array[File] relevant_in_bams = select_all(relevant_in_bams_optional) - Array[File] relevant_in_bams = select_all(relevant_in_bams_optional) + Array[File] relevant_in_bams = select_all(relevant_in_bam) # gathers results from the scatter - # merges the bam files to produce this output file + # merges the relevant input bam files to produce this output file call demux.merge_and_reheader_bams { input: out_basename = out_basename, @@ -37,7 +36,7 @@ workflow merge_bams_bulk { } # returns true if the basename of in_bam contains out_basename, -# either at the start or end of the string or surrounded by any of [._-] +# separated from other characters by start or end of string or any of [._-] task does_in_bam_match_out_basename { String out_basename File in_bam @@ -65,4 +64,4 @@ task does_in_bam_match_out_basename { output { Boolean match = read_boolean("match") } -} \ No newline at end of file +} From 19d7625015e8791c93e34c8267697d8d2216ef0f Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Mon, 18 Nov 2019 17:05:31 -0500 Subject: [PATCH 076/123] added input variables of different interesting filetypes to see how they look in the DNAnexus UI --- pipes/WDL/workflows/merge_bams_bulk.wdl | 200 +++++++++++++++++------- 1 file changed, 140 insertions(+), 60 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index ba992491d..1be329a96 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -1,67 +1,147 @@ import "tasks_demux.wdl" as demux workflow merge_bams_bulk { - - Array[File]+ in_bams - File out_basenames # one per line + Map[String, Array[File]] map_of_strings_to_arrays_of_files + Array[Pair] array_of_pairs + Pair[String, File] string_and_file_pair + Array[Pair[File, String]] array_of_pairs_of_files_and_strings + Map[File, String] map_of_files_to_strings + Array[Array[File]] array_of_arrays_of_files + Array[Pair[String, Array[File]]] array_of_pairs_of_strings_and_arrays_of_files + Array[Array[File]] array_of_arrays_of_files + Array[String] array_of_strings + File? reheader_table String? docker="quay.io/broadinstitute/viral-core" - - scatter (out_basename in read_lines(out_basenames)) { - - # identifies the input bam files containing this output basename - # (surrounded by start or end of string or any of [._-]) - scatter (in_bam in in_bams) { - call does_in_bam_match_out_basename { - input: - out_basename = out_basename, - in_bam = in_bam - } - - if(does_in_bam_match_out_basename.match) { - File relevant_in_bam = in_bam - } - } - Array[File] relevant_in_bams = select_all(relevant_in_bam) # gathers results from the scatter - - # merges the relevant input bam files to produce this output file - call demux.merge_and_reheader_bams { - input: - out_basename = out_basename, - in_bams = relevant_in_bams, - reheader_table = reheader_table, - docker = docker - } - } } -# returns true if the basename of in_bam contains out_basename, -# separated from other characters by start or end of string or any of [._-] -task does_in_bam_match_out_basename { - String out_basename - File in_bam - - String in_bam_name = basename(in_bam, ".bam") - - command { - # basename (exact match) - if [[ ${in_bam_name} =~ ^${out_basename}$ ]]; then - echo true | tee match - # something[._-]basename - elif [[ ${in_bam_name} =~ [._-]${out_basename}$ ]]; then - echo true | tee match - # basename[._-]something - elif [[ ${in_bam_name} =~ ^${out_basename}[._-] ]]; then - echo true | tee match - # something[._-]basename[._-]something - elif [[ ${in_bam_name} =~ [._-]${out_basename}[._-] ]]; then - echo true | tee match - else - echo false | tee match - fi - } - - output { - Boolean match = read_boolean("match") - } -} +# Array[File]+ in_bams +# File out_basenames # one per line +# File? reheader_table +# String? docker="quay.io/broadinstitute/viral-core" +# +# # identifies out_basename for each in_bam file +# Array[String] out_basenames_list = read_lines(out_basenames) +# # Map[String, Int] test = {"a": 1, "b": 2} +# # Int fun = test["b"] +# # Map[File, String] in_bam_to_out_basename = {} +# # scatter (in_bam in in_bams) { +# # call get_out_basename { +# # input: +# # in_bam = in_bam, +# # out_basenames = out_basenames_list +# # } +# # String out_basename = get_out_basename.out_basename +# # in_bam_to_out_basename[in_bam] = out_basename +# # } +# +# # generates an output file for each out_basename +# scatter (out_basename in out_basenames_list) { +# +# # identifies the input bam files containing this output basename +# # (surrounded by start or end of string or any of [._-]) +# scatter (in_bam in in_bams) { +# if(in_bam_to_out_basename[in_bam] == out_basename) { +# File relevant_in_bam = in_bam +# } +# } +# Array[File] relevant_in_bams = select_all(relevant_in_bam) # gathers results from the scatter +# +# # merges the relevant input bam files to produce this output file +# call demux.merge_and_reheader_bams { +# input: +# out_basename = out_basename, +# in_bams = relevant_in_bams, +# reheader_table = reheader_table, +# docker = docker +# } +# } +# } +# +# task get_out_basename { +# File in_bam +# Array[String] out_basenames +# +# String in_bam_basename = basename(in_bam, ".bam") +# +# command { +# for out_basename in ${sep=' ' out_basenames}; do +# # basename (exact match) +# if [[ ${in_bam_name} =~ ^$out_basename$ ]]; then +# echo true | tee out_basename +# # something[._-]basename +# elif [[ ${in_bam_name} =~ [._-]$out_basename$ ]]; then +# echo true | tee out_basename +# # basename[._-]something +# elif [[ ${in_bam_name} =~ ^$out_basename[._-] ]]; then +# echo true | tee out_basename +# # something[._-]basename[._-]something +# elif [[ ${in_bam_name} =~ [._-]$out_basename[._-] ]]; then +# echo true | tee out_basename +# else +# echo false | tee out_basename +# fi +# done +# } +# output { +# String out_basename = read_string("out_basename") +# } +# } + +# scatter (out_basename in read_lines(out_basenames)) { +# +# # identifies the input bam files containing this output basename +# # (surrounded by start or end of string or any of [._-]) +# scatter (in_bam in in_bams) { +# call does_in_bam_match_out_basename { +# input: +# out_basename = out_basename, +# in_bam = in_bam +# } +# +# if(does_in_bam_match_out_basename.match) { +# File relevant_in_bam = in_bam +# } +# } +# Array[File] relevant_in_bams = select_all(relevant_in_bam) # gathers results from the scatter +# +# # merges the relevant input bam files to produce this output file +# call demux.merge_and_reheader_bams { +# input: +# out_basename = out_basename, +# in_bams = relevant_in_bams, +# reheader_table = reheader_table, +# docker = docker +# } +# } +# +# # returns true if the basename of in_bam contains out_basename, +# # separated from other characters by start or end of string or any of [._-] +# task does_in_bam_match_out_basename { +# String out_basename +# File in_bam +# +# String in_bam_name = basename(in_bam, ".bam") +# +# command { +# # basename (exact match) +# if [[ ${in_bam_name} =~ ^${out_basename}$ ]]; then +# echo true | tee match +# # something[._-]basename +# elif [[ ${in_bam_name} =~ [._-]${out_basename}$ ]]; then +# echo true | tee match +# # basename[._-]something +# elif [[ ${in_bam_name} =~ ^${out_basename}[._-] ]]; then +# echo true | tee match +# # something[._-]basename[._-]something +# elif [[ ${in_bam_name} =~ [._-]${out_basename}[._-] ]]; then +# echo true | tee match +# else +# echo false | tee match +# fi +# } +# +# output { +# Boolean match = read_boolean("match") +# } +# } From d2d03dc748a47eedd5d4efd51eb70b6c14353182 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Mon, 18 Nov 2019 17:10:41 -0500 Subject: [PATCH 077/123] removed duplicate variable with same name, oops --- pipes/WDL/workflows/merge_bams_bulk.wdl | 1 - 1 file changed, 1 deletion(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 1be329a96..4c06c4e77 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -6,7 +6,6 @@ workflow merge_bams_bulk { Pair[String, File] string_and_file_pair Array[Pair[File, String]] array_of_pairs_of_files_and_strings Map[File, String] map_of_files_to_strings - Array[Array[File]] array_of_arrays_of_files Array[Pair[String, Array[File]]] array_of_pairs_of_strings_and_arrays_of_files Array[Array[File]] array_of_arrays_of_files Array[String] array_of_strings From 81ccd39bc803e0c2466761f227070c897c6edde5 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Mon, 18 Nov 2019 17:29:23 -0500 Subject: [PATCH 078/123] commented out array_of_pairs --- pipes/WDL/workflows/merge_bams_bulk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 4c06c4e77..74e76d12d 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -2,7 +2,7 @@ import "tasks_demux.wdl" as demux workflow merge_bams_bulk { Map[String, Array[File]] map_of_strings_to_arrays_of_files - Array[Pair] array_of_pairs +# Array[Pair] array_of_pairs Pair[String, File] string_and_file_pair Array[Pair[File, String]] array_of_pairs_of_files_and_strings Map[File, String] map_of_files_to_strings From 8ad0efcdafc755d8d0c4c9dea76feb9c4f538d04 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Mon, 18 Nov 2019 17:30:12 -0500 Subject: [PATCH 079/123] commented out everything with Pairs --- pipes/WDL/workflows/merge_bams_bulk.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 74e76d12d..09eaaeed9 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -3,10 +3,10 @@ import "tasks_demux.wdl" as demux workflow merge_bams_bulk { Map[String, Array[File]] map_of_strings_to_arrays_of_files # Array[Pair] array_of_pairs - Pair[String, File] string_and_file_pair - Array[Pair[File, String]] array_of_pairs_of_files_and_strings +# Pair[String, File] string_and_file_pair +# Array[Pair[File, String]] array_of_pairs_of_files_and_strings Map[File, String] map_of_files_to_strings - Array[Pair[String, Array[File]]] array_of_pairs_of_strings_and_arrays_of_files +# Array[Pair[String, Array[File]]] array_of_pairs_of_strings_and_arrays_of_files Array[Array[File]] array_of_arrays_of_files Array[String] array_of_strings From 2003e73b4ada08202d04192ff0867c6e07ba93be Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Mon, 18 Nov 2019 17:43:50 -0500 Subject: [PATCH 080/123] deteleted Array[Pair] --- pipes/WDL/workflows/merge_bams_bulk.wdl | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 09eaaeed9..d8fa5763e 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -2,11 +2,10 @@ import "tasks_demux.wdl" as demux workflow merge_bams_bulk { Map[String, Array[File]] map_of_strings_to_arrays_of_files -# Array[Pair] array_of_pairs -# Pair[String, File] string_and_file_pair -# Array[Pair[File, String]] array_of_pairs_of_files_and_strings + Pair[String, File] string_and_file_pair + Array[Pair[File, String]] array_of_pairs_of_files_and_strings Map[File, String] map_of_files_to_strings -# Array[Pair[String, Array[File]]] array_of_pairs_of_strings_and_arrays_of_files + Array[Pair[String, Array[File]]] array_of_pairs_of_strings_and_arrays_of_files Array[Array[File]] array_of_arrays_of_files Array[String] array_of_strings From 3909690544d3b47262cc35450ebd908821844744 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 01:28:56 -0500 Subject: [PATCH 081/123] new version that takes a map of input bam file name to output bam file name --- pipes/WDL/workflows/merge_bams_bulk.wdl | 48 ++++++++++++++++++++----- 1 file changed, 40 insertions(+), 8 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index d8fa5763e..3c7ebeca8 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -1,18 +1,49 @@ import "tasks_demux.wdl" as demux workflow merge_bams_bulk { - Map[String, Array[File]] map_of_strings_to_arrays_of_files - Pair[String, File] string_and_file_pair - Array[Pair[File, String]] array_of_pairs_of_files_and_strings - Map[File, String] map_of_files_to_strings - Array[Pair[String, Array[File]]] array_of_pairs_of_strings_and_arrays_of_files - Array[Array[File]] array_of_arrays_of_files - Array[String] array_of_strings - + Array[File]+ in_bams # any order + File in_bam_out_bam_table # first column: input bam file basename, second column: output bam file basename File? reheader_table String? docker="quay.io/broadinstitute/viral-core" + + + # generates map with key: input bam file name -> value: output bam file basename + Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) + + # retrieves unique output bam file basenames (no repeats) + # TODO + File out_bam_basenames # one per line + Array[String] out_bams = read_lines(out_bam_basenames) + + scatter (out_bam in out_bams) { + String out_bam_basename_short = basename(out_bam, ".bam") + String out_bam_basename_long = basename(out_bam) + scatter (in_bam in in_bams) { + String in_bam_basename_short = basename(in_bam, ".bam") + String in_bam_basename_long = basename(in_bam) + + if(in_bam_to_out_bam[in_bam_basename_short] == out_bam_basename_short + || in_bam_to_out_bam[in_bam_basename_long] == out_bam_basename_long + || in_bam_to_out_bam[in_bam_basename_short] == out_bam_basename_long + || in_bam_to_out_bam[in_bam_basename_long] == out_bam_basename_short) { + File relevant_in_bam = in_bam + } + } + Array[File] relevant_in_bams = select_all(relevant_in_bam) # gathers input bam files from the scatter + + # merges the relevant input bam files to produce this output file + call demux.merge_and_reheader_bams { + input: + out_basename = in_bam_basename_short, + in_bams = relevant_in_bams, + reheader_table = reheader_table, + docker = docker + } + } } +# workflow merge_bams_bulk { +# # Array[File]+ in_bams # File out_basenames # one per line # File? reheader_table @@ -20,6 +51,7 @@ workflow merge_bams_bulk { # # # identifies out_basename for each in_bam file # Array[String] out_basenames_list = read_lines(out_basenames) +# String test = out_basenames_list[0] # # Map[String, Int] test = {"a": 1, "b": 2} # # Int fun = test["b"] # # Map[File, String] in_bam_to_out_basename = {} From 5a55b42d9d923f244ff6768e2f25d4335d688ed1 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 02:03:05 -0500 Subject: [PATCH 082/123] shortened really long if statement and fixed in_bam out_bam confusion in merge_and_reheader_bams call --- pipes/WDL/workflows/merge_bams_bulk.wdl | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 3c7ebeca8..f2bb381a2 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -16,16 +16,12 @@ workflow merge_bams_bulk { Array[String] out_bams = read_lines(out_bam_basenames) scatter (out_bam in out_bams) { - String out_bam_basename_short = basename(out_bam, ".bam") - String out_bam_basename_long = basename(out_bam) scatter (in_bam in in_bams) { - String in_bam_basename_short = basename(in_bam, ".bam") String in_bam_basename_long = basename(in_bam) - - if(in_bam_to_out_bam[in_bam_basename_short] == out_bam_basename_short - || in_bam_to_out_bam[in_bam_basename_long] == out_bam_basename_long - || in_bam_to_out_bam[in_bam_basename_short] == out_bam_basename_long - || in_bam_to_out_bam[in_bam_basename_long] == out_bam_basename_short) { + String in_bam_basename_short = basename(in_bam, ".bam") + if(in_bam_to_out_bam[in_bam_basename_long] == out_bam + || in_bam_to_out_bam[in_bam_basename_short] == out_bam) { + File relevant_in_bam = in_bam } } @@ -34,7 +30,7 @@ workflow merge_bams_bulk { # merges the relevant input bam files to produce this output file call demux.merge_and_reheader_bams { input: - out_basename = in_bam_basename_short, + out_basename = basename(out_bam), in_bams = relevant_in_bams, reheader_table = reheader_table, docker = docker From 0d48fdb2fc36745a9c15ed0c45a50e5069c20712 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 02:23:18 -0500 Subject: [PATCH 083/123] added task to retrieve out_bam names from in_bam_out_bam_table --- pipes/WDL/workflows/merge_bams_bulk.wdl | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index f2bb381a2..3b1154824 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -11,17 +11,20 @@ workflow merge_bams_bulk { Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) # retrieves unique output bam file basenames (no repeats) - # TODO - File out_bam_basenames # one per line - Array[String] out_bams = read_lines(out_bam_basenames) + call unique_values_in_second_column { + input: table = in_bam_to_out_bam + } + Array[String] out_bams = read_lines(unique_values_in_second_column.unique_values) + # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { + # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { String in_bam_basename_long = basename(in_bam) String in_bam_basename_short = basename(in_bam, ".bam") if(in_bam_to_out_bam[in_bam_basename_long] == out_bam || in_bam_to_out_bam[in_bam_basename_short] == out_bam) { - + File relevant_in_bam = in_bam } } @@ -38,6 +41,18 @@ workflow merge_bams_bulk { } } +task unique_values_in_second_column { + File table + + command { + cut -f2 ${table} | sort | uniq | tee unique_values + } + + output { + String unique_values = read_string("unique_values") + } +} + # workflow merge_bams_bulk { # # Array[File]+ in_bams From 3e6a3149d63a5deea612d3969e2413a8f3280c7a Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 02:24:28 -0500 Subject: [PATCH 084/123] added check to verify that key is defined in a map --- pipes/WDL/workflows/merge_bams_bulk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 3b1154824..e02c88898 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -22,8 +22,8 @@ workflow merge_bams_bulk { scatter (in_bam in in_bams) { String in_bam_basename_long = basename(in_bam) String in_bam_basename_short = basename(in_bam, ".bam") - if(in_bam_to_out_bam[in_bam_basename_long] == out_bam - || in_bam_to_out_bam[in_bam_basename_short] == out_bam) { + if(defined(in_bam_to_out_bam[in_bam_basename_long]) && in_bam_to_out_bam[in_bam_basename_long] == out_bam + || defined(in_bam_to_out_bam[in_bam_basename_short]) && in_bam_to_out_bam[in_bam_basename_short] == out_bam) { File relevant_in_bam = in_bam } From e7403bef08da58672cfd03992500408eb80d388e Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 02:40:49 -0500 Subject: [PATCH 085/123] fixed map-file confusion --- pipes/WDL/workflows/merge_bams_bulk.wdl | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index e02c88898..765fdc306 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -6,13 +6,12 @@ workflow merge_bams_bulk { File? reheader_table String? docker="quay.io/broadinstitute/viral-core" - # generates map with key: input bam file name -> value: output bam file basename Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) # retrieves unique output bam file basenames (no repeats) call unique_values_in_second_column { - input: table = in_bam_to_out_bam + input: table = in_bam_out_bam_table } Array[String] out_bams = read_lines(unique_values_in_second_column.unique_values) @@ -23,8 +22,7 @@ workflow merge_bams_bulk { String in_bam_basename_long = basename(in_bam) String in_bam_basename_short = basename(in_bam, ".bam") if(defined(in_bam_to_out_bam[in_bam_basename_long]) && in_bam_to_out_bam[in_bam_basename_long] == out_bam - || defined(in_bam_to_out_bam[in_bam_basename_short]) && in_bam_to_out_bam[in_bam_basename_short] == out_bam) { - + || defined(in_bam_to_out_bam[in_bam_basename_short]) && in_bam_to_out_bam[in_bam_basename_short] == out_bam) { File relevant_in_bam = in_bam } } @@ -33,7 +31,7 @@ workflow merge_bams_bulk { # merges the relevant input bam files to produce this output file call demux.merge_and_reheader_bams { input: - out_basename = basename(out_bam), + out_basename = basename(out_bam, ".bam"), in_bams = relevant_in_bams, reheader_table = reheader_table, docker = docker From c6f83c0f2f6f86474fc7b9fd15bf566f17173e9b Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 11:07:03 -0500 Subject: [PATCH 086/123] replaced read_string with read_lines and made unique_values an array --- pipes/WDL/workflows/merge_bams_bulk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 765fdc306..8b552a85c 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -13,7 +13,7 @@ workflow merge_bams_bulk { call unique_values_in_second_column { input: table = in_bam_out_bam_table } - Array[String] out_bams = read_lines(unique_values_in_second_column.unique_values) + Array[String] out_bams = unique_values_in_second_column.unique_values # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { @@ -47,7 +47,7 @@ task unique_values_in_second_column { } output { - String unique_values = read_string("unique_values") + Array[String] unique_values = read_lines("unique_values") } } From 34ec57dfc8627563bca4117537021b68362c174e Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 16:01:47 -0500 Subject: [PATCH 087/123] moved out_bams list to within a file rather than automatically retrieved --- pipes/WDL/workflows/merge_bams_bulk.wdl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 8b552a85c..51ea0c540 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -3,6 +3,7 @@ import "tasks_demux.wdl" as demux workflow merge_bams_bulk { Array[File]+ in_bams # any order File in_bam_out_bam_table # first column: input bam file basename, second column: output bam file basename + File out_bams_file File? reheader_table String? docker="quay.io/broadinstitute/viral-core" @@ -10,10 +11,11 @@ workflow merge_bams_bulk { Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) # retrieves unique output bam file basenames (no repeats) - call unique_values_in_second_column { - input: table = in_bam_out_bam_table - } - Array[String] out_bams = unique_values_in_second_column.unique_values +# call unique_values_in_second_column { +# input: table = in_bam_out_bam_table +# } +# Array[String] out_bams = unique_values_in_second_column.unique_values + Array[String] out_bams = read_lines(out_bams_file) # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { From ea81aefd1e17b6642500c6397bd2393d3070b5a4 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 16:10:25 -0500 Subject: [PATCH 088/123] renamed merge_bams_bulk so it gets put together earlier --- .../workflows/{merge_bams_bulk.wdl => aa_merge_bams_bulk.wdl} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename pipes/WDL/workflows/{merge_bams_bulk.wdl => aa_merge_bams_bulk.wdl} (99%) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl similarity index 99% rename from pipes/WDL/workflows/merge_bams_bulk.wdl rename to pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 51ea0c540..53c2c10d2 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -1,6 +1,6 @@ import "tasks_demux.wdl" as demux -workflow merge_bams_bulk { +workflow aa_merge_bams_bulk { Array[File]+ in_bams # any order File in_bam_out_bam_table # first column: input bam file basename, second column: output bam file basename File out_bams_file From 86b71451fc48e98c1ea8f723b7f4cc7ec4f95f49 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 16:27:47 -0500 Subject: [PATCH 089/123] assuming that filenames in the table don't end in .bam --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 53c2c10d2..2f73f8f11 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -21,10 +21,8 @@ workflow aa_merge_bams_bulk { scatter (out_bam in out_bams) { # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { - String in_bam_basename_long = basename(in_bam) - String in_bam_basename_short = basename(in_bam, ".bam") - if(defined(in_bam_to_out_bam[in_bam_basename_long]) && in_bam_to_out_bam[in_bam_basename_long] == out_bam - || defined(in_bam_to_out_bam[in_bam_basename_short]) && in_bam_to_out_bam[in_bam_basename_short] == out_bam) { + String in_bam_basename = basename(in_bam, ".bam") + if(in_bam_to_out_bam[in_bam_basename] == out_bam) { File relevant_in_bam = in_bam } } From d23ae27e6c7ca97b797d1cc547ad388bfb225644 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 17:19:21 -0500 Subject: [PATCH 090/123] moved in_bam_to_out_bam to hardcoded literal instead of in file --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 2f73f8f11..ad351f98a 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -2,13 +2,14 @@ import "tasks_demux.wdl" as demux workflow aa_merge_bams_bulk { Array[File]+ in_bams # any order - File in_bam_out_bam_table # first column: input bam file basename, second column: output bam file basename +# File in_bam_out_bam_table # first column: input bam file basename, second column: output bam file basename File out_bams_file File? reheader_table String? docker="quay.io/broadinstitute/viral-core" # generates map with key: input bam file name -> value: output bam file basename - Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) +# Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) + Map[String, String] in_bam_to_out_bam = {"Hep_WGS19_067": "Hep_WGS19_067", "Hep_WGS19_067_ERCC-57.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_067", "Hep_WGS19_068": "Hep_WGS19_068", "Hep_WGS19_068_ERCC-58.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_068", "Hep_WGS19_069": "Hep_WGS19_069", "Hep_WGS19_069_ERCC-61.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_069"} # retrieves unique output bam file basenames (no repeats) # call unique_values_in_second_column { From 2cd8b3453d6bb32c90dbc0bf17cb59780cb225cc Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 17:21:21 -0500 Subject: [PATCH 091/123] made out_bams a hardcoded literal too --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index ad351f98a..de18ff793 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -3,7 +3,7 @@ import "tasks_demux.wdl" as demux workflow aa_merge_bams_bulk { Array[File]+ in_bams # any order # File in_bam_out_bam_table # first column: input bam file basename, second column: output bam file basename - File out_bams_file +# File out_bams_file File? reheader_table String? docker="quay.io/broadinstitute/viral-core" @@ -16,7 +16,8 @@ workflow aa_merge_bams_bulk { # input: table = in_bam_out_bam_table # } # Array[String] out_bams = unique_values_in_second_column.unique_values - Array[String] out_bams = read_lines(out_bams_file) +# Array[String] out_bams = read_lines(out_bams_file) + Array[String] out_bams = ["Hep_WGS19_067", "Hep_WGS19_068", "Hep_WGS19_069"] # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { From e6bef52e5881dbbba332a25e4150690e56ebbb4a Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 17:54:15 -0500 Subject: [PATCH 092/123] made in_bam_to_out_bam read in from a table again --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index de18ff793..b7f11f296 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -2,14 +2,14 @@ import "tasks_demux.wdl" as demux workflow aa_merge_bams_bulk { Array[File]+ in_bams # any order -# File in_bam_out_bam_table # first column: input bam file basename, second column: output bam file basename + File in_bam_out_bam_table # first column: input bam file basename, second column: output bam file basename # File out_bams_file File? reheader_table String? docker="quay.io/broadinstitute/viral-core" # generates map with key: input bam file name -> value: output bam file basename -# Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) - Map[String, String] in_bam_to_out_bam = {"Hep_WGS19_067": "Hep_WGS19_067", "Hep_WGS19_067_ERCC-57.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_067", "Hep_WGS19_068": "Hep_WGS19_068", "Hep_WGS19_068_ERCC-58.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_068", "Hep_WGS19_069": "Hep_WGS19_069", "Hep_WGS19_069_ERCC-61.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_069"} + Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) +# Map[String, String] in_bam_to_out_bam = {"Hep_WGS19_067": "Hep_WGS19_067", "Hep_WGS19_067_ERCC-57.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_067", "Hep_WGS19_068": "Hep_WGS19_068", "Hep_WGS19_068_ERCC-58.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_068", "Hep_WGS19_069": "Hep_WGS19_069", "Hep_WGS19_069_ERCC-61.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_069"} # retrieves unique output bam file basenames (no repeats) # call unique_values_in_second_column { From 1898c7f28f2e41ba91588b1511f1b1273066d1b7 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 18:20:06 -0500 Subject: [PATCH 093/123] made read_map go through bash script instead of reading directly from file --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index b7f11f296..3f5d6e1cc 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -8,7 +8,11 @@ workflow aa_merge_bams_bulk { String? docker="quay.io/broadinstitute/viral-core" # generates map with key: input bam file name -> value: output bam file basename - Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) + call read_map_through_bash { + input: table = in_bam_out_bam_table + } + Map[String, String] in_bam_to_out_bam = read_map_through_bash.map +# Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) # Map[String, String] in_bam_to_out_bam = {"Hep_WGS19_067": "Hep_WGS19_067", "Hep_WGS19_067_ERCC-57.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_067", "Hep_WGS19_068": "Hep_WGS19_068", "Hep_WGS19_068_ERCC-58.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_068", "Hep_WGS19_069": "Hep_WGS19_069", "Hep_WGS19_069_ERCC-61.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_069"} # retrieves unique output bam file basenames (no repeats) @@ -41,6 +45,18 @@ workflow aa_merge_bams_bulk { } } +task read_map_through_bash { + File table + + command { + cat ${table} | tee map_table + } + + output { + Map[String, String] map = read_map("map_table") + } +} + task unique_values_in_second_column { File table From 70c1794852bbf5d373e3ef3993d92425c132f12c Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 18:50:27 -0500 Subject: [PATCH 094/123] tried piping map file through stdout --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 3f5d6e1cc..8209a01ef 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -49,11 +49,11 @@ task read_map_through_bash { File table command { - cat ${table} | tee map_table + cat ${table} } output { - Map[String, String] map = read_map("map_table") + Map[String, String] map = read_map(stdout()) } } From efc4cb0217ac21a8acc3d57eedb81fda3d68e366 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 18:54:59 -0500 Subject: [PATCH 095/123] added hardcoded map to task outputs so it will hopefully be displayed --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 8209a01ef..d09fcb12a 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -8,12 +8,14 @@ workflow aa_merge_bams_bulk { String? docker="quay.io/broadinstitute/viral-core" # generates map with key: input bam file name -> value: output bam file basename + Map[String, String] in_bam_to_out_bam_hardcoded = {"Hep_WGS19_067": "Hep_WGS19_067", "Hep_WGS19_067_ERCC-57.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_067", "Hep_WGS19_068": "Hep_WGS19_068", "Hep_WGS19_068_ERCC-58.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_068", "Hep_WGS19_069": "Hep_WGS19_069", "Hep_WGS19_069_ERCC-61.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_069"} call read_map_through_bash { - input: table = in_bam_out_bam_table + input: + table = in_bam_out_bam_table, + hardcoded_map = in_bam_to_out_bam_hardcoded } Map[String, String] in_bam_to_out_bam = read_map_through_bash.map # Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) -# Map[String, String] in_bam_to_out_bam = {"Hep_WGS19_067": "Hep_WGS19_067", "Hep_WGS19_067_ERCC-57.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_067", "Hep_WGS19_068": "Hep_WGS19_068", "Hep_WGS19_068_ERCC-58.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_068", "Hep_WGS19_069": "Hep_WGS19_069", "Hep_WGS19_069_ERCC-61.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_069"} # retrieves unique output bam file basenames (no repeats) # call unique_values_in_second_column { @@ -47,6 +49,7 @@ workflow aa_merge_bams_bulk { task read_map_through_bash { File table + Map[String, String] hardcoded_map command { cat ${table} @@ -54,6 +57,7 @@ task read_map_through_bash { output { Map[String, String] map = read_map(stdout()) + Map[String, String] hardcoded_map = hardcoded_map } } From 48824f5edb2f1718fd9e9db233b7b06aacdbc664 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 19:03:04 -0500 Subject: [PATCH 096/123] resolved reused names oops --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index d09fcb12a..e0f07217d 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -57,7 +57,7 @@ task read_map_through_bash { output { Map[String, String] map = read_map(stdout()) - Map[String, String] hardcoded_map = hardcoded_map + Map[String, String] hardcoded_map_output = hardcoded_map } } From 2819c294936d5ae3cc856d04bd22adf0c53352f4 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 19:18:23 -0500 Subject: [PATCH 097/123] replaced map with map_output --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index e0f07217d..6b0132c58 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -14,7 +14,7 @@ workflow aa_merge_bams_bulk { table = in_bam_out_bam_table, hardcoded_map = in_bam_to_out_bam_hardcoded } - Map[String, String] in_bam_to_out_bam = read_map_through_bash.map + Map[String, String] in_bam_to_out_bam = read_map_through_bash.map_output # Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) # retrieves unique output bam file basenames (no repeats) @@ -56,7 +56,7 @@ task read_map_through_bash { } output { - Map[String, String] map = read_map(stdout()) + Map[String, String] map_output = read_map(stdout()) Map[String, String] hardcoded_map_output = hardcoded_map } } From e8136f60c9ff87fe6205c08806ab7d84c08bf30f Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 19:33:12 -0500 Subject: [PATCH 098/123] reading directly from file again --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 6b0132c58..277556b05 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -9,13 +9,13 @@ workflow aa_merge_bams_bulk { # generates map with key: input bam file name -> value: output bam file basename Map[String, String] in_bam_to_out_bam_hardcoded = {"Hep_WGS19_067": "Hep_WGS19_067", "Hep_WGS19_067_ERCC-57.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_067", "Hep_WGS19_068": "Hep_WGS19_068", "Hep_WGS19_068_ERCC-58.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_068", "Hep_WGS19_069": "Hep_WGS19_069", "Hep_WGS19_069_ERCC-61.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_069"} - call read_map_through_bash { - input: - table = in_bam_out_bam_table, - hardcoded_map = in_bam_to_out_bam_hardcoded - } - Map[String, String] in_bam_to_out_bam = read_map_through_bash.map_output -# Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) +# call read_map_through_bash { +# input: +# table = in_bam_out_bam_table, +# hardcoded_map = in_bam_to_out_bam_hardcoded +# } +# Map[String, String] in_bam_to_out_bam = read_map_through_bash.map_output + Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) # retrieves unique output bam file basenames (no repeats) # call unique_values_in_second_column { From 3f6314bcd07ba5632d02baafafc8b11a3683c421 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 19:33:52 -0500 Subject: [PATCH 099/123] hardcoded map again --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 277556b05..77a49e886 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -8,14 +8,14 @@ workflow aa_merge_bams_bulk { String? docker="quay.io/broadinstitute/viral-core" # generates map with key: input bam file name -> value: output bam file basename - Map[String, String] in_bam_to_out_bam_hardcoded = {"Hep_WGS19_067": "Hep_WGS19_067", "Hep_WGS19_067_ERCC-57.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_067", "Hep_WGS19_068": "Hep_WGS19_068", "Hep_WGS19_068_ERCC-58.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_068", "Hep_WGS19_069": "Hep_WGS19_069", "Hep_WGS19_069_ERCC-61.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_069"} + Map[String, String] in_bam_to_out_bam = {"Hep_WGS19_067": "Hep_WGS19_067", "Hep_WGS19_067_ERCC-57.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_067", "Hep_WGS19_068": "Hep_WGS19_068", "Hep_WGS19_068_ERCC-58.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_068", "Hep_WGS19_069": "Hep_WGS19_069", "Hep_WGS19_069_ERCC-61.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_069"} # call read_map_through_bash { # input: # table = in_bam_out_bam_table, # hardcoded_map = in_bam_to_out_bam_hardcoded # } # Map[String, String] in_bam_to_out_bam = read_map_through_bash.map_output - Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) +# Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) # retrieves unique output bam file basenames (no repeats) # call unique_values_in_second_column { From e47d6a5aab5a2c3b3368e2f4a2e6714318b17173 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 19:51:34 -0500 Subject: [PATCH 100/123] trying to access elements of map at different scopes --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 77a49e886..8fd09ad5f 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -8,14 +8,17 @@ workflow aa_merge_bams_bulk { String? docker="quay.io/broadinstitute/viral-core" # generates map with key: input bam file name -> value: output bam file basename - Map[String, String] in_bam_to_out_bam = {"Hep_WGS19_067": "Hep_WGS19_067", "Hep_WGS19_067_ERCC-57.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_067", "Hep_WGS19_068": "Hep_WGS19_068", "Hep_WGS19_068_ERCC-58.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_068", "Hep_WGS19_069": "Hep_WGS19_069", "Hep_WGS19_069_ERCC-61.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_069"} + Map[String, String] in_bam_to_out_bam_hardcoded = {"Hep_WGS19_067": "Hep_WGS19_067", "Hep_WGS19_067_ERCC-57.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_067", "Hep_WGS19_068": "Hep_WGS19_068", "Hep_WGS19_068_ERCC-58.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_068", "Hep_WGS19_069": "Hep_WGS19_069", "Hep_WGS19_069_ERCC-61.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_069"} # call read_map_through_bash { # input: # table = in_bam_out_bam_table, # hardcoded_map = in_bam_to_out_bam_hardcoded # } # Map[String, String] in_bam_to_out_bam = read_map_through_bash.map_output -# Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) + Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) + String test_a1 = in_bam_out_bam_table["Hep_WGS19_067"] + String test_a2 = in_bam_out_bam_table["Hep_WGS19_068"] + String test_a3 = in_bam_out_bam_table["Hep_WGS19_069"] # retrieves unique output bam file basenames (no repeats) # call unique_values_in_second_column { @@ -27,8 +30,16 @@ workflow aa_merge_bams_bulk { # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { + String test_b1 = in_bam_out_bam_table["Hep_WGS19_067"] + String test_b2 = in_bam_out_bam_table["Hep_WGS19_068"] + String test_b3 = in_bam_out_bam_table["Hep_WGS19_069"] + # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { + String test_c1 = in_bam_out_bam_table["Hep_WGS19_067"] + String test_c2 = in_bam_out_bam_table["Hep_WGS19_068"] + String test_c3 = in_bam_out_bam_table["Hep_WGS19_069"] + String in_bam_basename = basename(in_bam, ".bam") if(in_bam_to_out_bam[in_bam_basename] == out_bam) { File relevant_in_bam = in_bam From c0f3ab4b1dcb39324ce4f8ec8eedf226dc0ca37b Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 19:54:46 -0500 Subject: [PATCH 101/123] fixed dumb error oops --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 8fd09ad5f..9552a0dc8 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -16,9 +16,8 @@ workflow aa_merge_bams_bulk { # } # Map[String, String] in_bam_to_out_bam = read_map_through_bash.map_output Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) - String test_a1 = in_bam_out_bam_table["Hep_WGS19_067"] - String test_a2 = in_bam_out_bam_table["Hep_WGS19_068"] - String test_a3 = in_bam_out_bam_table["Hep_WGS19_069"] + String test_a1 = in_bam_to_out_bam["Hep_WGS19_067"] + String test_a2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] # retrieves unique output bam file basenames (no repeats) # call unique_values_in_second_column { @@ -30,15 +29,13 @@ workflow aa_merge_bams_bulk { # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { - String test_b1 = in_bam_out_bam_table["Hep_WGS19_067"] - String test_b2 = in_bam_out_bam_table["Hep_WGS19_068"] - String test_b3 = in_bam_out_bam_table["Hep_WGS19_069"] + String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] + String test_b2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { - String test_c1 = in_bam_out_bam_table["Hep_WGS19_067"] - String test_c2 = in_bam_out_bam_table["Hep_WGS19_068"] - String test_c3 = in_bam_out_bam_table["Hep_WGS19_069"] + String test_c1 = in_bam_to_out_bam["Hep_WGS19_067"] + String test_c2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] String in_bam_basename = basename(in_bam, ".bam") if(in_bam_to_out_bam[in_bam_basename] == out_bam) { From e5c81d88fc34cba4b62d3bf1ee102da707a9dd49 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 20:03:19 -0500 Subject: [PATCH 102/123] put hardcoded test first --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 9552a0dc8..170044ce0 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -16,8 +16,9 @@ workflow aa_merge_bams_bulk { # } # Map[String, String] in_bam_to_out_bam = read_map_through_bash.map_output Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) - String test_a1 = in_bam_to_out_bam["Hep_WGS19_067"] + String test_a2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] + String test_a1 = in_bam_to_out_bam["Hep_WGS19_067"] # retrieves unique output bam file basenames (no repeats) # call unique_values_in_second_column { @@ -29,13 +30,13 @@ workflow aa_merge_bams_bulk { # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { - String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] String test_b2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] - + String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] + # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { - String test_c1 = in_bam_to_out_bam["Hep_WGS19_067"] String test_c2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] + String test_c1 = in_bam_to_out_bam["Hep_WGS19_067"] String in_bam_basename = basename(in_bam, ".bam") if(in_bam_to_out_bam[in_bam_basename] == out_bam) { From 4281ef33df7ad982d55b99360a779e9f4d362610 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 20:11:21 -0500 Subject: [PATCH 103/123] removed references to hardcoded map --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 170044ce0..f7d195055 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -8,16 +8,10 @@ workflow aa_merge_bams_bulk { String? docker="quay.io/broadinstitute/viral-core" # generates map with key: input bam file name -> value: output bam file basename - Map[String, String] in_bam_to_out_bam_hardcoded = {"Hep_WGS19_067": "Hep_WGS19_067", "Hep_WGS19_067_ERCC-57.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_067", "Hep_WGS19_068": "Hep_WGS19_068", "Hep_WGS19_068_ERCC-58.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_068", "Hep_WGS19_069": "Hep_WGS19_069", "Hep_WGS19_069_ERCC-61.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_069"} -# call read_map_through_bash { -# input: -# table = in_bam_out_bam_table, -# hardcoded_map = in_bam_to_out_bam_hardcoded -# } -# Map[String, String] in_bam_to_out_bam = read_map_through_bash.map_output +# Map[String, String] in_bam_to_out_bam_hardcoded = {"Hep_WGS19_067": "Hep_WGS19_067", "Hep_WGS19_067_ERCC-57.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_067", "Hep_WGS19_068": "Hep_WGS19_068", "Hep_WGS19_068_ERCC-58.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_068", "Hep_WGS19_069": "Hep_WGS19_069", "Hep_WGS19_069_ERCC-61.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_069"} Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) - String test_a2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] +# String test_a2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] String test_a1 = in_bam_to_out_bam["Hep_WGS19_067"] # retrieves unique output bam file basenames (no repeats) @@ -30,12 +24,12 @@ workflow aa_merge_bams_bulk { # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { - String test_b2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] +# String test_b2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { - String test_c2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] +# String test_c2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] String test_c1 = in_bam_to_out_bam["Hep_WGS19_067"] String in_bam_basename = basename(in_bam, ".bam") From 183c550f67e5cfe42cbb42e48d4f2c1070841bde Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 20:12:07 -0500 Subject: [PATCH 104/123] removed all except the inner-most test map access --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index f7d195055..183e72a6c 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -12,7 +12,7 @@ workflow aa_merge_bams_bulk { Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) # String test_a2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] - String test_a1 = in_bam_to_out_bam["Hep_WGS19_067"] +# String test_a1 = in_bam_to_out_bam["Hep_WGS19_067"] # retrieves unique output bam file basenames (no repeats) # call unique_values_in_second_column { @@ -25,7 +25,7 @@ workflow aa_merge_bams_bulk { # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { # String test_b2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] - String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] +# String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { From bd16496959f88a0a22df02a3a6810b3dd7d7c7c1 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 20:15:09 -0500 Subject: [PATCH 105/123] pulled map access out of if statement --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 183e72a6c..6163dea7d 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -30,10 +30,11 @@ workflow aa_merge_bams_bulk { # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { # String test_c2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] - String test_c1 = in_bam_to_out_bam["Hep_WGS19_067"] +# String test_c1 = in_bam_to_out_bam["Hep_WGS19_067"] String in_bam_basename = basename(in_bam, ".bam") - if(in_bam_to_out_bam[in_bam_basename] == out_bam) { + String this_in_bams_out_bam_basename = in_bam_to_out_bam[in_bam_basename] + if(this_in_bams_out_bam_basename == out_bam) { File relevant_in_bam = in_bam } } From 38e21fee679596fe215d79f06f854dd841416bd8 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 20:26:28 -0500 Subject: [PATCH 106/123] map access in outer scatter --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 6163dea7d..3cbb46ef4 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -25,7 +25,7 @@ workflow aa_merge_bams_bulk { # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { # String test_b2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] -# String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] + String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { @@ -33,8 +33,7 @@ workflow aa_merge_bams_bulk { # String test_c1 = in_bam_to_out_bam["Hep_WGS19_067"] String in_bam_basename = basename(in_bam, ".bam") - String this_in_bams_out_bam_basename = in_bam_to_out_bam[in_bam_basename] - if(this_in_bams_out_bam_basename == out_bam) { + if(in_bam_to_out_bam[in_bam_basename] == out_bam) { File relevant_in_bam = in_bam } } From b2ea552f225b25084a7bc643e574e2fe7e28ae50 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 20:27:02 -0500 Subject: [PATCH 107/123] map access outside of scatter --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 3cbb46ef4..745e49d93 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -12,7 +12,7 @@ workflow aa_merge_bams_bulk { Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) # String test_a2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] -# String test_a1 = in_bam_to_out_bam["Hep_WGS19_067"] + String test_a1 = in_bam_to_out_bam["Hep_WGS19_067"] # retrieves unique output bam file basenames (no repeats) # call unique_values_in_second_column { @@ -25,7 +25,7 @@ workflow aa_merge_bams_bulk { # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { # String test_b2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] - String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] +# String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { From 457549637e0d087669c646e7796cbdd7606d2178 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 20:28:12 -0500 Subject: [PATCH 108/123] map access in inner scatter, outer scatter, and outside of scatter --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 745e49d93..f7d195055 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -25,12 +25,12 @@ workflow aa_merge_bams_bulk { # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { # String test_b2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] -# String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] + String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { # String test_c2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] -# String test_c1 = in_bam_to_out_bam["Hep_WGS19_067"] + String test_c1 = in_bam_to_out_bam["Hep_WGS19_067"] String in_bam_basename = basename(in_bam, ".bam") if(in_bam_to_out_bam[in_bam_basename] == out_bam) { From 3e4541906fdda37db7dca70c461947753c7dca08 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Tue, 19 Nov 2019 23:27:20 -0500 Subject: [PATCH 109/123] reading out_bams from in_bam_out_bam_table instead of its own file or hardcoded --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index f7d195055..e4a5658db 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -8,30 +8,22 @@ workflow aa_merge_bams_bulk { String? docker="quay.io/broadinstitute/viral-core" # generates map with key: input bam file name -> value: output bam file basename -# Map[String, String] in_bam_to_out_bam_hardcoded = {"Hep_WGS19_067": "Hep_WGS19_067", "Hep_WGS19_067_ERCC-57.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_067", "Hep_WGS19_068": "Hep_WGS19_068", "Hep_WGS19_068_ERCC-58.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_068", "Hep_WGS19_069": "Hep_WGS19_069", "Hep_WGS19_069_ERCC-61.lExp_8_Hep_A_23_and_spike_pool": "Hep_WGS19_069"} Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) -# String test_a2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] - String test_a1 = in_bam_to_out_bam["Hep_WGS19_067"] - # retrieves unique output bam file basenames (no repeats) -# call unique_values_in_second_column { -# input: table = in_bam_out_bam_table -# } -# Array[String] out_bams = unique_values_in_second_column.unique_values + call unique_values_in_second_column { + input: table = in_bam_out_bam_table + } + Array[String] out_bams = unique_values_in_second_column.unique_values # Array[String] out_bams = read_lines(out_bams_file) - Array[String] out_bams = ["Hep_WGS19_067", "Hep_WGS19_068", "Hep_WGS19_069"] +# Array[String] out_bams = ["Hep_WGS19_067", "Hep_WGS19_068", "Hep_WGS19_069"] # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { -# String test_b2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { -# String test_c2 = in_bam_to_out_bam_hardcoded["Hep_WGS19_067"] - String test_c1 = in_bam_to_out_bam["Hep_WGS19_067"] - String in_bam_basename = basename(in_bam, ".bam") if(in_bam_to_out_bam[in_bam_basename] == out_bam) { File relevant_in_bam = in_bam From 20ccbbd54872e2d22a2242fb09ce818bb13fc93f Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Wed, 20 Nov 2019 00:22:41 -0500 Subject: [PATCH 110/123] map access in outer scatter commented out --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index e4a5658db..0c7fed9ca 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -20,7 +20,7 @@ workflow aa_merge_bams_bulk { # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { - String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] +# String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { From 280ff5fc5e18ea2fdceb5ec93ad64ffc9c175665 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Wed, 20 Nov 2019 00:31:58 -0500 Subject: [PATCH 111/123] deleted commented out code --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 153 +-------------------- 1 file changed, 1 insertion(+), 152 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 0c7fed9ca..32b0ea85d 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -2,8 +2,7 @@ import "tasks_demux.wdl" as demux workflow aa_merge_bams_bulk { Array[File]+ in_bams # any order - File in_bam_out_bam_table # first column: input bam file basename, second column: output bam file basename -# File out_bams_file + File in_bam_out_bam_table # first column: input bam file basename, second column: output bam file basename (one line per INPUT file) File? reheader_table String? docker="quay.io/broadinstitute/viral-core" @@ -15,8 +14,6 @@ workflow aa_merge_bams_bulk { input: table = in_bam_out_bam_table } Array[String] out_bams = unique_values_in_second_column.unique_values -# Array[String] out_bams = read_lines(out_bams_file) -# Array[String] out_bams = ["Hep_WGS19_067", "Hep_WGS19_068", "Hep_WGS19_069"] # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { @@ -42,20 +39,6 @@ workflow aa_merge_bams_bulk { } } -task read_map_through_bash { - File table - Map[String, String] hardcoded_map - - command { - cat ${table} - } - - output { - Map[String, String] map_output = read_map(stdout()) - Map[String, String] hardcoded_map_output = hardcoded_map - } -} - task unique_values_in_second_column { File table @@ -67,137 +50,3 @@ task unique_values_in_second_column { Array[String] unique_values = read_lines("unique_values") } } - -# workflow merge_bams_bulk { -# -# Array[File]+ in_bams -# File out_basenames # one per line -# File? reheader_table -# String? docker="quay.io/broadinstitute/viral-core" -# -# # identifies out_basename for each in_bam file -# Array[String] out_basenames_list = read_lines(out_basenames) -# String test = out_basenames_list[0] -# # Map[String, Int] test = {"a": 1, "b": 2} -# # Int fun = test["b"] -# # Map[File, String] in_bam_to_out_basename = {} -# # scatter (in_bam in in_bams) { -# # call get_out_basename { -# # input: -# # in_bam = in_bam, -# # out_basenames = out_basenames_list -# # } -# # String out_basename = get_out_basename.out_basename -# # in_bam_to_out_basename[in_bam] = out_basename -# # } -# -# # generates an output file for each out_basename -# scatter (out_basename in out_basenames_list) { -# -# # identifies the input bam files containing this output basename -# # (surrounded by start or end of string or any of [._-]) -# scatter (in_bam in in_bams) { -# if(in_bam_to_out_basename[in_bam] == out_basename) { -# File relevant_in_bam = in_bam -# } -# } -# Array[File] relevant_in_bams = select_all(relevant_in_bam) # gathers results from the scatter -# -# # merges the relevant input bam files to produce this output file -# call demux.merge_and_reheader_bams { -# input: -# out_basename = out_basename, -# in_bams = relevant_in_bams, -# reheader_table = reheader_table, -# docker = docker -# } -# } -# } -# -# task get_out_basename { -# File in_bam -# Array[String] out_basenames -# -# String in_bam_basename = basename(in_bam, ".bam") -# -# command { -# for out_basename in ${sep=' ' out_basenames}; do -# # basename (exact match) -# if [[ ${in_bam_name} =~ ^$out_basename$ ]]; then -# echo true | tee out_basename -# # something[._-]basename -# elif [[ ${in_bam_name} =~ [._-]$out_basename$ ]]; then -# echo true | tee out_basename -# # basename[._-]something -# elif [[ ${in_bam_name} =~ ^$out_basename[._-] ]]; then -# echo true | tee out_basename -# # something[._-]basename[._-]something -# elif [[ ${in_bam_name} =~ [._-]$out_basename[._-] ]]; then -# echo true | tee out_basename -# else -# echo false | tee out_basename -# fi -# done -# } -# output { -# String out_basename = read_string("out_basename") -# } -# } - -# scatter (out_basename in read_lines(out_basenames)) { -# -# # identifies the input bam files containing this output basename -# # (surrounded by start or end of string or any of [._-]) -# scatter (in_bam in in_bams) { -# call does_in_bam_match_out_basename { -# input: -# out_basename = out_basename, -# in_bam = in_bam -# } -# -# if(does_in_bam_match_out_basename.match) { -# File relevant_in_bam = in_bam -# } -# } -# Array[File] relevant_in_bams = select_all(relevant_in_bam) # gathers results from the scatter -# -# # merges the relevant input bam files to produce this output file -# call demux.merge_and_reheader_bams { -# input: -# out_basename = out_basename, -# in_bams = relevant_in_bams, -# reheader_table = reheader_table, -# docker = docker -# } -# } -# -# # returns true if the basename of in_bam contains out_basename, -# # separated from other characters by start or end of string or any of [._-] -# task does_in_bam_match_out_basename { -# String out_basename -# File in_bam -# -# String in_bam_name = basename(in_bam, ".bam") -# -# command { -# # basename (exact match) -# if [[ ${in_bam_name} =~ ^${out_basename}$ ]]; then -# echo true | tee match -# # something[._-]basename -# elif [[ ${in_bam_name} =~ [._-]${out_basename}$ ]]; then -# echo true | tee match -# # basename[._-]something -# elif [[ ${in_bam_name} =~ ^${out_basename}[._-] ]]; then -# echo true | tee match -# # something[._-]basename[._-]something -# elif [[ ${in_bam_name} =~ [._-]${out_basename}[._-] ]]; then -# echo true | tee match -# else -# echo false | tee match -# fi -# } -# -# output { -# Boolean match = read_boolean("match") -# } -# } From a5ecff67bc5df156767fc4f725435bd60df1e31d Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Wed, 20 Nov 2019 00:34:05 -0500 Subject: [PATCH 112/123] tried adding length of map --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 1 + 1 file changed, 1 insertion(+) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 32b0ea85d..a10b2266c 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -18,6 +18,7 @@ workflow aa_merge_bams_bulk { # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { # String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] + length(in_bam_to_out_bam) # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { From d0f4166d2fa46691522b03dd1df6c88b86a7fbb9 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Wed, 20 Nov 2019 00:40:25 -0500 Subject: [PATCH 113/123] added length of map as access to map, instead of accessing an element with a hardcoded key --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index a10b2266c..32f88a81c 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -18,7 +18,8 @@ workflow aa_merge_bams_bulk { # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { # String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] - length(in_bam_to_out_bam) +# String placeholder = write_map(in_bam_to_out_bam) + Int placeholder = length(in_bam_to_out_bam) # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { From 9456b6f9bc4b764409993c80b341b602eb101bce Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Wed, 20 Nov 2019 00:49:46 -0500 Subject: [PATCH 114/123] trying write_map as map touch --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 32f88a81c..3ea03c025 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -18,8 +18,7 @@ workflow aa_merge_bams_bulk { # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { # String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] -# String placeholder = write_map(in_bam_to_out_bam) - Int placeholder = length(in_bam_to_out_bam) + String placeholder = write_map(in_bam_to_out_bam) # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { From 4e330d3030f41d7da375e349eccb7eb294a2af9f Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Wed, 20 Nov 2019 01:11:29 -0500 Subject: [PATCH 115/123] cleaned it up --- pipes/WDL/workflows/aa_merge_bams_bulk.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl index 3ea03c025..dc2bf6b1f 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/aa_merge_bams_bulk.wdl @@ -17,8 +17,7 @@ workflow aa_merge_bams_bulk { # collects and merges input bam files for each output bam file scatter (out_bam in out_bams) { -# String test_b1 = in_bam_to_out_bam["Hep_WGS19_067"] - String placeholder = write_map(in_bam_to_out_bam) + String placeholder = write_map(in_bam_to_out_bam) # need to touch map in outer scatter for it to be seen in inner scatter # retrieves the input bam files for this output bam file scatter (in_bam in in_bams) { From b10c66da58e4f9f914f28c635c0701edd63102c4 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Wed, 20 Nov 2019 02:09:51 -0500 Subject: [PATCH 116/123] renamed aa_merge_bams_bulk.wdl back to merge_bams_bulk.wdl --- .../workflows/{aa_merge_bams_bulk.wdl => merge_bams_bulk.wdl} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename pipes/WDL/workflows/{aa_merge_bams_bulk.wdl => merge_bams_bulk.wdl} (98%) diff --git a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl similarity index 98% rename from pipes/WDL/workflows/aa_merge_bams_bulk.wdl rename to pipes/WDL/workflows/merge_bams_bulk.wdl index dc2bf6b1f..754adbded 100644 --- a/pipes/WDL/workflows/aa_merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -1,6 +1,6 @@ import "tasks_demux.wdl" as demux -workflow aa_merge_bams_bulk { +workflow merge_bams_bulk { Array[File]+ in_bams # any order File in_bam_out_bam_table # first column: input bam file basename, second column: output bam file basename (one line per INPUT file) File? reheader_table From c211760f2723c5b1b1533fc0ac4f1ef51b156257 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 14 Dec 2019 19:10:07 -0500 Subject: [PATCH 117/123] added function to remove .bam from inside in_bam_out_bam file if it appears --- pipes/WDL/workflows/merge_bams_bulk.wdl | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 754adbded..07601ecf6 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -6,12 +6,18 @@ workflow merge_bams_bulk { File? reheader_table String? docker="quay.io/broadinstitute/viral-core" + # removes ".bam" from input bam file + call clean_in_bam_out_bam_table { + input: table = in_bam_out_bam_table + } + File cleaned_in_bam_out_bam_table = clean_in_bam_out_bam_table.cleaned_in_bam_out_bam_table + # generates map with key: input bam file name -> value: output bam file basename - Map[String, String] in_bam_to_out_bam = read_map(in_bam_out_bam_table) + Map[String, String] in_bam_to_out_bam = read_map(cleaned_in_bam_out_bam_table) # retrieves unique output bam file basenames (no repeats) call unique_values_in_second_column { - input: table = in_bam_out_bam_table + input: table = cleaned_in_bam_out_bam_table } Array[String] out_bams = unique_values_in_second_column.unique_values @@ -39,6 +45,18 @@ workflow merge_bams_bulk { } } +task clean_in_bam_out_bam_table { + File table + + command { + cat ${table} | sed 's/[.]bam$//g' | sed $'s/[.]bam\t/\t/g' | tee cleaned_in_bam_out_bam_table + } + + output { + File cleaned_in_bam_out_bam_table = "cleaned_in_bam_out_bam_table" + } +} + task unique_values_in_second_column { File table From ad5f69978a91b9fb269a3073c4775da99bd3f32e Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 14 Dec 2019 19:11:18 -0500 Subject: [PATCH 118/123] removed align_and_plot_bulk and assemble_denovo_bulk since the batch runner beta works again --- pipes/WDL/workflows/align_and_plot_bulk.wdl | 37 ------------------- pipes/WDL/workflows/assemble_denovo_bulk.wdl | 39 -------------------- 2 files changed, 76 deletions(-) delete mode 100644 pipes/WDL/workflows/align_and_plot_bulk.wdl delete mode 100644 pipes/WDL/workflows/assemble_denovo_bulk.wdl diff --git a/pipes/WDL/workflows/align_and_plot_bulk.wdl b/pipes/WDL/workflows/align_and_plot_bulk.wdl deleted file mode 100644 index 9548b5696..000000000 --- a/pipes/WDL/workflows/align_and_plot_bulk.wdl +++ /dev/null @@ -1,37 +0,0 @@ -import "tasks_reports.wdl" as reports - -workflow align_and_plot_bulk { - - Array[File]+ reads_unmapped_bam_files - File assembly_fasta - File? novocraft_license - - String? aligner="novoalign" # novoalign or bwa - String? aligner_options="-r Random -l 30 -g 40 -x 20 -t 502" - - Boolean? skip_mark_dupes=false - Boolean? plot_only_non_duplicates=false - Boolean? bin_large_plots=false - String? binning_summary_statistic="max" # max or min - - String? docker="quay.io/broadinstitute/viral-core" - - - scatter(reads_unmapped_bam in reads_unmapped_bam_files) { - call reports.plot_coverage { - input: - assembly_fasta = assembly_fasta, - reads_unmapped_bam = reads_unmapped_bam, - novocraft_license = novocraft_license, - - aligner = aligner, - aligner_options = aligner_options, - skip_mark_dupes = skip_mark_dupes, - plot_only_non_duplicates = plot_only_non_duplicates, - bin_large_plots = bin_large_plots, - binning_summary_statistic = binning_summary_statistic, - - docker = docker - } - } -} diff --git a/pipes/WDL/workflows/assemble_denovo_bulk.wdl b/pipes/WDL/workflows/assemble_denovo_bulk.wdl deleted file mode 100644 index 89a67dabe..000000000 --- a/pipes/WDL/workflows/assemble_denovo_bulk.wdl +++ /dev/null @@ -1,39 +0,0 @@ -import "tasks_taxon_filter.wdl" as taxon_filter -import "tasks_assembly.wdl" as assembly - -workflow assemble_denovo_bulk { - - Array[File]+ reads_unmapped_bam_files - Array[File]+ reference_genome_fasta - File lastal_db_fasta - File trim_clip_db - File? novocraft_license - - scatter(reads_unmapped_bam in reads_unmapped_bam_files) { - call taxon_filter.filter_to_taxon { - input: - reads_unmapped_bam = reads_unmapped_bam, - lastal_db_fasta = lastal_db_fasta - } - - call assembly.assemble { - input: - reads_unmapped_bam = filter_to_taxon.taxfilt_bam, - trim_clip_db = trim_clip_db - } - - call assembly.scaffold { - input: - contigs_fasta = assemble.contigs_fasta, - reads_bam = filter_to_taxon.taxfilt_bam, - reference_genome_fasta = reference_genome_fasta - } - - call assembly.refine_2x_and_plot { - input: - assembly_fasta = scaffold.scaffold_fasta, - reads_unmapped_bam = reads_unmapped_bam, - novocraft_license = novocraft_license - } - } -} \ No newline at end of file From d03aa0e87ae142af6e0bff56661715bbfec66162 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 14 Dec 2019 19:51:42 -0500 Subject: [PATCH 119/123] removed variable declaration for cleaned in_bam_to_out_bam file --- pipes/WDL/workflows/merge_bams_bulk.wdl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 07601ecf6..14a394d9d 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -10,14 +10,13 @@ workflow merge_bams_bulk { call clean_in_bam_out_bam_table { input: table = in_bam_out_bam_table } - File cleaned_in_bam_out_bam_table = clean_in_bam_out_bam_table.cleaned_in_bam_out_bam_table # generates map with key: input bam file name -> value: output bam file basename - Map[String, String] in_bam_to_out_bam = read_map(cleaned_in_bam_out_bam_table) + Map[String, String] in_bam_to_out_bam = read_map(clean_in_bam_out_bam_table.clean_table) # retrieves unique output bam file basenames (no repeats) call unique_values_in_second_column { - input: table = cleaned_in_bam_out_bam_table + input: table = clean_in_bam_out_bam_table.clean_table } Array[String] out_bams = unique_values_in_second_column.unique_values @@ -49,11 +48,11 @@ task clean_in_bam_out_bam_table { File table command { - cat ${table} | sed 's/[.]bam$//g' | sed $'s/[.]bam\t/\t/g' | tee cleaned_in_bam_out_bam_table + cat ${table} | sed 's/[.]bam$//g' | sed $'s/[.]bam\t/\t/g' | tee cleaned_table } output { - File cleaned_in_bam_out_bam_table = "cleaned_in_bam_out_bam_table" + File clean_table = "cleaned_table" } } From 888bb2a526f1e6f2896f38d48e2377e3aced5ac9 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sat, 14 Dec 2019 20:13:44 -0500 Subject: [PATCH 120/123] improved comment --- pipes/WDL/workflows/merge_bams_bulk.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index 14a394d9d..fe516601e 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -6,7 +6,7 @@ workflow merge_bams_bulk { File? reheader_table String? docker="quay.io/broadinstitute/viral-core" - # removes ".bam" from input bam file + # removes ".bam"s from ends of filenames in in_bam_out_bam_table call clean_in_bam_out_bam_table { input: table = in_bam_out_bam_table } From 85d8e62b690065399a2fd273e6f40570bfbf4ff4 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sun, 15 Dec 2019 00:30:02 -0500 Subject: [PATCH 121/123] renamed intermediate file cleaned_table --- pipes/WDL/workflows/merge_bams_bulk.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index fe516601e..c926a943a 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -48,11 +48,11 @@ task clean_in_bam_out_bam_table { File table command { - cat ${table} | sed 's/[.]bam$//g' | sed $'s/[.]bam\t/\t/g' | tee cleaned_table + cat ${table} | sed 's/[.]bam$//g' | sed $'s/[.]bam\t/\t/g' | tee in_bam_out_bam_table } output { - File clean_table = "cleaned_table" + File clean_table = "in_bam_out_bam_table" } } From 97ddcbd755a9242ef28ce15027658feff5449a4d Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sun, 15 Dec 2019 22:15:33 -0500 Subject: [PATCH 122/123] deleted defaults for assemble_denovo_bulk --- pipes/dnax/dx-defaults-assemble_denovo_bulk.json | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 pipes/dnax/dx-defaults-assemble_denovo_bulk.json diff --git a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json b/pipes/dnax/dx-defaults-assemble_denovo_bulk.json deleted file mode 100644 index 2539f4974..000000000 --- a/pipes/dnax/dx-defaults-assemble_denovo_bulk.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "assemble_denovo_bulk.trim_clip_db": - "dx://file-BXF0vYQ0QyBF509G9J12g927" -} From 4e6d91eebf2ac14850c18c6f225e53158fe50133 Mon Sep 17 00:00:00 2001 From: Lydia Andreyevna Krasilnikova Date: Sun, 15 Dec 2019 22:19:22 -0500 Subject: [PATCH 123/123] removed out_bams variable that is only used once --- pipes/WDL/workflows/merge_bams_bulk.wdl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pipes/WDL/workflows/merge_bams_bulk.wdl b/pipes/WDL/workflows/merge_bams_bulk.wdl index c926a943a..f442c95ee 100644 --- a/pipes/WDL/workflows/merge_bams_bulk.wdl +++ b/pipes/WDL/workflows/merge_bams_bulk.wdl @@ -18,10 +18,9 @@ workflow merge_bams_bulk { call unique_values_in_second_column { input: table = clean_in_bam_out_bam_table.clean_table } - Array[String] out_bams = unique_values_in_second_column.unique_values # collects and merges input bam files for each output bam file - scatter (out_bam in out_bams) { + scatter (out_bam in unique_values_in_second_column.unique_values) { String placeholder = write_map(in_bam_to_out_bam) # need to touch map in outer scatter for it to be seen in inner scatter # retrieves the input bam files for this output bam file