From 4d8b21ccfed5d88c5a36194a329912c34e35cffa Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Thu, 16 Nov 2023 10:35:32 -0500 Subject: [PATCH 01/23] add summary task --- beta-pipelines/skylab/m3c/CondensedSnm3C.wdl | 123 +++++++++++++------ 1 file changed, 87 insertions(+), 36 deletions(-) diff --git a/beta-pipelines/skylab/m3c/CondensedSnm3C.wdl b/beta-pipelines/skylab/m3c/CondensedSnm3C.wdl index d53d32a6e1..98dd357584 100644 --- a/beta-pipelines/skylab/m3c/CondensedSnm3C.wdl +++ b/beta-pipelines/skylab/m3c/CondensedSnm3C.wdl @@ -87,20 +87,20 @@ workflow WDLized_snm3C { chrom_size_path = chromosome_sizes } - # call summary { - # input: - # trimmed_stats = Sort_and_trim_r1_and_r2.trim_stats, - # hisat3n_stats = Hisat_3n_pair_end_mapping_dna_mode.hisat3n_stats, - # r1_hisat3n_stats = hisat_single_end_r1_r2_mapping_dna_mode_and_merge_sort_split_reads_by_name.r1_hisat3n_stats, - # r2_hisat3n_stats = hisat_single_end_r1_r2_mapping_dna_mode_and_merge_sort_split_reads_by_name.r2_hisat3n_stats, - # dedup_stats = dedup_unique_bam_and_index_unique_bam.dedup_stats, - # chromatin_contact_stats = call_chromatin_contacts.chromatin_contact_stats, - # allc_uniq_reads_stats = unique_reads_allc.allc_uniq_reads_stats, - # unique_reads_cgn_extraction_tbi = unique_reads_cgn_extraction.unique_reads_cgn_extraction_tbi - # } + call summary { + input: + trimmed_stats = Sort_and_trim_r1_and_r2.trim_stats_tar, + hisat3n_stats = Hisat_3n_pair_end_mapping_dna_mode.hisat3n_paired_end_stats_tar, + r1_hisat3n_stats = Hisat_single_end_r1_r2_mapping_dna_mode_and_merge_sort_split_reads_by_name.hisat3n_dna_split_reads_summary_R1_tar, + r2_hisat3n_stats = Hisat_single_end_r1_r2_mapping_dna_mode_and_merge_sort_split_reads_by_name.hisat3n_dna_split_reads_summary_R2_tar, + dedup_stats = dedup_unique_bam_and_index_unique_bam.dedup_stats_tar, + chromatin_contact_stats = call_chromatin_contacts.chromatin_contact_stats, + allc_uniq_reads_stats = unique_reads_allc.allc_uniq_reads_stats, + unique_reads_cgn_extraction_tbi = unique_reads_cgn_extraction.output_tbi_tar + } output { - #File MappingSummary = summary.MappingSummary + File MappingSummary = summary.mapping_summary #File allcFiles = unique_reads_allc. #File allc_CGNFiles = unique_reads_cgn_extraction. #File UniqueAlign_cell_parser_picard_dedup = dedup_unique_bam_and_index_unique_bam.dedup_stats @@ -545,6 +545,11 @@ task Hisat_single_end_r1_r2_mapping_dna_mode_and_merge_sort_split_reads_by_name --threads 11 | samtools view -b -q 10 -o "${sample_id}.hisat3n_dna.split_reads.R2.bam" done + # tar up the r1 and r2 stats files + tar -zcvf ../hisat3n_dna_split_reads_summary.R1.tar.gz *.hisat3n_dna_split_reads_summary.R1.txt + tar -zcvf ../hisat3n_dna_split_reads_summary.R2.tar.gz *.hisat3n_dna_split_reads_summary.R2.txt + + # define lists of r1 and r2 bam files R1_bams=($(ls | grep "\.hisat3n_dna.split_reads.R1.bam")) R2_bams=($(ls | grep "\.hisat3n_dna.split_reads.R2.bam")) @@ -573,6 +578,8 @@ task Hisat_single_end_r1_r2_mapping_dna_mode_and_merge_sort_split_reads_by_name } output { File merge_sorted_bam_tar = "hisat3n_dna.split_reads.name_sort.bam.tar.gz" + File hisat3n_dna_split_reads_summary_R1_tar = "hisat3n_dna_split_reads_summary.R1.tar.gz" + File hisat3n_dna_split_reads_summary_R2_tar = "hisat3n_dna_split_reads_summary.R2.tar.gz" } } @@ -748,7 +755,7 @@ task dedup_unique_bam_and_index_unique_bam { echo $name echo "Call Picard" picard MarkDuplicates I=$file O=/cromwell_root/output_bams/$name.bam \ - M=/cromwell_root/output_bams/$name.matrix.stats \ + M=/cromwell_root/output_bams/$name.matrix.txt \ REMOVE_DUPLICATES=true TMP_DIR=/cromwell_root/temp echo "Call samtools index" samtools index /cromwell_root/output_bams/$name.bam @@ -759,6 +766,9 @@ task dedup_unique_bam_and_index_unique_bam { #tar up the output files tar -zcvf dedup_unique_bam_and_index_unique_bam.tar.gz output_bams + #tar up the stats files + tar -zcvf dedup_unique_bam_and_index_unique_bam_stats.tar.gz output_bams/*.matrix.txt + >>> runtime { docker: docker @@ -768,6 +778,7 @@ task dedup_unique_bam_and_index_unique_bam { } output { File output_tar = "dedup_unique_bam_and_index_unique_bam.tar.gz" + File dedup_stats_tar = "dedup_unique_bam_and_index_unique_bam_stats.tar.gz" } } @@ -893,26 +904,66 @@ task unique_reads_cgn_extraction { } -#task summary { -# input { -# File trimmed_stats -# File hisat3n_stats -# File r1_hisat3n_stats -# File r2_hisat3n_stats -# File dedup_stats -# File chromatin_contact_stats -# File allc_uniq_reads_stats -# File unique_reads_cgn_extraction_tbi -# } -# command <<< -# >>> -# runtime { -# docker: "fill_in" -# disks: "local-disk ${disk_size} HDD" -# cpu: 1 -# memory: "${mem_size} GiB" -# } -# output { -# File mapping_summary = "MappingSummary.csv.gz" -# } -#} +task summary { + input { + File trimmed_stats + File hisat3n_stats + File r1_hisat3n_stats + File r2_hisat3n_stats + File dedup_stats + File chromatin_contact_stats + File allc_uniq_reads_stats + File unique_reads_cgn_extraction_tbi + String docker = "us.gcr.io/broad-gotc-prod/m3c-yap-hisat:1.0.0-2.2.1" + Int disk_size = 80 + Int mem_size = 20 + } + command <<< + set -euo pipefail + mkdir /cromwell_root/fastq + mkdir /cromwell_root/bam + mkdir /cromwell_root/allc + mkdir /cromwell_root/hic + + tar -xf ~{trimmed_stats} + rm ~{trimmed_stats} + tar -xf ~{hisat3n_stats} + rm ~{hisat3n_stats} + tar -xf ~{r1_hisat3n_stats} + rm ~{r1_hisat3n_stats} + tar -xf ~{r2_hisat3n_stats} + rm ~{r2_hisat3n_stats} + tar -xf ~{dedup_stats} + rm ~{dedup_stats} + tar -xf ~{chromatin_contact_stats} + rm ~{chromatin_contact_stats} + tar -xf ~{allc_uniq_reads_stats} + rm ~{allc_uniq_reads_stats} + tar -xf ~{unique_reads_cgn_extraction_tbi} + rm ~{unique_reads_cgn_extraction_tbi} + + mv *.trimmed.stats.txt /cromwell_root/fastq + mv *.hisat3n_dna_summary.txt /cromwell_root/bam + mv *.hisat3n_dna_split_reads_summary.R1.txt /cromwell_root/bam + mv *.hisat3n_dna_split_reads_summary.R2.txt /cromwell_root/bam + mv output_bams/*.hisat3n_dna.all_reads.deduped.matrix.txt /cromwell_root/bam + mv *.hisat3n_dna.all_reads.contact_stats.csv /cromwell_root/hic + mv *.allc.tsv.gz.count.csv /cromwell_root/allc + mv cromwell_root/allc-CGN/*.allc.tsv.gz.tbi /cromwell_root/allc + + python3 <>> + runtime { + docker: docker + disks: "local-disk ${disk_size} HDD" + cpu: 1 + memory: "${mem_size} GiB" + } + output { + File mapping_summary = "MappingSummary.csv.gz" + } +} From d5037438b47e75a982ada01f01523bffe7842b4c Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Mon, 8 Jan 2024 14:19:25 -0500 Subject: [PATCH 02/23] add multimapper option --- pipelines/skylab/multiome/Multiome.wdl | 2 ++ pipelines/skylab/optimus/Optimus.wdl | 4 +++- tasks/skylab/StarAlign.wdl | 29 ++++++++++++++++--------- verification/test-wdls/TestMultiome.wdl | 4 +++- 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index cc65819124..341a7ed60e 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -27,6 +27,7 @@ workflow Multiome { String star_strand_mode = "Forward" Boolean count_exons = false File gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" + String? soloMultiMappers # ATAC inputs # Array of input fastq files @@ -67,6 +68,7 @@ workflow Multiome { ignore_r1_read_length = ignore_r1_read_length, star_strand_mode = star_strand_mode, count_exons = count_exons, + soloMultiMappers = soloMultiMappers } # Call the ATAC workflow diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 98d10bbc52..eaa09a6dd8 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -31,6 +31,7 @@ workflow Optimus { File annotations_gtf File ref_genome_fasta File? mt_genes + String? soloMultiMappers # Chemistry options include: 2 or 3 Int tenx_chemistry_version @@ -131,7 +132,8 @@ workflow Optimus { chemistry = tenx_chemistry_version, counting_mode = counting_mode, count_exons = count_exons, - output_bam_basename = output_bam_basename + "_" + idx + output_bam_basename = output_bam_basename + "_" + idx, + soloMultiMappers = soloMultiMappers } } call Merge.MergeSortBamFiles as MergeBam { diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index ce3600dc74..068efe78ab 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -223,6 +223,7 @@ task STARsoloFastq { String counting_mode # when counting_mode = sn_rna, runs Gene and GeneFullEx50pAS in single alignments String output_bam_basename Boolean? count_exons + String? soloMultiMappers # runtime values String docker = "us.gcr.io/broad-gotc-prod/star:1.0.1-2.7.11a-1692706072" @@ -308,7 +309,8 @@ task STARsoloFastq { --outSAMtype BAM SortedByCoordinate \ --outSAMattributes UB UR UY CR CB CY NH GX GN sF \ --soloBarcodeReadLength 0 \ - --soloCellReadStats Standard + --soloCellReadStats Standard \ + ~{"--soloMultiMappers " + soloMultiMappers} elif [[ "~{counting_mode}" == "sn_rna" ]] then ## single nuclei @@ -333,7 +335,8 @@ task STARsoloFastq { --outSAMtype BAM SortedByCoordinate \ --outSAMattributes UB UR UY CR CB CY NH GX GN sF \ --soloBarcodeReadLength 0 \ - --soloCellReadStats Standard + --soloCellReadStats Standard \ + ~{"--soloMultiMappers " + soloMultiMappers} else COUNTING_MODE="GeneFull_Ex50pAS Gene" echo "Running in ~{counting_mode} mode. Count_exons is true and the Star parameter --soloFeatures will be set to $COUNTING_MODE" @@ -354,16 +357,14 @@ task STARsoloFastq { --outSAMtype BAM SortedByCoordinate \ --outSAMattributes UB UR UY CR CB CY NH GX GN sF \ --soloBarcodeReadLength 0 \ - --soloCellReadStats Standard + --soloCellReadStats Standard \ + ~{"--soloMultiMappers " + soloMultiMappers} fi else echo Error: unknown counting mode: "$counting_mode". Should be either sn_rna or sc_rna. exit 1; fi - - - echo "UMI LEN " $UMILen touch barcodes_sn_rna.tsv @@ -377,9 +378,11 @@ task STARsoloFastq { if [[ "~{counting_mode}" == "sc_rna" ]] then + SoloDirectory="Solo.out/Gene/raw" + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ mv "Solo.out/Gene/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/Gene/raw/features.tsv" features.tsv - mv "Solo.out/Gene/raw/matrix.mtx" matrix.mtx + #mv "Solo.out/Gene/raw/matrix.mtx" Ithinkicandeletehismatrix.mtx mv "Solo.out/Gene/CellReads.stats" CellReads.stats mv "Solo.out/Gene/Features.stats" Features.stats mv "Solo.out/Gene/Summary.csv" Summary.csv @@ -388,24 +391,30 @@ task STARsoloFastq { then if [[ "~{count_exons}" == "false" ]] then + SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw" + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv - mv "Solo.out/GeneFull_Ex50pAS/raw/matrix.mtx" matrix.mtx + #mv "Solo.out/GeneFull_Ex50pAS/raw/matrix.mtx" Ithinkicandeletehismatrix.mtx mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats mv "Solo.out/GeneFull_Ex50pAS/Features.stats" Features.stats mv "Solo.out/GeneFull_Ex50pAS/Summary.csv" Summary.csv mv "Solo.out/GeneFull_Ex50pAS/UMIperCellSorted.txt" UMIperCellSorted.txt else + SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw" + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ + SoloDirectory="Solo.out/Gene/raw" + find "$directory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"' mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv - mv "Solo.out/GeneFull_Ex50pAS/raw/matrix.mtx" matrix.mtx + #mv "Solo.out/GeneFull_Ex50pAS/raw/matrix.mtx" matrix.mtx mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats mv "Solo.out/GeneFull_Ex50pAS/Features.stats" Features.stats mv "Solo.out/GeneFull_Ex50pAS/Summary.csv" Summary.csv mv "Solo.out/GeneFull_Ex50pAS/UMIperCellSorted.txt" UMIperCellSorted.txt mv "Solo.out/Gene/raw/barcodes.tsv" barcodes_sn_rna.tsv mv "Solo.out/Gene/raw/features.tsv" features_sn_rna.tsv - mv "Solo.out/Gene/raw/matrix.mtx" matrix_sn_rna.mtx + #mv "Solo.out/Gene/raw/matrix.mtx" matrix_sn_rna.mtx mv "Solo.out/Gene/CellReads.stats" CellReads_sn_rna.stats mv "Solo.out/Gene/Features.stats" Features_sn_rna.stats mv "Solo.out/Gene/Summary.csv" Summary_sn_rna.csv diff --git a/verification/test-wdls/TestMultiome.wdl b/verification/test-wdls/TestMultiome.wdl index 9f35f7b8ae..1598f66714 100644 --- a/verification/test-wdls/TestMultiome.wdl +++ b/verification/test-wdls/TestMultiome.wdl @@ -27,6 +27,7 @@ workflow TestMultiome { String star_strand_mode = "Forward" Boolean count_exons = false File gex_whitelist = "gs://broad-gotc-test-storage/Multiome/input/737K-arc-v1_gex.txt" + String? soloMultiMappers # ATAC inputs # Array of input fastq files @@ -84,7 +85,8 @@ workflow TestMultiome { adapter_seq_read3 = adapter_seq_read3, chrom_sizes = chrom_sizes, atac_whitelist = atac_whitelist, - run_cellbender = run_cellbender + run_cellbender = run_cellbender, + soloMultiMappers = soloMultiMappers } From 693c9326687b9b930e02a2a02300c416dd109072 Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Mon, 8 Jan 2024 15:43:51 -0500 Subject: [PATCH 03/23] update optimus plumbing for ease of testing --- .../test_inputs/Plumbing/human_v3_example.json | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json index ff5a02caaf..0e5fafca34 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json @@ -1,20 +1,18 @@ { "Optimus.r1_fastq": [ - "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L001_R1_001.filtered.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L002_R1_001.filtered.fastq.gz" + "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L001_R1_001.filtered.fastq.gz" ], "Optimus.r2_fastq": [ - "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L001_R2_001.filtered.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L002_R2_001.filtered.fastq.gz" + "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L001_R2_001.filtered.fastq.gz" ], "Optimus.i1_fastq": [ - "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L001_I1_001.filtered.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L002_I1_001.filtered.fastq.gz" + "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L001_I1_001.filtered.fastq.gz" ], "Optimus.tar_star_reference": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_star2.7.10a-Human-GENCODE-build-GRCh38-43.tar", "Optimus.input_id": "pbmc_human_v3", "Optimus.tenx_chemistry_version": "3", "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf", "Optimus.star_strand_mode": "Forward", - "Optimus.ref_genome_fasta": "gs://gcp-public-data--broad-references/hg38/v0/GRCh38.primary_assembly.genome.fa" + "Optimus.ref_genome_fasta": "gs://gcp-public-data--broad-references/hg38/v0/GRCh38.primary_assembly.genome.fa", + "Optimus.soloMultiMappers": "EM" } From 50b75ac21e9d89a02c3a3d3f96bdb518f5d0824a Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Mon, 8 Jan 2024 15:47:25 -0500 Subject: [PATCH 04/23] add echos --- .../Plumbing/mouse_v2_snRNA_example.json | 18 ++---------------- tasks/skylab/StarAlign.wdl | 12 ++++++++---- 2 files changed, 10 insertions(+), 20 deletions(-) diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json index 239b7d1fcb..f3cf14c382 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json @@ -1,23 +1,9 @@ { "Optimus.r1_fastq": [ - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L001_R1_001.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L002_R1_001.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L003_R1_001.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L004_R1_001.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L005_R1_001.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L006_R1_001.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L007_R1_001.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L008_R1_001.fastq.gz" + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L001_R1_001.fastq.gz" ], "Optimus.r2_fastq": [ - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L001_R2_001.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L002_R2_001.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L003_R2_001.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L004_R2_001.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L005_R2_001.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L006_R2_001.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L007_R2_001.fastq.gz", - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L008_R2_001.fastq.gz" + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L001_R2_001.fastq.gz" ], "Optimus.tar_star_reference": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_star2.7.10a-Mouse-GENCODE-build-GRCm39-M32.tar", "Optimus.input_id": "nuclei_2k_mouse", diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 068efe78ab..3b6919948e 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -379,7 +379,8 @@ task STARsoloFastq { if [[ "~{counting_mode}" == "sc_rna" ]] then SoloDirectory="Solo.out/Gene/raw" - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} mv {} /cromwell_root/ + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} mv {} /cromwell_root/ mv "Solo.out/Gene/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/Gene/raw/features.tsv" features.tsv #mv "Solo.out/Gene/raw/matrix.mtx" Ithinkicandeletehismatrix.mtx @@ -392,7 +393,8 @@ task STARsoloFastq { if [[ "~{count_exons}" == "false" ]] then SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw" - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} mv {} /cromwell_root/ + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} mv {} /cromwell_root/ mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv #mv "Solo.out/GeneFull_Ex50pAS/raw/matrix.mtx" Ithinkicandeletehismatrix.mtx @@ -402,9 +404,11 @@ task STARsoloFastq { mv "Solo.out/GeneFull_Ex50pAS/UMIperCellSorted.txt" UMIperCellSorted.txt else SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw" - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} mv {} /cromwell_root/ + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} mv {} /cromwell_root/ SoloDirectory="Solo.out/Gene/raw" - find "$directory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"' + find "$directory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"' + find "$directory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"' mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv #mv "Solo.out/GeneFull_Ex50pAS/raw/matrix.mtx" matrix.mtx From 10b42486514345078ab9d83b5051f99acc6d6a9b Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Mon, 8 Jan 2024 15:48:23 -0500 Subject: [PATCH 05/23] add to test --- verification/test-wdls/TestOptimus.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/verification/test-wdls/TestOptimus.wdl b/verification/test-wdls/TestOptimus.wdl index b162ab1e35..535eb8d530 100644 --- a/verification/test-wdls/TestOptimus.wdl +++ b/verification/test-wdls/TestOptimus.wdl @@ -26,6 +26,7 @@ workflow TestOptimus { File annotations_gtf File ref_genome_fasta File? mt_genes + String? soloMultiMappers # Chemistry options include: 2 or 3 Int tenx_chemistry_version = 2 @@ -84,7 +85,8 @@ workflow TestOptimus { force_no_check = force_no_check, star_strand_mode = star_strand_mode, count_exons = count_exons, - ignore_r1_read_length = ignore_r1_read_length + ignore_r1_read_length = ignore_r1_read_length, + soloMultiMappers = soloMultiMappers } # Collect all of the pipeling output into single Array From b52cbf18395b5cb290d07122c499862967122497 Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Tue, 9 Jan 2024 08:54:00 -0500 Subject: [PATCH 06/23] remove some echoes --- tasks/skylab/StarAlign.wdl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 3b6919948e..100d856c81 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -379,8 +379,9 @@ task STARsoloFastq { if [[ "~{counting_mode}" == "sc_rna" ]] then SoloDirectory="Solo.out/Gene/raw" + echo "SoloDirectory is $SoloDirectory" find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} mv {} /cromwell_root/ - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} mv {} /cromwell_root/ + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ mv "Solo.out/Gene/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/Gene/raw/features.tsv" features.tsv #mv "Solo.out/Gene/raw/matrix.mtx" Ithinkicandeletehismatrix.mtx @@ -393,8 +394,9 @@ task STARsoloFastq { if [[ "~{count_exons}" == "false" ]] then SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw" + echo "SoloDirectory is $SoloDirectory" find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} mv {} /cromwell_root/ - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} mv {} /cromwell_root/ + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv #mv "Solo.out/GeneFull_Ex50pAS/raw/matrix.mtx" Ithinkicandeletehismatrix.mtx @@ -404,11 +406,13 @@ task STARsoloFastq { mv "Solo.out/GeneFull_Ex50pAS/UMIperCellSorted.txt" UMIperCellSorted.txt else SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw" + echo "SoloDirectory is $SoloDirectory" find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} mv {} /cromwell_root/ - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} mv {} /cromwell_root/ + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ SoloDirectory="Solo.out/Gene/raw" + echo "SoloDirectory is $SoloDirectory" find "$directory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"' - find "$directory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"' + find "$directory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"' mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv #mv "Solo.out/GeneFull_Ex50pAS/raw/matrix.mtx" matrix.mtx From 1949d4e675e8eb2e4e7ecdb9eb9727161c9a48d0 Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Tue, 9 Jan 2024 13:42:20 -0500 Subject: [PATCH 07/23] make mouse snrna json go back to what is in dev --- .../Plumbing/mouse_v2_snRNA_example.json | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json index f3cf14c382..239b7d1fcb 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json @@ -1,9 +1,23 @@ { "Optimus.r1_fastq": [ - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L001_R1_001.fastq.gz" + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L001_R1_001.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L002_R1_001.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L003_R1_001.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L004_R1_001.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L005_R1_001.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L006_R1_001.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L007_R1_001.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L008_R1_001.fastq.gz" ], "Optimus.r2_fastq": [ - "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L001_R2_001.fastq.gz" + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L001_R2_001.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L002_R2_001.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L003_R2_001.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L004_R2_001.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L005_R2_001.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L006_R2_001.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L007_R2_001.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/mouse_v2/nuclei_2k_S1_L008_R2_001.fastq.gz" ], "Optimus.tar_star_reference": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_star2.7.10a-Mouse-GENCODE-build-GRCm39-M32.tar", "Optimus.input_id": "nuclei_2k_mouse", From a330405def28885d9f271b98ef064bc110f8e633 Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Tue, 9 Jan 2024 13:44:10 -0500 Subject: [PATCH 08/23] make mouse snrna json go back to what is in dev --- .../optimus/test_inputs/Plumbing/human_v3_example.json | 9 ++++++--- .../optimus/test_inputs/Plumbing/mouse_v2_example.json | 3 ++- .../test_inputs/Plumbing/mouse_v2_snRNA_example.json | 3 ++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json index 0e5fafca34..240ae49ceb 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json @@ -1,12 +1,15 @@ { "Optimus.r1_fastq": [ - "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L001_R1_001.filtered.fastq.gz" + "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L001_R1_001.filtered.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L002_R1_001.filtered.fastq.gz" ], "Optimus.r2_fastq": [ - "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L001_R2_001.filtered.fastq.gz" + "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L001_R2_001.filtered.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L002_R2_001.filtered.fastq.gz" ], "Optimus.i1_fastq": [ - "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L001_I1_001.filtered.fastq.gz" + "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L001_I1_001.filtered.fastq.gz", + "gs://broad-gotc-test-storage/Optimus/input/plumbing/chemistry_10X_V3/pbmc_10k_v3_chr21_genic/pbmc_10k_v3_S1_L002_I1_001.filtered.fastq.gz" ], "Optimus.tar_star_reference": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_star2.7.10a-Human-GENCODE-build-GRCh38-43.tar", "Optimus.input_id": "pbmc_human_v3", diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json index bbf625ef27..60aa9d4cc6 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json @@ -28,5 +28,6 @@ "Optimus.tenx_chemistry_version": "2", "Optimus.star_strand_mode": "Unstranded", "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf", - "Optimus.ref_genome_fasta": "gs://gcp-public-data--broad-references/GRCm39/GRCm39.primary_assembly.genome.fa.gz" + "Optimus.ref_genome_fasta": "gs://gcp-public-data--broad-references/GRCm39/GRCm39.primary_assembly.genome.fa.gz", + "Optimus.soloMultiMappers": "EM" } diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json index 239b7d1fcb..0afea7854b 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json @@ -26,5 +26,6 @@ "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf", "Optimus.ref_genome_fasta": "gs://gcp-public-data--broad-references/GRCm39/GRCm39.primary_assembly.genome.fa.gz", "Optimus.counting_mode": "sn_rna", - "Optimus.count_exons": true + "Optimus.count_exons": true, + "Optimus.soloMultiMappers": "EM" } From e4d2fa2dc340184681d4118acd4cd9da4448e872 Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Tue, 9 Jan 2024 14:08:23 -0500 Subject: [PATCH 09/23] add as outputs --- pipelines/skylab/multiome/Multiome.wdl | 5 +++++ pipelines/skylab/optimus/Optimus.wdl | 6 ++++++ tasks/skylab/StarAlign.wdl | 15 ++++++++------- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 341a7ed60e..e2d64133b4 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -136,6 +136,11 @@ workflow Multiome { File gene_metrics_gex = Optimus.gene_metrics File? cell_calls_gex = Optimus.cell_calls File h5ad_output_file_gex = JoinBarcodes.gex_h5ad_file + Array[File] soloMultiMappers = Optimus.soloMultiMappers + Array[File?] multimappers_EM_matrix = Optimus.multimappers_EM_matrix + Array[File?] multimappers_Uniform_matrix = Optimus.multimappers_Uniform_matrix + Array[File?] multimappers_Rescue_matrix = Optimus.multimappers_Rescue_matrix + Array[File?] multimappers_PropUnique_matrix = Optimus.multimappers_PropUnique_matrix # cellbender outputs File? cell_barcodes_csv = CellBender.cell_csv diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index eaa09a6dd8..e1dc99c2fc 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -239,6 +239,12 @@ workflow Optimus { File gene_metrics = GeneMetrics.gene_metrics File? cell_calls = RunEmptyDrops.empty_drops_result File? aligner_metrics = MergeStarOutputs.cell_reads_out + Array[File] soloMultiMappers = STARsoloFastq.matrix_sn_rna + Array[File?] multimappers_EM_matrix = STARsoloFastq.multimappers_EM_matrix + Array[File?] multimappers_Uniform_matrix = STARsoloFastq.multimappers_Uniform_matrix + Array[File?] multimappers_Rescue_matrix = STARsoloFastq.multimappers_Rescue_matrix + Array[File?] multimappers_PropUnique_matrix = STARsoloFastq.multimappers_PropUnique_matrix + # h5ad File h5ad_output_file = final_h5ad_output } diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 100d856c81..2cedd9ac6d 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -380,11 +380,10 @@ task STARsoloFastq { then SoloDirectory="Solo.out/Gene/raw" echo "SoloDirectory is $SoloDirectory" - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} mv {} /cromwell_root/ + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/ find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ mv "Solo.out/Gene/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/Gene/raw/features.tsv" features.tsv - #mv "Solo.out/Gene/raw/matrix.mtx" Ithinkicandeletehismatrix.mtx mv "Solo.out/Gene/CellReads.stats" CellReads.stats mv "Solo.out/Gene/Features.stats" Features.stats mv "Solo.out/Gene/Summary.csv" Summary.csv @@ -395,11 +394,10 @@ task STARsoloFastq { then SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw" echo "SoloDirectory is $SoloDirectory" - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} mv {} /cromwell_root/ + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/ find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv - #mv "Solo.out/GeneFull_Ex50pAS/raw/matrix.mtx" Ithinkicandeletehismatrix.mtx mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats mv "Solo.out/GeneFull_Ex50pAS/Features.stats" Features.stats mv "Solo.out/GeneFull_Ex50pAS/Summary.csv" Summary.csv @@ -407,15 +405,14 @@ task STARsoloFastq { else SoloDirectory="Solo.out/GeneFull_Ex50pAS/raw" echo "SoloDirectory is $SoloDirectory" - find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} mv {} /cromwell_root/ + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} echo mv {} /cromwell_root/ find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ SoloDirectory="Solo.out/Gene/raw" echo "SoloDirectory is $SoloDirectory" - find "$directory" -maxdepth 1 -type f -name "*.mtx" -print0 | echo xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"' + find "$directory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; echo mv {} "/cromwell_root/$new_name"' find "$directory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"' mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv - #mv "Solo.out/GeneFull_Ex50pAS/raw/matrix.mtx" matrix.mtx mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats mv "Solo.out/GeneFull_Ex50pAS/Features.stats" Features.stats mv "Solo.out/GeneFull_Ex50pAS/Summary.csv" Summary.csv @@ -462,6 +459,10 @@ task STARsoloFastq { File align_features_sn_rna = "Features_sn_rna.stats" File summary_sn_rna = "Summary_sn_rna.csv" File umipercell_sn_rna = "UMIperCellSorted_sn_rna.txt" + File? multimappers_EM_matrix = "UniqueAndMult-EM.mtx" + File? multimappers_Uniform_matrix = "UniqueAndMult-Uniform.mtx" + File? multimappers_Rescue_matrix = "UniqueAndMult-Rescue.mtx" + File? multimappers_PropUnique_matrix = "UniqueAndMult-PropUnique.mtx" } } From e75a479085637712173a99ed8f8dcd898ce8fd6e Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Wed, 10 Jan 2024 13:22:22 -0500 Subject: [PATCH 10/23] typo --- tasks/skylab/StarAlign.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index 2cedd9ac6d..5dead400a5 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -409,8 +409,8 @@ task STARsoloFastq { find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} mv {} /cromwell_root/ SoloDirectory="Solo.out/Gene/raw" echo "SoloDirectory is $SoloDirectory" - find "$directory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; echo mv {} "/cromwell_root/$new_name"' - find "$directory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"' + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; echo mv {} "/cromwell_root/$new_name"' + find "$SoloDirectory" -maxdepth 1 -type f -name "*.mtx" -print0 | xargs -0 -I{} sh -c 'new_name="$(basename {} .mtx)_sn_rna.mtx"; mv {} "/cromwell_root/$new_name"' mv "Solo.out/GeneFull_Ex50pAS/raw/barcodes.tsv" barcodes.tsv mv "Solo.out/GeneFull_Ex50pAS/raw/features.tsv" features.tsv mv "Solo.out/GeneFull_Ex50pAS/CellReads.stats" CellReads.stats From bab93adb9f755436a8a42a0df165f5417e6da9d0 Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Thu, 11 Jan 2024 09:03:44 -0500 Subject: [PATCH 11/23] changelogs --- pipelines/skylab/multiome/Multiome.changelog.md | 5 +++++ pipelines/skylab/multiome/Multiome.wdl | 2 +- pipelines/skylab/optimus/Optimus.changelog.md | 4 ++++ pipelines/skylab/optimus/Optimus.wdl | 2 +- pipelines/skylab/paired_tag/PairedTag.changelog.md | 4 ++++ pipelines/skylab/paired_tag/PairedTag.wdl | 2 +- pipelines/skylab/slideseq/SlideSeq.changelog.md | 4 ++++ pipelines/skylab/slideseq/SlideSeq.wdl | 2 +- .../MultiSampleSmartSeq2SingleNucleus.changelog.md | 4 ++++ .../MultiSampleSmartSeq2SingleNucleus.wdl | 2 +- 10 files changed, 26 insertions(+), 5 deletions(-) diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md index 48070c0ec9..457c6cd2de 100644 --- a/pipelines/skylab/multiome/Multiome.changelog.md +++ b/pipelines/skylab/multiome/Multiome.changelog.md @@ -1,3 +1,8 @@ +# 3.0.5 +2024-01-11 (Date of Last Commit) + +* Added the --soloMultiMappers flag as an optional input to the StarSoloFastq task in the StarAlign.wdl + # 3.0.4 2024-01-05 (Date of Last Commit) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index e2d64133b4..4967be0351 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -6,7 +6,7 @@ import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils import "https://raw.githubusercontent.com/broadinstitute/CellBender/v0.3.0/wdl/cellbender_remove_background.wdl" as CellBender workflow Multiome { - String pipeline_version = "3.0.4" + String pipeline_version = "3.0.5" input { String input_id diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md index 489343f6c1..2c0e1cb730 100644 --- a/pipelines/skylab/optimus/Optimus.changelog.md +++ b/pipelines/skylab/optimus/Optimus.changelog.md @@ -1,3 +1,7 @@ +# 6.3.4 +2024-01-11 (Date of Last Commit) +* Added the --soloMultiMappers flag as an optional input to the StarSoloFastq task in the StarAlign.wdl + # 6.3.3 2024-01-05 (Date of Last Commit) * Modified the STARsoloFastq task in the StarAlign.wdl so STARsolo can run different types of alignments in a single STARsolo command depending on the counting_mode diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index e1dc99c2fc..146be378ee 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -65,7 +65,7 @@ workflow Optimus { # version of this pipeline - String pipeline_version = "6.3.3" + String pipeline_version = "6.3.4" # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays Array[Int] indices = range(length(r1_fastq)) diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md index eb236702ce..a7a173d46f 100644 --- a/pipelines/skylab/paired_tag/PairedTag.changelog.md +++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md @@ -1,3 +1,7 @@ +# 0.0.4 +2024-01-11 (Date of Last Commit) +* Added the --soloMultiMappers flag as an optional input to the StarSoloFastq task in the StarAlign.wdl + # 0.0.3 2024-01-05 (Date of Last Commit) diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl index 36816f514d..59d24db8d8 100644 --- a/pipelines/skylab/paired_tag/PairedTag.wdl +++ b/pipelines/skylab/paired_tag/PairedTag.wdl @@ -5,7 +5,7 @@ import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils import "../../../tasks/skylab/PairedTagUtils.wdl" as Demultiplexing workflow PairedTag { - String pipeline_version = "0.0.3" + String pipeline_version = "0.0.4" input { String input_id diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md index 3b3f2cec69..969c9782cc 100644 --- a/pipelines/skylab/slideseq/SlideSeq.changelog.md +++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md @@ -1,3 +1,7 @@ +# 2.1.5 +2024-01-11 (Date of Last Commit) +* Added the --soloMultiMappers flag as an optional input to the StarSoloFastq task in the StarAlign.wdl; this does affect the SlideSeq workflow + # 2.1.4 2024-01-05 (Date of Last Commit) * Modified the STARsoloFastq task in the StarAlign.wdl so STARsolo can run different types of alignments in a single STARsolo command depending on the counting_mode; this does affect the SlideSeq workflow diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl index fcd7238683..e088379862 100644 --- a/pipelines/skylab/slideseq/SlideSeq.wdl +++ b/pipelines/skylab/slideseq/SlideSeq.wdl @@ -23,7 +23,7 @@ import "../../../tasks/skylab/MergeSortBam.wdl" as Merge workflow SlideSeq { - String pipeline_version = "2.1.4" + String pipeline_version = "2.1.5" input { Array[File] r1_fastq diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md index f64b820dd3..6ab189b9ce 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md @@ -1,3 +1,7 @@ +# 1.2.28 +2024-01-11 (Date of Last Commit) +* Added the --soloMultiMappers flag as an optional input to the StarSoloFastq task in the StarAlign.wdl; this does affect the MultiSampleSmartSeq2SingleNucleus workflow + # 1.2.27 2024-01-05 (Date of Last Commit) * Modified the STARsoloFastq task in the StarAlign.wdl so STARsolo can run different types of alignments in a single STARsolo command depending on the counting_mode; this does affect the MultiSampleSmartSeq2SingleNucleus workflow diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl index 23fa349662..d0bf9dbb2f 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl @@ -40,7 +40,7 @@ workflow MultiSampleSmartSeq2SingleNucleus { String? input_id_metadata_field } # Version of this pipeline - String pipeline_version = "1.2.27" + String pipeline_version = "1.2.28" if (false) { String? none = "None" From 3aeca876694a4c1c8b8291d2345d3950b53fd57c Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Thu, 11 Jan 2024 10:46:02 -0500 Subject: [PATCH 12/23] changelogs --- .../multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json index c394ae790c..352e345c47 100644 --- a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json +++ b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json @@ -23,5 +23,6 @@ "Multiome.run_cellbender":"false", "Multiome.Atac.BWAPairedEndAlignment.cpu_platform":"Intel Cascade Lake", "Multiome.Atac.BWAPairedEndAlignment.mem_size":"64", - "Multiome.Atac.BWAPairedEndAlignment.nthreads":"16" + "Multiome.Atac.BWAPairedEndAlignment.nthreads":"16", + "Multiome.Optimus.STARsoloFastq.soloMultiMappers":"Uniform" } From e8fde9f00bac07a86d2867fc2ed6b0379a59d46c Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Thu, 11 Jan 2024 11:42:57 -0500 Subject: [PATCH 13/23] changelogs --- .../multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json index 352e345c47..3c3b223256 100644 --- a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json +++ b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled.json @@ -24,5 +24,5 @@ "Multiome.Atac.BWAPairedEndAlignment.cpu_platform":"Intel Cascade Lake", "Multiome.Atac.BWAPairedEndAlignment.mem_size":"64", "Multiome.Atac.BWAPairedEndAlignment.nthreads":"16", - "Multiome.Optimus.STARsoloFastq.soloMultiMappers":"Uniform" + "Multiome.soloMultiMappers":"Uniform" } From 660c1ec372fee282e5ab6b536091baf947c4f2ef Mon Sep 17 00:00:00 2001 From: kayleemathews Date: Fri, 12 Jan 2024 09:28:57 -0500 Subject: [PATCH 14/23] update pipeline docs --- website/docs/Pipelines/Multiome_Pipeline/README.md | 8 +++++++- website/docs/Pipelines/Optimus_Pipeline/README.md | 8 +++++++- website/docs/Pipelines/PairedTag_Pipeline/README.md | 2 +- .../README.md | 2 +- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md index 413e6addcf..757457170f 100644 --- a/website/docs/Pipelines/Multiome_Pipeline/README.md +++ b/website/docs/Pipelines/Multiome_Pipeline/README.md @@ -7,7 +7,7 @@ slug: /Pipelines/Multiome_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [Multiome v3.0.4](https://github.com/broadinstitute/warp/releases) | January, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact the [WARP Pipeline Development team](mailto:warp-pipelines-help@broadinstitute.org) | +| [Multiome v3.0.5](https://github.com/broadinstitute/warp/releases) | January, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact the [WARP Pipeline Development team](mailto:warp-pipelines-help@broadinstitute.org) | ![Multiome_diagram](./multiome_diagram.png) @@ -70,6 +70,7 @@ Multiome can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/sta | star_strand_mode | Optional string for the Optimus (GEX) pipeline for performing STARsolo alignment on forward stranded, reverse stranded, or unstranded data; default is "Forward". | String | | count_exons | Optional boolean for the Optimus (GEX) pipeline indicating if the workflow should calculate exon counts **when in single-nucleus (sn_rna) mode**; if "true" in sc_rna mode, the workflow will return an error; default is "false". | Boolean | | gex_whitelist | Optional file containing the list of valid barcodes for 10x multiome GEX data; default is "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt". | File | +| soloMultiMappers | Optional string describing whether or not the Optimus (GEX) pipeline should run STARsolo with the `--soloMultiMappers` flag. | String | | atac_r1_fastq | Array of read 1 paired-end FASTQ files representing a single 10x multiome ATAC library. | Array[File] | | atac_r2_fastq | Array of barcodes FASTQ files representing a single 10x multiome ATAC library. | Array[File] | | atac_r3_fastq | Array of read 2 paired-end FASTQ files representing a single 10x multiome ATAC library. | Array[File] | @@ -114,6 +115,11 @@ The Multiome workflow calls two WARP subworkflows, one external subworkflow (opt | gene_metrics_gex | `_gex.gene_metrics.csv.gz` | CSV file containing the per-gene metrics. | | cell_calls_gex | `_gex.emptyDrops` | TSV file containing the EmptyDrops results when the Optimus workflow is run in sc_rna mode. | | h5ad_output_file_gex | `_gex.h5ad` | h5ad (Anndata) file containing the raw cell-by-gene count matrix, gene metrics, cell metrics, and global attributes. Also contains equivalent ATAC barcode for each gene expression barcode in the `atac_barcodes` column of the `h5ad.obs` property. See the [Optimus Count Matrix Overview](../Optimus_Pipeline/Loom_schema.md) for more details. | +| soloMultiMappers | +| multimappers_EM_matrix | +| multimappers_Uniform_matrix | +| multimappers_Rescue_matrix | +| multimappers_PropUnique_matrix | | cell_barcodes_csv | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | | checkpoint_file | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | | h5_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md index d59477bf45..2032919528 100644 --- a/website/docs/Pipelines/Optimus_Pipeline/README.md +++ b/website/docs/Pipelines/Optimus_Pipeline/README.md @@ -7,7 +7,7 @@ slug: /Pipelines/Optimus_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [optimus_v6.3.2](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | December, 2023 | Elizabeth Kiernan | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | +| [optimus_v6.3.4](https://github.com/broadinstitute/warp/releases?q=optimus&expanded=true) | January, 2024 | Elizabeth Kiernan | Please file GitHub issues in warp or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ![Optimus_diagram](Optimus_diagram.png) @@ -93,6 +93,7 @@ The example configuration files also contain metadata for the reference files, d | annotations_gtf | GTF containing gene annotations used for gene tagging (must match GTF in STAR reference). | N/A | | tenx_chemistry_version | Integer that specifies if data was generated with 10x v2 or v3 chemistry. Optimus validates this chemistry by examining the UMIs and CBs in the first read 1 FASTQ file. If the chemistry does not match, the pipeline will fail. You can remove the check by setting "ignore_r1_read_length = true" in the input JSON. | 2 or 3 | | mt_genes | Optional file containing mitochondrial gene names for a specific species. This is used for calculating gene metrics. | N/A | +| soloMultiMappers | Optional string describing whether or not the Optimus (GEX) pipeline should run STARsolo with the `--soloMultiMappers` flag. | N/A | | counting_mode | String describing whether data is single-cell or single-nucleus. Single-cell mode counts reads aligned to the gene transcript, whereas single-nucleus counts whole transcript to account for nuclear pre-mRNA. | "sc_rna" or "sn_rna" | | output_bam_basename | String used as a basename for output BAM file; the default is set to the string used for the `input_id` parameter. | N/A | | star_strand_mode | Optional string for running the workflow on forward stranded, reverse stranded, or unstranded data; default is "Forward". | "Forward" (default), "Reverse", and "Unstranded" | @@ -252,6 +253,11 @@ The following table lists the output files produced from the pipeline. For sampl | cell_metrics | `.cell-metrics.csv.gz` | Matrix of metrics by cells. | Compressed CSV | | gene_metrics | `.gene-metrics.csv.gz` | Matrix of metrics by genes. | Compressed CSV | | aligner_metrics | `.cell_reads.txt` | Per barcode metrics (CellReads.stats) produced by the STARsolo aligner. | TXT | +| soloMultiMappers | +| multimappers_EM_matrix | +| multimappers_Uniform_matrix | +| multimappers_Rescue_matrix | +| multimappers_PropUnique_matrix | | cell_calls | empty_drops_result.csv | emptyDrops results from the RunEmptyDrops task. | CSV | | h5ad_output_file | `.h5ad` | h5ad file with count data (exonic or whole transcript depending on the counting_mode) and metadata. | H5AD | diff --git a/website/docs/Pipelines/PairedTag_Pipeline/README.md b/website/docs/Pipelines/PairedTag_Pipeline/README.md index 5b7e7b0a02..86ddd12d87 100644 --- a/website/docs/Pipelines/PairedTag_Pipeline/README.md +++ b/website/docs/Pipelines/PairedTag_Pipeline/README.md @@ -7,7 +7,7 @@ slug: /Pipelines/PairedTag_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [PairedTag_v0.0.3](https://github.com/broadinstitute/warp/releases) | January, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) | +| [PairedTag_v0.0.4](https://github.com/broadinstitute/warp/releases) | January, 2024 | Kaylee Mathews | Please file GitHub issues in warp or contact [documentation authors](mailto:warp-pipelines-help@broadinstitute.org) | ## Introduction to the Paired-Tag workflow diff --git a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md index bbfffebe12..e21fe808ee 100644 --- a/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md +++ b/website/docs/Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README.md @@ -7,7 +7,7 @@ slug: /Pipelines/Smart-seq2_Single_Nucleus_Multi_Sample_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [MultiSampleSmartSeq2SingleNuclei_v1.2.26](https://github.com/broadinstitute/warp/releases) | December, 2023 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | +| [MultiSampleSmartSeq2SingleNuclei_v1.2.28](https://github.com/broadinstitute/warp/releases) | January, 2024 | [Elizabeth Kiernan](mailto:ekiernan@broadinstitute.org) | Please file GitHub issues in WARP or contact [the WARP team](mailto:warp-pipelines-help@broadinstitute.org) | ![](./snSS2.png) From fc80cdbf262bfd4e1dd44f7673004ec8bbcad5b9 Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Fri, 12 Jan 2024 13:32:30 -0500 Subject: [PATCH 15/23] optional output --- pipelines/skylab/multiome/Multiome.wdl | 2 +- pipelines/skylab/optimus/Optimus.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 4967be0351..8e91900157 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -136,7 +136,7 @@ workflow Multiome { File gene_metrics_gex = Optimus.gene_metrics File? cell_calls_gex = Optimus.cell_calls File h5ad_output_file_gex = JoinBarcodes.gex_h5ad_file - Array[File] soloMultiMappers = Optimus.soloMultiMappers + Array[File?] soloMultiMappers = Optimus.soloMultiMappers Array[File?] multimappers_EM_matrix = Optimus.multimappers_EM_matrix Array[File?] multimappers_Uniform_matrix = Optimus.multimappers_Uniform_matrix Array[File?] multimappers_Rescue_matrix = Optimus.multimappers_Rescue_matrix diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 146be378ee..45c2c6a5d7 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -239,7 +239,7 @@ workflow Optimus { File gene_metrics = GeneMetrics.gene_metrics File? cell_calls = RunEmptyDrops.empty_drops_result File? aligner_metrics = MergeStarOutputs.cell_reads_out - Array[File] soloMultiMappers = STARsoloFastq.matrix_sn_rna + Array[File?] soloMultiMappers = STARsoloFastq.matrix_sn_rna Array[File?] multimappers_EM_matrix = STARsoloFastq.multimappers_EM_matrix Array[File?] multimappers_Uniform_matrix = STARsoloFastq.multimappers_Uniform_matrix Array[File?] multimappers_Rescue_matrix = STARsoloFastq.multimappers_Rescue_matrix From 500ff512dfda2782366d229ab4eb4ccef356045d Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Fri, 12 Jan 2024 13:36:07 -0500 Subject: [PATCH 16/23] optional output --- pipelines/skylab/multiome/Multiome.wdl | 2 +- pipelines/skylab/optimus/Optimus.wdl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 8e91900157..4967be0351 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -136,7 +136,7 @@ workflow Multiome { File gene_metrics_gex = Optimus.gene_metrics File? cell_calls_gex = Optimus.cell_calls File h5ad_output_file_gex = JoinBarcodes.gex_h5ad_file - Array[File?] soloMultiMappers = Optimus.soloMultiMappers + Array[File] soloMultiMappers = Optimus.soloMultiMappers Array[File?] multimappers_EM_matrix = Optimus.multimappers_EM_matrix Array[File?] multimappers_Uniform_matrix = Optimus.multimappers_Uniform_matrix Array[File?] multimappers_Rescue_matrix = Optimus.multimappers_Rescue_matrix diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 45c2c6a5d7..146be378ee 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -239,7 +239,7 @@ workflow Optimus { File gene_metrics = GeneMetrics.gene_metrics File? cell_calls = RunEmptyDrops.empty_drops_result File? aligner_metrics = MergeStarOutputs.cell_reads_out - Array[File?] soloMultiMappers = STARsoloFastq.matrix_sn_rna + Array[File] soloMultiMappers = STARsoloFastq.matrix_sn_rna Array[File?] multimappers_EM_matrix = STARsoloFastq.multimappers_EM_matrix Array[File?] multimappers_Uniform_matrix = STARsoloFastq.multimappers_Uniform_matrix Array[File?] multimappers_Rescue_matrix = STARsoloFastq.multimappers_Rescue_matrix From e8d911151b065184e8dfa40f8c03db93e68acfe6 Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Fri, 12 Jan 2024 13:49:49 -0500 Subject: [PATCH 17/23] optional output --- pipelines/skylab/multiome/Multiome.wdl | 2 +- pipelines/skylab/optimus/Optimus.wdl | 2 +- tasks/skylab/StarAlign.wdl | 1 - website/docs/Pipelines/Multiome_Pipeline/README.md | 10 +++++----- website/docs/Pipelines/Optimus_Pipeline/README.md | 8 ++++---- 5 files changed, 11 insertions(+), 12 deletions(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 4967be0351..5634f9bf03 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -136,7 +136,7 @@ workflow Multiome { File gene_metrics_gex = Optimus.gene_metrics File? cell_calls_gex = Optimus.cell_calls File h5ad_output_file_gex = JoinBarcodes.gex_h5ad_file - Array[File] soloMultiMappers = Optimus.soloMultiMappers + Array[File] matrix_sn_rna = Optimus.soloMultiMappers Array[File?] multimappers_EM_matrix = Optimus.multimappers_EM_matrix Array[File?] multimappers_Uniform_matrix = Optimus.multimappers_Uniform_matrix Array[File?] multimappers_Rescue_matrix = Optimus.multimappers_Rescue_matrix diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 146be378ee..7c6b01034c 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -239,7 +239,7 @@ workflow Optimus { File gene_metrics = GeneMetrics.gene_metrics File? cell_calls = RunEmptyDrops.empty_drops_result File? aligner_metrics = MergeStarOutputs.cell_reads_out - Array[File] soloMultiMappers = STARsoloFastq.matrix_sn_rna + Array[File] matrix_sn_rna = STARsoloFastq.matrix_sn_rna Array[File?] multimappers_EM_matrix = STARsoloFastq.multimappers_EM_matrix Array[File?] multimappers_Uniform_matrix = STARsoloFastq.multimappers_Uniform_matrix Array[File?] multimappers_Rescue_matrix = STARsoloFastq.multimappers_Rescue_matrix diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index e9688bbdf7..8ab0c8d615 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -419,7 +419,6 @@ task STARsoloFastq { mv "Solo.out/GeneFull_Ex50pAS/UMIperCellSorted.txt" UMIperCellSorted.txt mv "Solo.out/Gene/raw/barcodes.tsv" barcodes_sn_rna.tsv mv "Solo.out/Gene/raw/features.tsv" features_sn_rna.tsv - #mv "Solo.out/Gene/raw/matrix.mtx" matrix_sn_rna.mtx mv "Solo.out/Gene/CellReads.stats" CellReads_sn_rna.stats mv "Solo.out/Gene/Features.stats" Features_sn_rna.stats mv "Solo.out/Gene/Summary.csv" Summary_sn_rna.csv diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md index 757457170f..2f5fb9fab7 100644 --- a/website/docs/Pipelines/Multiome_Pipeline/README.md +++ b/website/docs/Pipelines/Multiome_Pipeline/README.md @@ -115,11 +115,11 @@ The Multiome workflow calls two WARP subworkflows, one external subworkflow (opt | gene_metrics_gex | `_gex.gene_metrics.csv.gz` | CSV file containing the per-gene metrics. | | cell_calls_gex | `_gex.emptyDrops` | TSV file containing the EmptyDrops results when the Optimus workflow is run in sc_rna mode. | | h5ad_output_file_gex | `_gex.h5ad` | h5ad (Anndata) file containing the raw cell-by-gene count matrix, gene metrics, cell metrics, and global attributes. Also contains equivalent ATAC barcode for each gene expression barcode in the `atac_barcodes` column of the `h5ad.obs` property. See the [Optimus Count Matrix Overview](../Optimus_Pipeline/Loom_schema.md) for more details. | -| soloMultiMappers | -| multimappers_EM_matrix | -| multimappers_Uniform_matrix | -| multimappers_Rescue_matrix | -| multimappers_PropUnique_matrix | +| soloMultiMappers | +| multimappers_EM_matrix | UniqueAndMult-EM.mtx | +| multimappers_Uniform_matrix | UniqueAndMult-Uniform.mtx | +| multimappers_Rescue_matrix | UniqueAndMult-Rescue.mtx | +| multimappers_PropUnique_matrix | UniqueAndMult-PropUnique.mtx | | cell_barcodes_csv | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | | checkpoint_file | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | | h5_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md index 2032919528..bb8fa67ebb 100644 --- a/website/docs/Pipelines/Optimus_Pipeline/README.md +++ b/website/docs/Pipelines/Optimus_Pipeline/README.md @@ -254,10 +254,10 @@ The following table lists the output files produced from the pipeline. For sampl | gene_metrics | `.gene-metrics.csv.gz` | Matrix of metrics by genes. | Compressed CSV | | aligner_metrics | `.cell_reads.txt` | Per barcode metrics (CellReads.stats) produced by the STARsolo aligner. | TXT | | soloMultiMappers | -| multimappers_EM_matrix | -| multimappers_Uniform_matrix | -| multimappers_Rescue_matrix | -| multimappers_PropUnique_matrix | +| multimappers_EM_matrix | UniqueAndMult-EM.mtx | +| multimappers_Uniform_matrix | UniqueAndMult-Uniform.mtx | +| multimappers_Rescue_matrix | UniqueAndMult-Rescue.mtx | +| multimappers_PropUnique_matrix | UniqueAndMult-PropUnique.mtx | | cell_calls | empty_drops_result.csv | emptyDrops results from the RunEmptyDrops task. | CSV | | h5ad_output_file | `.h5ad` | h5ad file with count data (exonic or whole transcript depending on the counting_mode) and metadata. | H5AD | From 875fa4a9f27d82a0f428733ab0e2764ae6ea6f99 Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Fri, 12 Jan 2024 13:51:14 -0500 Subject: [PATCH 18/23] optional output --- pipelines/skylab/multiome/Multiome.wdl | 1 - pipelines/skylab/optimus/Optimus.wdl | 1 - website/docs/Pipelines/Multiome_Pipeline/README.md | 1 - website/docs/Pipelines/Optimus_Pipeline/README.md | 1 - 4 files changed, 4 deletions(-) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index 5634f9bf03..e8c92430d5 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -136,7 +136,6 @@ workflow Multiome { File gene_metrics_gex = Optimus.gene_metrics File? cell_calls_gex = Optimus.cell_calls File h5ad_output_file_gex = JoinBarcodes.gex_h5ad_file - Array[File] matrix_sn_rna = Optimus.soloMultiMappers Array[File?] multimappers_EM_matrix = Optimus.multimappers_EM_matrix Array[File?] multimappers_Uniform_matrix = Optimus.multimappers_Uniform_matrix Array[File?] multimappers_Rescue_matrix = Optimus.multimappers_Rescue_matrix diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 7c6b01034c..fb01379032 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -239,7 +239,6 @@ workflow Optimus { File gene_metrics = GeneMetrics.gene_metrics File? cell_calls = RunEmptyDrops.empty_drops_result File? aligner_metrics = MergeStarOutputs.cell_reads_out - Array[File] matrix_sn_rna = STARsoloFastq.matrix_sn_rna Array[File?] multimappers_EM_matrix = STARsoloFastq.multimappers_EM_matrix Array[File?] multimappers_Uniform_matrix = STARsoloFastq.multimappers_Uniform_matrix Array[File?] multimappers_Rescue_matrix = STARsoloFastq.multimappers_Rescue_matrix diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md index 2f5fb9fab7..c99d5e89d5 100644 --- a/website/docs/Pipelines/Multiome_Pipeline/README.md +++ b/website/docs/Pipelines/Multiome_Pipeline/README.md @@ -115,7 +115,6 @@ The Multiome workflow calls two WARP subworkflows, one external subworkflow (opt | gene_metrics_gex | `_gex.gene_metrics.csv.gz` | CSV file containing the per-gene metrics. | | cell_calls_gex | `_gex.emptyDrops` | TSV file containing the EmptyDrops results when the Optimus workflow is run in sc_rna mode. | | h5ad_output_file_gex | `_gex.h5ad` | h5ad (Anndata) file containing the raw cell-by-gene count matrix, gene metrics, cell metrics, and global attributes. Also contains equivalent ATAC barcode for each gene expression barcode in the `atac_barcodes` column of the `h5ad.obs` property. See the [Optimus Count Matrix Overview](../Optimus_Pipeline/Loom_schema.md) for more details. | -| soloMultiMappers | | multimappers_EM_matrix | UniqueAndMult-EM.mtx | | multimappers_Uniform_matrix | UniqueAndMult-Uniform.mtx | | multimappers_Rescue_matrix | UniqueAndMult-Rescue.mtx | diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md index bb8fa67ebb..70b50069fc 100644 --- a/website/docs/Pipelines/Optimus_Pipeline/README.md +++ b/website/docs/Pipelines/Optimus_Pipeline/README.md @@ -253,7 +253,6 @@ The following table lists the output files produced from the pipeline. For sampl | cell_metrics | `.cell-metrics.csv.gz` | Matrix of metrics by cells. | Compressed CSV | | gene_metrics | `.gene-metrics.csv.gz` | Matrix of metrics by genes. | Compressed CSV | | aligner_metrics | `.cell_reads.txt` | Per barcode metrics (CellReads.stats) produced by the STARsolo aligner. | TXT | -| soloMultiMappers | | multimappers_EM_matrix | UniqueAndMult-EM.mtx | | multimappers_Uniform_matrix | UniqueAndMult-Uniform.mtx | | multimappers_Rescue_matrix | UniqueAndMult-Rescue.mtx | From 5ce21a44ce290aa89d7794a2c389f15838db9057 Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Fri, 12 Jan 2024 14:01:23 -0500 Subject: [PATCH 19/23] docs --- .../Pipelines/Multiome_Pipeline/README.md | 22 +++++++++---------- .../docs/Pipelines/Optimus_Pipeline/README.md | 8 +++---- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md index c99d5e89d5..ea05e7238b 100644 --- a/website/docs/Pipelines/Multiome_Pipeline/README.md +++ b/website/docs/Pipelines/Multiome_Pipeline/README.md @@ -115,18 +115,18 @@ The Multiome workflow calls two WARP subworkflows, one external subworkflow (opt | gene_metrics_gex | `_gex.gene_metrics.csv.gz` | CSV file containing the per-gene metrics. | | cell_calls_gex | `_gex.emptyDrops` | TSV file containing the EmptyDrops results when the Optimus workflow is run in sc_rna mode. | | h5ad_output_file_gex | `_gex.h5ad` | h5ad (Anndata) file containing the raw cell-by-gene count matrix, gene metrics, cell metrics, and global attributes. Also contains equivalent ATAC barcode for each gene expression barcode in the `atac_barcodes` column of the `h5ad.obs` property. See the [Optimus Count Matrix Overview](../Optimus_Pipeline/Loom_schema.md) for more details. | -| multimappers_EM_matrix | UniqueAndMult-EM.mtx | -| multimappers_Uniform_matrix | UniqueAndMult-Uniform.mtx | -| multimappers_Rescue_matrix | UniqueAndMult-Rescue.mtx | -| multimappers_PropUnique_matrix | UniqueAndMult-PropUnique.mtx | -| cell_barcodes_csv | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | +| multimappers_EM_matrix | UniqueAndMult-EM.mtx | Optional output produced when `soloMultiMappers` is "EM"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information.| +| multimappers_Uniform_matrix | UniqueAndMult-Uniform.mtx | Optional output produced when `soloMultiMappers` is "Uniform"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information.| +| multimappers_Rescue_matrix | UniqueAndMult-Rescue.mtx | Optional output produced when `soloMultiMappers` is "Rescue"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | +| multimappers_PropUnique_matrix | UniqueAndMult-PropUnique.mtx | Optional output produced when `soloMultiMappers` is "PropUnique"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information.| +| cell_barcodes_csv | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.| | checkpoint_file | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | -| h5_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | -| html_report_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | -| log | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | -| metrics_csv_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | -| output_directory | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | -| summary_pdf | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | +| h5_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.| +| html_report_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.| +| log | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.| +| metrics_csv_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.| +| output_directory | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.| +| summary_pdf | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.| diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md index 70b50069fc..8df2979d25 100644 --- a/website/docs/Pipelines/Optimus_Pipeline/README.md +++ b/website/docs/Pipelines/Optimus_Pipeline/README.md @@ -253,10 +253,10 @@ The following table lists the output files produced from the pipeline. For sampl | cell_metrics | `.cell-metrics.csv.gz` | Matrix of metrics by cells. | Compressed CSV | | gene_metrics | `.gene-metrics.csv.gz` | Matrix of metrics by genes. | Compressed CSV | | aligner_metrics | `.cell_reads.txt` | Per barcode metrics (CellReads.stats) produced by the STARsolo aligner. | TXT | -| multimappers_EM_matrix | UniqueAndMult-EM.mtx | -| multimappers_Uniform_matrix | UniqueAndMult-Uniform.mtx | -| multimappers_Rescue_matrix | UniqueAndMult-Rescue.mtx | -| multimappers_PropUnique_matrix | UniqueAndMult-PropUnique.mtx | +| multimappers_EM_matrix | UniqueAndMult-EM.mtx | Optional output produced when `soloMultiMappers` is "EM"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX | +| multimappers_Uniform_matrix | UniqueAndMult-Uniform.mtx | Optional output produced when `soloMultiMappers` is "Uniform"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX | +| multimappers_Rescue_matrix | UniqueAndMult-Rescue.mtx | Optional output produced when `soloMultiMappers` is "Rescue"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX | +| multimappers_PropUnique_matrix | UniqueAndMult-PropUnique.mtx | Optional output produced when `soloMultiMappers` is "PropUnique"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information.| MTX | | cell_calls | empty_drops_result.csv | emptyDrops results from the RunEmptyDrops task. | CSV | | h5ad_output_file | `.h5ad` | h5ad file with count data (exonic or whole transcript depending on the counting_mode) and metadata. | H5AD | From a8223df0889c4e37f6d9ff8947f0af7ce57c485c Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Fri, 12 Jan 2024 14:07:44 -0500 Subject: [PATCH 20/23] docs --- website/docs/Pipelines/Multiome_Pipeline/README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md index ea05e7238b..a1ae09b092 100644 --- a/website/docs/Pipelines/Multiome_Pipeline/README.md +++ b/website/docs/Pipelines/Multiome_Pipeline/README.md @@ -121,12 +121,12 @@ The Multiome workflow calls two WARP subworkflows, one external subworkflow (opt | multimappers_PropUnique_matrix | UniqueAndMult-PropUnique.mtx | Optional output produced when `soloMultiMappers` is "PropUnique"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information.| | cell_barcodes_csv | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.| | checkpoint_file | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | -| h5_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.| -| html_report_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.| -| log | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.| -| metrics_csv_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.| -| output_directory | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.| -| summary_pdf | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.| +| h5_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | +| html_report_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | +| log | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | +| metrics_csv_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | +| output_directory | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | +| summary_pdf | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.n | From 21e8d378313c1e76d9dacadc21400fe31365b7e2 Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Fri, 12 Jan 2024 14:08:52 -0500 Subject: [PATCH 21/23] docs --- website/docs/Pipelines/Multiome_Pipeline/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md index a1ae09b092..3057f89507 100644 --- a/website/docs/Pipelines/Multiome_Pipeline/README.md +++ b/website/docs/Pipelines/Multiome_Pipeline/README.md @@ -126,7 +126,7 @@ The Multiome workflow calls two WARP subworkflows, one external subworkflow (opt | log | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | | metrics_csv_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | | output_directory | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | -| summary_pdf | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information.n | +| summary_pdf | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | From 47a6197f1b17f463e7975c708993af3769da4ad6 Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo <38223776+nikellepetrillo@users.noreply.github.com> Date: Tue, 16 Jan 2024 09:13:50 -0500 Subject: [PATCH 22/23] Update website/docs/Pipelines/Optimus_Pipeline/README.md Co-authored-by: Kaylee Mathews <95316074+kayleemathews@users.noreply.github.com> --- website/docs/Pipelines/Optimus_Pipeline/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/website/docs/Pipelines/Optimus_Pipeline/README.md b/website/docs/Pipelines/Optimus_Pipeline/README.md index 8df2979d25..71b18e89c5 100644 --- a/website/docs/Pipelines/Optimus_Pipeline/README.md +++ b/website/docs/Pipelines/Optimus_Pipeline/README.md @@ -253,10 +253,10 @@ The following table lists the output files produced from the pipeline. For sampl | cell_metrics | `.cell-metrics.csv.gz` | Matrix of metrics by cells. | Compressed CSV | | gene_metrics | `.gene-metrics.csv.gz` | Matrix of metrics by genes. | Compressed CSV | | aligner_metrics | `.cell_reads.txt` | Per barcode metrics (CellReads.stats) produced by the STARsolo aligner. | TXT | -| multimappers_EM_matrix | UniqueAndMult-EM.mtx | Optional output produced when `soloMultiMappers` is "EM"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX | -| multimappers_Uniform_matrix | UniqueAndMult-Uniform.mtx | Optional output produced when `soloMultiMappers` is "Uniform"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX | -| multimappers_Rescue_matrix | UniqueAndMult-Rescue.mtx | Optional output produced when `soloMultiMappers` is "Rescue"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX | -| multimappers_PropUnique_matrix | UniqueAndMult-PropUnique.mtx | Optional output produced when `soloMultiMappers` is "PropUnique"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information.| MTX | +| multimappers_EM_matrix | `UniqueAndMult-EM.mtx` | Optional output produced when `soloMultiMappers` is "EM"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX | +| multimappers_Uniform_matrix | `UniqueAndMult-Uniform.mtx` | Optional output produced when `soloMultiMappers` is "Uniform"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX | +| multimappers_Rescue_matrix | `UniqueAndMult-Rescue.mtx` | Optional output produced when `soloMultiMappers` is "Rescue"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information. | MTX | +| multimappers_PropUnique_matrix | `UniqueAndMult-PropUnique.mtx` | Optional output produced when `soloMultiMappers` is "PropUnique"; see STARsolo [documentation](https://github.com/alexdobin/STAR/blob/master/docs/STARsolo.md#multi-gene-reads) for more information.| MTX | | cell_calls | empty_drops_result.csv | emptyDrops results from the RunEmptyDrops task. | CSV | | h5ad_output_file | `.h5ad` | h5ad file with count data (exonic or whole transcript depending on the counting_mode) and metadata. | H5AD | From e72f185aa54c3f92038489d250443699a7970fda Mon Sep 17 00:00:00 2001 From: Nikelle Petrillo Date: Tue, 16 Jan 2024 09:16:18 -0500 Subject: [PATCH 23/23] remove optional input to tests --- .../skylab/optimus/test_inputs/Plumbing/human_v3_example.json | 3 +-- .../skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json | 3 +-- .../optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json index 240ae49ceb..ff5a02caaf 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/human_v3_example.json @@ -16,6 +16,5 @@ "Optimus.tenx_chemistry_version": "3", "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf", "Optimus.star_strand_mode": "Forward", - "Optimus.ref_genome_fasta": "gs://gcp-public-data--broad-references/hg38/v0/GRCh38.primary_assembly.genome.fa", - "Optimus.soloMultiMappers": "EM" + "Optimus.ref_genome_fasta": "gs://gcp-public-data--broad-references/hg38/v0/GRCh38.primary_assembly.genome.fa" } diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json index 60aa9d4cc6..bbf625ef27 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_example.json @@ -28,6 +28,5 @@ "Optimus.tenx_chemistry_version": "2", "Optimus.star_strand_mode": "Unstranded", "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf", - "Optimus.ref_genome_fasta": "gs://gcp-public-data--broad-references/GRCm39/GRCm39.primary_assembly.genome.fa.gz", - "Optimus.soloMultiMappers": "EM" + "Optimus.ref_genome_fasta": "gs://gcp-public-data--broad-references/GRCm39/GRCm39.primary_assembly.genome.fa.gz" } diff --git a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json index 0afea7854b..239b7d1fcb 100644 --- a/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json +++ b/pipelines/skylab/optimus/test_inputs/Plumbing/mouse_v2_snRNA_example.json @@ -26,6 +26,5 @@ "Optimus.annotations_gtf": "gs://gcp-public-data--broad-references/GRCm39/star/v2_7_10a/modified_vM32.annotation.gtf", "Optimus.ref_genome_fasta": "gs://gcp-public-data--broad-references/GRCm39/GRCm39.primary_assembly.genome.fa.gz", "Optimus.counting_mode": "sn_rna", - "Optimus.count_exons": true, - "Optimus.soloMultiMappers": "EM" + "Optimus.count_exons": true }