Skip to content

Commit

Permalink
Merge branch 'develop' into np_jw_test_illumina_genotyping_arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
nikellepetrillo authored Feb 13, 2025
2 parents 981002e + 16affa2 commit bcaeb26
Show file tree
Hide file tree
Showing 19 changed files with 142 additions and 52 deletions.
58 changes: 29 additions & 29 deletions pipeline_versions.txt
Original file line number Diff line number Diff line change
@@ -1,40 +1,40 @@
Pipeline Name Version Date of Last Commit
IlluminaGenotypingArray 1.12.24 2024-11-04
WholeGenomeReprocessing 3.3.3 2024-11-04
ExternalWholeGenomeReprocessing 2.3.3 2024-11-04
ExternalExomeReprocessing 3.3.3 2024-11-04
CramToUnmappedBams 1.1.3 2024-08-02
ExomeReprocessing 3.3.3 2024-11-04
GDCWholeGenomeSomaticSingleSample 1.3.4 2024-11-04
UltimaGenomicsWholeGenomeCramOnly 1.0.23 2024-11-04
WholeGenomeGermlineSingleSample 3.3.3 2024-11-04
UltimaGenomicsWholeGenomeGermline 1.1.3 2024-12-05
ExomeGermlineSingleSample 3.2.3 2024-11-04
snm3C 4.0.4 2024-08-06
BuildIndices 4.0.0 2025-01-17
scATAC 1.3.2 2023-08-03
MultiSampleSmartSeq2SingleNucleus 2.0.8 2025-02-12
atac 2.7.1 2025-02-12
Optimus 7.9.2 2025-02-12
SmartSeq2SingleSample 5.1.21 2024-09-11
Multiome 5.11.0 2025-02-05
PairedTag 1.10.2 2025-02-06
SlideSeq 3.4.9 2025-02-12
MultiSampleSmartSeq2 2.2.22 2024-09-11
AnnotationFiltration 1.2.7 2024-11-04
RNAWithUMIsPipeline 1.0.18 2024-11-04
Imputation 1.1.15 2024-11-04
Arrays 2.6.30 2024-11-04
MultiSampleArrays 1.6.2 2024-08-02
ValidateChip 1.16.7 2024-11-04
JointGenotyping 1.7.2 2024-11-04
ReblockGVCF 2.4.0 2024-12-05
UltimaGenomicsJointGenotyping 1.2.2 2024-11-04
JointGenotypingByChromosomePartTwo 1.5.2 2024-11-04
JointGenotypingByChromosomePartOne 1.5.2 2024-11-04
ExomeGermlineSingleSample 3.2.3 2024-11-04
WholeGenomeGermlineSingleSample 3.3.3 2024-11-04
UltimaGenomicsWholeGenomeGermline 1.1.3 2024-12-05
VariantCalling 2.2.4 2024-11-04
GDCWholeGenomeSomaticSingleSample 1.3.4 2024-11-04
UltimaGenomicsWholeGenomeCramOnly 1.0.23 2024-11-04
CheckFingerprint 1.0.22 2024-10-28
RNAWithUMIsPipeline 1.0.18 2024-11-04
BroadInternalUltimaGenomics 1.1.3 2024-12-05
BroadInternalRNAWithUMIs 1.0.36 2024-11-04
BroadInternalImputation 1.1.14 2024-11-04
BroadInternalArrays 1.1.14 2024-11-04
Imputation 1.1.15 2024-11-04
MultiSampleArrays 1.6.2 2024-08-02
ValidateChip 1.16.7 2024-11-04
Arrays 2.6.30 2024-11-04
AnnotationFiltration 1.2.7 2024-11-04
Multiome 5.10.0 2025-02-03
snm3C 4.0.4 2024-08-06
SlideSeq 3.4.8 2025-01-13
scATAC 1.3.2 2023-08-03
BuildIndices 4.0.0 2025-01-17
MultiSampleSmartSeq2 2.2.22 2024-09-11
Optimus 7.9.1 2025-01-13
atac 2.7.0 2025-02-03
PairedTag 1.10.1 2025-02-03
SmartSeq2SingleSample 5.1.21 2024-09-11
MultiSampleSmartSeq2SingleNucleus 2.0.7 2025-01-13
BroadInternalUltimaGenomics 1.1.3 2024-12-05
IlluminaGenotypingArray 1.12.24 2024-11-04
ExternalExomeReprocessing 3.3.3 2024-11-04
ExternalWholeGenomeReprocessing 2.3.3 2024-11-04
ExomeReprocessing 3.3.3 2024-11-04
CramToUnmappedBams 1.1.3 2024-08-02
WholeGenomeReprocessing 3.3.3 2024-11-04
7 changes: 7 additions & 0 deletions pipelines/skylab/atac/atac.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# 2.7.1
2025-02-12 (Date of Last Commit)

* Added a new warning for peak calling step if the probability_threshold is too low, resutling in a null matrix after doublet filtering
* Updated the probability threshold default to 0.5
* Updated the warp-tools docker image to include an update to the GroupQCs function in sctools; this does not affect the outputs of the pipeline

# 2.7.0
2025-02-03 (Date of Last Commit)

Expand Down
10 changes: 7 additions & 3 deletions pipelines/skylab/atac/atac.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ workflow ATAC {
String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
}

String pipeline_version = "2.7.0"
String pipeline_version = "2.7.1"

# Determine docker prefix based on cloud provider
String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix

# Docker image names
String warp_tools_docker = "warp-tools:2.6.0"
String warp_tools_docker = "warp-tools:2.6.1"
String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919"
String samtools_docker = "samtools-dist-bwa:3.0.0"
String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311"
Expand Down Expand Up @@ -668,7 +668,7 @@ task PeakCalling {
Int min_counts = 5000
Int min_tsse = 10
Int max_counts = 100000
Float probability_threshold = 1
Float probability_threshold = 0.5
# Runtime attributes/docker
String docker_path
Expand Down Expand Up @@ -751,6 +751,10 @@ task PeakCalling {
print("Filter doublets based on scrublet scores")
snap.pp.filter_doublets(atac_data_mod, probability_threshold=probability_threshold)
print(atac_data_mod)
# Check if the matrix is empty
if atac_data_mod.n_obs == 0:
raise ValueError("Matrix is empty after filtering doublets: Try increasing the probability_threshold.")
# Perform graph-based clustering to identify cell clusters.
# Build a k-nearest neighbour graph using snap.pp.knn
Expand Down
6 changes: 6 additions & 0 deletions pipelines/skylab/multiome/Multiome.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# 5.11.0
2025-02-05 (Date of Last Commit)

* Refactored the Peak Calling step of Multiome to use the JoinBarcodes output h5ad as the input for peak calling, ensuring the h5ad files have both GEX and ATAC barcodes
* Updated the warp-tools docker image to include an update to the GroupQCs function in sctools; this does not affect the outputs of the pipeline

# 5.10.0
2025-02-03 (Date of Last Commit)

Expand Down
22 changes: 17 additions & 5 deletions pipelines/skylab/multiome/Multiome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ import "../../../pipelines/skylab/atac/atac.wdl" as atac
import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus
import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils
import "../../../tasks/broad/Utilities.wdl" as utils
#import "../../../pipelines/skylab/atac/atac.wdl" as PeakCalling
workflow Multiome {

String pipeline_version = "5.10.0"
String pipeline_version = "5.11.0"

input {
String cloud_provider
Expand Down Expand Up @@ -60,7 +61,7 @@ workflow Multiome {
String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix

# Define docker images
String snap_atac_docker_image = "snapatac2:1.0.4-2.3.1-1700590229"
String snap_atac_docker_image = "snapatac2:2.0.0"

# Define all whitelist files
File gcp_gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt"
Expand Down Expand Up @@ -124,7 +125,8 @@ workflow Multiome {
atac_nhash_id = atac_nhash_id,
adapter_seq_read3 = adapter_seq_read3,
atac_expected_cells = expected_cells,
peak_calling = run_peak_calling
peak_calling = false

}
call H5adUtils.JoinMultiomeBarcodes as JoinBarcodes {
input:
Expand All @@ -136,6 +138,16 @@ workflow Multiome {
atac_fragment = Atac.fragment_file
}

if (run_peak_calling) {
call atac.PeakCalling as PeakCalling {
input:
annotations_gtf = annotations_gtf,
metrics_h5ad = JoinBarcodes.atac_h5ad_file,
chrom_sizes = chrom_sizes,
output_base_name = input_id,
docker_path = docker_prefix + snap_atac_docker_image,
}
}

meta {
allowNestedInputs: true
Expand All @@ -152,8 +164,8 @@ workflow Multiome {
File fragment_file_index = JoinBarcodes.atac_fragment_tsv_index
File snap_metrics_atac = JoinBarcodes.atac_h5ad_file
File atac_library_metrics = Atac.library_metrics_file
File? cellbybin_h5ad_file = Atac.cellbybin_h5ad_file
File? cellbypeak_h5ad_file = Atac.cellbypeak_h5ad_file
File? cellbybin_h5ad_file = PeakCalling.cellbybin_h5ad
File? cellbypeak_h5ad_file = PeakCalling.cellbypeak_h5ad

# optimus outputs
File genomic_reference_version_gex = Optimus.genomic_reference_version
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,6 @@
"Multiome.soloMultiMappers":"Uniform",
"Multiome.gex_nhash_id":"example_1234",
"Multiome.atac_nhash_id":"example_1234",
"Multiome.run_peak_calling":"true"
"Multiome.run_peak_calling": true,
"Multiome.PeakCalling.probability_threshold":"1.00"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
{
"Multiome.annotations_gtf":"gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf",
"Multiome.gex_i1_fastq":[
"gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L001_I1_001.fastq.gz",
"gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_I1_001.fastq.gz"
],
"Multiome.input_id":"10k_PBMC",
"Multiome.cloud_provider":"gcp",
"Multiome.gex_r1_fastq":[
"gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L001_R1_001.fastq.gz",
"gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_R1_001.fastq.gz"
],
"Multiome.gex_r2_fastq":[
"gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L001_R2_001.fastq.gz",
"gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_R2_001.fastq.gz"
],
"Multiome.atac_r1_fastq":[
"gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_atac_S1_L001_R1_001.fastq.gz",
"gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_atac_S1_L002_R1_001.fastq.gz"
],
"Multiome.atac_r2_fastq":[
"gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_atac_S1_L001_R2_001.fastq.gz",
"gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_atac_S1_L002_R2_001.fastq.gz"
],
"Multiome.atac_r3_fastq":[
"gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_atac_S1_L001_R3_001.fastq.gz",
"gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_atac_S1_L002_R3_001.fastq.gz"
],
"Multiome.tar_bwa_reference":"gs://gcp-public-data--broad-references/hg38/v0/bwa/v2_2_1/bwa-mem2-2.2.1-Human-GENCODE-build-GRCh38.tar",
"Multiome.tar_star_reference":"gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_star2.7.10a-Human-GENCODE-build-GRCh38-43.tar",
"Multiome.chrom_sizes":"gs://broad-gotc-test-storage/Multiome/input/hg38.chrom.sizes",
"Multiome.Atac.cpu_platform_bwa":"Intel Cascade Lake",
"Multiome.Atac.num_threads_bwa":"24",
"Multiome.Atac.mem_size_bwa":"175",
"Multiome.gex_nhash_id":"example_1234",
"Multiome.atac_nhash_id":"example_1234",
"Multiome.run_peak_calling":true,
"Multiome.PeakCalling.probability_threshold":"0.5"
}
6 changes: 5 additions & 1 deletion pipelines/skylab/optimus/Optimus.changelog.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# 7.9.2
2025-02-12 (Date of Last Commit)

* Updated the warp-tools docker image to include an update to the GroupQCs function in sctools; this does not affect the outputs of the pipeline

# 7.9.1
2025-01-13 (Date of Last Commit)

* Added a boolean variable is_slidetags; set to false by default, but set to true if the Slide-Tags pipeline is calling Optimus

* Added reference_gtf_file to the output h5ad unstructured metadata

# 7.9.0
Expand Down
4 changes: 2 additions & 2 deletions pipelines/skylab/optimus/Optimus.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ workflow Optimus {
}

# version of this pipeline
String pipeline_version = "7.9.1"
String pipeline_version = "7.9.2"

# this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays
Array[Int] indices = range(length(r1_fastq))
Expand All @@ -99,7 +99,7 @@ workflow Optimus {
String pytools_docker = "pytools:1.0.0-1661263730"
String empty_drops_docker = "empty-drops:1.0.1-4.2"
String star_docker = "star:1.0.1-2.7.11a-1692706072"
String warp_tools_docker = "warp-tools:2.6.0"
String warp_tools_docker = "warp-tools:2.6.1"
String star_merge_docker = "star-merge-npz:1.3.0"
String samtools_star = "samtools-star:1.0.0-1.11-2.7.11a-1731516196"

Expand Down
6 changes: 6 additions & 0 deletions pipelines/skylab/paired_tag/PairedTag.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# 1.10.2
2025-02-06 (Date of Last Commit)

* Updated the SnapATAC2 docker image to the latest SnapATAC2, allowing for future peak calling implementation
* Updated the warp-tools docker image to include an update to the GroupQCs function in sctools; this does not affect the outputs of the pipeline

# 1.10.1
2025-02-03 (Date of Last Commit)

Expand Down
4 changes: 2 additions & 2 deletions pipelines/skylab/paired_tag/PairedTag.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils

workflow PairedTag {

String pipeline_version = "1.10.1"
String pipeline_version = "1.10.2"


input {
Expand Down Expand Up @@ -63,7 +63,7 @@ workflow PairedTag {

# All docker images that are needed for tasks in this workflow
String upstools_docker = "upstools:2.0.0"
String snapatac_docker = "snapatac2:1.0.4-2.3.1-1700590229"
String snapatac_docker = "snapatac2:2.0.0"

# Prefixes based on cloud env
String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
Expand Down
4 changes: 4 additions & 0 deletions pipelines/skylab/slideseq/SlideSeq.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# 3.4.9
2025-02-12 (Date of Last Commit)
* Updated the warp-tools docker image to include an update to the GroupQCs function in sctools; this does not affect the outputs of the pipeline

# 3.4.8
2025-01-13 (Date of Last Commit)

Expand Down
4 changes: 2 additions & 2 deletions pipelines/skylab/slideseq/SlideSeq.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils

workflow SlideSeq {

String pipeline_version = "3.4.8"
String pipeline_version = "3.4.9"

input {
Array[File] r1_fastq
Expand All @@ -48,7 +48,7 @@ workflow SlideSeq {
# docker images
String pytools_docker = "pytools:1.0.0-1661263730"
String picard_cloud_docker = "picard-cloud:2.26.10"
String warp_tools_docker = "warp-tools:2.6.0"
String warp_tools_docker = "warp-tools:2.6.1"
String star_merge_docker = "star-merge-npz:1.3.0"

String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 2.0.8
2025-02-12 (Date of Last Commit)

* Updated the warp-tools docker image to include an update to the GroupQCs function in sctools; this does not affect the outputs of the pipeline

# 2.0.7
2025-01-13 (Date of Last Commit)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ workflow MultiSampleSmartSeq2SingleNucleus {
}

# Version of this pipeline
String pipeline_version = "2.0.7"
String pipeline_version = "2.0.8"

if (false) {
String? none = "None"
Expand Down
2 changes: 1 addition & 1 deletion tasks/skylab/FastqProcessing.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ task FastqProcessingSlidSeq {
# Runtime attributes
String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.6.0"
String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.6.1"
Int cpu = 16
Int machine_mb = 40000
Int disk = ceil(size(r1_fastq, "GiB")*3 + size(r2_fastq, "GiB")*3) + 50
Expand Down
4 changes: 2 additions & 2 deletions tasks/skylab/H5adUtils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ task SingleNucleusSlideseqH5adOutput {
task SingleNucleusSmartSeq2H5adOutput {
input {
#runtime values
String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0"
String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.6.1"
Array[File] alignment_summary_metrics
Array[File] dedup_metrics
Expand Down Expand Up @@ -631,7 +631,7 @@ task AggregateSmartSeq2H5ad {
Array[File] h5ad_input
String batch_id
String pipeline_version
String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0"
String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.6.1"
Int disk = 200
Int machine_mem_mb = 4000
Int cpu = 1
Expand Down
2 changes: 1 addition & 1 deletion verification/VerifyTasks.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ task CompareTabix {
fi
>>>
runtime {
docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.4-2.3.1-1700590229"
docker: "us.gcr.io/broad-gotc-prod/snapatac2:2.0.0"
disks: "local-disk 100 HDD"
memory: "50 GiB"
preemptible: 3
Expand Down
Loading

0 comments on commit bcaeb26

Please sign in to comment.