diff --git a/.dockstore.yml b/.dockstore.yml index 366840f7d4..08449df04d 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -19,14 +19,14 @@ workflows: subclass: WDL primaryDescriptorPath: /pipelines/skylab/smartseq2_single_sample/SmartSeq2SingleSample.wdl - - name: Smartseq2_Single_Nucleus_Multisample - subclass: WDL - primaryDescriptorPath: /pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl - - name: Smartseq2_Single_Nucleus subclass: WDL primaryDescriptorPath: /pipelines/skylab/smartseq2_single_nucleus/SmartSeq2SingleNucleus.wdl + - name: Smartseq2_Single_Nucleus_Multisample + subclass: WDL + primaryDescriptorPath: /pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl + - name: IlluminaGenotypingArray subclass: WDL primaryDescriptorPath: /pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl @@ -43,10 +43,18 @@ workflows: subclass: WDL primaryDescriptorPath: /pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl + - name: ExomeReprocessing + subclass: WDL + primaryDescriptorPath: /pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl + - name: WholeGenomeGermlineSingleSample subclass: WDL primaryDescriptorPath: /pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl + - name: WholeGenomeReprocessing + subclass: WDL + primaryDescriptorPath: /pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl + - name: OptimusHcaAdapter subclass: WDL primaryDescriptorPath: /projects/optimus/CreateOptimusAdapterMetadata.wdl @@ -59,6 +67,10 @@ workflows: subclass: WDL primaryDescriptorPath: /pipelines/cemba/cemba_methylcseq/CEMBA.wdl + - name: CramToUnmappedBams + subclass: WDL + primaryDescriptorPath: /pipelines/broad/reprocessing/cram_to_unmapped_bams/CramToUnmappedBams.wdl + - name: ReblockGVCF subclass: WDL primaryDescriptorPath: /pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl @@ -71,6 +83,14 @@ workflows: subclass: WDL primaryDescriptorPath: /pipelines/broad/arrays/imputation/Imputation.wdl + - name: ImputationBeagle + subclass: WDL + primaryDescriptorPath: /pipelines/broad/arrays/imputation_beagle/ImputationBeagle.wdl + + - name: ArrayImputationQuotaConsumed + subclass: WDL + primaryDescriptorPath: /pipelines/broad/arrays/imputation_beagle/ArrayImputationQuotaConsumed.wdl + - name: RNAWithUMIsPipeline subclass: WDL primaryDescriptorPath: /pipelines/broad/rna_seq/RNAWithUMIsPipeline.wdl @@ -123,10 +143,90 @@ workflows: subclass: WDL primaryDescriptorPath: /pipelines/skylab/atac/atac.wdl + - name: TestCramToUnmappedBams + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestCramToUnmappedBams.wdl + + - name: TestExomeGermlineSingleSample + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestExomeGermlineSingleSample.wdl + + - name: TestExomeReprocessing + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestExomeReprocessing.wdl + - name: TestIlluminaGenotypingArray subclass: WDL primaryDescriptorPath: /verification/test-wdls/TestIlluminaGenotypingArray.wdl + + - name: TestImputation + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestImputation.wdl + + - name: TestImputationBeagle + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestImputationBeagle.wdl + + - name: TestJointGenotyping + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestJointGenotyping.wdl + + - name: TestPairedTag + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestPairedTag.wdl + - name: TestOptimus + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestOptimus.wdl + + - name: TestMultiSampleSmartSeq2SingleNucleus + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl + + - name: TestMultiome + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestMultiome.wdl + + - name: TestSlideSeq + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestSlideSeq.wdl + + - name: TestReblockGVCF + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestReblockGVCF.wdl + + - name: TestRNAWithUMIsPipeline + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestRNAWithUMIsPipeline.wdl + + - name: Testsnm3C + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/Testsnm3C.wdl + + - name: TestUltimaGenomicsJointGenotyping + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestUltimaGenomicsJointGenotyping.wdl + + - name: TestUltimaGenomicsWholeGenomeGermline + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestUltimaGenomicsWholeGenomeGermline.wdl + + - name: TestUltimaGenomicsWholeGenomeCramOnly + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl + + - name: TestVariantCalling + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestVariantCalling.wdl + + - name: TestWholeGenomeGermlineSingleSample + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl + + - name: TestWholeGenomeReprocessing + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestWholeGenomeReprocessing.wdl + - name: VariantCalling subclass: WDL primaryDescriptorPath: /pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl @@ -134,3 +234,7 @@ workflows: - name: SlideTags subclass: WDL primaryDescriptorPath: /beta-pipelines/skylab/slidetags/SlideTags.wdl + + - name: TestATAC + subclass: WDL + primaryDescriptorPath: /verification/test-wdls/TestATAC.wdl diff --git a/.github/workflows/test_atac.yml b/.github/workflows/test_atac.yml new file mode 100644 index 0000000000..1962068303 --- /dev/null +++ b/.github/workflows/test_atac.yml @@ -0,0 +1,63 @@ +name: Test ATAC + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/skylab/atac/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/skylab/MergeSortBam.wdl' + - 'tasks/skylab/FastqProcessing.wdl' + - 'tasks/skylab/PairedTagUtils.wdl' + - 'tasks/broad/Utilities.wdl' + # verification WDL and its dependencies + - 'verification/VerifyATAC.wdl' + - 'verification/VerifyTasks.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestATAC.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_atac.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestATAC: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestATAC + dockstore_pipeline_name: atac + pipeline_dir: pipelines/skylab/atac + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_cram_to_unmapped_bams.yml b/.github/workflows/test_cram_to_unmapped_bams.yml new file mode 100644 index 0000000000..952ac0aa5f --- /dev/null +++ b/.github/workflows/test_cram_to_unmapped_bams.yml @@ -0,0 +1,63 @@ +name: Test CramToUnmappedBams + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/broad/reprocessing/cram_to_unmapped_bams/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/broad/Utilities.wdl' + # verification WDL and its dependencies + - 'verification/VerifyCramToUnmappedBams.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestCramToUnmappedBams.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_cram_to_unmapped_bams.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestCramToUnmappedBams: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestCramToUnmappedBams + dockstore_pipeline_name: CramToUnmappedBams + pipeline_dir: pipelines/broad/reprocessing/cram_to_unmapped_bams + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_exome_germline_single_sample.yml b/.github/workflows/test_exome_germline_single_sample.yml new file mode 100644 index 0000000000..c5f9763f59 --- /dev/null +++ b/.github/workflows/test_exome_germline_single_sample.yml @@ -0,0 +1,76 @@ +name: Test ExomeGermlineSingleSample + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/broad/dna_seq/germline/single_sample/exome/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/broad/UnmappedBamToAlignedBam.wdl' + - 'tasks/broad/AggregatedBamQC.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/BamProcessing.wdl' + - 'tasks/broad/BamToCram.wdl' + - 'structs/dna_seq/DNASeqStructs.wdl' + - 'pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl' + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/DragenTasks.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/Utilities.wdl' + # verification WDL and its dependencies + - 'verification/VerifyGermlineSingleSample.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyTasks.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestExomeGermlineSingleSample.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_exome_germline_single_sample.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestExomeGermlineSingleSample: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestExomeGermlineSingleSample + dockstore_pipeline_name: ExomeGermlineSingleSample + pipeline_dir: pipelines/broad/dna_seq/germline/single_sample/exome + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_exome_reprocessing.yml b/.github/workflows/test_exome_reprocessing.yml new file mode 100644 index 0000000000..a5e3976eb5 --- /dev/null +++ b/.github/workflows/test_exome_reprocessing.yml @@ -0,0 +1,79 @@ +name: Test ExomeReprocessing + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/broad/reprocessing/exome/**' + - 'pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl' + - 'pipelines/broad/reprocessing/cram_to_unmapped_bams/CramToUnmappedBams.wdl' + # tasks from the pipeline WDL and their dependencies + - 'tasks/broad/UnmappedBamToAlignedBam.wdl' + - 'tasks/broad/AggregatedBamQC.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/BamProcessing.wdl' + - 'tasks/broad/BamToCram.wdl' + - 'structs/dna_seq/DNASeqStructs.wdl' + - 'pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl' + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/DragenTasks.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/Utilities.wdl' + # verification WDL and its dependencies + - 'verification/VerifyExomeReprocessing.wdl' + - 'verification/VerifyGermlineSingleSample.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestCramToUnmappedBams.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyTasks.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_exome_reprocessing.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestExomeReprocessing: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestExomeReprocessing + dockstore_pipeline_name: ExomeReprocessing + pipeline_dir: pipelines/broad/reprocessing/exome + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_illumina_genotyping_array.yml b/.github/workflows/test_illumina_genotyping_array.yml index e1774240bb..2250b2e281 100644 --- a/.github/workflows/test_illumina_genotyping_array.yml +++ b/.github/workflows/test_illumina_genotyping_array.yml @@ -1,20 +1,31 @@ - name: Test Illumina Genotyping Array # Controls when the workflow will run on: - #run on push to feature branch "kp_GHA_Terra_auth_PD-2682" - REMOVE WHEN DONE TESTING - # push: - # branches: - # - kp_GHA_Terra_auth_PD-2682 pull_request: branches: [ "develop", "staging", "master" ] - # Only run if files in these paths changed: pipelines/broad/genotyping/illumina, tasks, verification, .github/workflows/test_illumina_genotyping_array.yml + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### paths: + # anything in the pipelines folder - 'pipelines/broad/genotyping/illumina/**' - - 'tasks/**' - - 'verification/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/broad/IlluminaGenotypingArrayTasks.wdl' + - 'tasks/broad/Qc.wdl' + # verification WDL and its dependencies + - 'verification/VerifyIlluminaGenotypingArray.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestIlluminaGenotypingArray.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script - '.github/workflows/test_illumina_genotyping_array.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + # Allows you to run this workflow manually from the Actions tab workflow_dispatch: inputs: @@ -22,170 +33,33 @@ on: description: 'Use call cache (default: true)' required: false default: "true" -env: - PROJECT_NAME: WARP - # Github repo name - REPOSITORY_NAME: ${{ github.event.repository.name }} + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" jobs: - run_pipeline: - runs-on: ubuntu-latest - # Add "id-token" with the intended permissions. - permissions: - contents: 'read' - id-token: 'write' - - steps: - # actions/checkout MUST come before auth - - uses: 'actions/checkout@v3' - - - id: 'auth' - name: 'Authenticate to Google Cloud' - uses: 'google-github-actions/auth@v2' - with: - token_format: 'access_token' - # Centralized in dsp-tools-k8s; ask in #dsp-devops-champions for help troubleshooting - # This is provided by the DevOps team - do not change! - workload_identity_provider: 'projects/1038484894585/locations/global/workloadIdentityPools/github-wi-pool/providers/github-wi-provider' - # This is our tester service account - service_account: 'pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com' - access_token_lifetime: '3600' #seconds, default is 3600 - access_token_scopes: 'profile, email, openid' - - # ... further steps are automatically authenticated - - name: Check working directory - run: | - echo "Current directory:" - pwd - ls -lht - - - name: Submit job, poll status, and get outputs - id: pipeline_run - run: | - # Set these environment variables - TOKEN="${{ steps.auth.outputs.access_token }}" - NAMESPACE="warp-pipelines" - WORKSPACE="WARP Tests" - PIPELINE_NAME="IlluminaGenotypingArray" - USE_CALL_CACHE="${{ github.event.inputs.useCallCache }}" - - # Function to call the Firecloud API using the firecloud_api.py script - firecloud_action() { - python3 scripts/firecloud_api/firecloud_api.py --token "$TOKEN" --namespace "$NAMESPACE" --workspace "$WORKSPACE" --action "$1" "${@:2}" - } - - # Create the submission_data.json file - SUBMISSION_DATA_FILE="submission_data.json" - # Convert USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) - if [ "$USE_CALL_CACHE" = "true" ]; then - USE_CALL_CACHE_BOOL=true - else - USE_CALL_CACHE_BOOL=false - fi - # Use a heredoc to generate the JSON file content dynamically - cat < "$SUBMISSION_DATA_FILE" - { - "methodConfigurationNamespace": "warp-pipelines", - "methodConfigurationName": "$PIPELINE_NAME", - "useCallCache": $USE_CALL_CACHE_BOOL, - "deleteIntermediateOutputFiles": true, - "useReferenceDisks": true, - "memoryRetryMultiplier": 1.2, - "workflowFailureMode": "NoNewCalls", - "userComment": "Automated submission", - "ignoreEmptyOutputs": false - } - EOF - - echo "Created submission data file: $SUBMISSION_DATA_FILE" - - # 1. Submit a new workflow using the generated submission_data.json - SUBMISSION_ID=$(firecloud_action submit --submission_data_file "$SUBMISSION_DATA_FILE") - - # Check if submission was successful - if [ -z "$SUBMISSION_ID" ]; then - echo "Submission failed." # Log failure to stdout - echo "submission_id=" >> $GITHUB_OUTPUT # Set empty submission id - exit 1 - fi - - echo "Submission ID: $SUBMISSION_ID" - echo "submission_id=$SUBMISSION_ID" >> $GITHUB_OUTPUT # Write the submission ID to GITHUB_OUTPUT - - # 2. Poll submission status and get workflow IDs and statuses - echo "Polling submission status..." - RESPONSE=$(firecloud_action poll_status --submission_id "$SUBMISSION_ID") - - # Parse the JSON response to get the workflow ID and statuses - echo "Workflows and their statuses:" - echo "$RESPONSE" | jq - - # Check if RESPONSE is empty - if [ -z "$RESPONSE" ]; then - echo "Failed to retrieve Workflow IDs." # Log failure to stdout - exit 1 - fi - - # Extract workflows and their statuses - WORKFLOW_STATUSES=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + ": " + .value) | .[]') - echo "workflow_statuses=$WORKFLOW_STATUSES" >> $GITHUB_OUTPUT # Write workflow statuses to GITHUB_OUTPUT - - # Generate markdown summary table for workflows and statuses - WORKFLOW_TABLE=$(echo "$RESPONSE" | jq -r 'to_entries | ["Workflow ID | Status", "--- | ---"] + map(.key + " | " + .value) | .[]') - - # Print workflow table to stdout - echo "$WORKFLOW_TABLE" - - # 3. Iterate over the Workflow IDs to get outputs - OUTPUTS="" - echo "Retrieving workflow outputs..." - for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do - WORKFLOW_OUTPUT=$(firecloud_action get_outputs --submission_id "$SUBMISSION_ID" --workflow_id "$WORKFLOW_ID" --pipeline_name "$PIPELINE_NAME") - OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' - done - echo "Workflow outputs retrieved successfully." - echo "Raw output before jq:" - echo "$OUTPUTS" - echo "outputs=$OUTPUTS" >> $GITHUB_OUTPUT # Write the outputs to GITHUB_OUTPUT - - # Handle null values, strings, and numbers in the outputs by converting everything to a string and replacing null with '-' - OUTPUTS_TABLE=$(echo "$OUTPUTS" | jq -r 'to_entries | ["Output | Value", "--- | ---"] + map(.key + " | " + (if .value == null then "-" else (.value | tostring) end)) | .[]') - #print outputs table to stdout - echo "$OUTPUTS_TABLE" - - - name: Print Summary on Success - if: success() - run: | - echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY - echo "- **Pipeline Name**: IlluminaGenotypingArray" >> $GITHUB_STEP_SUMMARY - echo "- **Submission ID**: ${{ steps.pipeline_run.outputs.submission_id }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - - echo "## Workflows and their statuses" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - echo "${{ steps.pipeline_run.outputs.workflow_statuses }}" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - - echo "## Workflow Outputs" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - echo "${{ steps.pipeline_run.outputs.outputs }}" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - echo " :shipit: " >> $GITHUB_STEP_SUMMARY - - - name: Print Summary on Failure - if: failure() - run: | - echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY - echo "- **Pipeline Name**: IlluminaGenotypingArray" >> $GITHUB_STEP_SUMMARY - echo "- **Submission ID**: ${{ steps.pipeline_run.outputs.submission_id }}" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - - echo "## Workflows and their statuses (if available)" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - echo "${{ steps.pipeline_run.outputs.workflow_statuses }}" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - - echo "## Workflow Outputs (if available)" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY - echo "${{ steps.pipeline_run.outputs.outputs }}" >> $GITHUB_STEP_SUMMARY - echo "\`\`\`" >> $GITHUB_STEP_SUMMARY \ No newline at end of file + TestIlluminaGenotypingArray: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestIlluminaGenotypingArray + dockstore_pipeline_name: IlluminaGenotypingArray + pipeline_dir: pipelines/broad/genotyping/illumina + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_imputation.yml b/.github/workflows/test_imputation.yml new file mode 100644 index 0000000000..629cd00f9f --- /dev/null +++ b/.github/workflows/test_imputation.yml @@ -0,0 +1,65 @@ +name: Test Imputation + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/broad/arrays/imputation/**' + - 'structs/imputation/ImputationStructs.wdl' + # tasks from the pipeline WDL and their dependencies + - 'tasks/broad/ImputationTasks.wdl' + # verification WDL and its dependencies + - 'verification/VerifyImputation.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestImputation.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_imputation.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestImputation: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestImputation + dockstore_pipeline_name: Imputation + pipeline_dir: pipelines/broad/arrays/imputation + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_imputation_beagle.yml b/.github/workflows/test_imputation_beagle.yml new file mode 100644 index 0000000000..f9a627f02f --- /dev/null +++ b/.github/workflows/test_imputation_beagle.yml @@ -0,0 +1,75 @@ +name: Test ImputationBeagle + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + - 'pipelines/broad/arrays/imputation_beagle/**' + - 'structs/imputation/ImputationBeagleStructs.wdl' + - 'tasks/broad/ImputationTasks.wdl' + - 'tasks/broad/ImputationBeagleTasks.wdl' + - 'verification/VerifyImputationBeagle.wdl' + - 'verification/test-wdls/TestImputationBeagle.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + - '.github/workflows/test_imputation_beagle.yml' + - '.github/workflows/warp_test_workflow.yml' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +env: + # pipeline configuration + PIPELINE_NAME: TestImputationBeagle + DOCKSTORE_PIPELINE_NAME: ImputationBeagle + PIPELINE_DIR: "pipelines/broad/arrays/imputation_beagle" + + # workspace configuration + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + + # service account configuration + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + + +jobs: + TestImputationBeagle: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestImputationBeagle + dockstore_pipeline_name: ImputationBeagle + pipeline_dir: pipelines/broad/arrays/imputation_beagle + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} diff --git a/.github/workflows/test_joint_genotyping.yml b/.github/workflows/test_joint_genotyping.yml new file mode 100644 index 0000000000..f2122fed0e --- /dev/null +++ b/.github/workflows/test_joint_genotyping.yml @@ -0,0 +1,68 @@ +name: Test JointGenotyping + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/broad/dna_seq/germline/joint_genotyping/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/broad/JointGenotypingTasks.wdl' + # verification WDL and its dependencies + - 'verification/VerifyJointGenotyping.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyGermlineSingleSample.wdl' + - 'verification/VerifyNA12878.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestJointGenotyping.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_joint_genotyping.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestJointGenotyping: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestJointGenotyping + dockstore_pipeline_name: JointGenotyping + pipeline_dir: pipelines/broad/dna_seq/germline/joint_genotyping + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_multiome.yml b/.github/workflows/test_multiome.yml new file mode 100644 index 0000000000..831aabf270 --- /dev/null +++ b/.github/workflows/test_multiome.yml @@ -0,0 +1,76 @@ +name: Test Multiome + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/skylab/multiome/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/skylab/MergeSortBam.wdl' + - 'tasks/skylab/FastqProcessing.wdl' + - 'tasks/skylab/PairedTagUtils.wdl' + - 'pipelines/skylab/optimus/Optimus.wdl' + - 'tasks/skylab/FastqProcessing.wdl' + - 'tasks/skylab/StarAlign.wdl' + - 'tasks/skylab/Metrics.wdl' + - 'tasks/skylab/RunEmptyDrops.wdl' + - 'tasks/skylab/CheckInputs.wdl' + - 'tasks/skylab/MergeSortBam.wdl' + - 'tasks/skylab/H5adUtils.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # verification WDL and its dependencies + - 'verification/VerifyMultiome.wdl' + - 'verification/VerifyTasks.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestMultiome.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_multiome.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +jobs: + TestMultiome: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestMultiome + dockstore_pipeline_name: Multiome + pipeline_dir: pipelines/skylab/multiome + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_multisamplesmartseq2singlenucleus.yml b/.github/workflows/test_multisamplesmartseq2singlenucleus.yml new file mode 100644 index 0000000000..d420e6575f --- /dev/null +++ b/.github/workflows/test_multisamplesmartseq2singlenucleus.yml @@ -0,0 +1,68 @@ +name: Test Multi Sample Smart Seq 2 Single Nucleus +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/skylab/smartseq2_single_nucleus_multisample/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/skylab/CheckInputs.wdl' + - 'tasks/skylab/TrimAdapters.wdl' + - 'tasks/skylab/StarAlign.wdl' + - 'tasks/skylab/Picard.wdl' + - 'tasks/skylab/FeatureCounts.wdl' + - 'tasks/skylab/H5adUtils.wdl' + - 'tasks/broad/Utilities.wdl' + # verification WDL and its dependencies + - 'verification/VerifyMultiSampleSmartSeq2SingleNucleus.wdl' + - 'verification/VerifyTasks.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_multisamplesmartseq2singlenucleus.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestMultiSampleSmartSeq2SingleNucleus: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestMultiSampleSmartSeq2SingleNucleus + dockstore_pipeline_name: Smartseq2_Single_Nucleus_Multisample + pipeline_dir: pipelines/skylab/smartseq2_single_nucleus_multisample + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_optimus.yml b/.github/workflows/test_optimus.yml new file mode 100644 index 0000000000..ae2418705c --- /dev/null +++ b/.github/workflows/test_optimus.yml @@ -0,0 +1,73 @@ +name: Test Optimus + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/skylab/optimus/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/skylab/FastqProcessing.wdl' + - 'tasks/skylab/StarAlign.wdl' + - 'tasks/skylab/Metrics.wdl' + - 'tasks/skylab/RunEmptyDrops.wdl' + - 'tasks/skylab/CheckInputs.wdl' + - 'tasks/skylab/MergeSortBam.wdl' + - 'tasks/skylab/H5adUtils.wdl' + - 'tasks/broad/Utilities.wdl' + # verification WDL and its dependencies + - 'verification/VerifyOptimus.wdl' + - 'verification/VerifyTasks.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestOptimus.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_optimus.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + + +jobs: + TestOptimus: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestOptimus + dockstore_pipeline_name: Optimus + pipeline_dir: pipelines/skylab/optimus + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_pairedtag.yml b/.github/workflows/test_pairedtag.yml new file mode 100644 index 0000000000..37f53630e7 --- /dev/null +++ b/.github/workflows/test_pairedtag.yml @@ -0,0 +1,74 @@ +name: Test PairedTag + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/skylab/paired_tag/**' + - 'pipelines/skylab/optimus/Optimus.wdl' + # tasks from the pipeline WDL and their dependencies + - 'tasks/skylab/H5adUtils.wdl' + - 'tasks/skylab/PairedTagUtils.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/skylab/FastqProcessing.wdl' + - 'tasks/skylab/StarAlign.wdl' + - 'tasks/skylab/Metrics.wdl' + - 'tasks/skylab/RunEmptyDrops.wdl' + - 'tasks/skylab/CheckInputs.wdl' + - 'tasks/skylab/MergeSortBam.wdl' + # verification WDL and its dependencies + - 'verification/VerifyPairedTag.wdl' + - 'verification/VerifyTasks.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestPairedTag.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_pairedtag.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestPairedTag: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestPairedTag + dockstore_pipeline_name: PairedTag + pipeline_dir: pipelines/skylab/paired_tag + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_reblockGVCF.yml b/.github/workflows/test_reblockGVCF.yml new file mode 100644 index 0000000000..92be99d298 --- /dev/null +++ b/.github/workflows/test_reblockGVCF.yml @@ -0,0 +1,66 @@ +name: Test ReblockGVCF + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/Utilities.wdl' + # verification WDL and its dependencies + - 'verification/VerifyGvcf.wdl' + - 'verification/VerifyTasks.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestReblockGVCF.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_reblockGVCF.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestReblockGVCF: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestReblockGVCF + dockstore_pipeline_name: ReblockGVCF + pipeline_dir: pipelines/broad/dna_seq/germline/joint_genotyping/reblocking + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_rna_with_umis.yml b/.github/workflows/test_rna_with_umis.yml new file mode 100644 index 0000000000..82debfaa18 --- /dev/null +++ b/.github/workflows/test_rna_with_umis.yml @@ -0,0 +1,67 @@ +name: Test RNA with UMIs + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/broad/rna_seq/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/broad/UMIAwareDuplicateMarking.wdl' + - 'tasks/broad/RNAWithUMIsTasks.wdl' + - 'tasks/broad/Utilities.wdl' + # verification WDL and its dependencies + - 'verification/VerifyRNAWithUMIs.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyTasks.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestRNAWithUMIsPipeline.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_rna_with_umis.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestRNAWithUMIsPipeline: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestRNAWithUMIsPipeline + dockstore_pipeline_name: RNAWithUMIsPipeline + pipeline_dir: pipelines/broad/rna_seq + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_slideseq.yml b/.github/workflows/test_slideseq.yml new file mode 100644 index 0000000000..ee66e000ea --- /dev/null +++ b/.github/workflows/test_slideseq.yml @@ -0,0 +1,70 @@ +name: Test Slide Seq + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/skylab/slideseq/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/skylab/StarAlign.wdl' + - 'tasks/skylab/FastqProcessing.wdl' + - 'tasks/skylab/Metrics.wdl' + - 'tasks/skylab/H5adUtils.wdl' + - 'tasks/skylab/CheckInputs.wdl' + - 'tasks/skylab/MergeSortBam.wdl' + - 'tasks/broad/Utilities.wdl' + # verification WDL and its dependencies + - 'verification/VerifySlideSeq.wdl' + - 'verification/VerifyTasks.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestSlideSeq.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_slideseq.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestSlideSeq: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestSlideSeq + dockstore_pipeline_name: SlideSeq + pipeline_dir: pipelines/skylab/slideseq + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_snm3c.yml b/.github/workflows/test_snm3c.yml new file mode 100644 index 0000000000..f22a88ae00 --- /dev/null +++ b/.github/workflows/test_snm3c.yml @@ -0,0 +1,63 @@ +name: Test snm3C + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/skylab/snm3C/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/broad/Utilities.wdl' + # verification WDL and its dependencies + - 'verification/Verifysnm3C.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/Testsnm3C.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_snm3c.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + Testsnm3C: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: Testsnm3C + dockstore_pipeline_name: snm3C-seq + pipeline_dir: pipelines/skylab/snm3C + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_ultima_genomics_joint_genotyping.yml b/.github/workflows/test_ultima_genomics_joint_genotyping.yml new file mode 100644 index 0000000000..d37674c7fd --- /dev/null +++ b/.github/workflows/test_ultima_genomics_joint_genotyping.yml @@ -0,0 +1,70 @@ +name: Test UltimaGenomicsJointGenotyping + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/broad/dna_seq/germline/joint_genotyping/UltimaGenomics/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/broad/JointGenotypingTasks.wdl' + - 'tasks/broad/UltimaGenomicsGermlineFilteringThreshold.wdl' + - 'tasks/broad/JointGenotypingTasks.wdl' + # verification WDL and its dependencies + - 'verification/VerifyUltimaGenomicsJointGenotyping.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestUltimaGenomicsJointGenotyping.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyGermlineSingleSample.wdl' + - 'verification/VerifyNA12878.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_ultima_genomics_joint_genotyping.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestUltimaGenomicsJointGenotyping: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestUltimaGenomicsJointGenotyping + dockstore_pipeline_name: UltimaGenomicsJointGenotyping + pipeline_dir: pipelines/broad/dna_seq/germline/joint_genotyping/UltimaGenomics + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_ultima_genomics_whole_genome_cram_only.yml b/.github/workflows/test_ultima_genomics_whole_genome_cram_only.yml new file mode 100644 index 0000000000..25439c9a99 --- /dev/null +++ b/.github/workflows/test_ultima_genomics_whole_genome_cram_only.yml @@ -0,0 +1,76 @@ +name: Test UltimaGenomicsWholeGenomeCramOnly + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/broad/dna_seq/somatic/single_sample/ugwgs/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl' + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'structs/dna_seq/DNASeqStructs.wdl' + - 'tasks/broad/Alignment.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/UltimaGenomicsWholeGenomeGermlineQC.wdl' + - 'structs/dna_seq/UltimaGenomicsWholeGenomeGermlineStructs.wdl' + - 'tasks/broad/InternalTasks.wdl' + - 'tasks/broad/UltimaGenomicsWholeGenomeGermlineAlignmentMarkDuplicates.wdl' + - 'pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl' + # verification WDL and its dependencies + - 'verification/VerifyUltimaGenomicsWholeGenomeCramOnly.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/VerifyNA12878.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_ultima_genomics_whole_genome_cram_only.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestUltimaGenomicsWholeGenomeCramOnly: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestUltimaGenomicsWholeGenomeCramOnly + dockstore_pipeline_name: UltimaGenomicsWholeGenomeCramOnly + pipeline_dir: pipelines/broad/dna_seq/somatic/single_sample/ugwgs + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_ultima_genomics_whole_genome_germline.yml b/.github/workflows/test_ultima_genomics_whole_genome_germline.yml new file mode 100644 index 0000000000..2f7acc0193 --- /dev/null +++ b/.github/workflows/test_ultima_genomics_whole_genome_germline.yml @@ -0,0 +1,76 @@ +name: Test UltimaGenomicsWholeGenomeGermline + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/broad/dna_seq/germline/single_sample/ugwgs/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/broad/UltimaGenomicsWholeGenomeGermlineTasks.wdl' + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'structs/dna_seq/DNASeqStructs.wdl' + - 'tasks/broad/Alignment.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/UltimaGenomicsWholeGenomeGermlineQC.wdl' + - 'structs/dna_seq/UltimaGenomicsWholeGenomeGermlineStructs.wdl' + - 'tasks/broad/InternalTasks.wdl' + - 'tasks/broad/UltimaGenomicsWholeGenomeGermlineAlignmentMarkDuplicates.wdl' + - 'pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl' + # verification WDL and its dependencies + - 'verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyTasks.wdl' + - 'verification/VerifyNA12878.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestUltimaGenomicsWholeGenomeGermline.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_ultima_genomics_whole_genome_germline.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestUltimaGenomicsWholeGenomeGermline: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestUltimaGenomicsWholeGenomeGermline + dockstore_pipeline_name: UltimaGenomicsWholeGenomeGermline + pipeline_dir: pipelines/broad/dna_seq/germline/single_sample/ugwgs + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_variant_calling.yml b/.github/workflows/test_variant_calling.yml new file mode 100644 index 0000000000..2f5f41cd92 --- /dev/null +++ b/.github/workflows/test_variant_calling.yml @@ -0,0 +1,68 @@ +name: Test Variant Calling + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/broad/dna_seq/germline/variant_calling/**' + # tasks from the pipeline WDL and their dependencies + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/BamProcessing.wdl' + - 'tasks/broad/DragenTasks.wdl' + # verification WDL and its dependencies + - 'verification/VerifyGvcf.wdl' + - 'verification/VerifyTasks.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestVariantCalling.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_variant_calling.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestVariantCalling: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestVariantCalling + dockstore_pipeline_name: VariantCalling + pipeline_dir: pipelines/broad/dna_seq/germline/variant_calling + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_whole_genome_germline_single_sample.yml b/.github/workflows/test_whole_genome_germline_single_sample.yml new file mode 100644 index 0000000000..c79c30de75 --- /dev/null +++ b/.github/workflows/test_whole_genome_germline_single_sample.yml @@ -0,0 +1,76 @@ +name: Test WholeGenomeGermlineSingleSample + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/broad/dna_seq/germline/single_sample/wgs/**' + # tasks from the pipeline WDL and their dependencies + - 'structs/dna_seq/DNASeqStructs.wdl' + - 'tasks/broad/Alignment.wdl' + - 'tasks/broad/DragmapAlignment.wdl' + - 'tasks/broad/SplitLargeReadGroup.wdl' + - 'tasks/broad/BamProcessing.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/AggregatedBamQC.wdl' + - 'tasks/broad/BamToCram.wdl' + - 'pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl' + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'tasks/broad/DragenTasks.wdl' + # verification WDL and its dependencies + - 'verification/VerifyGermlineSingleSample.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyTasks.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_whole_genome_germline_single_sample.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestWholeGenomeGermlineSingleSample: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestWholeGenomeGermlineSingleSample + dockstore_pipeline_name: WholeGenomeGermlineSingleSample + pipeline_dir: pipelines/broad/dna_seq/germline/single_sample/wgs + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/test_whole_genome_reprocessing.yml b/.github/workflows/test_whole_genome_reprocessing.yml new file mode 100644 index 0000000000..0abff328cc --- /dev/null +++ b/.github/workflows/test_whole_genome_reprocessing.yml @@ -0,0 +1,79 @@ +name: Test WholeGenomeReprocessing + +# Controls when the workflow will run +on: + pull_request: + branches: [ "develop", "staging", "master" ] + # Only run if files in these paths changed: + #################################### + # SET PIPELINE SPECIFIC PATHS HERE # + #################################### + paths: + # anything in the pipelines folder + - 'pipelines/broad/reprocessing/wgs/**' + - 'pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl' + # tasks from the pipeline WDL and their dependencies + - 'structs/dna_seq/DNASeqStructs.wdl' + - 'tasks/broad/Alignment.wdl' + - 'tasks/broad/DragmapAlignment.wdl' + - 'tasks/broad/SplitLargeReadGroup.wdl' + - 'tasks/broad/BamProcessing.wdl' + - 'tasks/broad/Utilities.wdl' + - 'tasks/broad/Qc.wdl' + - 'tasks/broad/AggregatedBamQC.wdl' + - 'tasks/broad/BamToCram.wdl' + - 'pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl' + - 'pipelines/broad/reprocessing/cram_to_unmapped_bams/CramToUnmappedBams.wdl' + - 'tasks/broad/GermlineVariantDiscovery.wdl' + - 'tasks/broad/DragenTasks.wdl' + # verification WDL and its dependencies + - 'verification/VerifyExomeReprocessing.wdl' + - 'verification/VerifyGermlineSingleSample.wdl' + - 'verification/VerifyMetrics.wdl' + - 'verification/VerifyTasks.wdl' + # test WDL and its dependencies + - 'verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl' + - 'tasks/broad/TerraCopyFilesFromCloudToCloud.wdl' + # this file, the subworkflow file, and the firecloud_api script + - '.github/workflows/test_whole_genome_reprocessing.yml' + - '.github/workflows/warp_test_workflow.yml' + - 'scripts/firecloud_api/firecloud_api.py' + + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + inputs: + useCallCache: + description: 'Use call cache (default: true)' + required: false + default: "true" + updateTruth: + description: 'Update truth files (default: false)' + required: false + default: "false" + testType: + description: 'Specify the type of test (Plumbing or Scientific)' + required: false + type: choice + options: + - Plumbing + - Scientific + truthBranch: + description: 'Specify the branch for truth files (default: master)' + required: false + default: "master" + +jobs: + TestWholeGenomeReprocessing: + uses: ./.github/workflows/warp_test_workflow.yml + with: + pipeline_name: TestWholeGenomeReprocessing + dockstore_pipeline_name: WholeGenomeReprocessing + pipeline_dir: pipelines/broad/reprocessing/wgs + use_call_cache: ${{ github.event.inputs.useCallCache || 'true' }} + update_truth: ${{ github.event.inputs.updateTruth || 'false' }} + test_type: ${{ github.event.inputs.testType }} + truth_branch: ${{ github.event.inputs.truthBranch || 'master' }} + secrets: + PDT_TESTER_SA_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/warp_test_workflow.yml b/.github/workflows/warp_test_workflow.yml new file mode 100644 index 0000000000..7bbded37aa --- /dev/null +++ b/.github/workflows/warp_test_workflow.yml @@ -0,0 +1,443 @@ +name: Reusable WARP Test Workflow + +on: + workflow_call: + inputs: + pipeline_name: + required: true + type: string + description: 'Name of the pipeline to test' + dockstore_pipeline_name: + required: true + type: string + description: 'Name of the pipeline in Dockstore' + pipeline_dir: + required: true + type: string + description: 'Directory containing the pipeline' + use_call_cache: + required: false + type: string + default: 'true' + description: 'Use call cache' + update_truth: + required: false + type: string + default: 'false' + description: 'Update truth files' + test_type: + required: false + type: string + default: 'Plumbing' + description: 'Type of test (Plumbing or Scientific)' + truth_branch: + required: false + type: string + default: 'master' + description: 'Branch for truth files' + + secrets: + PDT_TESTER_SA_B64: + required: true + DOCKSTORE_TOKEN: + required: true + +env: + TESTING_WORKSPACE: WARP Tests + WORKSPACE_NAMESPACE: warp-pipelines + SA_JSON_B64: ${{ secrets.PDT_TESTER_SA_B64 }} + USER: pdt-tester@warp-pipeline-dev.iam.gserviceaccount.com + +jobs: + test_pipeline: + runs-on: ubuntu-latest + permissions: + contents: 'read' + id-token: 'write' + actions: write + + steps: + # Step 1: Checkout code + # Purpose: Clones the repository code at the specified reference + - uses: actions/checkout@v3 + with: + ref: ${{ github.ref }} + + # Step 2: Setup Python + # Purpose: Installs Python 3.11 for running pipeline scripts + - name: Set up python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + # Step 3: Install Dependencies + # Purpose: Installs required Python packages for the pipeline + - name: Install dependencies + run: | + cd scripts/firecloud_api/ + pip install -r requirements.txt + + # Step 4: Set Branch Name + # Purpose: Determines and sets the correct branch name for either PR or direct commits + - name: Set Branch Name + id: set_branch + run: | + if [ -z "${{ github.head_ref }}" ]; then + echo "Branch name is missing, using ${GITHUB_REF##*/}" + echo "BRANCH_NAME=${GITHUB_REF##*/}" >> $GITHUB_ENV + else + echo "Branch name from PR: ${{ github.head_ref }}" + echo "BRANCH_NAME=${{ github.head_ref }}" >> $GITHUB_ENV + fi + + # Step 5: Set Test Type + # Purpose: Determines and sets the correct test type based on the branch name + - name: Set Test Type + id: set_test_type + run: | + if [ "${{ github.event_name }}" == "pull_request" ]; then + # For PRs, set based on target branch + if [ "${{ github.base_ref }}" == "master" ]; then + # If PR is targeting master branch, run Scientific tests + echo "testType=Scientific" >> $GITHUB_ENV + echo "testType=Scientific" + else + # If PR targets any other branch (develop, staging), run Plumbing tests + echo "testType=Plumbing" >> $GITHUB_ENV + echo "testType=Plumbing" + fi + else + # For manual workflow runs (workflow_dispatch) + echo "testType=${{ inputs.test_type }}" >> $GITHUB_ENV + echo "testType=${{ inputs.test_type }}" + fi + + # Step 6: Create Method Configuration + # Purpose: Sets up the testing configuration in Terra workspace + - name: Create new method configuration + run: | + # Wait 5.5 minutes for Dockstore to update + echo "Waiting for Dockstore to update..." + sleep 330 + + echo "Creating new method configuration for branch: $BRANCH_NAME" + + METHOD_CONFIG_NAME=$(python3 scripts/firecloud_api/firecloud_api.py \ + create_new_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "${{ inputs.pipeline_name }}" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER") + + echo "METHOD_CONFIG_NAME=$METHOD_CONFIG_NAME" >> $GITHUB_ENV + + # Step 7: Cancel Previous Runs + # Purpose: Cancels previous GHA workflows from the same branch (regardless of plumbing or scientific test type) + # to avoid running multiple tests at the same time + - name: Cancel Previous GHA Runs + uses: styfle/cancel-workflow-action@0.11.0 + with: + access_token: ${{ github.token }} + all_but_latest: true + ignore_sha: true + + # Step 8: Cancel Previous Terra Submissions + # Purpose: Abort previous Terra submissions from the same branch to avoid running multiple tests at the same time + # Will not abort a Terra submission if it is a scientific test + - name: Cancel Previous Terra Submissions + if: ${{ !contains(env.METHOD_CONFIG_NAME, '_Scientific_') }} + run: | + python3 scripts/firecloud_api/firecloud_api.py \ + --workspace-namespace "${{ env.WORKSPACE_NAMESPACE }}" \ + --workspace-name "${{ env.TESTING_WORKSPACE }}" \ + --pipeline_name "${{ inputs.pipeline_name }}" \ + --branch_name "${{ env.BRANCH_NAME }}" \ + --sa-json-b64 "${{ secrets.PDT_TESTER_SA_B64 }}" \ + --user "${{ env.USER }}" \ + --test_type "$testType" \ + cancel_old_submissions + + # Step 9: Handle Git Commit Hash + # Purpose: Gets the correct Github commit hash for version tracking + - name: Determine Github Commit Hash + id: determine_github_commit_hash + run: | + if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then + echo "Using github.sha for manually triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.sha }}" >> $GITHUB_ENV + elif [ "${{ github.event_name }}" == "pull_request" ]; then + echo "Using github.event.pull_request.head.sha for PR-triggered workflow." + echo "GITHUB_COMMIT_HASH=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV + else + echo "Unsupported event type: ${{ github.event_name }}" + exit 1 + fi + + # Step 10: Compare Hashes + # Purpose: Compares the Dockstore and Github commit hashes to ensure they match + - name: Compare Dockstore and Github Commit Hashes with Retry + id: compare_hashes + run: | + + MAX_WAIT_TIME=$((15 * 60)) # 15 minutes in seconds + WAIT_INTERVAL=60 # 1 minute in seconds + TOTAL_WAITED=0 + + echo "Starting hash comparison with retry mechanism..." + + while [ $TOTAL_WAITED -lt $MAX_WAIT_TIME ]; do + echo "Fetching Dockstore Commit Hash..." + DOCKSTORE_COMMIT_HASH=$(python scripts/dockstore_api/fetch_dockstore_commit.py \ + $DOCKSTORE_TOKEN \ + ${{ inputs.dockstore_pipeline_name }} \ + $BRANCH_NAME) + echo "Fetched Dockstore Commit Hash: $DOCKSTORE_COMMIT_HASH" + + echo "GitHub Commit Hash: $GITHUB_COMMIT_HASH" + + if [ "$DOCKSTORE_COMMIT_HASH" == "$GITHUB_COMMIT_HASH" ]; then + echo "Success: The Dockstore Commit Hash matches the GitHub Commit Hash." + exit 0 + else + echo "Mismatch found: $DOCKSTORE_COMMIT_HASH != $GITHUB_COMMIT_HASH" + echo "Retrying in $WAIT_INTERVAL seconds..." + sleep $WAIT_INTERVAL + TOTAL_WAITED=$((TOTAL_WAITED + WAIT_INTERVAL)) + fi + done + + echo "Error: The Dockstore Commit Hash does not match the GitHub Commit Hash after 15 minutes of retries!" + exit 1 + env: + GITHUB_COMMIT_HASH: ${{ env.GITHUB_COMMIT_HASH }} + DOCKSTORE_TOKEN: ${{ secrets.DOCKSTORE_TOKEN }} + + # Step 11: Run Tests + # Purpose: Main testing step - runs the pipeline and collects results + - name: Update test inputs, Upload to Terra, Submit, Monitor and Retrieve Outputs + run: | + UPDATE_TRUTH="${{ inputs.update_truth }}" + USE_CALL_CACHE="${{ inputs.use_call_cache }}" + TRUTH_BRANCH="${{ inputs.truth_branch }}" + CURRENT_TIME=$(date +"%Y-%m-%d-%H-%M-%S") + MAX_RETRIES=2 + RETRY_DELAY=300 # 300 seconds = 5 minutes + # Initialize variables to aggregate statuses and outputs + ALL_WORKFLOW_STATUSES="Workflow ID | Status"$'\n'"--- | ---" + ALL_OUTPUTS="" + declare -a SUBMISSION_IDS + declare -A WORKFLOW_STATUSES + OVERALL_SUCCESS=true + + # Convert UPDATE_TRUTH and USE_CALL_CACHE to a boolean-friendly format ("true" -> true, "false" -> false) + if [ "$UPDATE_TRUTH" = "true" ]; then + UPDATE_TRUTH_BOOL=true + else + UPDATE_TRUTH_BOOL=false + fi + + if [ "$USE_CALL_CACHE" == "true" ]; then + USE_CALL_CACHE_BOOL=true + else + USE_CALL_CACHE_BOOL=false + fi + + TEST_TYPE="${{ env.testType }}" + INPUTS_DIR="${{ inputs.pipeline_dir }}/test_inputs/$TEST_TYPE" + echo "Running tests with test type: $TEST_TYPE" + + TRUTH_PATH="gs://broad-gotc-test-storage/${{ inputs.dockstore_pipeline_name }}/truth/$(echo "$TEST_TYPE" | tr '[:upper:]' '[:lower:]')/$TRUTH_BRANCH" + echo "Truth path: $TRUTH_PATH" + RESULTS_PATH="gs://broad-gotc-test-storage/${{ inputs.dockstore_pipeline_name }}/results/$CURRENT_TIME" + + # Submit all jobs first and store their submission IDs + for input_file in "$INPUTS_DIR"/*.json; do + test_input_file=$(python3 scripts/firecloud_api/UpdateTestInputs.py --truth_path "$TRUTH_PATH" \ + --results_path "$RESULTS_PATH" \ + --inputs_json "$input_file" \ + --update_truth "$UPDATE_TRUTH_BOOL" \ + --branch_name "$BRANCH_NAME" ) + echo "Uploading the test input file: $test_input_file" + + # Create the submission_data.json file for this input_file + input_file_filename=$(basename $input_file) + SUBMISSION_DATA_FILE="submission_data.json" + printf '{ + "methodConfigurationNamespace": "%s", + "methodConfigurationName": "%s_%s_%s", + "useCallCache": %s, + "deleteIntermediateOutputFiles": false, + "useReferenceDisks": true, + "memoryRetryMultiplier": 1.2, + "workflowFailureMode": "NoNewCalls", + "userComment": "%s", + "ignoreEmptyOutputs": false + }' "$WORKSPACE_NAMESPACE" "${{ inputs.pipeline_name }}" "$TEST_TYPE" "$BRANCH_NAME" "$USE_CALL_CACHE_BOOL" "$input_file_filename" > "$SUBMISSION_DATA_FILE" + + echo "Created submission data file: $SUBMISSION_DATA_FILE" + cat "$SUBMISSION_DATA_FILE" + + # Upload the test inputs to Terra + python3 scripts/firecloud_api/firecloud_api.py \ + upload_test_inputs \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "${{ inputs.pipeline_name }}" \ + --test_input_file "$test_input_file" \ + --branch_name "$BRANCH_NAME" \ + --sa-json-b64 "$SA_JSON_B64" \ + --test_type "$TEST_TYPE" \ + --user "$USER" + + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + SUBMISSION_ID=$(python3 scripts/firecloud_api/firecloud_api.py submit_job \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --submission_data_file "$SUBMISSION_DATA_FILE") + + echo "Submission ID: $SUBMISSION_ID" + + if [[ "$SUBMISSION_ID" == *"404"* || -z "$SUBMISSION_ID" ]]; then + echo "Error in submission, retrying in $RETRY_DELAY seconds..." + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + echo "Submission successful. Submission ID: $SUBMISSION_ID" + SUBMISSION_IDS+=("$SUBMISSION_ID") + break + done + done + + echo "All jobs have been submitted. Starting to poll for statuses..." + + # Poll for statuses of all jobs + for SUBMISSION_ID in "${SUBMISSION_IDS[@]}"; do + attempt=1 + while [ $attempt -le $MAX_RETRIES ]; do + echo "Polling for Submission ID: $SUBMISSION_ID" + RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py poll_job_status \ + --submission_id "$SUBMISSION_ID" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --workspace-namespace "$WORKSPACE_NAMESPACE" \ + --workspace-name "$TESTING_WORKSPACE") + + if [ -z "$RESPONSE" ]; then + echo "Failed to retrieve Workflow IDs for submission: $SUBMISSION_ID" + OVERALL_SUCCESS=false + ((attempt++)) + if [ $attempt -gt $MAX_RETRIES ]; then + echo "Max retries reached. Exiting..." + exit 1 + fi + sleep $RETRY_DELAY + continue + fi + + WORKFLOW_STATUSES_FOR_SUBMISSION=$(echo "$RESPONSE" | jq -r 'to_entries | map(.key + " | " + .value) | .[]') + WORKFLOW_STATUSES["$SUBMISSION_ID"]="$WORKFLOW_STATUSES_FOR_SUBMISSION" + + # Check if any workflow failed or errored + FAILED_WORKFLOWS=$(echo "$RESPONSE" | jq -r 'to_entries | .[] | select(.value == "Failed" or .value == "Aborted" or .value == "Aborting") | .key') + if [ ! -z "$FAILED_WORKFLOWS" ]; then + echo "Failed workflows detected:" + echo "$FAILED_WORKFLOWS" + OVERALL_SUCCESS=false + fi + + # retrieve workflow outputs + echo "Retrieving workflow outputs for Submission ID: $SUBMISSION_ID..." + + for WORKFLOW_ID in $(echo "$RESPONSE" | jq -r 'keys[]'); do + WORKFLOW_OUTPUT=$(python3 scripts/firecloud_api/firecloud_api.py get_workflow_outputs \ + --user "$USER" \ + --sa-json-b64 "$SA_JSON_B64" \ + --submission_id "$SUBMISSION_ID" \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --workflow_id "$WORKFLOW_ID" \ + --pipeline_name "${{ inputs.pipeline_name }}") + ALL_OUTPUTS+="$WORKFLOW_OUTPUT"$'\n' + done + break + done + done + + # Generate final summary + FINAL_SUMMARY="## Combined Workflow Statuses\n\n" + + # Add all workflow statuses to the summary + for SUBMISSION_ID in "${!WORKFLOW_STATUSES[@]}"; do + SUBMISSION_URL="https://app.terra.bio/#workspaces/$WORKSPACE_NAMESPACE/WARP%20Tests/job_history/$SUBMISSION_ID" + # Add the Submission ID as a hyperlink + FINAL_SUMMARY+="[Submission ID: $SUBMISSION_ID]($SUBMISSION_URL)\n" + # Add the workflows and statuses for this submission + FINAL_SUMMARY+="${WORKFLOW_STATUSES[$SUBMISSION_ID]}\n\n" + done + + echo -e "$FINAL_SUMMARY" >> $GITHUB_STEP_SUMMARY + + if [ "$OVERALL_SUCCESS" = false ]; then + echo "" + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "One or more workflows failed in Terra. Check the workflow status summary for details." + echo "" + echo "****************************************************************************************" + echo "****************************************************************************************" + echo "" + echo "" + exit 1 + fi + + # Step 12: Cleanup + # Purpose: Ensures cleanup of Terra method configurations regardless of test outcome + - name: Delete Method Configuration + if: always() + run: | + echo "Deleting method configuration for branch: $BRANCH_NAME" + DELETE_RESPONSE=$(python3 scripts/firecloud_api/firecloud_api.py delete_method_config \ + --workspace-namespace $WORKSPACE_NAMESPACE \ + --workspace-name "$TESTING_WORKSPACE" \ + --pipeline_name "${{ inputs.pipeline_name }}" \ + --branch_name "$BRANCH_NAME" \ + --test_type "$testType" \ + --sa-json-b64 "$SA_JSON_B64" \ + --user "$USER" \ + --method_config_name "$METHOD_CONFIG_NAME") + + echo "Delete response: $DELETE_RESPONSE" + if [ "$DELETE_RESPONSE" == "True" ]; then + echo "Method configuration deleted successfully." + else + echo "Error: Method configuration deletion failed." + exit 1 + fi + + # Step 13: Print Summary on Success + # Purpose: Prints the final summary of the pipeline execution in case of success + - name: Print Summary on Success + if: success() + run: | + echo "# :white_check_mark: Pipeline Execution Summary :white_check_mark:" >> $GITHUB_STEP_SUMMARY + + # Step 14: Print Summary on Failure + # Purpose: Prints the final summary of the pipeline execution in case of failure + - name: Print Summary on Failure + if: failure() + run: | + echo "# :x: Pipeline Execution Summary (on Failure) :x: " >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/pipeline_versions.txt b/pipeline_versions.txt index 0aafa27cf9..bc8fa78585 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -1,40 +1,42 @@ Pipeline Name Version Date of Last Commit -IlluminaGenotypingArray 1.12.24 2024-11-04 +Arrays 2.6.30 2024-11-04 +ValidateChip 1.16.7 2024-11-04 +ArrayImputationQuotaConsumed 1.0.0 2025-02-24 +ImputationBeagle 1.0.0 2025-02-26 +Imputation 1.1.16 2025-02-24 +MultiSampleArrays 1.6.2 2024-08-02 WholeGenomeReprocessing 3.3.3 2024-11-04 +ExomeReprocessing 3.3.3 2024-11-04 +CramToUnmappedBams 1.1.3 2024-08-02 ExternalWholeGenomeReprocessing 2.3.3 2024-11-04 ExternalExomeReprocessing 3.3.3 2024-11-04 -CramToUnmappedBams 1.1.3 2024-08-02 -ExomeReprocessing 3.3.3 2024-11-04 -GDCWholeGenomeSomaticSingleSample 1.3.4 2024-11-04 +BroadInternalArrays 1.1.14 2024-11-04 +BroadInternalImputation 1.1.15 2025-02-24 +BroadInternalRNAWithUMIs 1.0.36 2024-11-04 +BroadInternalUltimaGenomics 1.1.3 2024-12-05 +RNAWithUMIsPipeline 1.0.18 2024-11-04 +IlluminaGenotypingArray 1.12.24 2024-11-04 +AnnotationFiltration 1.2.7 2024-11-04 UltimaGenomicsWholeGenomeCramOnly 1.0.23 2024-11-04 -WholeGenomeGermlineSingleSample 3.3.3 2024-11-04 +GDCWholeGenomeSomaticSingleSample 1.3.4 2024-11-04 UltimaGenomicsWholeGenomeGermline 1.1.3 2024-12-05 +WholeGenomeGermlineSingleSample 3.3.3 2024-11-04 ExomeGermlineSingleSample 3.2.3 2024-11-04 -JointGenotyping 1.7.2 2024-11-04 +VariantCalling 2.2.4 2024-11-04 ReblockGVCF 2.4.0 2024-12-05 UltimaGenomicsJointGenotyping 1.2.2 2024-11-04 -JointGenotypingByChromosomePartTwo 1.5.2 2024-11-04 JointGenotypingByChromosomePartOne 1.5.2 2024-11-04 -VariantCalling 2.2.4 2024-11-04 +JointGenotypingByChromosomePartTwo 1.5.2 2024-11-04 +JointGenotyping 1.7.2 2024-11-04 CheckFingerprint 1.0.22 2024-10-28 -RNAWithUMIsPipeline 1.0.18 2024-11-04 -BroadInternalUltimaGenomics 1.1.3 2024-12-05 -BroadInternalRNAWithUMIs 1.0.36 2024-11-04 -BroadInternalImputation 1.1.14 2024-11-04 -BroadInternalArrays 1.1.14 2024-11-04 -Imputation 1.1.15 2024-11-04 -MultiSampleArrays 1.6.2 2024-08-02 -ValidateChip 1.16.7 2024-11-04 -Arrays 2.6.30 2024-11-04 -AnnotationFiltration 1.2.7 2024-11-04 -Multiome 5.10.0 2025-02-03 -snm3C 4.0.4 2024-08-06 -SlideSeq 3.4.8 2025-01-13 scATAC 1.3.2 2023-08-03 -BuildIndices 4.0.0 2025-01-17 MultiSampleSmartSeq2 2.2.22 2024-09-11 -Optimus 7.9.1 2025-01-13 -atac 2.7.0 2025-02-03 -PairedTag 1.10.1 2025-02-03 +BuildIndices 4.0.0 2025-01-17 +SlideSeq 3.4.9 2025-02-25 +PairedTag 1.10.2 2025-02-25 +MultiSampleSmartSeq2SingleNucleus 2.0.8 2025-02-25 +atac 2.7.1 2025-02-25 +snm3C 4.0.4 2024-08-06 SmartSeq2SingleSample 5.1.21 2024-09-11 -MultiSampleSmartSeq2SingleNucleus 2.0.7 2025-01-13 +Optimus 7.9.2 2025-02-25 +Multiome 5.11.0 2025-02-25 diff --git a/pipelines/broad/arrays/imputation/Imputation.changelog.md b/pipelines/broad/arrays/imputation/Imputation.changelog.md index 52765e4ec1..5030cf3f05 100644 --- a/pipelines/broad/arrays/imputation/Imputation.changelog.md +++ b/pipelines/broad/arrays/imputation/Imputation.changelog.md @@ -1,3 +1,8 @@ +# 1.1.16 +2025-02-24 (Date of Last Commit) + +* Updated runtime parameters in some ImputationTasks, and added an explicit definition of a vcf_index. + # 1.1.15 2024-11-04 (Date of Last Commit) diff --git a/pipelines/broad/arrays/imputation/Imputation.wdl b/pipelines/broad/arrays/imputation/Imputation.wdl index 4a44ba4ac5..3466169b64 100644 --- a/pipelines/broad/arrays/imputation/Imputation.wdl +++ b/pipelines/broad/arrays/imputation/Imputation.wdl @@ -6,7 +6,7 @@ import "../../../../tasks/broad/Utilities.wdl" as utils workflow Imputation { - String pipeline_version = "1.1.15" + String pipeline_version = "1.1.16" input { Int chunkLength = 25000000 @@ -242,6 +242,7 @@ workflow Imputation { call tasks.SelectVariantsByIds { input: vcf = SetIdsVcfToImpute.output_vcf, + vcf_index = SetIdsVcfToImpute.output_vcf_index, ids = FindSitesUniqueToFileTwoOnly.missing_sites, basename = "imputed_sites_to_recover" } diff --git a/pipelines/broad/arrays/imputation_beagle/ArrayImputationQuotaConsumed.changelog.md b/pipelines/broad/arrays/imputation_beagle/ArrayImputationQuotaConsumed.changelog.md new file mode 100644 index 0000000000..978888b711 --- /dev/null +++ b/pipelines/broad/arrays/imputation_beagle/ArrayImputationQuotaConsumed.changelog.md @@ -0,0 +1,4 @@ +# 1.0.0 +2025-02-24 (Date of Last Commit) + +* Initial release of pipeline to calculate the number of samples, i.e. quota used by an imputation service that uses ImputationBeagle.wdl. diff --git a/pipelines/broad/arrays/imputation_beagle/ArrayImputationQuotaConsumed.wdl b/pipelines/broad/arrays/imputation_beagle/ArrayImputationQuotaConsumed.wdl new file mode 100644 index 0000000000..a4cd6e8d09 --- /dev/null +++ b/pipelines/broad/arrays/imputation_beagle/ArrayImputationQuotaConsumed.wdl @@ -0,0 +1,29 @@ +version 1.0 + +import "../../../../tasks/broad/ImputationTasks.wdl" as tasks + +workflow QuotaConsumed { + String pipeline_version = "1.0.0" + + input { + Int chunkLength = 25000000 + Int chunkOverlaps = 5000000 + + File multi_sample_vcf + + File ref_dict + Array[String] contigs + String reference_panel_path_prefix + String genetic_maps_path + String output_basename + } + + call tasks.CountSamples { + input: + vcf = multi_sample_vcf + } + + output { + Int quota_consumed = CountSamples.nSamples + } +} diff --git a/pipelines/broad/arrays/imputation_beagle/ImputationBeagle.changelog.md b/pipelines/broad/arrays/imputation_beagle/ImputationBeagle.changelog.md new file mode 100644 index 0000000000..ddc7604697 --- /dev/null +++ b/pipelines/broad/arrays/imputation_beagle/ImputationBeagle.changelog.md @@ -0,0 +1,5 @@ +# 1.0.0 +2025-02-26 (Date of Last Commit) + +* Initial public release of the ImputationBeagle pipeline. + * The ImputationBeagle pipeline imputes missing genotypes from a multi-sample VCF using a large genomic reference panel. It is based on the Michigan Imputation Server pipeline but uses the Beagle imputation tool instead of minimac. Overall, the pipeline filters, phases, and performs imputation on a multi-sample VCF. It outputs the imputed VCF. diff --git a/pipelines/broad/arrays/imputation_beagle/ImputationBeagle.wdl b/pipelines/broad/arrays/imputation_beagle/ImputationBeagle.wdl new file mode 100644 index 0000000000..64d058b965 --- /dev/null +++ b/pipelines/broad/arrays/imputation_beagle/ImputationBeagle.wdl @@ -0,0 +1,226 @@ +version 1.0 + +import "../../../../structs/imputation/ImputationBeagleStructs.wdl" as structs +import "../../../../tasks/broad/ImputationTasks.wdl" as tasks +import "../../../../tasks/broad/ImputationBeagleTasks.wdl" as beagleTasks + +workflow ImputationBeagle { + + String pipeline_version = "1.0.0" + + input { + Int chunkLength = 25000000 + Int chunkOverlaps = 2000000 # this is the padding that will be added to the beginning and end of each chunk to reduce edge effects + + File multi_sample_vcf + + File ref_dict # for reheadering / adding contig lengths in the header of the ouptut VCF, and calculating contig lengths + Array[String] contigs + String reference_panel_path_prefix # path + file prefix to the bucket where the reference panel files are stored for all contigs + String genetic_maps_path # path to the bucket where genetic maps are stored for all contigs + String output_basename # the basename for intermediate and output files + + # file extensions used to find reference panel files + String bed_suffix = ".bed" + String bref3_suffix = ".bref3" + + String gatk_docker = "broadinstitute/gatk:4.6.0.0" + String ubuntu_docker = "ubuntu:20.04" + + Int? error_count_override + } + + call tasks.CountSamples { + input: + vcf = multi_sample_vcf + } + + call beagleTasks.CreateVcfIndex { + input: + vcf_input = multi_sample_vcf, + gatk_docker = gatk_docker + } + + Float chunkLengthFloat = chunkLength + + scatter (contig in contigs) { + # these are specific to hg38 - contig is format 'chr1' + String reference_basename = reference_panel_path_prefix + "." + contig + String genetic_map_filename = genetic_maps_path + "plink." + contig + ".GRCh38.withchr.map" + + ReferencePanelContig referencePanelContig = { + "bed": reference_basename + bed_suffix, + "bref3": reference_basename + bref3_suffix, + "contig": contig, + "genetic_map": genetic_map_filename + } + + + call tasks.CalculateChromosomeLength { + input: + ref_dict = ref_dict, + chrom = referencePanelContig.contig, + ubuntu_docker = ubuntu_docker + } + + Int num_chunks = ceil(CalculateChromosomeLength.chrom_length / chunkLengthFloat) + + scatter (i in range(num_chunks)) { + String chunk_contig = referencePanelContig.contig + + Int start = (i * chunkLength) + 1 + Int startWithOverlaps = if (start - chunkOverlaps < 1) then 1 else start - chunkOverlaps + Int end = if (CalculateChromosomeLength.chrom_length < ((i + 1) * chunkLength)) then CalculateChromosomeLength.chrom_length else ((i + 1) * chunkLength) + Int endWithOverlaps = if (CalculateChromosomeLength.chrom_length < end + chunkOverlaps) then CalculateChromosomeLength.chrom_length else end + chunkOverlaps + String chunk_basename = referencePanelContig.contig + "_chunk_" + i + + # generate the chunked vcf file that will be used for imputation, including overlaps + call tasks.GenerateChunk { + input: + vcf = CreateVcfIndex.vcf, + vcf_index = CreateVcfIndex.vcf_index, + start = startWithOverlaps, + end = endWithOverlaps, + chrom = referencePanelContig.contig, + basename = chunk_basename, + gatk_docker = gatk_docker + } + + call beagleTasks.CountVariantsInChunks { + input: + vcf = GenerateChunk.output_vcf, + vcf_index = GenerateChunk.output_vcf_index, + panel_bed_file = referencePanelContig.bed, + gatk_docker = gatk_docker + } + + call beagleTasks.CheckChunks { + input: + var_in_original = CountVariantsInChunks.var_in_original, + var_also_in_reference = CountVariantsInChunks.var_also_in_reference + } + } + + Array[File] chunkedVcfsWithOverlapsForImputation = GenerateChunk.output_vcf + + call tasks.StoreChunksInfo as StoreContigLevelChunksInfo { + input: + chroms = chunk_contig, + starts = start, + ends = end, + vars_in_array = CountVariantsInChunks.var_in_original, + vars_in_panel = CountVariantsInChunks.var_also_in_reference, + valids = CheckChunks.valid, + basename = output_basename + } + + # if any chunk for any chromosome fail CheckChunks, then we will not impute run any task in the next scatter, + # namely phasing and imputing which would be the most costly to throw away + Int n_failed_chunks_int = select_first([error_count_override, read_int(StoreContigLevelChunksInfo.n_failed_chunks)]) + call beagleTasks.ErrorWithMessageIfErrorCountNotZero as FailQCNChunks { + input: + errorCount = n_failed_chunks_int, + message = "contig " + referencePanelContig.contig + " had " + n_failed_chunks_int + " failing chunks" + } + + scatter (i in range(num_chunks)) { + String chunk_basename_imputed = referencePanelContig.contig + "_chunk_" + i + "_imputed" + + # max amount of cpus you can ask for is 96 so at a max of 10k samples we can only ask for 9 cpu a sample. + # these values are based on trying to optimize for pre-emptibility using a 400k sample reference panel + # and up to a 10k sample input vcf + Int beagle_cpu = if (CountSamples.nSamples <= 1000) then 8 else floor(CountSamples.nSamples / 1000) * 9 + Int beagle_phase_memory_in_gb = if (CountSamples.nSamples <= 1000) then 22 else ceil(beagle_cpu * 1.5) + Int beagle_impute_memory_in_gb = if (CountSamples.nSamples <= 1000) then 30 else ceil(beagle_cpu * 4.3) + + call beagleTasks.Phase { + input: + dataset_vcf = chunkedVcfsWithOverlapsForImputation[i], + ref_panel_bref3 = referencePanelContig.bref3, + chrom = referencePanelContig.contig, + basename = chunk_basename_imputed, + genetic_map_file = referencePanelContig.genetic_map, + start = start[i], + end = end[i], + cpu = beagle_cpu, + memory_mb = beagle_phase_memory_in_gb * 1024, + for_dependency = FailQCNChunks.done + } + + call beagleTasks.Impute { + input: + dataset_vcf = Phase.vcf, + ref_panel_bref3 = referencePanelContig.bref3, + chrom = referencePanelContig.contig, + basename = chunk_basename_imputed, + genetic_map_file = referencePanelContig.genetic_map, + start = start[i], + end = end[i], + cpu = beagle_cpu, + memory_mb = beagle_impute_memory_in_gb * 1024 + } + + call beagleTasks.CreateVcfIndex as IndexImputedBeagle { + input: + vcf_input = Impute.vcf, + gatk_docker = gatk_docker + } + + call tasks.UpdateHeader { + input: + vcf = IndexImputedBeagle.vcf, + vcf_index = IndexImputedBeagle.vcf_index, + ref_dict = ref_dict, + basename = chunk_basename_imputed, + disable_sequence_dictionary_validation = false, + gatk_docker = gatk_docker + } + + call tasks.SeparateMultiallelics { + input: + original_vcf = UpdateHeader.output_vcf, + original_vcf_index = UpdateHeader.output_vcf_index, + output_basename = chunk_basename_imputed + } + + call tasks.RemoveSymbolicAlleles { + input: + original_vcf = SeparateMultiallelics.output_vcf, + original_vcf_index = SeparateMultiallelics.output_vcf_index, + output_basename = chunk_basename_imputed, + gatk_docker = gatk_docker + } + } + + Array[File] chromosome_vcfs = select_all(RemoveSymbolicAlleles.output_vcf) + } + + call tasks.GatherVcfs { + input: + input_vcfs = flatten(chromosome_vcfs), + output_vcf_basename = output_basename + ".imputed", + gatk_docker = gatk_docker + } + + call tasks.StoreChunksInfo { + input: + chroms = flatten(chunk_contig), + starts = flatten(start), + ends = flatten(end), + vars_in_array = flatten(CountVariantsInChunks.var_in_original), + vars_in_panel = flatten(CountVariantsInChunks.var_also_in_reference), + valids = flatten(CheckChunks.valid), + basename = output_basename + } + + output { + File imputed_multi_sample_vcf = GatherVcfs.output_vcf + File imputed_multi_sample_vcf_index = GatherVcfs.output_vcf_index + File chunks_info = StoreChunksInfo.chunks_info + } + + meta { + allowNestedInputs: true + } + +} diff --git a/pipelines/broad/arrays/imputation_beagle/README.md b/pipelines/broad/arrays/imputation_beagle/README.md new file mode 100644 index 0000000000..754e416b5a --- /dev/null +++ b/pipelines/broad/arrays/imputation_beagle/README.md @@ -0,0 +1,7 @@ +### ImputationBeagle summary + +The ImputationBeagle pipeline imputes missing genotypes from a multi-sample VCF using the [Beagle imputation tool](https://faculty.washington.edu/browning/beagle/beagle.html) and a large genomic reference panel. Overall, the pipeline filters, phases, and performs imputation on a multi-sample VCF. This pipeline was created for use by the All of Us/AnVIL Imputation Service. + +### ArrayImputationQuotaConsumed summary + +The ArrayImputationQuotaConsumed pipeline is used by the All of Us/AnVIL Imputation Service and calculates the number of samples in the input multi-sample VCF, which is the metric used by the service for ImputationBeagle pipeline quota. diff --git a/pipelines/broad/arrays/imputation_beagle/test_inputs/Plumbing/NA12878_x10_hg38_arrays.json b/pipelines/broad/arrays/imputation_beagle/test_inputs/Plumbing/NA12878_x10_hg38_arrays.json new file mode 100644 index 0000000000..bdf5a00597 --- /dev/null +++ b/pipelines/broad/arrays/imputation_beagle/test_inputs/Plumbing/NA12878_x10_hg38_arrays.json @@ -0,0 +1,8 @@ +{ + "ImputationBeagle.multi_sample_vcf": "gs://broad-gotc-test-storage/imputation_beagle/scientific/vcfs/NA12878_10_duplicate.merged.cleaned.vcf.gz", + "ImputationBeagle.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ImputationBeagle.reference_panel_path_prefix": "gs://broad-gotc-test-storage/imputation_beagle/scientific/1000G_HGDP_no_singletons_reference_panel/hgdp.tgp.gwaspy.AN_added.bcf.ac2", + "ImputationBeagle.contigs": ["chr21","chr22"], + "ImputationBeagle.genetic_maps_path": "gs://broad-gotc-test-storage/imputation_beagle/scientific/plink-genetic-maps/", + "ImputationBeagle.output_basename": "plumbing_test" +} diff --git a/pipelines/broad/arrays/imputation_beagle/test_inputs/Scientific/NA12878_x10_hg38_arrays.json b/pipelines/broad/arrays/imputation_beagle/test_inputs/Scientific/NA12878_x10_hg38_arrays.json new file mode 100644 index 0000000000..4263609e29 --- /dev/null +++ b/pipelines/broad/arrays/imputation_beagle/test_inputs/Scientific/NA12878_x10_hg38_arrays.json @@ -0,0 +1,8 @@ +{ + "ImputationBeagle.multi_sample_vcf": "gs://broad-gotc-test-storage/imputation_beagle/scientific/vcfs/NA12878_10_duplicate.merged.cleaned.vcf.gz", + "ImputationBeagle.ref_dict": "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict", + "ImputationBeagle.reference_panel_path_prefix": "gs://broad-gotc-test-storage/imputation_beagle/scientific/1000G_HGDP_no_singletons_reference_panel/hgdp.tgp.gwaspy.AN_added.bcf.ac2", + "ImputationBeagle.contigs": ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22"], + "ImputationBeagle.genetic_maps_path": "gs://broad-gotc-test-storage/imputation_beagle/scientific/plink-genetic-maps/", + "ImputationBeagle.output_basename": "scientific_test" +} diff --git a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md index a0930046d7..e4f328a7fb 100644 --- a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md +++ b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.changelog.md @@ -1,3 +1,8 @@ +# 1.1.15 +2025-02-24 (Date of Last Commit) + +* Updated runtime parameters in some ImputationTasks, and added an explicit definition of a vcf_index. + # 1.1.14 2024-11-04 (Date of Last Commit) diff --git a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl index 525ce85e00..27e16fa28e 100644 --- a/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl +++ b/pipelines/broad/internal/arrays/imputation/BroadInternalImputation.wdl @@ -9,7 +9,7 @@ workflow BroadInternalImputation { description: "Push outputs of Imputation.wdl to TDR dataset table ImputationOutputsTable and split out Imputation arrays into ImputationWideOutputsTable." allowNestedInputs: true } - String pipeline_version = "1.1.14" + String pipeline_version = "1.1.15" input { # inputs to wrapper task diff --git a/pipelines/broad/reprocessing/cram_to_unmapped_bams/test_inputs/Plumbing/G96830.NA12878.WGS.json b/pipelines/broad/reprocessing/cram_to_unmapped_bams/test_inputs/Plumbing/G96830.NA12878.WGS.json index 4a75091350..896592d9b6 100644 --- a/pipelines/broad/reprocessing/cram_to_unmapped_bams/test_inputs/Plumbing/G96830.NA12878.WGS.json +++ b/pipelines/broad/reprocessing/cram_to_unmapped_bams/test_inputs/Plumbing/G96830.NA12878.WGS.json @@ -1,5 +1,5 @@ { - "CramToUnmappedBams.input_cram": "gs://broad-gotc-test-storage/single_sample/plumbing/truth/{TRUTH_BRANCH}/20k/NA12878_PLUMBING.cram", + "CramToUnmappedBams.input_cram": "gs://broad-gotc-test-storage/single_sample/plumbing/truth/master/20k/NA12878_PLUMBING.cram", "CramToUnmappedBams.output_map": "gs://broad-gotc-test-storage/germline_single_sample/wgs/plumbing/bams/G96830.NA12878/readgroupid_to_bamfilename_map.txt", "CramToUnmappedBams.base_file_name": "G96830.NA12878.WGS", "CramToUnmappedBams.unmapped_bam_suffix": ".unmapped.bam", diff --git a/pipelines/broad/reprocessing/wgs/test_inputs/Plumbing/G96830.NA12878.json b/pipelines/broad/reprocessing/wgs/test_inputs/Plumbing/G96830.NA12878.json index 6a0f7293ca..da81415881 100644 --- a/pipelines/broad/reprocessing/wgs/test_inputs/Plumbing/G96830.NA12878.json +++ b/pipelines/broad/reprocessing/wgs/test_inputs/Plumbing/G96830.NA12878.json @@ -1,5 +1,5 @@ { - "WholeGenomeReprocessing.input_cram": "gs://broad-gotc-test-storage/single_sample/plumbing/truth/{TRUTH_BRANCH}/20k/NA12878_PLUMBING.cram", + "WholeGenomeReprocessing.input_cram": "gs://broad-gotc-test-storage/single_sample/plumbing/truth/master/20k/NA12878_PLUMBING.cram", "WholeGenomeReprocessing.output_map": "gs://broad-gotc-test-storage/germline_single_sample/wgs/plumbing/bams/G96830.NA12878/readgroupid_to_bamfilename_map.txt", "WholeGenomeReprocessing.sample_name": "NA12878 PLUMBING", diff --git a/pipelines/skylab/atac/atac.changelog.md b/pipelines/skylab/atac/atac.changelog.md index 636f57ac71..d82db1d96b 100644 --- a/pipelines/skylab/atac/atac.changelog.md +++ b/pipelines/skylab/atac/atac.changelog.md @@ -1,3 +1,11 @@ +# 2.7.1 +2025-02-25 (Date of Last Commit) + +* Added a new warning for peak calling step if the probability_threshold is too low, resutling in a null matrix after doublet filtering +* Updated the probability threshold default to 0.5 +* Updated the warp-tools docker image to include an update to the GroupQCs function in sctools; this does not affect the outputs of the pipeline +* Added reference information to the BAM header + # 2.7.0 2025-02-03 (Date of Last Commit) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index 3c74822c93..4262e8321c 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -51,7 +51,7 @@ workflow ATAC { String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" } - String pipeline_version = "2.7.0" + String pipeline_version = "2.7.1" # Determine docker prefix based on cloud provider String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" @@ -59,7 +59,7 @@ workflow ATAC { String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix # Docker image names - String warp_tools_docker = "warp-tools:2.6.0" + String warp_tools_docker = "warp-tools:2.6.1" String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919" String samtools_docker = "samtools-dist-bwa:3.0.0" String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311" @@ -353,6 +353,7 @@ task BWAPairedEndAlignment { Array[File] read1_fastq Array[File] read3_fastq File tar_bwa_reference + String reference_path = tar_bwa_reference String read_group_id = "RG1" String read_group_sample_name = "RGSN1" String suffix = "trimmed_adapters.fastq.gz" @@ -477,7 +478,11 @@ task BWAPairedEndAlignment { ls # rename file to this - mv final.sorted.bam ~{bam_aligned_output_name} + echo "Reheading BAM with reference" + /usr/temp/Open-Omics-Acceleration-Framework/applications/samtools/samtools view -H final.sorted.bam > header.txt + echo -e "@CO\tReference genome used: ~{reference_path}" >> header.txt + /usr/temp/Open-Omics-Acceleration-Framework/applications/samtools/samtools reheader header.txt final.sorted.bam > final.sorted.reheader.bam + mv final.sorted.reheader.bam ~{bam_aligned_output_name} echo "the present working dir" pwd @@ -668,7 +673,7 @@ task PeakCalling { Int min_counts = 5000 Int min_tsse = 10 Int max_counts = 100000 - Float probability_threshold = 1 + Float probability_threshold = 0.5 # Runtime attributes/docker String docker_path @@ -751,6 +756,10 @@ task PeakCalling { print("Filter doublets based on scrublet scores") snap.pp.filter_doublets(atac_data_mod, probability_threshold=probability_threshold) print(atac_data_mod) + + # Check if the matrix is empty + if atac_data_mod.n_obs == 0: + raise ValueError("Matrix is empty after filtering doublets: Try increasing the probability_threshold.") # Perform graph-based clustering to identify cell clusters. # Build a k-nearest neighbour graph using snap.pp.knn diff --git a/pipelines/skylab/atac/test_inputs/Plumbing/10k_pbmc_downsampled.json b/pipelines/skylab/atac/test_inputs/Plumbing/10k_pbmc_downsampled.json new file mode 100644 index 0000000000..2b0ead12a3 --- /dev/null +++ b/pipelines/skylab/atac/test_inputs/Plumbing/10k_pbmc_downsampled.json @@ -0,0 +1,23 @@ +{ + "ATAC.input_id":"10k_PBMC_downsampled", + "ATAC.cloud_provider":"gcp", + "ATAC.read1_fastq_gzipped":[ + "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R1_atac.fastq.gz" + ], + "ATAC.read2_fastq_gzipped":[ + "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R2_atac.fastq.gz" + ], + "ATAC.read3_fastq_gzipped":[ + "gs://broad-gotc-test-storage/Multiome/input/plumbing/fastq_R3_atac.fastq.gz" + ], + "ATAC.tar_bwa_reference":"gs://gcp-public-data--broad-references/hg38/v0/bwa/v2_2_1/bwa-mem2-2.2.1-Human-GENCODE-build-GRCh38.tar", + "ATAC.chrom_sizes":"gs://broad-gotc-test-storage/Multiome/input/hg38.chrom.sizes", + "ATAC.cpu_platform_bwa":"Intel Cascade Lake", + "ATAC.num_threads_bwa":"16", + "ATAC.mem_size_bwa":"64", + "ATAC.atac_nhash_id":"example_1234", + "ATAC.annotations_gtf":"gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf", + "ATAC.whitelist":"gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_atac.txt", + "ATAC.vm_size":"Standard_M128s" + } + \ No newline at end of file diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md index 2dd910c46a..2755db43a2 100644 --- a/pipelines/skylab/multiome/Multiome.changelog.md +++ b/pipelines/skylab/multiome/Multiome.changelog.md @@ -1,3 +1,10 @@ +# 5.11.0 +2025-02-25 (Date of Last Commit) + +* Refactored the Peak Calling step of Multiome to use the JoinBarcodes output h5ad as the input for peak calling, ensuring the h5ad files have both GEX and ATAC barcodes +* Updated the warp-tools docker image to include an update to the GroupQCs function in sctools; this does not affect the outputs of the pipeline +* Added reference information to the BAM headers + # 5.10.0 2025-02-03 (Date of Last Commit) diff --git a/pipelines/skylab/multiome/Multiome.wdl b/pipelines/skylab/multiome/Multiome.wdl index d079e5c58a..855bb9df91 100644 --- a/pipelines/skylab/multiome/Multiome.wdl +++ b/pipelines/skylab/multiome/Multiome.wdl @@ -4,10 +4,11 @@ import "../../../pipelines/skylab/atac/atac.wdl" as atac import "../../../pipelines/skylab/optimus/Optimus.wdl" as optimus import "../../../tasks/skylab/H5adUtils.wdl" as H5adUtils import "../../../tasks/broad/Utilities.wdl" as utils +#import "../../../pipelines/skylab/atac/atac.wdl" as PeakCalling workflow Multiome { - String pipeline_version = "5.10.0" + String pipeline_version = "5.11.0" input { String cloud_provider @@ -60,7 +61,7 @@ workflow Multiome { String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix # Define docker images - String snap_atac_docker_image = "snapatac2:1.0.4-2.3.1-1700590229" + String snap_atac_docker_image = "snapatac2:2.0.0" # Define all whitelist files File gcp_gex_whitelist = "gs://gcp-public-data--broad-references/RNA/resources/arc-v1/737K-arc-v1_gex.txt" @@ -124,7 +125,8 @@ workflow Multiome { atac_nhash_id = atac_nhash_id, adapter_seq_read3 = adapter_seq_read3, atac_expected_cells = expected_cells, - peak_calling = run_peak_calling + peak_calling = false + } call H5adUtils.JoinMultiomeBarcodes as JoinBarcodes { input: @@ -136,6 +138,16 @@ workflow Multiome { atac_fragment = Atac.fragment_file } + if (run_peak_calling) { + call atac.PeakCalling as PeakCalling { + input: + annotations_gtf = annotations_gtf, + metrics_h5ad = JoinBarcodes.atac_h5ad_file, + chrom_sizes = chrom_sizes, + output_base_name = input_id, + docker_path = docker_prefix + snap_atac_docker_image, + } + } meta { allowNestedInputs: true @@ -152,8 +164,8 @@ workflow Multiome { File fragment_file_index = JoinBarcodes.atac_fragment_tsv_index File snap_metrics_atac = JoinBarcodes.atac_h5ad_file File atac_library_metrics = Atac.library_metrics_file - File? cellbybin_h5ad_file = Atac.cellbybin_h5ad_file - File? cellbypeak_h5ad_file = Atac.cellbypeak_h5ad_file + File? cellbybin_h5ad_file = PeakCalling.cellbybin_h5ad + File? cellbypeak_h5ad_file = PeakCalling.cellbypeak_h5ad # optimus outputs File genomic_reference_version_gex = Optimus.genomic_reference_version diff --git a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled_peakcall.json b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled_peakcall.json index a567f330b8..52232154c3 100644 --- a/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled_peakcall.json +++ b/pipelines/skylab/multiome/test_inputs/Plumbing/10k_pbmc_downsampled_peakcall.json @@ -27,5 +27,6 @@ "Multiome.soloMultiMappers":"Uniform", "Multiome.gex_nhash_id":"example_1234", "Multiome.atac_nhash_id":"example_1234", - "Multiome.run_peak_calling":"true" + "Multiome.run_peak_calling": true, + "Multiome.PeakCalling.probability_threshold":"1.00" } diff --git a/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc_peakcall.json b/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc_peakcall.json new file mode 100644 index 0000000000..9e923bd49e --- /dev/null +++ b/pipelines/skylab/multiome/test_inputs/Scientific/10k_pbmc_peakcall.json @@ -0,0 +1,39 @@ +{ + "Multiome.annotations_gtf":"gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf", + "Multiome.gex_i1_fastq":[ + "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L001_I1_001.fastq.gz", + "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_I1_001.fastq.gz" + ], + "Multiome.input_id":"10k_PBMC", + "Multiome.cloud_provider":"gcp", + "Multiome.gex_r1_fastq":[ + "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L001_R1_001.fastq.gz", + "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_R1_001.fastq.gz" + ], + "Multiome.gex_r2_fastq":[ + "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L001_R2_001.fastq.gz", + "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_gex_S1_L002_R2_001.fastq.gz" + ], + "Multiome.atac_r1_fastq":[ + "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_atac_S1_L001_R1_001.fastq.gz", + "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_atac_S1_L002_R1_001.fastq.gz" + ], + "Multiome.atac_r2_fastq":[ + "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_atac_S1_L001_R2_001.fastq.gz", + "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_atac_S1_L002_R2_001.fastq.gz" + ], + "Multiome.atac_r3_fastq":[ + "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_atac_S1_L001_R3_001.fastq.gz", + "gs://broad-gotc-test-storage/Multiome/input/scientific/10k_PBMC_Multiome/10k_PBMC_Multiome_nextgem_Chromium_Controller_atac_S1_L002_R3_001.fastq.gz" + ], + "Multiome.tar_bwa_reference":"gs://gcp-public-data--broad-references/hg38/v0/bwa/v2_2_1/bwa-mem2-2.2.1-Human-GENCODE-build-GRCh38.tar", + "Multiome.tar_star_reference":"gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_star2.7.10a-Human-GENCODE-build-GRCh38-43.tar", + "Multiome.chrom_sizes":"gs://broad-gotc-test-storage/Multiome/input/hg38.chrom.sizes", + "Multiome.Atac.cpu_platform_bwa":"Intel Cascade Lake", + "Multiome.Atac.num_threads_bwa":"24", + "Multiome.Atac.mem_size_bwa":"175", + "Multiome.gex_nhash_id":"example_1234", + "Multiome.atac_nhash_id":"example_1234", + "Multiome.run_peak_calling":true, + "Multiome.PeakCalling.probability_threshold":"0.5" +} diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md index a1df2a0575..3caaaba6c1 100644 --- a/pipelines/skylab/optimus/Optimus.changelog.md +++ b/pipelines/skylab/optimus/Optimus.changelog.md @@ -1,8 +1,13 @@ +# 7.9.2 +2025-02-25 (Date of Last Commit) + +* Updated the warp-tools docker image to include an update to the GroupQCs function in sctools; this does not affect the outputs of the pipeline +* Added reference information to the BAM header + # 7.9.1 2025-01-13 (Date of Last Commit) * Added a boolean variable is_slidetags; set to false by default, but set to true if the Slide-Tags pipeline is calling Optimus - * Added reference_gtf_file to the output h5ad unstructured metadata # 7.9.0 diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index a975931245..02bb352300 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -80,7 +80,7 @@ workflow Optimus { } # version of this pipeline - String pipeline_version = "7.9.1" + String pipeline_version = "7.9.2" # this is used to scatter matched [r1_fastq, r2_fastq, i1_fastq] arrays Array[Int] indices = range(length(r1_fastq)) @@ -99,7 +99,7 @@ workflow Optimus { String pytools_docker = "pytools:1.0.0-1661263730" String empty_drops_docker = "empty-drops:1.0.1-4.2" String star_docker = "star:1.0.1-2.7.11a-1692706072" - String warp_tools_docker = "warp-tools:2.6.0" + String warp_tools_docker = "warp-tools:2.6.1" String star_merge_docker = "star-merge-npz:1.3.0" String samtools_star = "samtools-star:1.0.0-1.11-2.7.11a-1731516196" diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md index 403d6ce5b5..7784bae401 100644 --- a/pipelines/skylab/paired_tag/PairedTag.changelog.md +++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md @@ -1,3 +1,10 @@ +# 1.10.2 +2025-02-25 (Date of Last Commit) + +* Updated the SnapATAC2 docker image to the latest SnapATAC2, allowing for future peak calling implementation +* Updated the warp-tools docker image to include an update to the GroupQCs function in sctools; this does not affect the outputs of the pipeline +* Added reference information to the BAM headers + # 1.10.1 2025-02-03 (Date of Last Commit) diff --git a/pipelines/skylab/paired_tag/PairedTag.wdl b/pipelines/skylab/paired_tag/PairedTag.wdl index 05d1b74b96..e475c9c650 100644 --- a/pipelines/skylab/paired_tag/PairedTag.wdl +++ b/pipelines/skylab/paired_tag/PairedTag.wdl @@ -8,7 +8,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils workflow PairedTag { - String pipeline_version = "1.10.1" + String pipeline_version = "1.10.2" input { @@ -63,7 +63,7 @@ workflow PairedTag { # All docker images that are needed for tasks in this workflow String upstools_docker = "upstools:2.0.0" - String snapatac_docker = "snapatac2:1.0.4-2.3.1-1700590229" + String snapatac_docker = "snapatac2:2.0.0" # Prefixes based on cloud env String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json index 418063d6eb..e1e025c4eb 100644 --- a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json +++ b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BC011_BC015_downsampled.json @@ -22,7 +22,7 @@ "PairedTag.preindex":"true", "PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake", "PairedTag.Atac_preindex.num_threads_bwa":"16", - "PairedTag.Atac_preindex.mem_size_bwa":"64", + "PairedTag.Atac_preindex.mem_size_bwa":"64", "PairedTag.soloMultiMappers":"Uniform", "PairedTag.cloud_provider": "gcp", "PairedTag.gex_nhash_id":"example_1234", diff --git a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json index f682f59a1c..102acb73ab 100644 --- a/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json +++ b/pipelines/skylab/paired_tag/test_inputs/Plumbing/BI015_downsampled.json @@ -22,7 +22,7 @@ "PairedTag.preindex":"false", "PairedTag.Atac_preindex.cpu_platform_bwa":"Intel Cascade Lake", "PairedTag.Atac_preindex.num_threads_bwa":"16", - "PairedTag.Atac_preindex.mem_size_bwa":"64", + "PairedTag.Atac_preindex.mem_size_bwa":"64", "PairedTag.soloMultiMappers":"Uniform", "PairedTag.cloud_provider": "gcp", "PairedTag.gex_nhash_id":"example_1234", diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md index cc1ba27ed1..4f5b477c55 100644 --- a/pipelines/skylab/slideseq/SlideSeq.changelog.md +++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md @@ -1,3 +1,8 @@ +# 3.4.9 +2025-02-25 (Date of Last Commit) +* Updated the warp-tools docker image to include an update to the GroupQCs function in sctools; this does not affect the outputs of the pipeline +* Added reference information to the BAM header for Optimus and ATAC workflows; this does not impact Slideseq + # 3.4.8 2025-01-13 (Date of Last Commit) diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl index 94ba0a2ca4..0ac98d6063 100644 --- a/pipelines/skylab/slideseq/SlideSeq.wdl +++ b/pipelines/skylab/slideseq/SlideSeq.wdl @@ -25,7 +25,7 @@ import "../../../tasks/broad/Utilities.wdl" as utils workflow SlideSeq { - String pipeline_version = "3.4.8" + String pipeline_version = "3.4.9" input { Array[File] r1_fastq @@ -48,7 +48,7 @@ workflow SlideSeq { # docker images String pytools_docker = "pytools:1.0.0-1661263730" String picard_cloud_docker = "picard-cloud:2.26.10" - String warp_tools_docker = "warp-tools:2.6.0" + String warp_tools_docker = "warp-tools:2.6.1" String star_merge_docker = "star-merge-npz:1.3.0" String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md index 2bfa234f11..99bed0d904 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md @@ -1,3 +1,9 @@ +# 2.0.8 +2025-02-25 (Date of Last Commit) + +* Updated the warp-tools docker image to include an update to the GroupQCs function in sctools; this does not affect the outputs of the pipeline +* Added reference information to the BAM header for Optimus and ATAC; does not impact snSS2 + # 2.0.7 2025-01-13 (Date of Last Commit) diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl index ded336b418..b576dca832 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl @@ -57,7 +57,7 @@ workflow MultiSampleSmartSeq2SingleNucleus { } # Version of this pipeline - String pipeline_version = "2.0.7" + String pipeline_version = "2.0.8" if (false) { String? none = "None" diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json b/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json index db8f68b114..587a4aa5e4 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/test_inputs/Plumbing/mouse_example.json @@ -4,14 +4,14 @@ "MultiSampleSmartSeq2SingleNucleus.annotations_gtf": "gs://gcp-public-data--broad-references/mm10/v0/single_nucleus/modified_gencode.vM23.primary_assembly.annotation.gtf", "MultiSampleSmartSeq2SingleNucleus.adapter_list": "gs://broad-gotc-test-storage/MultiSampleSmartSeq2SingleNucleus/adapters/Illumina_adapters_list.fa", "MultiSampleSmartSeq2SingleNucleus.fastq1_input_files": [ - "gs://broad-gotc-test-storage/MultiSampleSmartSeq2SingleNucleus/truth/plumbing/master/pr_test_truth/SM-GE644_S110_E1-50_GGAGCTAC-CTAAGCCT_R1.fastq.gz", - "gs://broad-gotc-test-storage/MultiSampleSmartSeq2SingleNucleus/truth/plumbing/master/pr_test_truth/SM-GE644_S117_E1-50_GCGTAGTA-AAGGAGTA_R1.fastq.gz", - "gs://broad-gotc-test-storage/MultiSampleSmartSeq2SingleNucleus/truth/plumbing/master/pr_test_truth/SM-GE644_S118_E1-50_GCGTAGTA-CTAAGCCT_R1.fastq.gz" + "gs://broad-gotc-test-storage/Smartseq2_Single_Nucleus_Multisample/truth/plumbing/master/pr_test_truth/SM-GE644_S110_E1-50_GGAGCTAC-CTAAGCCT_R1.fastq.gz", + "gs://broad-gotc-test-storage/Smartseq2_Single_Nucleus_Multisample/truth/plumbing/master/pr_test_truth/SM-GE644_S117_E1-50_GCGTAGTA-AAGGAGTA_R1.fastq.gz", + "gs://broad-gotc-test-storage/Smartseq2_Single_Nucleus_Multisample/truth/plumbing/master/pr_test_truth/SM-GE644_S118_E1-50_GCGTAGTA-CTAAGCCT_R1.fastq.gz" ], "MultiSampleSmartSeq2SingleNucleus.fastq2_input_files": [ - "gs://broad-gotc-test-storage/MultiSampleSmartSeq2SingleNucleus/truth/plumbing/master/pr_test_truth/SM-GE644_S110_E1-50_GGAGCTAC-CTAAGCCT_R2.fastq.gz", - "gs://broad-gotc-test-storage/MultiSampleSmartSeq2SingleNucleus/truth/plumbing/master/pr_test_truth/SM-GE644_S117_E1-50_GCGTAGTA-AAGGAGTA_R2.fastq.gz", - "gs://broad-gotc-test-storage/MultiSampleSmartSeq2SingleNucleus/truth/plumbing/master/pr_test_truth/SM-GE644_S118_E1-50_GCGTAGTA-CTAAGCCT_R2.fastq.gz" + "gs://broad-gotc-test-storage/Smartseq2_Single_Nucleus_Multisample/truth/plumbing/master/pr_test_truth/SM-GE644_S110_E1-50_GGAGCTAC-CTAAGCCT_R2.fastq.gz", + "gs://broad-gotc-test-storage/Smartseq2_Single_Nucleus_Multisample/truth/plumbing/master/pr_test_truth/SM-GE644_S117_E1-50_GCGTAGTA-AAGGAGTA_R2.fastq.gz", + "gs://broad-gotc-test-storage/Smartseq2_Single_Nucleus_Multisample/truth/plumbing/master/pr_test_truth/SM-GE644_S118_E1-50_GCGTAGTA-CTAAGCCT_R2.fastq.gz" ], "MultiSampleSmartSeq2SingleNucleus.input_ids": [ "SM-GE644_S110_E1-50_GGAGCTAC-CTAAGCCT", diff --git a/scripts/dockstore_api/fetch_dockstore_commit.py b/scripts/dockstore_api/fetch_dockstore_commit.py new file mode 100644 index 0000000000..430cd80a2d --- /dev/null +++ b/scripts/dockstore_api/fetch_dockstore_commit.py @@ -0,0 +1,51 @@ +import requests +import sys + +def fetch_commit_id(token, repository, version_name): + # Fetch the workflow data + url = f"https://dockstore.org/api/workflows/path/workflow/github.com%2Fbroadinstitute%2Fwarp%2F{repository}/published" + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/json", + } + + response = requests.get(url, headers=headers) + response.raise_for_status() + data = response.json() + + # Extract workflow ID and version ID + workflow_id = data.get("id") + version_id = next( + (version["id"] for version in data.get("workflowVersions", []) + if version["name"] == version_name), + None + ) + + if not workflow_id or not version_id: + raise ValueError("Workflow ID or Version ID could not be found.") + + # Fetch the specific version details to get the commit ID + version_url = f"https://dockstore.org/api/workflows/{workflow_id}/workflowVersions/{version_id}" + version_response = requests.get(version_url, headers=headers) + version_response.raise_for_status() + version_data = version_response.json() + + # Extract commit ID + commit_id = version_data.get("commitID") + if not commit_id: + raise ValueError("Commit ID could not be found.") + + return commit_id + +if __name__ == "__main__": + if len(sys.argv) != 4: + print("Usage: python fetch_dockstore_commit.py ") + sys.exit(1) + + _, token, repository, version_name = sys.argv + + try: + commit_id = fetch_commit_id(token, repository, version_name) + print(commit_id) + except Exception as e: + print(f"Error: {e}") \ No newline at end of file diff --git a/scripts/firecloud_api/UpdateTestInputs.py b/scripts/firecloud_api/UpdateTestInputs.py new file mode 100644 index 0000000000..1eb45d85e9 --- /dev/null +++ b/scripts/firecloud_api/UpdateTestInputs.py @@ -0,0 +1,128 @@ +import argparse +import json +import os +import ast +from decimal import Decimal + +def format_float(value): + """Format float to avoid scientific notation for small numbers.""" + if isinstance(value, (float, int)): + # Convert to Decimal for precise string representation + return str(Decimal(str(value))) + return value + +def update_test_inputs(inputs_json, truth_path, results_path, update_truth, branch_name): + with open(inputs_json, 'r') as file: + test_inputs = json.load(file) + + # Get the sample name from the test inputs JSON + sample_name = os.path.splitext(os.path.basename(inputs_json))[0] + + # Get the pipeline name from the test inputs JSON + pipeline_name = next(iter(test_inputs)).split('.')[0] + + # Append "Test" in front of the pipeline name + test_name = f"Test{pipeline_name}" + + # Update all keys and ensure nested inputs are handled correctly + updated_inputs = {} + for key, value in test_inputs.items(): + # Split the key to analyze its structure + key_parts = key.split('.') + + # Replace the top-level component with the test_name + key_parts[0] = test_name + + # For nested keys (more than two parts), append the original pipeline name with a `.` + if len(key_parts) > 2: + key_parts[1] = f"{pipeline_name}.{key_parts[1]}" + + # Reconstruct the updated key + new_key = '.'.join(key_parts) + + # Handle different value types appropriately + if isinstance(value, list): + processed_value = [] + for item in value: + if isinstance(item, str) and item.startswith('[') and item.endswith(']'): + try: + inner_list = ast.literal_eval(item) + processed_value.extend(inner_list) + except (ValueError, SyntaxError): + processed_value.append(item) + else: + processed_value.append(item) + updated_inputs[new_key] = processed_value + elif isinstance(value, float): + # Format float values to avoid scientific notation + updated_inputs[new_key] = format_float(value) + else: + updated_inputs[new_key] = value + + # Add the truth_path and results_path to the updated inputs + updated_inputs[f"{test_name}.results_path"] = f"{results_path}/{sample_name}/" + updated_inputs[f"{test_name}.truth_path"] = f"{truth_path}/{sample_name}/" + updated_inputs[f"{test_name}.update_truth"] = update_truth + + # Convert the dictionary to JSON string with explicit float formatting + json_str = json.dumps(updated_inputs, indent=4) + + # Save the updated test inputs JSON + output_name = f"updated_{sample_name}_{branch_name}.json" + with open(output_name, 'w') as file: + file.write(json_str) + + print(f"{output_name}") + return output_name + +def main(): + description = """This script updates the test inputs JSON to work with the test wrapper WDL, + which runs the pipeline and verification""" + + parser = argparse.ArgumentParser(description=description) + + parser.add_argument( + "--truth_path", + dest="truth_path", + required=True, + help="The base path where the truth data is stored", + ) + + parser.add_argument( + "--results_path", + dest="results_path", + required=True, + help="The base path where the test data will be stored", + ) + + parser.add_argument( + "--inputs_json", + dest="inputs_json", + required=True, + help="The JSON file containing the test inputs, formatted to run the pipeline WDL. " + "This will be updated to run the wrapper Test WDL", + ) + + parser.add_argument( + "--update_truth", + dest="update_truth", + default="false", + required=False, + choices=["true", "false"], + help="Boolean flag to update the truth data. If true, the truth data will be updated with the test data. ", + ) + + parser.add_argument( + "--branch_name", + required=True, + help="Branch name of the current pipeline run") + + args = parser.parse_args() + # convert the update_truth flag to a boolean + update_truth_bool = args.update_truth.lower() == "true" + + # Update the test inputs to work with the test wrapper WDL + update_test_inputs(args.inputs_json, args.truth_path, args.results_path, update_truth_bool, args.branch_name) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/firecloud_api/firecloud_api.py b/scripts/firecloud_api/firecloud_api.py index 95d5e42b29..6679f3ebc1 100644 --- a/scripts/firecloud_api/firecloud_api.py +++ b/scripts/firecloud_api/firecloud_api.py @@ -1,42 +1,440 @@ -""" -firecloud_api.py -Author: Kevin Palis - -This module provides an object-oriented interface for interacting with the Firecloud REST API. -It includes functionalities to submit workflows, retrieve workflow outputs, and monitor -workflow statuses. - -Classes: - - FirecloudAPI: A class to handle Firecloud API interactions. - -Usage: - Initialize the FirecloudAPI class with API token, namespace, and workspace details, and - call its methods to interact with the Firecloud service. -""" - +import base64 +import json import requests +from datetime import datetime, timezone +from urllib.parse import quote +from google.auth.transport.requests import Request +from google.oauth2 import service_account +from google.auth import credentials +import argparse +import logging import time -import json import sys +# Configure logging to display INFO level and above messages +logging.basicConfig( + level=logging.INFO, # This will show INFO and higher levels (INFO, WARNING, ERROR, CRITICAL) + format='%(asctime)s - %(levelname)s - %(message)s' +) + class FirecloudAPI: - def __init__(self, token, namespace, workspace_name): + def __init__(self, workspace_namespace, workspace_name, sa_json_b64, user, action, method_namespace, method_name): + self.sa_json_b64 = sa_json_b64 + self.namespace = workspace_namespace + self.workspace_name = workspace_name + self.user = user # Store the user email + self.base_url = "https://api.firecloud.org/api" + self.action = action + self.method_namespace = method_namespace + self.method_name = method_name + + # Setup credentials once during initialization + scopes = ['profile', 'email', 'openid'] + decoded_sa = base64.b64decode(sa_json_b64).decode('utf-8') + sa_credentials = service_account.Credentials.from_service_account_info( + json.loads(decoded_sa), + scopes=scopes + ) + self.delegated_creds = sa_credentials.with_subject(user) + + def get_method_config_name(self, pipeline_name, branch_name, test_type): """ - Initializes the FirecloudAPI object with authentication and workspace details. + Helper method to consistently generate method configuration names - :param token: API access token - :param namespace: Workspace namespace - :param workspace_name: Workspace name + :param pipeline_name: Name of the pipeline + :param branch_name: Name of the branch + :param test_type: Type of test (Scientific or Plumbing) + :return: Formatted method configuration name """ - self.token = token - self.namespace = namespace - self.workspace_name = workspace_name - self.base_url = "https://api.firecloud.org/api" - self.headers = { - 'accept': '*/*', - 'Authorization': f'Bearer {self.token}', + return f"{pipeline_name}_{test_type}_{branch_name}" + + def build_auth_headers(self, token: str): + if not self.delegated_creds.valid: + logging.info("Refreshing credentials.") + self.delegated_creds.refresh(Request()) + token = self.delegated_creds.token + return { + "content-type": "application/json", + "Authorization": f"Bearer {token}", } + def get_user_token(self, credentials: credentials): + """ + Get test user's access token + """ + # if token is expired or about to expire in 10 seconds, refresh and then use it + if not credentials.valid: + logging.info("Fetching user's new access token") + credentials.refresh(Request()) + logging.info("Token refreshed.") + else: + expiry_timestamp = credentials.expiry.replace(tzinfo=timezone.utc).timestamp() + now_timestamp = datetime.now(timezone.utc).timestamp() + # if token is about to expire in 1 minute, refresh and then use it + if expiry_timestamp - now_timestamp < 60: + logging.info("Fetching user's new access token") + credentials.refresh(Request()) + logging.info("Token refreshed.") + + return credentials.token + + def submit_job(self, submission_data_file): + """ + Submits a job to Terra/Firecloud with retry logic for intermittent 500 errors. + + :param submission_data_file: The JSON data for the submission + :return: The submission ID if successful, None otherwise + """ + # Set up retry parameters + max_retry_duration = 15 * 60 # 15 minutes in seconds + start_time = time.time() + retry_delay = 5 # Start with a 5-second delay between retries + max_retry_delay = 30 # Maximum retry delay in seconds + max_attempts = 10 # Maximum number of retry attempts + + attempts = 0 + while attempts < max_attempts: + attempts += 1 + + # Check if we've exceeded the maximum retry duration + current_time = time.time() + if current_time - start_time > max_retry_duration: + logging.error(f"Exceeded maximum retry duration of {max_retry_duration/60} minutes.") + return None + + try: + token = self.get_user_token(self.delegated_creds) + headers = self.build_auth_headers(token) + url = f"{self.base_url}/workspaces/{self.namespace}/{quote(self.workspace_name)}/submissions" + + logging.info(f"Submitting job, attempt {attempts}/{max_attempts}") + response = requests.post(url, json=submission_data_file, headers=headers) + + # Print status code and response body for debugging + logging.info(f"Response status code for submitting job: {response.status_code}") + + # Handle different response codes + if response.status_code == 201: # Success + try: + # Parse the response as JSON + response_json = response.json() + logging.info(f"Response body: {response.text}") + + # Extract the submissionId + submission_id = response_json.get("submissionId", None) + if submission_id: + logging.info(f"Submission ID extracted: {submission_id}") + return submission_id + else: + logging.error("Error: submissionId not found in the response.") + return None + except json.JSONDecodeError: + logging.error("Error: Failed to parse JSON response.") + logging.error(f"Response body: {response.text}") + # If we can't parse the JSON but got a 201, we might still want to retry + if attempts < max_attempts: + time.sleep(retry_delay) + retry_delay = min(retry_delay * 1.5, max_retry_delay) + continue + return None + + elif response.status_code == 500: # Server error, retry + logging.warning(f"Received 500 error. Retrying in {retry_delay} seconds...") + logging.warning(f"Response body: {response.text}") + time.sleep(retry_delay) + # Implement exponential backoff with a cap + retry_delay = min(retry_delay * 1.5, max_retry_delay) + continue + + elif response.status_code >= 400 and response.status_code < 500: # Client error + # For 4xx errors, only retry a few times as they might be temporary auth issues + logging.error(f"Client error (4xx): {response.status_code}") + logging.error(f"Response body: {response.text}") + if response.status_code == 401 or response.status_code == 403: + # Auth errors might be temporary, retry with token refresh + self.delegated_creds.refresh(Request()) + if attempts < 3: # Only retry auth errors a few times + time.sleep(retry_delay) + continue + return None + + else: # Other error codes + logging.error(f"Failed to submit job. Status code: {response.status_code}") + logging.error(f"Response body: {response.text}") + if attempts < max_attempts: + time.sleep(retry_delay) + retry_delay = min(retry_delay * 1.5, max_retry_delay) + continue + return None + + except requests.exceptions.RequestException as e: + # Handle network errors + logging.warning(f"Network error occurred: {e}. Retrying in {retry_delay} seconds...") + time.sleep(retry_delay) + # Implement exponential backoff with a cap + retry_delay = min(retry_delay * 1.5, max_retry_delay) + continue + + logging.error(f"Failed to submit job after {max_attempts} attempts.") + return None + + + def create_new_method_config(self, branch_name, pipeline_name): + """ + Creates a new method configuration in the workspace via Firecloud API. + Includes a retry mechanism for 404 errors from Dockstore. + + :param branch_name: The branch name + :param pipeline_name: The name of the pipeline + :return: The name of the created method configuration or None if failed + """ + # Create method config name with test type + method_config_name = self.get_method_config_name(pipeline_name, branch_name, args.test_type) + + # Flag to track if we've already retried for a 404 error + dockstore_404_retried = False + + # Function to create the payload + def create_payload(): + return { + "deleted": False, + "inputs": {}, + "methodConfigVersion": 0, + "methodRepoMethod": { + "methodUri": f"dockstore://github.com/broadinstitute/warp/{pipeline_name}/{branch_name}", + "sourceRepo": "dockstore", + "methodPath": f"github.com/broadinstitute/warp/{pipeline_name}", + "methodVersion": f"{branch_name}" + }, + "name": method_config_name, + "namespace": "warp-pipelines", + "outputs": {}, + "prerequisites": {} + } + + # Attempt to create the method configuration + def attempt_creation(): + payload = create_payload() + logging.info(f"Creating new method configuration: {json.dumps(payload, indent=2)}") + + # Construct the API endpoint URL for creating a new method configuration + url = f"{self.base_url}/workspaces/{self.namespace}/{quote(self.workspace_name)}/method_configs/{self.namespace}/{method_config_name}" + + token = self.get_user_token(self.delegated_creds) + headers = self.build_auth_headers(token) + + # Create the new method configuration in the workspace + response = requests.put(url, headers=headers, json=payload) + + return response + + # First attempt + response = attempt_creation() + + # Check if we got a 404 error (likely from Dockstore) + if response.status_code == 404 and not dockstore_404_retried: + error_message = response.text + logging.warning(f"Received 404 error, possibly from Dockstore: {error_message}") + logging.info(f"Waiting 5 minutes before retrying...") + + # Wait for 5 minutes (300 seconds) + time.sleep(300) + + # Mark that we've retried for this error + dockstore_404_retried = True + + # Retry the creation + logging.info("Retrying method configuration creation after 5-minute wait") + response = attempt_creation() + + # Final check if the method configuration was created successfully + if response.status_code == 200: + logging.info(f"Method configuration {method_config_name} created successfully.") + return method_config_name + else: + logging.error(f"Failed to create method configuration. Status code: {response.status_code}") + logging.error(f"Response body: {response.text}") + raise Exception(f"Failed to create method configuration for {pipeline_name} on the branch {branch_name}") + + + def upload_test_inputs(self, pipeline_name, test_inputs, branch_name, test_type): + """ + Uploads test inputs to the workspace via Firecloud API. + + :param test_inputs: JSON data containing test inputs + :param pipeline_name: The name of the pipeline + :param branch_name: The name of the branch + :param test_type: The type of test (Scientific or Plumbing) + :return: True if successful, False otherwise + """ + + method_config_name = self.get_method_config_name(pipeline_name, branch_name, test_type) + url = f"{self.base_url}/workspaces/{self.namespace}/{quote(self.workspace_name)}/method_configs/{self.namespace}/{method_config_name}" + + token = self.get_user_token(self.delegated_creds) + headers = self.build_auth_headers(token) + + # get the current method configuration + response = requests.get(url, headers=headers) + + if response.status_code == 404: + logging.info(f"Method config {method_config_name} not found. Creating new config...") + if not self.create_new_method_config(branch_name, pipeline_name): + logging.error("Failed to create new method configuration.") + return False + response = requests.get(url, headers=headers) + if response.status_code != 200: + logging.error(f"Failed to get method configuration. Status code: {response.status_code}") + return False + + config = response.json() + print(f"Current method configuration: {json.dumps(config, indent=2)}") + # update the config with the new inputs + print(f"Opening test inputs file: {test_inputs}") + with open(test_inputs, 'r') as file: + inputs_json = json.load(file) + print("Test inputs loaded successfully.") + inputs_json = self.quote_values(inputs_json) + print(f"here is test json after quote_values: {json.dumps(inputs_json, indent=2)}") + config["inputs"] = inputs_json + + # Construct the methodUri with the branch name + base_url = f"github.com/broadinstitute/warp/{pipeline_name}" + method_uri = f"dockstore://{quote(base_url)}/{branch_name}" + print(f"Updating methodUri with branch name: {method_uri}") + config["methodRepoMethod"]["methodUri"] = method_uri + + print(f"Updating methodVersion with branch name: {branch_name}") + config["methodRepoMethod"]["methodVersion"] = branch_name + + # We need to increment the methodConfigVersion by 1 every time we update the method configuration + config["methodConfigVersion"] += 1 # Increment version number by 1 + print(f"Updated method configuration: {json.dumps(config, indent=2)}") + + + # post the updated method config to the workspace + response = requests.post(url, headers=headers, json=config) + print(f"Response status code for uploading inputs: {response.status_code}") + print(f"Response text: {response.text}") + + # Check if the test inputs were uploaded successfully + if response.status_code == 200: + print("Test inputs uploaded successfully.") + return True + else: + print(f"Failed to upload test inputs. Status code: {response.status_code}") + return False + + def poll_job_status(self, submission_id): + """ + Polls the status of a submission until it is complete and returns a dictionary of workflow IDs and their statuses. + Includes retry mechanism for handling intermittent 500 errors. + + :param submission_id: The ID of the submission to poll + :return: Dictionary with workflow IDs as keys and their statuses as values + """ + # Construct the API endpoint URL for polling submission status + status_url = f"{self.base_url}/workspaces/{self.namespace}/{self.workspace_name}/submissions/{submission_id}" + workflow_status_map = {} + + # Set up retry parameters + start_time = time.time() + retry_delay = 5 # Start with a 5-second delay between retries + max_retry_delay = 30 # Maximum retry delay in seconds + + # Continuously poll the status of the submission until completion + while True: + try: + # Get the token and headers + token = self.get_user_token(self.delegated_creds) + headers = self.build_auth_headers(token) + status_response = requests.get(status_url, headers=headers) + + # Check for 500 errors and retry if necessary + if status_response.status_code == 500: + logging.warning(f"Received 500 error. Retrying in {retry_delay} seconds...") + time.sleep(retry_delay) + # Implement exponential backoff with a cap + retry_delay = min(retry_delay * 1.5, max_retry_delay) + continue + + # Check if the response status code is successful (200) + if status_response.status_code != 200: + logging.error(f"Error: Received status code {status_response.status_code}") + logging.info(f"Response content: {status_response.text}") + # For non-500 errors, wait and retry a few times + if time.time() - start_time <= 60: # Only retry for the first minute for non-500 errors + logging.warning(f"Retrying in {retry_delay} seconds...") + time.sleep(retry_delay) + continue + return {} + + try: + # Parse the response as JSON + status_data = status_response.json() + # Reset retry delay after successful request + retry_delay = 5 + except json.JSONDecodeError: + logging.error("Error decoding JSON response.") + logging.info(f"Response content: {status_response.text}") + time.sleep(retry_delay) + continue + + # Retrieve workflows and their statuses + workflows = status_data.get("workflows", []) + for workflow in workflows: + workflow_id = workflow.get("workflowId") + workflow_status = workflow.get("status") + if workflow_id and workflow_status: + workflow_status_map[workflow_id] = workflow_status + + # Check if the submission is complete + submission_status = status_data.get("status", "") + if submission_status == "Done": + logging.info("Submission is done.") + break + + # Wait for 20 seconds before polling again + time.sleep(20) + + except requests.exceptions.RequestException as e: + # Handle network errors + logging.warning(f"Network error occurred: {e}. Retrying in {retry_delay} seconds...") + time.sleep(retry_delay) + # Implement exponential backoff with a cap + retry_delay = min(retry_delay * 1.5, max_retry_delay) + + return workflow_status_map + + def quote_values(self, inputs_json): + """ + Format JSON values with proper handling of nested structures + """ + def format_value(val): + if isinstance(val, bool): + return str(val).lower() + elif isinstance(val, dict): + return json.dumps(val, indent=2) + elif isinstance(val, list): + if all(isinstance(x, str) for x in val): + return json.dumps(val) + return json.dumps([format_value(x) for x in val]) + elif isinstance(val, (int, float)): + return str(val) + elif val is None: + return "" + elif isinstance(val, str): + if val.startswith("{") and val.endswith("}"): + try: + parsed = json.loads(val) + return json.dumps(parsed, indent=2) + except json.JSONDecodeError: + return f'"{val}"' + return f'"{val}"' + return f'"{str(val)}"' + + return {key: format_value(value) for key, value in inputs_json.items()} + def get_workflow_outputs(self, submission_id, workflow_id, pipeline_name): """ Fetches workflow outputs from the Firecloud API. @@ -58,129 +456,264 @@ def get_workflow_outputs(self, submission_id, workflow_id, pipeline_name): output_values = list(outputs.values()) return outputs, output_values else: - print(f"Failed to retrieve workflow outputs. Status code: {response.status_code}") + logging.error(f"Failed to retrieve workflow outputs. Status code: {response.status_code}") return None, None - def create_submission(self, submission_data): + def delete_method_config(self, method_config_name): """ - Submits a workflow to the Firecloud API. + Deletes a method configuration from the workspace. - :param submission_data: JSON data containing submission details - :return: Submission ID if successful, None otherwise + :param method_config_name: The name of the method configuration to delete + :return: True if deletion is successful, False otherwise """ - # Construct the API endpoint URL for creating a new submission - url = f"{self.base_url}/workspaces/{self.namespace}/{self.workspace_name}/submissions" - response = requests.post(url, headers=self.headers, json=submission_data) + url = f"{self.base_url}/workspaces/{self.namespace}/{quote(self.workspace_name)}/method_configs/{self.namespace}/{method_config_name}" - # Check if the submission was created successfully - if response.status_code == 201: - submission_id = response.json().get('submissionId') - #print(f"Submission created with ID: {submission_id}") - return submission_id + token = self.get_user_token(self.delegated_creds) + headers = self.build_auth_headers(token) + + # Send a DELETE request to delete the method configuration + response = requests.delete(url, headers=headers) + + if response.status_code == 204: + logging.info(f"Method configuration {method_config_name} deleted successfully.") + print("True") + return True else: - print(f"Failed to create submission. Status code: {response.status_code}") - return None + logging.error(f"Failed to delete method configuration {method_config_name}. Status code: {response.status_code}") + logging.error(f"Response body: {response.text}") + return False - - def poll_submission_status(self, submission_id): + def get_active_submissions(self, method_config_name=None): """ - Polls the status of a submission until it is complete and returns a dictionary of workflow IDs and their statuses. + Get all active workflow submissions for the workspace. + Optionally filter by method configuration name. + """ + url = f"{self.base_url}/workspaces/{self.namespace}/{quote(self.workspace_name)}/submissions" + token = self.get_user_token(self.delegated_creds) + headers = self.build_auth_headers(token) - :param submission_id: The ID of the submission to poll - :return: Dictionary with workflow IDs as keys and their statuses as values + response = requests.get(url, headers=headers) + + if response.status_code != 200: + logging.error(f"Failed to get submissions. Status code: {response.status_code}") + logging.error(f"Response body: {response.text}") + return [] + + submissions = response.json() + active_submissions = [] + + for submission in submissions: + # Check if submission is active (not Done, Aborted, or Failed) + if submission['status'] in ['Submitted', 'Running', 'Queued']: + config_name = submission.get('methodConfigurationName', '') + if config_name.startswith(method_config_name): + active_submissions.append(submission) + + return active_submissions + + def cancel_submission(self, submission_id): """ - # Construct the API endpoint URL for polling submission status - status_url = f"{self.base_url}/workspaces/{self.namespace}/{self.workspace_name}/submissions/{submission_id}" - workflow_status_map = {} + Cancel a specific workflow submission. + """ + url = f"{self.base_url}/workspaces/{self.namespace}/{quote(self.workspace_name)}/submissions/{submission_id}" + token = self.get_user_token(self.delegated_creds) + headers = self.build_auth_headers(token) - # Continuously poll the status of the submission until completion - while True: - status_response = requests.get(status_url, headers=self.headers) + response = requests.delete(url, headers=headers) - # Check if the response status code is successful (200) - if status_response.status_code != 200: - print(f"Error: Received status code {status_response.status_code}", file=sys.stderr) - print(f"Response content: {status_response.text}", file=sys.stderr) - return {} + if response.status_code not in [204]: + logging.error(f"Failed to cancel submission {submission_id}. Status code: {response.status_code}") + logging.error(f"Response body: {response.text}") + return False - try: - # Parse the response as JSON - status_data = status_response.json() - except json.JSONDecodeError: - print("Error decoding JSON response.", file=sys.stderr) - print(f"Response content: {status_response.text}", file=sys.stderr) - return {} - - # Retrieve workflows and their statuses - workflows = status_data.get("workflows", []) - for workflow in workflows: - workflow_id = workflow.get("workflowId") - workflow_status = workflow.get("status") - if workflow_id and workflow_status: - workflow_status_map[workflow_id] = workflow_status - - # Check if the submission is complete - submission_status = status_data.get("status", "") - if submission_status == "Done": - break - - # Wait for 60 seconds before polling again - time.sleep(60) + logging.info(f"Successfully cancelled submission {submission_id}") + return True + + def cancel_old_submissions(self, pipeline_name, branch_name): + """ + Cancel all active submissions for a pipeline's method configuration. + Returns the number of cancelled submissions. + """ + method_config_name = self.get_method_config_name(pipeline_name, branch_name, args.test_type) + active_submissions = self.get_active_submissions(method_config_name) + cancelled_count = 0 + + for submission in active_submissions: + if self.cancel_submission(submission['submissionId']): + cancelled_count += 1 + logging.info(f"Cancelled submission {submission['submissionId']}") + + return cancelled_count + + + def main(self): + logging.info("Starting process based on action.") + + if self.action == "submit_job": + submission_id = self.submit_job() + logging.info(f"Job submission complete with ID: {submission_id}") + elif self.action == "create_new_method_config": + if not args.pipeline_name or not args.branch_name: + parser.error("Arguments --pipeline_name and --branch_name are required for 'create_new_method_config'") + method_config_name = self.create_new_method_config(args.branch_name, args.pipeline_name) + print(method_config_name) + if method_config_name: + logging.info(f"Method configuration created with name: {method_config_name}") + else: + logging.error("Failed to create method configuration.") + elif self.action == "delete_method_config": + if not args.method_config_name: + if not all([args.pipeline_name, args.branch_name]): + parser.error("Either --method_config_name or both --pipeline_name and --branch_name are required") + method_config_name = self.get_method_config_name(args.pipeline_name, args.branch_name, args.test_type) + else: + method_config_name = args.method_config_name + result = self.delete_method_config(method_config_name) + print(str(result).lower()) + elif self.action == "upload_test_inputs": + success = self.upload_test_inputs(self.pipeline_name, self.test_input_file, self.branch_name, self.test_type) + if success: + logging.info("Test inputs uploaded successfully.") + else: + logging.error("Failed to upload test inputs.") + elif self.action == "poll_job_status": + status = self.poll_job_status() + logging.info(f"Final job status: {status}") + elif self.action == "create_new_method_config": + method_config_name = self.create_new_method_config(self.branch_name, self.pipeline_name) + if method_config_name: + logging.info("Method configuration created successfully.") + else: + logging.error("Failed to create method configuration.") + elif self.action == "delete_method_config": + if not args.method_config_name: + parser.error("Argument --method_config_name is required for 'delete_method_config'") + else: + # Delete the method configuration + result = self.delete_method_config(args.method_config_name) + if result: + logging.info("Method configuration deleted successfully.") + else: + logging.error("Failed to delete method configuration.") + elif self.action == "get_workflow_outputs": + if not args.submission_id or not args.workflow_id or not args.pipeline_name: + parser.error("Arguments --submission_id, --workflow_id, and --pipeline_name are required for 'get_workflow_outputs'") + # Fetch workflow outputs + outputs, output_values = self.get_workflow_outputs(args.submission_id, args.workflow_id, args.pipeline_name) + if outputs: + logging.info(f"Workflow outputs: {json.dumps(outputs, indent=2)}") + logging.info(f"Output values: {output_values}") + else: + logging.error("Failed to retrieve workflow outputs.") + else: + logging.error(f"Unknown action: {self.action}") - return workflow_status_map -# Bash Script Interaction if __name__ == "__main__": - import argparse - - # Set up command-line argument parsing - parser = argparse.ArgumentParser(description='Interact with Firecloud API.') - parser.add_argument('--token', required=True, help='API access token') - parser.add_argument('--namespace', required=True, help='Workspace namespace') - parser.add_argument('--workspace', required=True, help='Workspace name') - parser.add_argument('--action', required=True, choices=['get_outputs', 'submit', 'poll_status'], help='Action to perform') - parser.add_argument('--submission_id', help='Submission ID (required for get_outputs and poll_status)') - parser.add_argument('--workflow_id', help='Workflow ID (required for get_outputs)') - parser.add_argument('--pipeline_name', help='Pipeline name (required for get_outputs)') + parser = argparse.ArgumentParser() + parser.add_argument("--sa-json-b64", required=True, help="Base64 encoded service account JSON") + parser.add_argument("--user", required=True, help="User email for impersonation") + parser.add_argument("--workspace-namespace", required=True, help="Namespace of the workspace.") + parser.add_argument("--workspace-name", required=True, help="Name of the workspace.") + parser.add_argument("--pipeline_name", help="Pipeline name (required for 'upload_test_inputs')") + parser.add_argument("--test_input_file", help="Path to test input file (required for 'upload_test_inputs')") + parser.add_argument("--branch_name", help="Branch name for the method repository (required for 'upload_test_inputs')") + parser.add_argument("--method_namespace", help="Method namespace") + parser.add_argument("--method_name", help="Method name") parser.add_argument('--submission_data_file', help='Path to submission data JSON file (required for submit)') + parser.add_argument('--submission_id', help='Submission ID (required for poll_job_status)') + parser.add_argument('--workflow_id', help='Workflow ID (required for get_workflow_outputs)') + parser.add_argument("--source", help="Source GCS path for gsutil copy") + parser.add_argument("--destination", help="Destination GCS path for gsutil copy") + parser.add_argument("--method_config_name", help="Name of the method configuration to delete") + parser.add_argument("--test_type", help="Test type (Scientific or Plumbing)") + parser.add_argument("action", choices=["submit_job", "upload_test_inputs", "poll_job_status", "get_workflow_outputs", "create_new_method_config", "delete_method_config", "cancel_old_submissions"], + help="Action to perform: 'submit_job', 'upload_test_inputs', 'poll_job_status', 'get_workflow_outputs', 'create_new_method_config', or 'delete_method_config'") args = parser.parse_args() - # Initialize the FirecloudAPI instance with provided arguments - firecloud_api = FirecloudAPI(args.token, args.namespace, args.workspace) + # Pass action to the FirecloudAPI constructor + api = FirecloudAPI( + sa_json_b64=args.sa_json_b64, + user=args.user, + workspace_namespace=args.workspace_namespace, + workspace_name=args.workspace_name, + action=args.action, + method_namespace=args.method_namespace, + method_name=args.method_name + ) - # Perform actions based on the specified action argument - if args.action == 'get_outputs': - if not all([args.submission_id, args.workflow_id, args.pipeline_name]): - print("For 'get_outputs', --submission_id, --workflow_id, and --pipeline_name are required.") - else: - outputs, output_values = firecloud_api.get_workflow_outputs(args.submission_id, args.workflow_id, args.pipeline_name) - #print(outputs) - # Convert the dictionary, outputs, to a JSON string and print it - if outputs: - print(json.dumps(outputs)) # Output the dictionary as a JSON string for bash parsing - else: - print("No outputs found or an error occurred.", file=sys.stderr) - elif args.action == 'submit': + if args.action == "upload_test_inputs": + # Check for required arguments for upload_test_inputs action + if not args.pipeline_name or not args.test_input_file or not args.branch_name: + parser.error("Arguments --pipeline_name, --test_input_file, and --branch_name are required for 'upload_test_inputs'") + # Call the function to upload test inputs + api.upload_test_inputs(args.pipeline_name, args.test_input_file, args.branch_name, args.test_type) + + elif args.action == "submit_job": + # Check for required argument for submit_job action if not args.submission_data_file: - print("For 'submit', --submission_data_file is required.") + parser.error("Argument --submission_data_file is required for 'submit_job'") + # Load the submission data from the provided file else: - # Load submission data from the specified JSON file with open(args.submission_data_file, 'r') as file: submission_data = json.load(file) - submission_id = firecloud_api.create_submission(submission_data) - print(submission_id) - - elif args.action == 'poll_status': - if not args.submission_id: - print("For 'poll_status', --submission_id is required.", file=sys.stderr) - else: - workflow_status_map = firecloud_api.poll_submission_status(args.submission_id) - - # Convert the dictionary to a JSON string and print it - if workflow_status_map: - print(json.dumps(workflow_status_map)) # Output the dictionary as a JSON string for bash parsing - else: - print("No workflows found or an error occurred.", file=sys.stderr) \ No newline at end of file + # Submit the job with the loaded submission data + submission_id = api.submit_job(submission_data) + if submission_id: + print(submission_id) + logging.info("Submission successful.") + else: + logging.error("Submission failed.") + sys.exit(1) + + elif args.action == "poll_job_status": + if not args.submission_id: + parser.error("Argument --submission_id is required for 'poll_job_status'") + else: + # Poll the job status with the provided submission ID + workflow_status_map = api.poll_job_status(args.submission_id) + + # Convert the dictionary to a JSON string and print it + if workflow_status_map: + print(json.dumps(workflow_status_map)) # Output the dictionary as a JSON string for bash parsing + else: + print("No workflows found or an error occurred.") + + elif args.action == "create_new_method_config": + # Check for required arguments for create_new_method_config action + if not args.pipeline_name or not args.branch_name: + parser.error("Arguments --pipeline_name and --branch_name are required for 'create_new_method_config'") + # Call the function to create a new method configuration + method_config_name = api.create_new_method_config(args.branch_name, args.pipeline_name) + print(method_config_name) + if method_config_name: + logging.info(f"Method configuration created with name: {method_config_name}") + else: + logging.error("Failed to create method configuration.") + elif args.action == "delete_method_config": + if not args.method_config_name: + parser.error("Argument --method_config_name is required for 'delete_method_config'") + else: + # Delete the method configuration + result = api.delete_method_config(args.method_config_name) + if result: + logging.info("Method configuration deleted successfully.") + else: + logging.error("Failed to delete method configuration.") + elif args.action == "cancel_old_submissions": + if not all([args.pipeline_name, args.branch_name]): + parser.error("Arguments --pipeline_name and --branch_name are required for 'cancel_old_submissions'") + + # Cancel old submissions + cancelled_count = api.cancel_old_submissions( + args.pipeline_name, + args.branch_name + ) + print(f"Cancelled {cancelled_count} old submissions") + + + + diff --git a/scripts/firecloud_api/requirements.txt b/scripts/firecloud_api/requirements.txt new file mode 100644 index 0000000000..9d92243289 --- /dev/null +++ b/scripts/firecloud_api/requirements.txt @@ -0,0 +1,2 @@ +requests==2.32.2 +google-auth==2.23.3 \ No newline at end of file diff --git a/structs/imputation/ImputationBeagleStructs.wdl b/structs/imputation/ImputationBeagleStructs.wdl new file mode 100644 index 0000000000..6aaaaa061a --- /dev/null +++ b/structs/imputation/ImputationBeagleStructs.wdl @@ -0,0 +1,8 @@ +version 1.0 + +struct ReferencePanelContig { + File bed + File bref3 + String contig + File genetic_map +} diff --git a/tasks/broad/ImputationBeagleTasks.wdl b/tasks/broad/ImputationBeagleTasks.wdl new file mode 100644 index 0000000000..af4363e600 --- /dev/null +++ b/tasks/broad/ImputationBeagleTasks.wdl @@ -0,0 +1,217 @@ +version 1.0 + +task CountVariantsInChunks { + input { + File vcf + File vcf_index + File panel_bed_file + + String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0" + Int cpu = 1 + Int memory_mb = 16000 + Int disk_size_gb = 2 * ceil(size([vcf, vcf_index, panel_bed_file], "GiB")) + 10 + } + Int command_mem = memory_mb - 1500 + Int max_heap = memory_mb - 1000 + + command <<< + set -e -o pipefail + + ln -sf ~{vcf} input.vcf.gz + ln -sf ~{vcf_index} input.vcf.gz.tbi + + gatk --java-options "-Xms~{command_mem}m -Xmx~{max_heap}m" CountVariants -V input.vcf.gz | tail -n 1 > var_in_original + bedtools intersect -a ~{vcf} -b ~{panel_bed_file} | wc -l > var_also_in_reference + >>> + + output { + Int var_in_original = read_int("var_in_original") + Int var_also_in_reference = read_int("var_also_in_reference") + } + runtime { + docker: gatk_docker + disks: "local-disk ${disk_size_gb} HDD" + memory: "${memory_mb} MiB" + cpu: cpu + preemptible: 3 + } +} + +task CheckChunks { + input { + Int var_in_original + Int var_also_in_reference + + String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.7-1.10.2-0.1.16-1669908889" + Int cpu = 1 + Int memory_mb = 4000 + } + command <<< + set -e -o pipefail + + if [ $(( ~{var_also_in_reference} * 2 - ~{var_in_original})) -gt 0 ] && [ ~{var_also_in_reference} -gt 3 ]; then + echo true > valid_file.txt + else + echo false > valid_file.txt + fi + >>> + output { + Boolean valid = read_boolean("valid_file.txt") + } + runtime { + docker: bcftools_docker + disks: "local-disk 10 HDD" + memory: "${memory_mb} MiB" + cpu: cpu + preemptible: 3 + } +} + +task Phase { + input { + File dataset_vcf + File ref_panel_bref3 + File genetic_map_file + String basename + String chrom + Int start + Int end + + String beagle_docker = "us.gcr.io/broad-gotc-prod/imputation-beagle:1.0.0-17Dec24.224-1740423035" + Int cpu = 8 # This parameter is used as the nthreads input to Beagle which is part of how we make it determinstic. Changing this value may change the output generated by the tool + Int memory_mb = 32000 # value depends on chunk size, the number of samples in ref and target panel, and whether imputation is performed + Int xmx_mb = memory_mb - 5000 # I suggest setting this parameter to be 85-90% of the memory_mb parameter + Int disk_size_gb = ceil(3 * size([dataset_vcf, ref_panel_bref3], "GiB")) + 10 # value may need to be adjusted + + Boolean for_dependency # used for task dependency management + } + + command <<< + set -e -o pipefail + + java -ea -Xmx~{xmx_mb}m \ + -jar /usr/gitc/beagle.17Dec24.224.jar \ + gt=~{dataset_vcf} \ + ref=~{ref_panel_bref3} \ + map=~{genetic_map_file} \ + out=phased_~{basename} \ + chrom=~{chrom}:~{start}-~{end} \ + impute=false \ + nthreads=~{cpu} \ + seed=-99999 + + >>> + output { + File vcf = "phased_~{basename}.vcf.gz" + File log = "phased_~{basename}.log" + } + runtime { + docker: beagle_docker + disks: "local-disk ${disk_size_gb} HDD" + memory: "${memory_mb} MiB" + cpu: cpu + preemptible: 3 + } +} + +task Impute { + input { + File dataset_vcf + File ref_panel_bref3 + File genetic_map_file + String basename + String chrom + Int start + Int end + + String beagle_docker = "us.gcr.io/broad-gotc-prod/imputation-beagle:1.0.0-17Dec24.224-1740423035" + Int cpu = 8 # This parameter is used as the nthreads input to Beagle which is part of how we make it determinstic. Changing this value may change the output generated by the tool + Int memory_mb = 32000 # value depends on chunk size, the number of samples in ref and target panel, and whether imputation is performed + Int xmx_mb = memory_mb - 5000 # I suggest setting this parameter to be 85-90% of the memory_mb parameter + Int disk_size_gb = ceil(3 * size([dataset_vcf, ref_panel_bref3], "GiB")) + 10 # value may need to be adjusted + } + + command <<< + set -e -o pipefail + + java -ea -Xmx~{xmx_mb}m \ + -jar /usr/gitc/beagle.17Dec24.224.jar \ + gt=~{dataset_vcf} \ + ref=~{ref_panel_bref3} \ + map=~{genetic_map_file} \ + out=imputed_~{basename} \ + chrom=~{chrom}:~{start}-~{end} \ + impute=true \ + nthreads=~{cpu} \ + seed=-99999 + + >>> + output { + File vcf = "imputed_~{basename}.vcf.gz" + File log = "imputed_~{basename}.log" + } + runtime { + docker: beagle_docker + disks: "local-disk ${disk_size_gb} HDD" + memory: "${memory_mb} MiB" + cpu: cpu + preemptible: 3 + } +} + +task ErrorWithMessageIfErrorCountNotZero { + input { + Int errorCount + String message + } + command <<< + if [[ ~{errorCount} -gt 0 ]]; then + >&2 echo "Error: ~{message}" + exit 1 + else + exit 0 + fi + >>> + + runtime { + docker: "ubuntu:20.04" + preemptible: 3 + } + output { + Boolean done = true + } +} + +task CreateVcfIndex { + input { + File vcf_input + + Int disk_size_gb = ceil(1.2*size(vcf_input, "GiB")) + 10 + Int cpu = 1 + Int memory_mb = 6000 + String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.5.0.0" + } + Int command_mem = memory_mb - 1500 + Int max_heap = memory_mb - 1000 + + String vcf_basename = basename(vcf_input) + + command { + set -e -o pipefail + + ln -sf ~{vcf_input} ~{vcf_basename} + + bcftools index -t ~{vcf_basename} + } + runtime { + docker: gatk_docker + disks: "local-disk ${disk_size_gb} HDD" + memory: "${memory_mb} MiB" + cpu: cpu + preemptible: 3 + } + output { + File vcf = "~{vcf_basename}" + File vcf_index = "~{vcf_basename}.tbi" + } +} diff --git a/tasks/broad/ImputationTasks.wdl b/tasks/broad/ImputationTasks.wdl index 793ae119b2..1a89954535 100644 --- a/tasks/broad/ImputationTasks.wdl +++ b/tasks/broad/ImputationTasks.wdl @@ -12,6 +12,8 @@ task CalculateChromosomeLength { } command { + set -e -o pipefail + grep -P "SN:~{chrom}\t" ~{ref_dict} | sed 's/.*LN://' | sed 's/\t.*//' } runtime { @@ -19,6 +21,7 @@ task CalculateChromosomeLength { disks: "local-disk ${disk_size_gb} HDD" memory: "${memory_mb} MiB" cpu: cpu + preemptible: 3 } output { Int chrom_length = read_int(stdout()) @@ -37,6 +40,8 @@ task GetMissingContigList { } command <<< + set -e -o pipefail + grep "@SQ" ~{ref_dict} | sed 's/.*SN://' | sed 's/\t.*//' > contigs.txt awk 'NR==FNR{arr[$0];next} !($0 in arr)' ~{included_contigs} contigs.txt > missing_contigs.txt >>> @@ -62,13 +67,13 @@ task GenerateChunk { File vcf File vcf_index - Int disk_size_gb = ceil(2*size(vcf, "GiB")) + 50 # not sure how big the disk size needs to be since we aren't downloading the entire VCF here + Int disk_size_gb = ceil(2*size(vcf, "GiB")) + 10 Int cpu = 1 Int memory_mb = 8000 String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.1.0" } - Int command_mem = memory_mb - 1000 - Int max_heap = memory_mb - 500 + Int command_mem = memory_mb - 1500 + Int max_heap = memory_mb - 1000 command { gatk --java-options "-Xms~{command_mem}m -Xmx~{max_heap}m" \ @@ -114,17 +119,17 @@ task CountVariantsInChunks { String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.1.0" Int cpu = 1 - Int memory_mb = 4000 + Int memory_mb = 6000 Int disk_size_gb = 2 * ceil(size([vcf, vcf_index, panel_vcf, panel_vcf_index], "GiB")) + 20 } - Int command_mem = memory_mb - 1000 - Int max_heap = memory_mb - 500 + Int command_mem = memory_mb - 1500 + Int max_heap = memory_mb - 1000 command <<< set -e -o pipefail echo $(gatk --java-options "-Xms~{command_mem}m -Xmx~{max_heap}m" CountVariants -V ~{vcf} | sed 's/Tool returned://') > var_in_original - echo $(gatk --java-options "-Xms~{command_mem}m -Xmx~{max_heap}m" CountVariants -V ~{vcf} -L ~{panel_vcf} | sed 's/Tool returned://') > var_in_reference + echo $(gatk --java-options "-Xms~{command_mem}m -Xmx~{max_heap}m" CountVariants -V ~{vcf} -L ~{panel_vcf} | sed 's/Tool returned://') > var_in_reference >>> output { Int var_in_original = read_int("var_in_original") @@ -147,7 +152,7 @@ task CheckChunks { Int var_in_original Int var_in_reference - Int disk_size_gb = ceil(2*size([vcf, vcf_index, panel_vcf, panel_vcf_index], "GiB")) + Int disk_size_gb = ceil(2*size([vcf, vcf_index, panel_vcf, panel_vcf_index], "GiB")) + 10 String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.7-1.10.2-0.1.16-1669908889" Int cpu = 1 Int memory_mb = 4000 @@ -191,7 +196,7 @@ task PhaseVariantsEagle { String eagle_docker = "us.gcr.io/broad-gotc-prod/imputation-eagle:1.0.0-2.4-1690199702" Int cpu = 8 Int memory_mb = 32000 - Int disk_size_gb = ceil(3 * size([dataset_bcf, reference_panel_bcf, dataset_bcf_index, reference_panel_bcf_index], "GiB")) + 50 + Int disk_size_gb = ceil(3 * size([dataset_bcf, reference_panel_bcf, dataset_bcf_index, reference_panel_bcf_index], "GiB")) + 10 } command <<< /usr/gitc/eagle \ @@ -269,10 +274,10 @@ task GatherVcfs { String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.1.0" Int cpu = 1 Int memory_mb = 16000 - Int disk_size_gb = ceil(3*size(input_vcfs, "GiB")) + Int disk_size_gb = ceil(3*size(input_vcfs, "GiB")) + 10 } - Int command_mem = memory_mb - 1000 - Int max_heap = memory_mb - 500 + Int command_mem = memory_mb - 1500 + Int max_heap = memory_mb - 1000 command <<< set -e -o pipefail @@ -285,7 +290,6 @@ task GatherVcfs { gatk --java-options "-Xms~{command_mem}m -Xmx~{max_heap}m" \ IndexFeatureFile -I ~{output_vcf_basename}.vcf.gz - >>> runtime { docker: gatk_docker @@ -304,6 +308,8 @@ task ReplaceHeader { File vcf_to_replace_header File vcf_with_new_header + Int cpu = 1 + Int memory_mb = 6000 String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.7-1.10.2-0.1.16-1669908889" } @@ -321,6 +327,9 @@ task ReplaceHeader { runtime { docker: bcftools_docker disks: "local-disk ${disk_size_gb} HDD" + memory: "${memory_mb} MiB" + cpu: cpu + preemptible: 3 } output { @@ -334,30 +343,32 @@ task UpdateHeader { File vcf_index File ref_dict String basename + Boolean disable_sequence_dictionary_validation = true Int disk_size_gb = ceil(4*(size(vcf, "GiB") + size(vcf_index, "GiB"))) + 20 String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.1.0" Int cpu = 1 - Int memory_mb = 8000 + Int memory_mb = 6000 } - Int command_mem = memory_mb - 1000 - Int max_heap = memory_mb - 500 + Int command_mem = memory_mb - 1500 + Int max_heap = memory_mb - 1000 + String disable_sequence_dict_validation_flag = if disable_sequence_dictionary_validation then "--disable-sequence-dictionary-validation" else "" command <<< - ## update the header of the merged vcf gatk --java-options "-Xms~{command_mem}m -Xmx~{max_heap}m" \ UpdateVCFSequenceDictionary \ --source-dictionary ~{ref_dict} \ --output ~{basename}.vcf.gz \ --replace -V ~{vcf} \ - --disable-sequence-dictionary-validation + ~{disable_sequence_dict_validation_flag} >>> runtime { docker: gatk_docker disks: "local-disk ${disk_size_gb} HDD" memory: "${memory_mb} MiB" cpu: cpu + preemptible: 3 } output { File output_vcf = "~{basename}.vcf.gz" @@ -376,8 +387,8 @@ task RemoveSymbolicAlleles { Int cpu = 1 Int memory_mb = 4000 } - Int command_mem = memory_mb - 1000 - Int max_heap = memory_mb - 500 + Int command_mem = memory_mb - 1500 + Int max_heap = memory_mb - 1000 command { gatk --java-options "-Xms~{command_mem}m -Xmx~{max_heap}m" \ @@ -392,6 +403,7 @@ task RemoveSymbolicAlleles { disks: "local-disk ${disk_size_gb} HDD" memory: "${memory_mb} MiB" cpu: cpu + preemptible: 3 } } @@ -401,7 +413,7 @@ task SeparateMultiallelics { File original_vcf_index String output_basename - Int disk_size_gb = ceil(2*(size(original_vcf, "GiB") + size(original_vcf_index, "GiB"))) + Int disk_size_gb = ceil(2*(size(original_vcf, "GiB") + size(original_vcf_index, "GiB"))) + 10 String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.7-1.10.2-0.1.16-1669908889" Int cpu = 1 Int memory_mb = 4000 @@ -421,6 +433,7 @@ task SeparateMultiallelics { disks: "local-disk ${disk_size_gb} HDD" memory: "${memory_mb} MiB" cpu: cpu + preemptible: 3 } } @@ -435,7 +448,7 @@ task OptionalQCSites { String bcftools_vcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.7-1.10.2-0.1.16-1669908889" Int cpu = 1 Int memory_mb = 16000 - Int disk_size_gb = ceil(2*(size(input_vcf, "GiB") + size(input_vcf_index, "GiB"))) + Int disk_size_gb = ceil(2*(size(input_vcf, "GiB") + size(input_vcf_index, "GiB"))) + 10 } Float max_missing = select_first([optional_qc_max_missing, 0.05]) @@ -443,8 +456,11 @@ task OptionalQCSites { command <<< set -e -o pipefail + ln -sf ~{input_vcf} input.vcf.gz + ln -sf ~{input_vcf_index} input.vcf.gz.tbi + # site missing rate < 5% ; hwe p > 1e-6 - vcftools --gzvcf ~{input_vcf} --max-missing ~{max_missing} --hwe ~{hwe} --recode -c | bgzip -c > ~{output_vcf_basename}.vcf.gz + vcftools --gzvcf input.vcf.gz --max-missing ~{max_missing} --hwe ~{hwe} --recode -c | bgzip -c > ~{output_vcf_basename}.vcf.gz bcftools index -t ~{output_vcf_basename}.vcf.gz # Note: this is necessary because vcftools doesn't have a way to output a zipped vcf, nor a way to index one (hence needing to use bcf). >>> runtime { @@ -472,6 +488,7 @@ task MergeSingleSampleVcfs { } command <<< set -e -o pipefail + # Move the index file next to the vcf with the corresponding name declare -a VCFS=(~{sep=' ' input_vcfs}) @@ -507,9 +524,12 @@ task CountSamples { String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.7-1.10.2-0.1.16-1669908889" Int cpu = 1 Int memory_mb = 3000 - Int disk_size_gb = 100 + ceil(size(vcf, "GiB")) + Int disk_size_gb = ceil(size(vcf, "GiB")) + 10 } + command <<< + set -e -o pipefail + bcftools query -l ~{vcf} | wc -l >>> runtime { @@ -517,6 +537,7 @@ task CountSamples { disks: "local-disk ${disk_size_gb} HDD" memory: "${memory_mb} MiB" cpu: cpu + preemptible: 3 } output { Int nSamples = read_int(stdout()) @@ -532,7 +553,7 @@ task AggregateImputationQCMetrics { String rtidyverse_docker = "rocker/tidyverse:4.1.0" Int cpu = 1 Int memory_mb = 2000 - Int disk_size_gb = 100 + ceil(size(infoFile, "GiB")) + Int disk_size_gb = ceil(size(infoFile, "GiB")) + 10 } command <<< Rscript -<< "EOF" @@ -560,7 +581,7 @@ task AggregateImputationQCMetrics { disks : "local-disk ${disk_size_gb} HDD" memory: "${memory_mb} MiB" cpu: cpu - preemptible : 3 + preemptible: 3 } output { File aggregated_metrics = "~{basename}_aggregated_imputation_metrics.tsv" @@ -600,7 +621,7 @@ task StoreChunksInfo { disks : "local-disk ${disk_size_gb} HDD" memory: "${memory_mb} MiB" cpu: cpu - preemptible : 3 + preemptible: 3 } output { File chunks_info = "~{basename}_chunk_info.tsv" @@ -617,7 +638,7 @@ task MergeImputationQCMetrics { String rtidyverse_docker = "rocker/tidyverse:4.1.0" Int cpu = 1 Int memory_mb = 2000 - Int disk_size_gb = 100 + ceil(size(metrics, "GiB")) + Int disk_size_gb = ceil(size(metrics, "GiB")) + 10 } command <<< Rscript -<< "EOF" @@ -638,7 +659,7 @@ task MergeImputationQCMetrics { disks : "local-disk ${disk_size_gb} HDD" memory: "${memory_mb} MiB" cpu: cpu - preemptible : 3 + preemptible: 3 } output { File aggregated_metrics = "~{basename}_aggregated_imputation_metrics.tsv" @@ -656,13 +677,13 @@ task SubsetVcfToRegion { Int end Boolean exclude_filtered = false - Int disk_size_gb = ceil(2*size(vcf, "GiB")) + 50 # not sure how big the disk size needs to be since we aren't downloading the entire VCF here + Int disk_size_gb = ceil(2*size(vcf, "GiB")) + 10 Int cpu = 1 Int memory_mb = 8000 String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.1.0" } - Int command_mem = memory_mb - 1000 - Int max_heap = memory_mb - 500 + Int command_mem = memory_mb - 1500 + Int max_heap = memory_mb - 1000 command { gatk --java-options "-Xms~{command_mem}m -Xmx~{max_heap}m" \ @@ -705,10 +726,11 @@ task SetIDs { String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.7-1.10.2-0.1.16-1669908889" Int cpu = 1 Int memory_mb = 4000 - Int disk_size_gb = 100 + ceil(2.2 * size(vcf, "GiB")) + Int disk_size_gb = ceil(2.2 * size(vcf, "GiB")) + 10 } command <<< set -e -o pipefail + bcftools annotate ~{vcf} --set-id '%CHROM\:%POS\:%REF\:%FIRST_ALT' -Oz -o ~{output_basename}.vcf.gz bcftools index -t ~{output_basename}.vcf.gz >>> @@ -717,6 +739,7 @@ task SetIDs { disks: "local-disk ${disk_size_gb} HDD" memory: "${memory_mb} MiB" cpu: cpu + preemptible: 3 } output { File output_vcf = "~{output_basename}.vcf.gz" @@ -729,7 +752,7 @@ task ExtractIDs { File vcf String output_basename - Int disk_size_gb = 2*ceil(size(vcf, "GiB")) + 100 + Int disk_size_gb = 2*ceil(size(vcf, "GiB")) + 10 String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.7-1.10.2-0.1.16-1669908889" Int cpu = 1 Int memory_mb = 4000 @@ -745,28 +768,34 @@ task ExtractIDs { disks: "local-disk ${disk_size_gb} HDD" memory: "${memory_mb} MiB" cpu: cpu + preemptible: 3 } } task SelectVariantsByIds { input { File vcf + File vcf_index File ids String basename String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.1.0" Int cpu = 1 Int memory_mb = 16000 - Int disk_size_gb = ceil(1.2*size(vcf, "GiB")) + 100 + Int disk_size_gb = ceil(1.2*size(vcf, "GiB")) + 10 } parameter_meta { vcf: { description: "vcf", localization_optional: true } + vcf_index: { + description: "vcf", + localization_optional: true + } } - Int command_mem = memory_mb - 1000 - Int max_heap = memory_mb - 500 + Int command_mem = memory_mb - 2000 + Int max_heap = memory_mb - 1500 command <<< set -e -o pipefail @@ -780,6 +809,7 @@ task SelectVariantsByIds { disks: "local-disk ${disk_size_gb} SSD" memory: "${memory_mb} MiB" cpu: cpu + preemptible: 3 } output { File output_vcf = "~{basename}.vcf.gz" @@ -795,7 +825,7 @@ task RemoveAnnotations { String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.7-1.10.2-0.1.16-1669908889" Int cpu = 1 Int memory_mb = 3000 - Int disk_size_gb = ceil(2.2*size(vcf, "GiB")) + 100 + Int disk_size_gb = ceil(2.2*size(vcf, "GiB")) + 10 } command <<< set -e -o pipefail @@ -808,6 +838,7 @@ task RemoveAnnotations { disks: "local-disk ${disk_size_gb} HDD" memory: "${memory_mb} MiB" cpu: cpu + preemptible: 3 } output { File output_vcf = "~{basename}.vcf.gz" @@ -823,10 +854,10 @@ task InterleaveVariants { String gatk_docker = "us.gcr.io/broad-gatk/gatk:4.6.1.0" Int cpu = 1 Int memory_mb = 16000 - Int disk_size_gb = ceil(3.2*size(vcfs, "GiB")) + 100 + Int disk_size_gb = ceil(3.2*size(vcfs, "GiB")) + 10 } - Int command_mem = memory_mb - 1000 - Int max_heap = memory_mb - 500 + Int command_mem = memory_mb - 1500 + Int max_heap = memory_mb - 1000 command <<< set -e -o pipefail @@ -839,6 +870,7 @@ task InterleaveVariants { disks: "local-disk ${disk_size_gb} SSD" memory: "${memory_mb} MiB" cpu: cpu + preemptible: 3 } output { File output_vcf = "~{basename}.vcf.gz" @@ -854,9 +886,11 @@ task FindSitesUniqueToFileTwoOnly { String ubuntu_docker = "ubuntu:20.04" Int cpu = 1 Int memory_mb = 4000 - Int disk_size_gb = ceil(size(file1, "GiB") + 2*size(file2, "GiB")) + 100 + Int disk_size_gb = ceil(size(file1, "GiB") + 2*size(file2, "GiB")) + 10 } command <<< + set -e -o pipefail + comm -13 <(sort ~{file1} | uniq) <(sort ~{file2} | uniq) > missing_sites.ids >>> runtime { @@ -864,6 +898,7 @@ task FindSitesUniqueToFileTwoOnly { disks: "local-disk ${disk_size_gb} HDD" memory: "${memory_mb} MiB" cpu: cpu + preemptible: 3 } output { File missing_sites = "missing_sites.ids" @@ -877,10 +912,10 @@ task SplitMultiSampleVcf { String bcftools_docker = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.7-1.10.2-0.1.16-1669908889" Int cpu = 1 - Int memory_mb = 8000 + Int memory_mb = 6000 # This calculation is explained in https://github.com/broadinstitute/warp/pull/937 - Int disk_size_gb = ceil(21*nSamples*size(multiSampleVcf, "GiB")/(nSamples+20)) + 100 + Int disk_size_gb = ceil(21*nSamples*size(multiSampleVcf, "GiB")/(nSamples+20)) + 10 } command <<< set -e -o pipefail @@ -901,4 +936,4 @@ task SplitMultiSampleVcf { Array[File] single_sample_vcfs = glob("out_dir/*.vcf.gz") Array[File] single_sample_vcf_indices = glob("out_dir/*.vcf.gz.tbi") } -} \ No newline at end of file +} diff --git a/tasks/broad/TerraCopyFilesFromCloudToCloud.wdl b/tasks/broad/TerraCopyFilesFromCloudToCloud.wdl new file mode 100644 index 0000000000..66b6eb69a4 --- /dev/null +++ b/tasks/broad/TerraCopyFilesFromCloudToCloud.wdl @@ -0,0 +1,49 @@ +version 1.0 + +## Copyright Broad Institute, 2024 +## +## This WDL defines tasks used for moving files from place to place on Terra Platform. +## +## Runtime parameters are often optimized for Broad's Google Cloud Platform implementation. +## For program versions, see docker containers. +## +## LICENSING : +## This script is released under the WDL source code license (BSD-3) (see LICENSE in +## https://github.com/broadinstitute/wdl). Note however that the programs it calls may +## be subject to different licenses. Users are responsible for checking that they are +## authorized to run all programs before running this script. Please see the docker +## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed +## licensing information pertaining to the included programs. + +task TerraCopyFilesFromCloudToCloud { + input { + Array[String] files_to_copy + String destination_cloud_path + Float? contamination + } + + command { + set -euo pipefail + + gcloud config set storage/process_count 16 + gcloud config set storage/thread_count 2 + echo ~{default='no_contamination' contamination} > contamination + + if ! grep -q no_contamination contamination; then + gcloud storage cp -L cp.log contamination ~{destination_cloud_path}.contamination + fi + gcloud storage cp ~{sep=' ' files_to_copy} ~{destination_cloud_path} + } + + output { + Boolean done = true + } + + runtime { + memory: "16 GiB" + cpu: "1" + disks: "local-disk 32 HDD" + docker: "gcr.io/google.com/cloudsdktool/google-cloud-cli:499.0.0-slim" + preemptible: 3 + } +} diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl index 530eee652b..94f29544c4 100644 --- a/tasks/skylab/FastqProcessing.wdl +++ b/tasks/skylab/FastqProcessing.wdl @@ -138,7 +138,7 @@ task FastqProcessingSlidSeq { # Runtime attributes - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.6.0" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.6.1" Int cpu = 16 Int machine_mb = 40000 Int disk = ceil(size(r1_fastq, "GiB")*3 + size(r2_fastq, "GiB")*3) + 50 diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index dedd01a509..b840dec944 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -536,7 +536,7 @@ task SingleNucleusSlideseqH5adOutput { task SingleNucleusSmartSeq2H5adOutput { input { #runtime values - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.6.1" Array[File] alignment_summary_metrics Array[File] dedup_metrics @@ -631,7 +631,7 @@ task AggregateSmartSeq2H5ad { Array[File] h5ad_input String batch_id String pipeline_version - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.3.0" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.6.1" Int disk = 200 Int machine_mem_mb = 4000 Int cpu = 1 diff --git a/tasks/skylab/StarAlign.wdl b/tasks/skylab/StarAlign.wdl index b2a07a4d0a..172bc3b4c9 100644 --- a/tasks/skylab/StarAlign.wdl +++ b/tasks/skylab/StarAlign.wdl @@ -224,6 +224,7 @@ task STARsoloFastq { String output_bam_basename Boolean? count_exons String? soloMultiMappers + String reference_path = tar_star_reference # runtime values String samtools_star_docker_path @@ -339,6 +340,10 @@ task STARsoloFastq { # validate the bam with samtools quickcheck samtools quickcheck -v Aligned.sortedByCoord.out.bam + # reheader the BAM + samtools view -H Aligned.sortedByCoord.out.bam > header.txt + echo -e "@CO\tReference genome used: ~{reference_path}" >> header.txt + samtools reheader header.txt Aligned.sortedByCoord.out.bam > Aligned.sortedByCoord.out.reheader.bam echo "UMI LEN " $UMILen @@ -419,7 +424,7 @@ task STARsoloFastq { else echo Error: unknown counting mode: "$counting_mode". Should be either sn_rna or sc_rna. fi - mv Aligned.sortedByCoord.out.bam ~{output_bam_basename}.bam + mv Aligned.sortedByCoord.out.reheader.bam ~{output_bam_basename}.bam >>> diff --git a/verification/VerifyATAC.wdl b/verification/VerifyATAC.wdl new file mode 100644 index 0000000000..daf84254be --- /dev/null +++ b/verification/VerifyATAC.wdl @@ -0,0 +1,44 @@ +version 1.0 + +import "../verification/VerifyTasks.wdl" as VerifyTasks + +workflow VerifyATAC { + + input { + File test_atac_bam + File truth_atac_bam + + File test_fragment_file + File truth_fragment_file + + File test_atac_h5ad + File truth_atac_h5ad + + File test_atac_library_metrics + File truth_atac_library_metrics + + Boolean? done + } + + call VerifyTasks.CompareBams as CompareAtacBams { + input: + test_bam = test_atac_bam, + truth_bam = truth_atac_bam, + lenient_header = true + } + call VerifyTasks.CompareTabix as CompareFragment { + input: + test_fragment_file = test_fragment_file, + truth_fragment_file = truth_fragment_file + } + call VerifyTasks.CompareH5adFilesATAC as CompareH5adFilesATAC { + input: + test_h5ad = test_atac_h5ad, + truth_h5ad = truth_atac_h5ad + } + call VerifyTasks.CompareLibraryFiles as CompareLibraryMetrics { + input: + test_text_file = test_atac_library_metrics, + truth_text_file = truth_atac_library_metrics + } +} \ No newline at end of file diff --git a/verification/VerifyImputationBeagle.wdl b/verification/VerifyImputationBeagle.wdl new file mode 100644 index 0000000000..e99f1767c1 --- /dev/null +++ b/verification/VerifyImputationBeagle.wdl @@ -0,0 +1,79 @@ +version 1.0 + +import "../verification/VerifyTasks.wdl" as Tasks + +## Copyright Broad Institute, 2018 +## +## This WDL script is designed to verify (compare) the outputs of an ArrayWf wdl. +## +## +## Runtime parameters are optimized for Broad's Google Cloud Platform implementation. +## For program versions, see docker containers. +## +## LICENSING : +## This script is released under the WDL source code license (BSD-3) (see LICENSE in +## https://github.com/broadinstitute/wdl). Note however that the programs it calls may +## be subject to different licenses. Users are responsible for checking that they are +## authorized to run all programs before running this script. Please see the docker +## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed +## licensing information pertaining to the included programs. + +workflow VerifyImputationBeagle { + input { + Array[File] truth_metrics + Array[File] test_metrics + + File truth_vcf + File test_vcf + File test_vcf_index + File truth_vcf_index + + Boolean? done + } + + String bcftools_docker_tag = "us.gcr.io/broad-gotc-prod/imputation-bcf-vcf:1.0.7-1.10.2-0.1.16-1669908889" + + scatter (idx in range(length(truth_metrics))) { + call CompareImputationMetrics { + input: + test_metrics = test_metrics[idx], + truth_metrics = truth_metrics[idx] + } + } + + call Tasks.CompareVcfs as CompareOutputVcfs { + input: + file1 = truth_vcf, + file2 = test_vcf, + patternForLinesToExcludeFromComparison = "##" # ignore headers + } + + output { + } + meta { + allowNestedInputs: true + } +} + +task CompareImputationMetrics { + input { + File test_metrics + File truth_metrics + } + command <<< + set -eo pipefail + diff "~{test_metrics}" "~{truth_metrics}" + + if [ $? -ne 0 ]; + then + echo "Error: ${test_metrics} and ${truth_metrics} differ" + fi + >>> + + runtime { + docker: "ubuntu:20.04" + cpu: 1 + memory: "3.75 GiB" + disks: "local-disk 10 HDD" + } +} diff --git a/verification/VerifyTasks.wdl b/verification/VerifyTasks.wdl index b4664611cf..46ed24373c 100644 --- a/verification/VerifyTasks.wdl +++ b/verification/VerifyTasks.wdl @@ -10,7 +10,7 @@ task CompareVcfs { command { set -eo pipefail - if [ -z ~{patternForLinesToExcludeFromComparison} ]; then + if [ -z '~{patternForLinesToExcludeFromComparison}' ]; then diff <(gunzip -c -f ~{file1}) <(gunzip -c -f ~{file2}) else echo "It's defined!" @@ -134,7 +134,7 @@ task CompareTabix { fi >>> runtime { - docker: "us.gcr.io/broad-gotc-prod/snapatac2:1.0.4-2.3.1-1700590229" + docker: "us.gcr.io/broad-gotc-prod/snapatac2:2.0.0" disks: "local-disk 100 HDD" memory: "50 GiB" preemptible: 3 diff --git a/verification/test-wdls/TestATAC.wdl b/verification/test-wdls/TestATAC.wdl new file mode 100644 index 0000000000..3ad0a1322e --- /dev/null +++ b/verification/test-wdls/TestATAC.wdl @@ -0,0 +1,157 @@ +version 1.0 + +import "../../pipelines/skylab/atac/atac.wdl" as ATAC +import "../../verification/VerifyATAC.wdl" as VerifyATAC +import "../../tasks/broad/Utilities.wdl" as Utilities +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy + +workflow TestATAC { + + input { + # Fastq inputs + Array[String] read1_fastq_gzipped + Array[String] read2_fastq_gzipped + Array[String] read3_fastq_gzipped + + # Output prefix/base name for all intermediate files and pipeline outputs + String input_id + String cloud_provider + # Additional library aliquot ID + String? atac_nhash_id + + #Expected cells from library preparation + Int atac_expected_cells = 3000 + + # Option for running files with preindex + Boolean preindex = false + + # BWA ref + File tar_bwa_reference + # BWA machine type -- to select number of splits + Int num_threads_bwa = 128 + Int mem_size_bwa = 512 + String cpu_platform_bwa = "Intel Ice Lake" + String vm_size + + # Text file containing chrom_sizes for genome build (i.e. hg38) + File chrom_sizes + #File for annotations for calculating ATAC TSSE + File annotations_gtf + # Whitelist + File whitelist + + # TrimAdapters input + String adapter_seq_read1 = "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG" + String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" + + # These values will be determined and injected into the inputs by the scala test framework + String truth_path + String results_path + Boolean update_truth + Boolean run_cellbender = false + } + + meta { + allowNestedInputs: true + } + + call ATAC.ATAC { + input: + read1_fastq_gzipped = read1_fastq_gzipped, + read2_fastq_gzipped = read2_fastq_gzipped, + read3_fastq_gzipped = read3_fastq_gzipped, + input_id = input_id, + cloud_provider = cloud_provider, + atac_nhash_id = atac_nhash_id, + atac_expected_cells = atac_expected_cells, + preindex = preindex, + tar_bwa_reference = tar_bwa_reference, + num_threads_bwa = num_threads_bwa, + mem_size_bwa = mem_size_bwa, + cpu_platform_bwa = cpu_platform_bwa, + vm_size = vm_size, + chrom_sizes = chrom_sizes, + annotations_gtf = annotations_gtf, + whitelist = whitelist, + adapter_seq_read1 = adapter_seq_read1, + adapter_seq_read3 = adapter_seq_read3 + } + + + # Collect all of the pipeline outputs into single Array[String] + Array[String] pipeline_outputs = flatten([ + [ # atac file outputs + ATAC.fragment_file, + ATAC.bam_aligned_output, + ATAC.snap_metrics, + ATAC.library_metrics_file + ], + ]) + + + # Collect all of the pipeline metrics into single Array[String] + Array[String] pipeline_metrics = flatten([ + [ # File outputs + ATAC.fragment_file, + ATAC.bam_aligned_output, + ATAC.snap_metrics, + ATAC.library_metrics_file + ], + ]) + + # Copy results of pipeline to test results bucket + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { + input: + files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), + destination_cloud_path = results_path + } + + # If updating truth then copy output to truth bucket + if (update_truth){ + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { + input: + files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), + destination_cloud_path = truth_path + } + } + + # This is achieved by passing each desired file/array[files] to GetValidationInputs + if (!update_truth){ + call Utilities.GetValidationInputs as GetAtacBam { + input: + input_file = ATAC.bam_aligned_output, + results_path = results_path, + truth_path = truth_path + } + call Utilities.GetValidationInputs as GetFragmentFile { + input: + input_file = ATAC.fragment_file, + results_path = results_path, + truth_path = truth_path + } + call Utilities.GetValidationInputs as GetSnapMetrics { + input: + input_file = ATAC.snap_metrics, + results_path = results_path, + truth_path = truth_path + } + call Utilities.GetValidationInputs as GetAtacLibraryMetrics { + input: + input_file = ATAC.library_metrics_file, + results_path = results_path, + truth_path = truth_path + } + call VerifyATAC.VerifyATAC as Verify { + input: + truth_atac_bam = GetAtacBam.truth_file, + test_atac_bam = GetAtacBam.results_file, + truth_fragment_file = GetFragmentFile.truth_file, + test_fragment_file = GetFragmentFile.results_file, + truth_atac_h5ad = GetSnapMetrics.truth_file, + test_atac_h5ad = GetSnapMetrics.results_file, + truth_atac_library_metrics = GetAtacLibraryMetrics.truth_file, + test_atac_library_metrics = GetAtacLibraryMetrics.results_file, + done = CopyToTestResults.done + } + } +} diff --git a/verification/test-wdls/TestCramToUnmappedBams.wdl b/verification/test-wdls/TestCramToUnmappedBams.wdl index 4a9927642e..ea80f74bda 100644 --- a/verification/test-wdls/TestCramToUnmappedBams.wdl +++ b/verification/test-wdls/TestCramToUnmappedBams.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/reprocessing/cram_to_unmapped_bams/CramToUnmappedBams.wdl" as CramToUnmappedBams import "../../verification/VerifyCramToUnmappedBamsUpdated.wdl" as VerifyCramToUnmappedBamsUpdated import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestCramToUnmappedBams { @@ -22,8 +22,6 @@ workflow TestCramToUnmappedBams { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -55,21 +53,17 @@ workflow TestCramToUnmappedBams { # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestExomeGermlineSingleSample.wdl b/verification/test-wdls/TestExomeGermlineSingleSample.wdl index 59110d09be..bb6424100b 100644 --- a/verification/test-wdls/TestExomeGermlineSingleSample.wdl +++ b/verification/test-wdls/TestExomeGermlineSingleSample.wdl @@ -3,187 +3,181 @@ version 1.0 import "../../pipelines/broad/dna_seq/germline/single_sample/exome/ExomeGermlineSingleSample.wdl" as ExomeGermlineSingleSample import "../../verification/VerifyGermlineSingleSample.wdl" as VerifyGermlineSingleSample import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestExomeGermlineSingleSample { - input { - PapiSettings papi_settings - SampleAndUnmappedBams sample_and_unmapped_bams - DNASeqSingleSampleReferences references - VariantCallingScatterSettings scatter_settings - - File? fingerprint_genotypes_file - File? fingerprint_genotypes_index - - File target_interval_list - File bait_interval_list - String bait_set_name - - Boolean provide_bam_output = false - - # These values will be determined and injected into the inputs by the scala test framework - String truth_path - String results_path - Boolean update_truth - String vault_token_path - String google_account_vault_path - String cloud_provider - } - - meta { - allowNestedInputs: true - } - - # Run the pipeline - call ExomeGermlineSingleSample.ExomeGermlineSingleSample { - input: - sample_and_unmapped_bams = sample_and_unmapped_bams, - references = references, - scatter_settings = scatter_settings, - fingerprint_genotypes_file = fingerprint_genotypes_file, - fingerprint_genotypes_index = fingerprint_genotypes_index, - papi_settings = papi_settings, - target_interval_list = target_interval_list, - bait_interval_list = bait_interval_list, - bait_set_name = bait_set_name, - provide_bam_output = provide_bam_output, - cloud_provider = cloud_provider - } - - # Collect all of the pipeline outputs into a single Array[String]] - Array[String] pipeline_outputs = flatten([ - [ # File outputs - ExomeGermlineSingleSample.selfSM, - ExomeGermlineSingleSample.agg_insert_size_histogram_pdf, - ExomeGermlineSingleSample.agg_quality_distribution_pdf, - ExomeGermlineSingleSample.calculate_read_group_checksum_md5, - ExomeGermlineSingleSample.agg_insert_size_histogram_pdf, - ExomeGermlineSingleSample.agg_quality_distribution_pdf, - ExomeGermlineSingleSample.output_cram, - ExomeGermlineSingleSample.output_cram_index, - ExomeGermlineSingleSample.output_cram_md5, - ExomeGermlineSingleSample.validate_cram_file_report, - ExomeGermlineSingleSample.output_vcf, - ExomeGermlineSingleSample.output_vcf_index - ], # Array[File] outputs - ExomeGermlineSingleSample.unsorted_read_group_base_distribution_by_cycle_pdf, - ExomeGermlineSingleSample.unsorted_read_group_insert_size_histogram_pdf, - ExomeGermlineSingleSample.unsorted_read_group_quality_by_cycle_pdf, - ExomeGermlineSingleSample.unsorted_read_group_quality_distribution_pdf, - # File? outputs - select_all([ExomeGermlineSingleSample.output_bqsr_reports]), - select_all([ExomeGermlineSingleSample.output_bam]), - select_all([ExomeGermlineSingleSample.output_bam_index]), - ]) - - # Collect all of the pipeline metrics into a single Array[String] - Array[String] pipeline_metrics = flatten([ - [ # File outputs - ExomeGermlineSingleSample.read_group_alignment_summary_metrics, - ExomeGermlineSingleSample.agg_alignment_summary_metrics, - ExomeGermlineSingleSample.agg_bait_bias_detail_metrics, - ExomeGermlineSingleSample.agg_bait_bias_summary_metrics, - ExomeGermlineSingleSample.agg_insert_size_metrics, - ExomeGermlineSingleSample.agg_pre_adapter_detail_metrics, - ExomeGermlineSingleSample.agg_pre_adapter_summary_metrics, - ExomeGermlineSingleSample.agg_quality_distribution_metrics, - ExomeGermlineSingleSample.agg_error_summary_metrics, - ExomeGermlineSingleSample.duplicate_metrics, - ExomeGermlineSingleSample.gvcf_summary_metrics, - ExomeGermlineSingleSample.gvcf_detail_metrics, - ExomeGermlineSingleSample.hybrid_selection_metrics, - ], # Array[File] outputs - ExomeGermlineSingleSample.quality_yield_metrics, - ExomeGermlineSingleSample.unsorted_read_group_base_distribution_by_cycle_metrics, - ExomeGermlineSingleSample.unsorted_read_group_insert_size_metrics, - ExomeGermlineSingleSample.unsorted_read_group_quality_by_cycle_metrics, - ExomeGermlineSingleSample.unsorted_read_group_quality_distribution_metrics, - # File? outputs - select_all([ExomeGermlineSingleSample.cross_check_fingerprints_metrics]), - select_all([ExomeGermlineSingleSample.fingerprint_summary_metrics]), - select_all([ExomeGermlineSingleSample.fingerprint_detail_metrics]), - ]) - - # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { - input: - files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, - contamination = ExomeGermlineSingleSample.contamination, - destination_cloud_path = results_path - } - - # If updating truth then copy pipeline results to truth bucket - if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { - input: - files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, - contamination = ExomeGermlineSingleSample.contamination, - destination_cloud_path = truth_path - } - } - - # If not updating truth then we need to collect all input for the validation WDL - # This is achieved by passing each desired file/array[files] to GetValidationInputs - if (!update_truth){ - call Utilities.GetValidationInputs as GetMetricsInputs { - input: - input_files = pipeline_metrics, - results_path = results_path, - truth_path = truth_path + input { + PapiSettings papi_settings + SampleAndUnmappedBams sample_and_unmapped_bams + DNASeqSingleSampleReferences references + VariantCallingScatterSettings scatter_settings + + File? fingerprint_genotypes_file + File? fingerprint_genotypes_index + + File target_interval_list + File bait_interval_list + String bait_set_name + + Boolean provide_bam_output = false + + # These values will be determined and injected into the inputs by the scala test framework + String truth_path + String results_path + Boolean update_truth + String cloud_provider } - call Utilities.GetValidationInputs as GetCrams { - input: - input_file = ExomeGermlineSingleSample.output_cram, - results_path = results_path, - truth_path = truth_path + meta { + allowNestedInputs: true } - call Utilities.GetValidationInputs as GetCrais { - input: - input_file = ExomeGermlineSingleSample.output_cram_index, - results_path = results_path, - truth_path = truth_path + # Run the pipeline + call ExomeGermlineSingleSample.ExomeGermlineSingleSample { + input: + sample_and_unmapped_bams = sample_and_unmapped_bams, + references = references, + scatter_settings = scatter_settings, + fingerprint_genotypes_file = fingerprint_genotypes_file, + fingerprint_genotypes_index = fingerprint_genotypes_index, + papi_settings = papi_settings, + target_interval_list = target_interval_list, + bait_interval_list = bait_interval_list, + bait_set_name = bait_set_name, + provide_bam_output = provide_bam_output, + cloud_provider = cloud_provider } - call Utilities.GetValidationInputs as GetGVCFs { - input: - input_file = ExomeGermlineSingleSample.output_vcf, - results_path = results_path, - truth_path = truth_path + # Collect all of the pipeline outputs into a single Array[String]] + Array[String] pipeline_outputs = flatten([ + [ # File outputs + ExomeGermlineSingleSample.selfSM, + ExomeGermlineSingleSample.agg_insert_size_histogram_pdf, + ExomeGermlineSingleSample.agg_quality_distribution_pdf, + ExomeGermlineSingleSample.calculate_read_group_checksum_md5, + ExomeGermlineSingleSample.agg_insert_size_histogram_pdf, + ExomeGermlineSingleSample.agg_quality_distribution_pdf, + ExomeGermlineSingleSample.output_cram, + ExomeGermlineSingleSample.output_cram_index, + ExomeGermlineSingleSample.output_cram_md5, + ExomeGermlineSingleSample.validate_cram_file_report, + ExomeGermlineSingleSample.output_vcf, + ExomeGermlineSingleSample.output_vcf_index + ], # Array[File] outputs + ExomeGermlineSingleSample.unsorted_read_group_base_distribution_by_cycle_pdf, + ExomeGermlineSingleSample.unsorted_read_group_insert_size_histogram_pdf, + ExomeGermlineSingleSample.unsorted_read_group_quality_by_cycle_pdf, + ExomeGermlineSingleSample.unsorted_read_group_quality_distribution_pdf, + # File? outputs + select_all([ExomeGermlineSingleSample.output_bqsr_reports]), + select_all([ExomeGermlineSingleSample.output_bam]), + select_all([ExomeGermlineSingleSample.output_bam_index]), + ]) + + # Collect all of the pipeline metrics into a single Array[String] + Array[String] pipeline_metrics = flatten([ + [ # File outputs + ExomeGermlineSingleSample.read_group_alignment_summary_metrics, + ExomeGermlineSingleSample.agg_alignment_summary_metrics, + ExomeGermlineSingleSample.agg_bait_bias_detail_metrics, + ExomeGermlineSingleSample.agg_bait_bias_summary_metrics, + ExomeGermlineSingleSample.agg_insert_size_metrics, + ExomeGermlineSingleSample.agg_pre_adapter_detail_metrics, + ExomeGermlineSingleSample.agg_pre_adapter_summary_metrics, + ExomeGermlineSingleSample.agg_quality_distribution_metrics, + ExomeGermlineSingleSample.agg_error_summary_metrics, + ExomeGermlineSingleSample.duplicate_metrics, + ExomeGermlineSingleSample.gvcf_summary_metrics, + ExomeGermlineSingleSample.gvcf_detail_metrics, + ExomeGermlineSingleSample.hybrid_selection_metrics, + ], # Array[File] outputs + ExomeGermlineSingleSample.quality_yield_metrics, + ExomeGermlineSingleSample.unsorted_read_group_base_distribution_by_cycle_metrics, + ExomeGermlineSingleSample.unsorted_read_group_insert_size_metrics, + ExomeGermlineSingleSample.unsorted_read_group_quality_by_cycle_metrics, + ExomeGermlineSingleSample.unsorted_read_group_quality_distribution_metrics, + # File? outputs + select_all([ExomeGermlineSingleSample.cross_check_fingerprints_metrics]), + select_all([ExomeGermlineSingleSample.fingerprint_summary_metrics]), + select_all([ExomeGermlineSingleSample.fingerprint_detail_metrics]), + ]) + + # Copy results of pipeline to test results bucket + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { + input: + files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), + contamination = ExomeGermlineSingleSample.contamination, + destination_cloud_path = results_path } - call Utilities.GetValidationInputs as GetGVCFIndexes { - input: - input_file = ExomeGermlineSingleSample.output_vcf_index, - results_path = results_path, - truth_path = truth_path + # If updating truth then copy pipeline results to truth bucket + if (update_truth){ + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { + input: + files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), + contamination = ExomeGermlineSingleSample.contamination, + destination_cloud_path = truth_path + } } - - # done is dummy input to force copy completion before verification - call VerifyGermlineSingleSample.VerifyGermlineSingleSample as Verify { - input: - truth_metrics = GetMetricsInputs.truth_files, - truth_cram = GetCrams.truth_file, - truth_crai = GetCrais.truth_file, - truth_gvcf = GetGVCFs.truth_file, - truth_gvcf_index = GetGVCFIndexes.truth_file, - test_metrics = GetMetricsInputs.results_files, - test_cram = GetCrams.results_file, - test_crai = GetCrais.results_file, - test_gvcf = GetGVCFs.results_file, - test_gvcf_index = GetGVCFIndexes.results_file, - done = CopyToTestResults.done + + # If not updating truth then we need to collect all input for the validation WDL + # This is achieved by passing each desired file/array[files] to GetValidationInputs + if (!update_truth){ + call Utilities.GetValidationInputs as GetMetricsInputs { + input: + input_files = pipeline_metrics, + results_path = results_path, + truth_path = truth_path + } + + call Utilities.GetValidationInputs as GetCrams { + input: + input_file = ExomeGermlineSingleSample.output_cram, + results_path = results_path, + truth_path = truth_path + } + + call Utilities.GetValidationInputs as GetCrais { + input: + input_file = ExomeGermlineSingleSample.output_cram_index, + results_path = results_path, + truth_path = truth_path + } + + call Utilities.GetValidationInputs as GetGVCFs { + input: + input_file = ExomeGermlineSingleSample.output_vcf, + results_path = results_path, + truth_path = truth_path + } + + call Utilities.GetValidationInputs as GetGVCFIndexes { + input: + input_file = ExomeGermlineSingleSample.output_vcf_index, + results_path = results_path, + truth_path = truth_path + } + + # done is dummy input to force copy completion before verification + call VerifyGermlineSingleSample.VerifyGermlineSingleSample as Verify { + input: + truth_metrics = GetMetricsInputs.truth_files, + truth_cram = GetCrams.truth_file, + truth_crai = GetCrais.truth_file, + truth_gvcf = GetGVCFs.truth_file, + truth_gvcf_index = GetGVCFIndexes.truth_file, + test_metrics = GetMetricsInputs.results_files, + test_cram = GetCrams.results_file, + test_crai = GetCrais.results_file, + test_gvcf = GetGVCFs.results_file, + test_gvcf_index = GetGVCFIndexes.results_file, + done = CopyToTestResults.done + } } - } - output { - Array[File]? metric_comparison_report_files = Verify.metric_comparison_report_files - } + output { + Array[File]? metric_comparison_report_files = Verify.metric_comparison_report_files + } -} +} \ No newline at end of file diff --git a/verification/test-wdls/TestExomeReprocessing.wdl b/verification/test-wdls/TestExomeReprocessing.wdl index 44905716ad..17d56a44ef 100644 --- a/verification/test-wdls/TestExomeReprocessing.wdl +++ b/verification/test-wdls/TestExomeReprocessing.wdl @@ -3,7 +3,7 @@ version 1.0 import "../../pipelines/broad/reprocessing/exome/ExomeReprocessing.wdl" as ExomeReprocessing import "../../verification/VerifyExomeReprocessing.wdl" as VerifyExomeReprocessing import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy import "../../structs/dna_seq/DNASeqStructs.wdl" @@ -39,8 +39,6 @@ workflow TestExomeReprocessing { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } @@ -124,21 +122,17 @@ workflow TestExomeReprocessing { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy pipeline results to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestIlluminaGenotypingArray.wdl b/verification/test-wdls/TestIlluminaGenotypingArray.wdl index f70710653f..46b8c680e6 100644 --- a/verification/test-wdls/TestIlluminaGenotypingArray.wdl +++ b/verification/test-wdls/TestIlluminaGenotypingArray.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/genotyping/illumina/IlluminaGenotypingArray.wdl" as IlluminaGenotypingArray import "../../verification/VerifyIlluminaGenotypingArray.wdl" as VerifyIlluminaGenotypingArray import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestIlluminaGenotypingArray { @@ -46,14 +46,13 @@ workflow TestIlluminaGenotypingArray { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { allowNestedInputs: true } - + + call IlluminaGenotypingArray.IlluminaGenotypingArray { input: sample_alias = sample_alias, @@ -88,10 +87,9 @@ workflow TestIlluminaGenotypingArray { disk_size = disk_size, preemptible_tries = preemptible_tries, genotype_concordance_threshold = genotype_concordance_threshold - } - + # Collect all of the pipeline outputs into single Array[String] Array[String] pipeline_outputs = flatten([ [ # File outputs @@ -108,7 +106,7 @@ workflow TestIlluminaGenotypingArray { select_all([IlluminaGenotypingArray.output_vcf_md5_cloud_path]), ]) - + # Collect all of the pipeline metrics into single Array[String] Array[String] pipeline_metrics = flatten([ # File? outputs @@ -127,21 +125,17 @@ workflow TestIlluminaGenotypingArray { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } - + # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { - input: + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { + input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } @@ -185,29 +179,25 @@ workflow TestIlluminaGenotypingArray { results_path = results_path, truth_path = truth_path } - call VerifyIlluminaGenotypingArray.VerifyIlluminaGenotypingArray as Verify { input: - truth_metrics = GetMetrics.truth_files, + truth_metrics = GetMetrics.truth_files, test_metrics = GetMetrics.results_files, - truth_gtc = GetGtc.truth_file, + truth_gtc = GetGtc.truth_file, test_gtc = GetGtc.results_file, - truth_vcf = GetVcf.truth_file, + truth_vcf = GetVcf.truth_file, test_vcf = GetVcf.results_file, - truth_fp_vcf = GetFpVcf.truth_file, + truth_fp_vcf = GetFpVcf.truth_file, test_fp_vcf = GetFpVcf.results_file, - truth_red_idat_md5 = GetRedIdatMd5.truth_file, + truth_red_idat_md5 = GetRedIdatMd5.truth_file, test_red_idat_md5 = GetRedIdatMd5.results_file, - truth_green_idat_md5 = GetGreenIdatMd5.truth_file, + truth_green_idat_md5 = GetGreenIdatMd5.truth_file, test_green_idat_md5 = GetGreenIdatMd5.results_file, bead_pool_manifest_file = bead_pool_manifest_file, done = CopyToTestResults.done } - } - - - - + output { + } } \ No newline at end of file diff --git a/verification/test-wdls/TestImputation.wdl b/verification/test-wdls/TestImputation.wdl index 5d340b333d..98b987b99c 100644 --- a/verification/test-wdls/TestImputation.wdl +++ b/verification/test-wdls/TestImputation.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/arrays/imputation/Imputation.wdl" as Imputation import "../../verification/VerifyImputation.wdl" as VerifyImputation import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestImputation { @@ -37,8 +37,6 @@ workflow TestImputation { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -98,21 +96,17 @@ workflow TestImputation { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestImputationBeagle.wdl b/verification/test-wdls/TestImputationBeagle.wdl new file mode 100644 index 0000000000..2d56d858aa --- /dev/null +++ b/verification/test-wdls/TestImputationBeagle.wdl @@ -0,0 +1,111 @@ +version 1.0 + + +import "../../pipelines/broad/arrays/imputation_beagle/ImputationBeagle.wdl" as ImputationBeagle +import "../../verification/VerifyImputationBeagle.wdl" as VerifyImputationBeagle +import "../../tasks/broad/Utilities.wdl" as Utilities +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy + +workflow TestImputationBeagle { + + input { + Int chunkLength = 25000000 + Int chunkOverlaps = 5000000 # this is the padding that will be added to the beginning and end of each chunk to reduce edge effects + + File multi_sample_vcf + + File ref_dict # for reheadering / adding contig lengths in the header of the ouptut VCF, and calculating contig lengths + Array[String] contigs + String reference_panel_path_prefix # path + file prefix to the bucket where the reference panel files are stored for all contigs + String genetic_maps_path # path to the bucket where genetic maps are stored for all contigs + String output_basename # the basename for intermediate and output files + + # These values will be determined and injected into the inputs by the scala test framework + String truth_path + String results_path + Boolean update_truth + } + + meta { + allowNestedInputs: true + } + + call ImputationBeagle.ImputationBeagle { + input: + chunkLength = chunkLength, + chunkOverlaps = chunkOverlaps, + multi_sample_vcf = multi_sample_vcf, + ref_dict = ref_dict, + contigs = contigs, + reference_panel_path_prefix = reference_panel_path_prefix, + genetic_maps_path = genetic_maps_path, + output_basename = output_basename, + } + + + # Collect all of the pipeline outputs into single Array[String] + Array[String] pipeline_outputs = flatten([ + [ # File outputs + ImputationBeagle.imputed_multi_sample_vcf, + ImputationBeagle.imputed_multi_sample_vcf_index, + ] + ]) + + + # Collect all of the pipeline metrics into single Array[String] + Array[String] pipeline_metrics = flatten([ + [ # File outputs + ImputationBeagle.chunks_info, + ] + ]) + + # Copy results of pipeline to test results bucket + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { + input: + files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), + destination_cloud_path = results_path + } + + # If updating truth then copy output to truth bucket + if (update_truth){ + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { + input: + files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), + destination_cloud_path = truth_path + } + } + + # This is achieved by passing each desired file/array[files] to GetValidationInputs + if (!update_truth){ + call Utilities.GetValidationInputs as GetMetrics { + input: + input_files = pipeline_metrics, + results_path = results_path, + truth_path = truth_path + } + call Utilities.GetValidationInputs as GetVcf { + input: + input_file = ImputationBeagle.imputed_multi_sample_vcf, + results_path = results_path, + truth_path = truth_path + } + call Utilities.GetValidationInputs as GetVcfIndex { + input: + input_file = ImputationBeagle.imputed_multi_sample_vcf_index, + results_path = results_path, + truth_path = truth_path + } + + + call VerifyImputationBeagle.VerifyImputationBeagle as Verify { + input: + truth_metrics = GetMetrics.truth_files, + test_metrics = GetMetrics.results_files, + truth_vcf = GetVcf.truth_file, + test_vcf = GetVcf.results_file, + truth_vcf_index = GetVcfIndex.truth_file, + test_vcf_index = GetVcfIndex.results_file, + done = CopyToTestResults.done + } + } +} diff --git a/verification/test-wdls/TestJointGenotyping.wdl b/verification/test-wdls/TestJointGenotyping.wdl index 389d7307b6..6951be0056 100644 --- a/verification/test-wdls/TestJointGenotyping.wdl +++ b/verification/test-wdls/TestJointGenotyping.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/dna_seq/germline/joint_genotyping/JointGenotyping.wdl" as JointGenotyping import "../../verification/VerifyJointGenotyping.wdl" as VerifyJointGenotyping import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestJointGenotyping { @@ -60,8 +60,6 @@ workflow TestJointGenotyping { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -142,21 +140,17 @@ workflow TestJointGenotyping { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl index 3c98269a4b..f40494dc12 100644 --- a/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl +++ b/verification/test-wdls/TestMultiSampleSmartSeq2SingleNucleus.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.wdl" as MultiSampleSmartSeq2SingleNucleus import "../../verification/VerifyMultiSampleSmartSeq2SingleNucleus.wdl" as VerifyMultiSampleSmartSeq2SingleNucleus import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestMultiSampleSmartSeq2SingleNucleus { @@ -31,8 +31,6 @@ workflow TestMultiSampleSmartSeq2SingleNucleus { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path String cloud_provider } @@ -79,21 +77,17 @@ workflow TestMultiSampleSmartSeq2SingleNucleus { # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestMultiome.wdl b/verification/test-wdls/TestMultiome.wdl index e710bb1942..9479a9c031 100644 --- a/verification/test-wdls/TestMultiome.wdl +++ b/verification/test-wdls/TestMultiome.wdl @@ -4,7 +4,8 @@ version 1.0 import "../../pipelines/skylab/multiome/Multiome.wdl" as Multiome import "../../verification/VerifyMultiome.wdl" as VerifyMultiome import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy + workflow TestMultiome { @@ -49,8 +50,6 @@ workflow TestMultiome { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path Boolean run_cellbender = false Boolean run_peak_calling = false @@ -124,21 +123,17 @@ workflow TestMultiome { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestOptimus.wdl b/verification/test-wdls/TestOptimus.wdl index c980face75..79487d446e 100644 --- a/verification/test-wdls/TestOptimus.wdl +++ b/verification/test-wdls/TestOptimus.wdl @@ -3,7 +3,7 @@ version 1.0 import "../../tasks/broad/Utilities.wdl" as Utilities import "../../pipelines/skylab/optimus/Optimus.wdl" as Optimus import "../../verification/VerifyOptimus.wdl" as VerifyOptimus -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestOptimus { @@ -57,8 +57,6 @@ workflow TestOptimus { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path String cloud_provider @@ -120,21 +118,17 @@ Array[String] pipeline_outputs = flatten([ ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy pipeline results to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestPairedTag.wdl b/verification/test-wdls/TestPairedTag.wdl index 9fcb2ebbd5..8ec9ae364a 100644 --- a/verification/test-wdls/TestPairedTag.wdl +++ b/verification/test-wdls/TestPairedTag.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/skylab/paired_tag/PairedTag.wdl" as PairedTag import "../../verification/VerifyPairedTag.wdl" as VerifyPairedTag import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestPairedTag { @@ -52,8 +52,6 @@ workflow TestPairedTag { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path Boolean run_cellbender = false String cloud_provider @@ -127,21 +125,17 @@ workflow TestPairedTag { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestRNAWithUMIsPipeline.wdl b/verification/test-wdls/TestRNAWithUMIsPipeline.wdl index e9eedd5aa2..babae40fdc 100644 --- a/verification/test-wdls/TestRNAWithUMIsPipeline.wdl +++ b/verification/test-wdls/TestRNAWithUMIsPipeline.wdl @@ -2,7 +2,7 @@ version 1.0 import "../../tasks/broad/Utilities.wdl" as Utilities import "../../verification/VerifyRNAWithUMIs.wdl" as VerifyRNAWithUMIs -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy import "../../pipelines/broad/rna_seq/RNAWithUMIsPipeline.wdl" as RNAWithUMIsPipeline workflow TestRNAWithUMIsPipeline { @@ -48,8 +48,6 @@ workflow TestRNAWithUMIsPipeline { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -110,21 +108,17 @@ workflow TestRNAWithUMIsPipeline { Array[String] pipeline_text_metrics = select_all([RNAWithUMIsPipeline.rnaseqc2_metrics]) #Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics, pipeline_text_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy pipeline results to truth bucket if (update_truth) { - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics, pipeline_text_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestReblockGVCF.wdl b/verification/test-wdls/TestReblockGVCF.wdl index e35ccad0af..eac450ebfb 100644 --- a/verification/test-wdls/TestReblockGVCF.wdl +++ b/verification/test-wdls/TestReblockGVCF.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/dna_seq/germline/joint_genotyping/reblocking/ReblockGVCF.wdl" as ReblockGVCF import "../../verification/VerifyGvcf.wdl" as VerifyGvcf import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestReblockGVCF { @@ -25,8 +25,6 @@ workflow TestReblockGVCF { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path String cloud_provider } @@ -63,21 +61,17 @@ workflow TestReblockGVCF { # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestSlideSeq.wdl b/verification/test-wdls/TestSlideSeq.wdl index b0523fee21..96a53bd7c2 100644 --- a/verification/test-wdls/TestSlideSeq.wdl +++ b/verification/test-wdls/TestSlideSeq.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/skylab/slideseq/SlideSeq.wdl" as SlideSeq import "../../verification/VerifySlideSeq.wdl" as VerifySlideSeq import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestSlideSeq { @@ -24,8 +24,6 @@ workflow TestSlideSeq { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path String cloud_provider } @@ -75,21 +73,17 @@ workflow TestSlideSeq { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestUltimaGenomicsJointGenotyping.wdl b/verification/test-wdls/TestUltimaGenomicsJointGenotyping.wdl index de9899439b..e8908b92de 100644 --- a/verification/test-wdls/TestUltimaGenomicsJointGenotyping.wdl +++ b/verification/test-wdls/TestUltimaGenomicsJointGenotyping.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/dna_seq/germline/joint_genotyping/UltimaGenomics/UltimaGenomicsJointGenotyping.wdl" as UltimaGenomicsJointGenotyping import "../../verification/VerifyUltimaGenomicsJointGenotyping.wdl" as VerifyUltimaGenomicsJointGenotyping import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestUltimaGenomicsJointGenotyping { @@ -46,8 +46,6 @@ workflow TestUltimaGenomicsJointGenotyping { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -115,21 +113,17 @@ workflow TestUltimaGenomicsJointGenotyping { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl b/verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl index 5203abb500..5275b62cee 100644 --- a/verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl +++ b/verification/test-wdls/TestUltimaGenomicsWholeGenomeCramOnly.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/dna_seq/somatic/single_sample/ugwgs/UltimaGenomicsWholeGenomeCramOnly.wdl" as UltimaGenomicsWholeGenomeCramOnly import "../../verification/VerifyUltimaGenomicsWholeGenomeCramOnly.wdl" as VerifyUltimaGenomicsWholeGenomeCramOnly import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestUltimaGenomicsWholeGenomeCramOnly { @@ -23,8 +23,6 @@ workflow TestUltimaGenomicsWholeGenomeCramOnly { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -80,21 +78,17 @@ workflow TestUltimaGenomicsWholeGenomeCramOnly { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestUltimaGenomicsWholeGenomeGermline.wdl b/verification/test-wdls/TestUltimaGenomicsWholeGenomeGermline.wdl index 9e1af52645..5842a52acf 100644 --- a/verification/test-wdls/TestUltimaGenomicsWholeGenomeGermline.wdl +++ b/verification/test-wdls/TestUltimaGenomicsWholeGenomeGermline.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/dna_seq/germline/single_sample/ugwgs/UltimaGenomicsWholeGenomeGermline.wdl" as UltimaGenomicsWholeGenomeGermline import "../../verification/VerifyUltimaGenomicsWholeGenomeGermline.wdl" as VerifyUltimaGenomicsWholeGenomeGermline import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestUltimaGenomicsWholeGenomeGermline { @@ -26,8 +26,6 @@ workflow TestUltimaGenomicsWholeGenomeGermline { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -92,21 +90,17 @@ workflow TestUltimaGenomicsWholeGenomeGermline { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestVariantCalling.wdl b/verification/test-wdls/TestVariantCalling.wdl index 3054e0a1b9..9a79ac4d68 100644 --- a/verification/test-wdls/TestVariantCalling.wdl +++ b/verification/test-wdls/TestVariantCalling.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/dna_seq/germline/variant_calling/VariantCalling.wdl" as VariantCalling import "../../verification/VerifyGvcf.wdl" as VerifyGvcf import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestVariantCalling { @@ -37,8 +37,6 @@ workflow TestVariantCalling { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path String cloud_provider } @@ -99,21 +97,17 @@ workflow TestVariantCalling { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl b/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl index 16b54c3876..9c5c44cf97 100644 --- a/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl +++ b/verification/test-wdls/TestWholeGenomeGermlineSingleSample.wdl @@ -3,7 +3,7 @@ version 1.0 import "../../pipelines/broad/dna_seq/germline/single_sample/wgs/WholeGenomeGermlineSingleSample.wdl" as WholeGenomeGermlineSingleSample import "../../verification/VerifyGermlineSingleSample.wdl" as VerifyGermlineSingleSample import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestWholeGenomeGermlineSingleSample { @@ -38,8 +38,6 @@ workflow TestWholeGenomeGermlineSingleSample { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -132,22 +130,18 @@ workflow TestWholeGenomeGermlineSingleSample { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, contamination = WholeGenomeGermlineSingleSample.contamination, destination_cloud_path = results_path } # If updating truth then copy pipeline results to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, contamination = WholeGenomeGermlineSingleSample.contamination, destination_cloud_path = truth_path } diff --git a/verification/test-wdls/TestWholeGenomeReprocessing.wdl b/verification/test-wdls/TestWholeGenomeReprocessing.wdl index bc5566d18e..12f3db9bfb 100644 --- a/verification/test-wdls/TestWholeGenomeReprocessing.wdl +++ b/verification/test-wdls/TestWholeGenomeReprocessing.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/broad/reprocessing/wgs/WholeGenomeReprocessing.wdl" as WholeGenomeReprocessing import "../../verification/VerifyExomeReprocessing.wdl" as VerifyExomeReprocessing import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow TestWholeGenomeReprocessing { @@ -30,8 +30,6 @@ workflow TestWholeGenomeReprocessing { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -124,21 +122,17 @@ workflow TestWholeGenomeReprocessing { ]) # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs, pipeline_metrics]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/verification/test-wdls/Testsnm3C.wdl b/verification/test-wdls/Testsnm3C.wdl index b8bfccb705..c65ee4471b 100644 --- a/verification/test-wdls/Testsnm3C.wdl +++ b/verification/test-wdls/Testsnm3C.wdl @@ -4,7 +4,7 @@ version 1.0 import "../../pipelines/skylab/snm3C/snm3C.wdl" as snm3C import "../../verification/Verifysnm3C.wdl" as Verifysnm3C import "../../tasks/broad/Utilities.wdl" as Utilities -import "../../tasks/broad/CopyFilesFromCloudToCloud.wdl" as Copy +import "../../tasks/broad/TerraCopyFilesFromCloudToCloud.wdl" as Copy workflow Testsnm3C { @@ -33,8 +33,6 @@ workflow Testsnm3C { String truth_path String results_path Boolean update_truth - String vault_token_path - String google_account_vault_path } meta { @@ -84,21 +82,17 @@ workflow Testsnm3C { # Copy results of pipeline to test results bucket - call Copy.CopyFilesFromCloudToCloud as CopyToTestResults { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTestResults { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = results_path } # If updating truth then copy output to truth bucket if (update_truth){ - call Copy.CopyFilesFromCloudToCloud as CopyToTruth { + call Copy.TerraCopyFilesFromCloudToCloud as CopyToTruth { input: files_to_copy = flatten([pipeline_outputs]), - vault_token_path = vault_token_path, - google_account_vault_path = google_account_vault_path, destination_cloud_path = truth_path } } diff --git a/website/docs/Pipelines/Multiome_Pipeline/README.md b/website/docs/Pipelines/Multiome_Pipeline/README.md index 2952e36c0c..a695713d83 100644 --- a/website/docs/Pipelines/Multiome_Pipeline/README.md +++ b/website/docs/Pipelines/Multiome_Pipeline/README.md @@ -7,7 +7,7 @@ slug: /Pipelines/Multiome_Pipeline/README | Pipeline Version | Date Updated | Documentation Author | Questions or Feedback | | :----: | :---: | :----: | :--------------: | -| [Multiome v5.9.6](https://github.com/broadinstitute/warp/releases) | January, 2025 | WARP Pipelines | Please [file an issue in WARP](https://github.com/broadinstitute/warp/issues). | +| [Multiome v5.11.0](https://github.com/broadinstitute/warp/releases) | February, 2025 | WARP Pipelines | Please [file an issue in WARP](https://github.com/broadinstitute/warp/issues). | ![Multiome_diagram](./multiome_diagram.png) @@ -82,7 +82,7 @@ Multiome can be deployed using [Cromwell](https://cromwell.readthedocs.io/en/sta | adapter_seq_read1 | Optional string describing the adapter sequence for ATAC read 1 paired-end reads to be used during adapter trimming with Cutadapt; default is "GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG". | String | | adapter_seq_read3 | Optional string describing the adapter sequence for ATAC read 2 paired-end reads to be used during adapter trimming with Cutadapt; default is "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG". | String | | run_cellbender | Optional boolean used to determine if the Optimus (GEX) pipeline should run CellBender on the output gene expression h5ad file, `h5ad_output_file_gex`; default is "false". | Boolean | -| run_peak_calling | Optional boolean used to determine if the ATAC pipeline should run Peak Calling; default is "false". | Boolean | +| run_peak_calling | Optional boolean used to determine if the ATAC pipeline should run Peak Calling; default is `false`. When set to true, the pipeline takes the ATAC h5ad produced by the JoinBarcodes task and performs peak calling to produce a cell by bin matrix and a cell by peak matrix. | Boolean | | vm_size | String defining the Azure virtual machine family for the workflow (default: "Standard_M128s"). | String | @@ -136,6 +136,8 @@ The Multiome workflow calls two WARP subworkflows, one external subworkflow (opt | metrics_csv_array | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | | output_directory | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | | summary_pdf | `` | Optional output produced when `run_cellbender` is "true"; see CellBender [documentation](https://cellbender.readthedocs.io/en/latest/usage/index.html) and [GitHub repository](https://github.com/broadinstitute/CellBender/tree/master) for more information. | +| cellbybin_h5ad_file | h5ad | Cell by bin matrix produced by SnapATAC2 peak calling. This matrix contains (unmerged) peaks in the MACS3 unstructured metadata (adata.uns['MACS3']). The matrix consists of insertion counts per 500 bp genomic bin and cell barcode. | +| cellbypeak_h5ad_file | h5ad | Cell by peak matrix produced by SnapATAC2 peak calling. This matrix contains insertion counts per (merged) peak coordinates and per cell barcode. | diff --git a/website/yarn.lock b/website/yarn.lock index a31c2843fd..ed982ba74d 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -8443,17 +8443,10 @@ send@0.19.0: range-parser "~1.2.1" statuses "2.0.1" -serialize-javascript@^6.0.0: - version "6.0.0" - resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-6.0.0.tgz#efae5d88f45d7924141da8b5c3a7a7e663fefeb8" - integrity sha512-Qr3TosvguFt8ePWqsvRfrKyQXIiW+nGbYpy8XK24NQHE83caxWt+mIymTT19DGFbNWNLfEwsrkSmN64lVWB9ag== - dependencies: - randombytes "^2.1.0" - -serialize-javascript@^6.0.1: - version "6.0.1" - resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-6.0.1.tgz#b206efb27c3da0b0ab6b52f48d170b7996458e5c" - integrity sha512-owoXEFjWRllis8/M1Q+Cw5k8ZH40e3zhp/ovX+Xr/vi1qj6QesbyXXViFbpNvWvPNAD62SutwEXavefrLJWj7w== +serialize-javascript@^6.0.0, serialize-javascript@^6.0.1: + version "6.0.2" + resolved "https://registry.yarnpkg.com/serialize-javascript/-/serialize-javascript-6.0.2.tgz#defa1e055c83bf6d59ea805d8da862254eb6a6c2" + integrity sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g== dependencies: randombytes "^2.1.0"