From 4023c51408bfc7c31b54841456001e001a719d00 Mon Sep 17 00:00:00 2001 From: hocinebendou Date: Tue, 23 Aug 2016 09:36:32 +0200 Subject: [PATCH 1/8] CWL wrapper for snpEff. VCF Annotation tool. --- snpEff.cwl | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 snpEff.cwl diff --git a/snpEff.cwl b/snpEff.cwl new file mode 100644 index 000000000..d2f5dd770 --- /dev/null +++ b/snpEff.cwl @@ -0,0 +1,70 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: CommandLineTool + +requirements: + - class: InlineJavascriptRequirement + +stdout: $(inputs.inputfile.path.replace(/^.*[\\\/]/, '').replace(/\.[^/.]+$/, '') + '.ann.vcf') + +inputs: + + genome: + type: string + default: "GRCh37.75" + inputBinding: + position: 1 + + variant_calling_file: + type: File + format: "http://edamontology.org/format_3016" + inputBinding: + position: 2 + + stats_filename: + type: string? + default: "snpEff_summary.html" + inputBinding: + prefix: "-stats" + + no_stats: + type: boolean? + inputBinding: + prefix: "-noStats" + + csvStats: + type: boolean? + inputBinding: + prefix: "-csvStats" + + output_format: + type: + type: num + symbols: [ vcf, gatk, bed, bedAnn ] + inputBinding: + prefix: -o + + verbose: + type: boolean? + inputBinding: + prefix: -v + +outputs: + annotated_vcf: + type: stdout + + summary_html: + type: File? + outputBinding: + glob: $(input.stats_filename) + + summary_txt: + type: File? + outputBinding: + glob: "snpEff_genes.txt" + +baseCommand: [ "java", "-Xmx4g" ] + +arguments: [ "-jar", "snpEff.jar" ] + From 50df226b3692c891ae35cf98f36eb14d720bfae9 Mon Sep 17 00:00:00 2001 From: hocinebendou Date: Tue, 23 Aug 2016 09:38:01 +0200 Subject: [PATCH 2/8] move snpEff to tools folder. --- snpEff.cwl => tools/snpEff.cwl | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename snpEff.cwl => tools/snpEff.cwl (100%) diff --git a/snpEff.cwl b/tools/snpEff.cwl similarity index 100% rename from snpEff.cwl rename to tools/snpEff.cwl From c2a7e6354847abdb86863992b8fa51ff79934c08 Mon Sep 17 00:00:00 2001 From: hocinebendou Date: Tue, 23 Aug 2016 10:24:48 +0200 Subject: [PATCH 3/8] syntax error changed. --- tools/snpEff.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/snpEff.cwl b/tools/snpEff.cwl index d2f5dd770..02312d5cd 100644 --- a/tools/snpEff.cwl +++ b/tools/snpEff.cwl @@ -40,7 +40,7 @@ inputs: output_format: type: - type: num + type: enum symbols: [ vcf, gatk, bed, bedAnn ] inputBinding: prefix: -o From 70882d47da5fa576e37cf671a01c346eee9fe873 Mon Sep 17 00:00:00 2001 From: hocinebendou Date: Tue, 23 Aug 2016 16:47:34 +0200 Subject: [PATCH 4/8] snpEff problem importing Database in CWL --- tools/snpEff.cwl | 32 +++++++++++++++++++------------- tools/snpEff_Dockerfile | 20 ++++++++++++++++++++ 2 files changed, 39 insertions(+), 13 deletions(-) create mode 100644 tools/snpEff_Dockerfile diff --git a/tools/snpEff.cwl b/tools/snpEff.cwl index 02312d5cd..48ab09379 100644 --- a/tools/snpEff.cwl +++ b/tools/snpEff.cwl @@ -3,16 +3,20 @@ cwlVersion: v1.0 class: CommandLineTool +hints: + DockerRequirement: + dockerPull: quay.io/snpeff:4.3 + requirements: - class: InlineJavascriptRequirement -stdout: $(inputs.inputfile.path.replace(/^.*[\\\/]/, '').replace(/\.[^/.]+$/, '') + '.ann.vcf') +#stdout: $(inputs.inputfile.path.replace(/^.*[\\\/]/, '').replace(/\.[^/.]+$/, '') + '.ann.vcf') inputs: genome: - type: string - default: "GRCh37.75" + type: string? + #default: "GRCh37.75" inputBinding: position: 1 @@ -22,11 +26,10 @@ inputs: inputBinding: position: 2 - stats_filename: - type: string? - default: "snpEff_summary.html" + genome_dir: + type: Directory inputBinding: - prefix: "-stats" + position: 2 no_stats: type: boolean? @@ -45,11 +48,16 @@ inputs: inputBinding: prefix: -o + nodownload: + type: boolean? + inputBinding: + prefix: -nodownload + verbose: type: boolean? inputBinding: prefix: -v - + outputs: annotated_vcf: type: stdout @@ -57,14 +65,12 @@ outputs: summary_html: type: File? outputBinding: - glob: $(input.stats_filename) + glob: "snpEff_summary.html" summary_txt: type: File? outputBinding: glob: "snpEff_genes.txt" -baseCommand: [ "java", "-Xmx4g" ] - -arguments: [ "-jar", "snpEff.jar" ] - +baseCommand: [ snpEff ] +arguments: [ "-stats", "snpEff_summary.html" ] diff --git a/tools/snpEff_Dockerfile b/tools/snpEff_Dockerfile new file mode 100644 index 000000000..7baa5fe92 --- /dev/null +++ b/tools/snpEff_Dockerfile @@ -0,0 +1,20 @@ +FROM java:8-jdk + +RUN apt-get update && apt-get install -y unzip wget + +WORKDIR /usr/local +RUN wget https://sourceforge.net/projects/snpeff/files/snpEff_v4_3_core.zip +RUN unzip snpEff_v4_3_core.zip +RUN rm snpEff_v4_3_core.zip + +RUN chmod a+x /usr/local/snpEff/scripts/snpEff + +ENV PATH /usr/local/snpEff/scripts:$PATH + +RUN mkdir /data +WORKDIR /data + +RUN groupadd -r -g 1000 ubuntu && useradd -r -g ubuntu -u 1000 ubuntu +USER ubuntu + +CMD ["/bin/bash"] From 49f1390fe05a35cf9156d03b21083bf687065d39 Mon Sep 17 00:00:00 2001 From: hocinebendou Date: Wed, 24 Aug 2016 09:57:11 +0200 Subject: [PATCH 5/8] Sample test for snEff in yml --- test/snpEff-job.yml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 test/snpEff-job.yml diff --git a/test/snpEff-job.yml b/test/snpEff-job.yml new file mode 100644 index 000000000..6a48fb28e --- /dev/null +++ b/test/snpEff-job.yml @@ -0,0 +1,9 @@ +variant_calling_file: + class: File + path: "../data/NA12878.sorted.vcf" + +nodownload: true +output_format: vcf +genome_dir: + class: Directory + location: /cip0/software/x86_64/miniconda-3.19.0/share/snpeff-4.3-2/GRCh37.75 From cf362f3c2c6717c5cfc8b21ea736bbaa33074566 Mon Sep 17 00:00:00 2001 From: hocinebendou Date: Wed, 24 Aug 2016 10:24:10 +0200 Subject: [PATCH 6/8] dataDir: a path to database --- test/snpEff-job.yml | 4 +--- tools/snpEff.cwl | 13 +++++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/test/snpEff-job.yml b/test/snpEff-job.yml index 6a48fb28e..bbde9e324 100644 --- a/test/snpEff-job.yml +++ b/test/snpEff-job.yml @@ -4,6 +4,4 @@ variant_calling_file: nodownload: true output_format: vcf -genome_dir: - class: Directory - location: /cip0/software/x86_64/miniconda-3.19.0/share/snpeff-4.3-2/GRCh37.75 +data_dir: /cip0/software/x86_64/miniconda-3.19.0/share/snpeff-4.3-2 diff --git a/tools/snpEff.cwl b/tools/snpEff.cwl index 48ab09379..007492c25 100644 --- a/tools/snpEff.cwl +++ b/tools/snpEff.cwl @@ -26,11 +26,16 @@ inputs: inputBinding: position: 2 - genome_dir: - type: Directory - inputBinding: - position: 2 + #genome_dir: + # type: Directory + # inputBinding: + # position: 2 + data_dir: + type: string + inputBinding: + prefix: "-dataDir" + no_stats: type: boolean? inputBinding: From 067a359ba46b9c9adfb1a88ae61bf94c08ad0c27 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 24 Aug 2016 08:25:10 +0000 Subject: [PATCH 7/8] edits to the test file --- test/snpEff-job.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/snpEff-job.yml b/test/snpEff-job.yml index 6a48fb28e..eaace0597 100644 --- a/test/snpEff-job.yml +++ b/test/snpEff-job.yml @@ -1,9 +1,9 @@ variant_calling_file: class: File - path: "../data/NA12878.sorted.vcf" + path: "/data_small/small_vcf/NA12878.sorted.vcf" nodownload: true output_format: vcf genome_dir: class: Directory - location: /cip0/software/x86_64/miniconda-3.19.0/share/snpeff-4.3-2/GRCh37.75 + location: "/data_small/snpEff_hg19/data/hg19" From f0bb8c4c631f45b805d43ca6a489c135092ce271 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 24 Aug 2016 11:31:26 +0000 Subject: [PATCH 8/8] I think this is working now --- test/snpEff-job.yml | 8 ++++---- tools/snpEff.cwl | 45 +++++++++++++++++++++++---------------------- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/test/snpEff-job.yml b/test/snpEff-job.yml index 2d4d246ae..8d1b4f8a9 100644 --- a/test/snpEff-job.yml +++ b/test/snpEff-job.yml @@ -1,3 +1,5 @@ +genome: hg19 + variant_calling_file: class: File path: "/data_small/small_vcf/NA12878.sorted.vcf" @@ -6,9 +8,7 @@ nodownload: true output_format: vcf -genome_dir: +data_dir: class: Directory - location: "/data_small/snpEff_hg19/data/hg19" - -data_dir: /cip0/software/x86_64/miniconda-3.19.0/share/snpeff-4.3-2 + location: "/data_small/snpEff_hg19/data" diff --git a/tools/snpEff.cwl b/tools/snpEff.cwl index 007492c25..2fd0e8d47 100644 --- a/tools/snpEff.cwl +++ b/tools/snpEff.cwl @@ -10,59 +10,60 @@ hints: requirements: - class: InlineJavascriptRequirement -#stdout: $(inputs.inputfile.path.replace(/^.*[\\\/]/, '').replace(/\.[^/.]+$/, '') + '.ann.vcf') - inputs: - genome: - type: string? - #default: "GRCh37.75" - inputBinding: - position: 1 - - variant_calling_file: - type: File - format: "http://edamontology.org/format_3016" - inputBinding: - position: 2 - - #genome_dir: - # type: Directory - # inputBinding: - # position: 2 - data_dir: - type: string + type: Directory inputBinding: prefix: "-dataDir" - + position: 1 + no_stats: type: boolean? inputBinding: prefix: "-noStats" + position: 2 csvStats: type: boolean? inputBinding: prefix: "-csvStats" + position: 3 output_format: type: type: enum symbols: [ vcf, gatk, bed, bedAnn ] + default: vcf inputBinding: prefix: -o + position: 4 nodownload: type: boolean? inputBinding: prefix: -nodownload + position: 5 verbose: type: boolean? inputBinding: prefix: -v + position: 6 + + genome: + type: string + inputBinding: + position: 7 + + variant_calling_file: + type: File + format: "http://edamontology.org/format_3016" + inputBinding: + position: 8 +stdout: $(inputs.variant_calling_file.path.replace(/^.*[\\\/]/, '').replace(/\.[^/.]+$/, '') + '.ann.vcf') + outputs: annotated_vcf: type: stdout @@ -78,4 +79,4 @@ outputs: glob: "snpEff_genes.txt" baseCommand: [ snpEff ] -arguments: [ "-stats", "snpEff_summary.html" ] +arguments: [ "ann", "-stats", "snpEff_summary.html" ]