diff --git a/Snakefile b/Snakefile index 953a9f0..d7962ed 100644 --- a/Snakefile +++ b/Snakefile @@ -16,10 +16,6 @@ def _get_seqdir(wildcards): return os.path.dirname(samples.loc[(wildcards.sample), ["assembly"]].dropna()[0]) rule all: - input: - "pipeline_finished.txt" - -rule cleanup: input: expand("results/{sample}/amrplusplus/{amrplusplus_outputs}", sample=samples.index, amrplusplus_outputs=amrplusplus_exts), expand("results/{sample}/rgi/rgi.json", sample=samples.index), @@ -32,18 +28,11 @@ rule cleanup: expand("results/{sample}/resfams/resfams.tblout", sample=samples.index), expand("results/{sample}/mykrobe/report.json", sample=samples.index), expand("results/{sample}/resfinder/data_resfinder.json", sample=samples.index), - expand("results/{sample}/srax/Results/sraX_analysis.html", sample=samples.index), + expand("results/{sample}/srax/sraX_analysis.html", sample=samples.index), expand("results/{sample}/sstar/report.tsv", sample=samples.index), expand("results/{sample}/kmerresistance/results.KmerRes", sample=samples.index), expand("results/{sample}/deeparg/output.mapping.ARG", sample=samples.index), #expand("results/{sample}/srst2/srst2__fullgenes__ResFinder__results.txt", sample=samples.index) - output: - "pipeline_finished.txt" - shell: - """ - rm -r results/*/groot/graphs results/*/deeparg/*.fasta results/*/amrplusplus/tmp results/*/staramr/hits/ results/*/ariba/*.gz results/*/srax/tmp results/*/mykrobe/skels || echo "tempfiles already absent" - touch pipeline_finished.txt - """ #include: "rules/srst2.smk" include: "rules/deeparg.smk" diff --git a/rules/abricate.smk b/rules/abricate.smk index e323717..1316d4a 100644 --- a/rules/abricate.smk +++ b/rules/abricate.smk @@ -17,5 +17,5 @@ rule run_abricate: shell: """ abricate --list > {log} - abricate --threads {threads} --nopath --db {params.dbname} --minid {params.minid} --mincov {params.mincov} {input.contigs} > {output.report} 2> >(tee -a {log} >&2) + abricate --threads {threads} --nopath --db {params.dbname} --minid {params.minid} --mincov {params.mincov} {input.contigs} > {output.report} > {log} 2>&1 """ diff --git a/rules/amrfinder.smk b/rules/amrfinder.smk index 8c7e4e4..7f6fbfb 100644 --- a/rules/amrfinder.smk +++ b/rules/amrfinder.smk @@ -20,8 +20,12 @@ rule run_amrfinder: conda: "../envs/amrfinder.yaml" params: - organism = config["params"]["amrfinder"]["organism"] + organism = config["params"]["amrfinder"]["organism"], + output_tmp_dir = "results/{sample}/amrfinder/tmp" threads: config["params"]["threads"] shell: - "amrfinder -n {input.contigs} -o {output.report} -O {params.organism} -d {input.db}/latest 2> >(tee {log} >&2) " + """ + amrfinder -n {input.contigs} -o {output.report} -O {params.organism} -d {input.db}/latest >{log} 2>&1 + rm -rf {params.output_tmp_dir} + """ diff --git a/rules/amrplusplus.smk b/rules/amrplusplus.smk index e3e1822..14bd2ba 100644 --- a/rules/amrplusplus.smk +++ b/rules/amrplusplus.smk @@ -66,9 +66,10 @@ rule run_amrplusplus: shell: """ mkdir -p {params.output_prefix_tmp} - trimmomatic PE {input.read1} {input.read2} {params.output_prefix_tmp}/{wildcards.sample}_r1_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r1_se_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_se_trimmed.fq SLIDINGWINDOW:4:15 LEADING:3 TRAILING:3 MINLEN:36 2> >(tee {log} >&2) - bwa mem {input.megares_db} {params.output_prefix_tmp}/{wildcards.sample}_r1_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_pe_trimmed.fq | samtools sort -n -O sam > {params.output_prefix_tmp}/{wildcards.sample}.sam 2> >(tee -a {log} >&2) - {input.resistome_tool} -ref_fp {input.megares_db} -annot_fp {input.megares_annot} -sam_fp {params.output_prefix_tmp}/{wildcards.sample}.sam -gene_fp {output.amr_gene} -group_fp {output.amr_group} -class_fp {output.amr_class} -mech_fp {output.amr_mech} -t 80 2> >(tee -a {log} >&2) - {input.rarefaction_tool} -ref_fp {input.megares_db} -annot_fp {input.megares_annot} -sam_fp {params.output_prefix_tmp}/{wildcards.sample}.sam -gene_fp {output.amr_gene}_rare -group_fp {output.amr_group}_rare -class_fp {output.amr_class}_rare -mech_fp {output.amr_mech}_rare -min 5 -max 100 -skip 5 -samples 1 -t 80 2> >(tee -a {log} >&2) - {input.snp_tool} -amr_fp {input.megares_db} -sampe {params.output_prefix_tmp}/{wildcards.sample}.sam -out_fp {output.amr_snps} 2> >(tee -a {log} >&2) + trimmomatic PE {input.read1} {input.read2} {params.output_prefix_tmp}/{wildcards.sample}_r1_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r1_se_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_se_trimmed.fq SLIDINGWINDOW:4:15 LEADING:3 TRAILING:3 MINLEN:36 >{log} 2>&1 + bwa mem {input.megares_db} {params.output_prefix_tmp}/{wildcards.sample}_r1_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_pe_trimmed.fq 2>> {log} | samtools sort -n -O sam > {params.output_prefix_tmp}/{wildcards.sample}.sam 2>>{log} + {input.resistome_tool} -ref_fp {input.megares_db} -annot_fp {input.megares_annot} -sam_fp {params.output_prefix_tmp}/{wildcards.sample}.sam -gene_fp {output.amr_gene} -group_fp {output.amr_group} -class_fp {output.amr_class} -mech_fp {output.amr_mech} -t 80 >>{log} 2>&1 + {input.rarefaction_tool} -ref_fp {input.megares_db} -annot_fp {input.megares_annot} -sam_fp {params.output_prefix_tmp}/{wildcards.sample}.sam -gene_fp {output.amr_gene}_rare -group_fp {output.amr_group}_rare -class_fp {output.amr_class}_rare -mech_fp {output.amr_mech}_rare -min 5 -max 100 -skip 5 -samples 1 -t 80 >>{log} 2>&1 + {input.snp_tool} -amr_fp {input.megares_db} -sampe {params.output_prefix_tmp}/{wildcards.sample}.sam -out_fp {output.amr_snps} >>{log} 2>&1 + rm -rf {params.output_prefix_tmp} """ diff --git a/rules/ariba.smk b/rules/ariba.smk index e29cbed..daac8af 100644 --- a/rules/ariba.smk +++ b/rules/ariba.smk @@ -25,12 +25,11 @@ rule run_ariba: "logs/ariba_{sample}.log" conda: "../envs/ariba.yaml" - threads: - config["params"]["threads"] + threads: 1 params: output_folder = "results/{sample}/ariba/" shell: """ - rm -r {params.output_folder}; - ariba run --threads 1 {input.ref_db} {input.read1} {input.read2} {params.output_folder} 2> >(tee {log} >&2) + rm -r {params.output_folder} + ariba run --threads {threads} {input.ref_db} {input.read1} {input.read2} {params.output_folder} > {log} 2>&1 """ diff --git a/rules/deeparg.smk b/rules/deeparg.smk index 1243e18..14811f2 100644 --- a/rules/deeparg.smk +++ b/rules/deeparg.smk @@ -4,11 +4,14 @@ rule run_deeparg: output: report = "results/{sample}/deeparg/output.mapping.ARG", report_potential = "results/{sample}/deeparg/output.mapping.potential.ARG" + log: + "logs/amrfinder_{sample}.log" singularity: "docker://gaarangoa/deeparg:v1.0.1" shell: """ - python /deeparg/deepARG.py --align --type nucl --reads --input /data/results/{wildcards.sample}/deeparg/reads.fasta --output /data/results/{wildcards.sample}/deeparg/output + python /deeparg/deepARG.py --align --type nucl --reads --input /data/results/{wildcards.sample}/deeparg/reads.fasta --output /data/results/{wildcards.sample}/deeparg/output > {log} 2>&1 + rm /data/results/{wildcards.sample}/deeparg/reads.fasta """ rule prepare_deeparg_reads: diff --git a/rules/groot.smk b/rules/groot.smk index 809d1f6..179d55a 100644 --- a/rules/groot.smk +++ b/rules/groot.smk @@ -36,4 +36,7 @@ rule run_groot: max_read_length = config['params']['groot']['read_length'] + 5, graph_dir = "results/{sample}/groot/graphs" shell: - "zcat {input.read1} {input.read2} | seqkit seq --min-len {params.min_read_length} --max-len {params.max_read_length} | groot align -g {params.graph_dir} -p {threads} -i {input.db_index} --log {log} | groot report --log {log} > {output.report}" + """ + zcat {input.read1} {input.read2} | seqkit seq --min-len {params.min_read_length} --max-len {params.max_read_length} | groot align -g {params.graph_dir} -p {threads} -i {input.db_index} --log {log} | groot report --log {log} > {output.report} + rm -rf {params.graph_dir} + """ diff --git a/rules/kmerresistance.smk b/rules/kmerresistance.smk index f40a9c2..4338829 100644 --- a/rules/kmerresistance.smk +++ b/rules/kmerresistance.smk @@ -51,6 +51,6 @@ rule run_kmerresistance: shell: """ zcat {input.read1} {input.read2} > {params.output_folder}/temp_all_reads.fq - kmerresistance -i {params.output_folder}/temp_all_reads.fq -t_db {params.kma_resfinder_db} -s_db {params.species_db} -o {params.output_folder}/results 2> >(tee {log} >&2) + kmerresistance -i {params.output_folder}/temp_all_reads.fq -t_db {params.kma_resfinder_db} -s_db {params.species_db} -o {params.output_folder}/results > {log} 2>&1 rm {params.output_folder}/temp_all_reads.fq """ diff --git a/rules/mykrobe.smk b/rules/mykrobe.smk index 10810b9..5a2989f 100644 --- a/rules/mykrobe.smk +++ b/rules/mykrobe.smk @@ -13,6 +13,10 @@ rule run_mykrobe: config["params"]["threads"] params: tmp = "results/{sample}/mykrobe/tmp/", - skel_dir = "results/{sample}/mykrobe/skels" + skel_dir = "results/{sample}/mykrobe/skels", + tmp_dir = "results/{sample}/mykrobe/tmp" shell: - "mykrobe predict {wildcards.sample} tb -1 {input.read1} {input.read2} --skeleton_dir {params.skel_dir} --threads {threads} --format json --output {output.report} --tmp {params.tmp} 2> >(tee {log} >&2) " + """ + mykrobe predict {wildcards.sample} tb -1 {input.read1} {input.read2} --skeleton_dir {params.skel_dir} --threads {threads} --format json --output {output.report} --tmp {params.tmp} > {log} 2>&1 + rm -rf {params.skel_dir} {params.tmp_dir} + """ diff --git a/rules/pointfinder.smk b/rules/pointfinder.smk index 28629db..97d3123 100644 --- a/rules/pointfinder.smk +++ b/rules/pointfinder.smk @@ -38,9 +38,10 @@ rule run_pointfinder: config["params"]["threads"] params: species = config["params"]["pointfinder"]["species"], - + output_tmp_dir = "results/{sample}/pointfinder/tmp" shell: """ - python {input.pointfinder_script} -i {input.contigs} -p {input.pointfinder_db} -s {params.species} -m blastn -m_p $(which blastn) -o results/{wildcards.sample}/pointfinder 2> >(tee {log} >&2) + python {input.pointfinder_script} -i {input.contigs} -p {input.pointfinder_db} -s {params.species} -m blastn -m_p $(which blastn) -o results/{wildcards.sample}/pointfinder > {log} 2>&1 cp {output.raw_report} {output.report} + rm -rf {params.output_tmp_dir} """ diff --git a/rules/resfams.smk b/rules/resfams.smk index 3660d71..8dafc11 100644 --- a/rules/resfams.smk +++ b/rules/resfams.smk @@ -21,6 +21,6 @@ rule run_resfams: output_prefix = "results/{sample}/resfams" shell: """ - prodigal -i {input.contigs} -a {params.output_prefix}/protein_seqs.faa 2> >(tee {log} >&2); - hmmsearch --cpu {threads} --tblout {output.report} {input.resfams_hmms} {params.output_prefix}/protein_seqs.faa > {log} 2>&1 + prodigal -i {input.contigs} -a {params.output_prefix}/protein_seqs.faa > {log} 2>&1 + hmmsearch --cpu {threads} --tblout {output.report} {input.resfams_hmms} {params.output_prefix}/protein_seqs.faa >>{log} 2>&1 """ diff --git a/rules/resfinder.smk b/rules/resfinder.smk index 19cb7aa..9786a7b 100644 --- a/rules/resfinder.smk +++ b/rules/resfinder.smk @@ -25,9 +25,11 @@ rule run_resfinder: threads: config["params"]["threads"] params: - outdir = "results/{sample}/resfinder" + outdir = "results/{sample}/resfinder", + output_tmp_dir = "results/{sample}/resfinder/tmp" shell: """ - mkdir -p {params.outdir}; - resfinder.py -p {input.resfinder_db} -i {input.contigs} -o {params.outdir} 2> >(tee {log} >&2) + mkdir -p {params.outdir} + resfinder.py -p {input.resfinder_db} -i {input.contigs} -o {params.outdir} > {log} 2>&1 + rm -rf {params.output_tmp_dir} """ diff --git a/rules/rgi.smk b/rules/rgi.smk index 4e37a1d..d98276b 100644 --- a/rules/rgi.smk +++ b/rules/rgi.smk @@ -30,6 +30,6 @@ rule run_rgi: output_prefix = "results/{sample}/rgi/rgi" shell: """ - rgi load --card_json {input.card_db} - rgi main --input_sequence {input.contigs} --output_file {params.output_prefix} --clean --num_threads {threads} 2> >(tee {log} >&2) + rgi load --card_json {input.card_db} > {log} 2>&1 + rgi main --input_sequence {input.contigs} --output_file {params.output_prefix} --clean --num_threads {threads} >>{log} 2>&1 """ diff --git a/rules/srax.smk b/rules/srax.smk index ef474ae..886ba7c 100644 --- a/rules/srax.smk +++ b/rules/srax.smk @@ -2,7 +2,7 @@ rule run_srax: input: genome_dir = lambda wildcards: _get_seqdir(wildcards), output: - report = "results/{sample}/srax/Results/sraX_analysis.html" + report = "results/{sample}/srax/sraX_analysis.html" message: "Running rule run_srax on {wildcards.sample} with contigs" log: "logs/srax_{sample}.log" @@ -12,6 +12,15 @@ rule run_srax: config["params"]["threads"] params: dbtype = config["params"]["srax"]["dbtype"], - outdir = "results/{sample}/srax" + outdir = "results/{sample}/srax", + tmp_output_dir = "results/{sample}/srax/tmp", + log_output_dir = "results/{sample}/srax/Log", + ARG_DB_output_dir = "results/{sample}/srax/ARG_DB", + analysis_output_dir = "results/{sample}/srax/Analysis", + result_output_dir = "results/{sample}/srax/Results" shell: - "sraX -i {input.genome_dir} -t 4 -db {params.dbtype} -o {params.outdir} 2> >(tee {log} >&2)" + """ + sraX -i {input.genome_dir} -t 4 -db {params.dbtype} -o {params.outdir} > {log} 2>&1 + mv {params.result_output_dir}/* {params.outdir} + rm -rf {params.tmp_output_dir} {params.log_output_dir} {params.ARG_DB_output_dir} {params.analysis_output_dir} {params.result_output_dir} + """ diff --git a/rules/srst2.smk b/rules/srst2.smk index 9b3d2f0..4e2a6b5 100644 --- a/rules/srst2.smk +++ b/rules/srst2.smk @@ -17,4 +17,4 @@ rule run_srst2: max_divergence = config["params"]["srst2"]["max_divergence"], output_prefix = "results/{sample}/srst2/srst2" shell: - "srst2 --threads {threads} --gene_db {params.gene_db} --forward '_R1' --reverse '_R2' --input_pe {input.read1} {input.read2} --min_depth {params.min_depth} --output {params.output_prefix} 2> >(tee {log} >&2)" + "srst2 --threads {threads} --gene_db {params.gene_db} --forward '_R1' --reverse '_R2' --input_pe {input.read1} {input.read2} --min_depth {params.min_depth} --output {params.output_prefix} > {log} 2>&1" diff --git a/rules/sstar.smk b/rules/sstar.smk index 7ac4689..585a4c6 100644 --- a/rules/sstar.smk +++ b/rules/sstar.smk @@ -35,5 +35,5 @@ rule run_sstar: outdir = 'results/{sample}/sstar' shell: """ - {input.sstar} -g {input.contigs} -d {input.resgannot_db} --outdir {params.outdir} > {output.report} + {input.sstar} -g {input.contigs} -d {input.resgannot_db} --outdir {params.outdir} > {output.report} 2>{log} """ diff --git a/rules/staramr.smk b/rules/staramr.smk index 8139c8a..e95a750 100644 --- a/rules/staramr.smk +++ b/rules/staramr.smk @@ -16,6 +16,6 @@ rule run_staramr: shell: """ rm -r {params.output_folder}; - staramr search -o {params.output_folder} --nproc {threads} {input.contigs} 2> >(tee {log} >&2) + staramr search -o {params.output_folder} --nproc {threads} {input.contigs} >{log} 2>&1 """ # only support salmonella/campylobacter