Skip to content

Commit

Permalink
Tidy up tmp outputs throughout and capture output
Browse files Browse the repository at this point in the history
- Capture all tool output into log as this overwhelms the terminal
  when running lots of samples/cores

- Closes #17 by removing temporary output at the end of each tool
  invocation instead of at the end of the workflow.

- This reduces per-sample hard-drive usage by 20x
  • Loading branch information
fmaguire committed Jun 30, 2020
1 parent 8a3634b commit 28bfdf3
Show file tree
Hide file tree
Showing 17 changed files with 59 additions and 44 deletions.
13 changes: 1 addition & 12 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@ def _get_seqdir(wildcards):
return os.path.dirname(samples.loc[(wildcards.sample), ["assembly"]].dropna()[0])

rule all:
input:
"pipeline_finished.txt"

rule cleanup:
input:
expand("results/{sample}/amrplusplus/{amrplusplus_outputs}", sample=samples.index, amrplusplus_outputs=amrplusplus_exts),
expand("results/{sample}/rgi/rgi.json", sample=samples.index),
Expand All @@ -32,18 +28,11 @@ rule cleanup:
expand("results/{sample}/resfams/resfams.tblout", sample=samples.index),
expand("results/{sample}/mykrobe/report.json", sample=samples.index),
expand("results/{sample}/resfinder/data_resfinder.json", sample=samples.index),
expand("results/{sample}/srax/Results/sraX_analysis.html", sample=samples.index),
expand("results/{sample}/srax/sraX_analysis.html", sample=samples.index),
expand("results/{sample}/sstar/report.tsv", sample=samples.index),
expand("results/{sample}/kmerresistance/results.KmerRes", sample=samples.index),
expand("results/{sample}/deeparg/output.mapping.ARG", sample=samples.index),
#expand("results/{sample}/srst2/srst2__fullgenes__ResFinder__results.txt", sample=samples.index)
output:
"pipeline_finished.txt"
shell:
"""
rm -r results/*/groot/graphs results/*/deeparg/*.fasta results/*/amrplusplus/tmp results/*/staramr/hits/ results/*/ariba/*.gz results/*/srax/tmp results/*/mykrobe/skels || echo "tempfiles already absent"
touch pipeline_finished.txt
"""

#include: "rules/srst2.smk"
include: "rules/deeparg.smk"
Expand Down
2 changes: 1 addition & 1 deletion rules/abricate.smk
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ rule run_abricate:
shell:
"""
abricate --list > {log}
abricate --threads {threads} --nopath --db {params.dbname} --minid {params.minid} --mincov {params.mincov} {input.contigs} > {output.report} 2> >(tee -a {log} >&2)
abricate --threads {threads} --nopath --db {params.dbname} --minid {params.minid} --mincov {params.mincov} {input.contigs} > {output.report} > {log} 2>&1
"""
8 changes: 6 additions & 2 deletions rules/amrfinder.smk
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,12 @@ rule run_amrfinder:
conda:
"../envs/amrfinder.yaml"
params:
organism = config["params"]["amrfinder"]["organism"]
organism = config["params"]["amrfinder"]["organism"],
output_tmp_dir = "results/{sample}/amrfinder/tmp"
threads:
config["params"]["threads"]
shell:
"amrfinder -n {input.contigs} -o {output.report} -O {params.organism} -d {input.db}/latest 2> >(tee {log} >&2) "
"""
amrfinder -n {input.contigs} -o {output.report} -O {params.organism} -d {input.db}/latest >{log} 2>&1
rm -rf {params.output_tmp_dir}
"""
11 changes: 6 additions & 5 deletions rules/amrplusplus.smk
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,10 @@ rule run_amrplusplus:
shell:
"""
mkdir -p {params.output_prefix_tmp}
trimmomatic PE {input.read1} {input.read2} {params.output_prefix_tmp}/{wildcards.sample}_r1_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r1_se_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_se_trimmed.fq SLIDINGWINDOW:4:15 LEADING:3 TRAILING:3 MINLEN:36 2> >(tee {log} >&2)
bwa mem {input.megares_db} {params.output_prefix_tmp}/{wildcards.sample}_r1_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_pe_trimmed.fq | samtools sort -n -O sam > {params.output_prefix_tmp}/{wildcards.sample}.sam 2> >(tee -a {log} >&2)
{input.resistome_tool} -ref_fp {input.megares_db} -annot_fp {input.megares_annot} -sam_fp {params.output_prefix_tmp}/{wildcards.sample}.sam -gene_fp {output.amr_gene} -group_fp {output.amr_group} -class_fp {output.amr_class} -mech_fp {output.amr_mech} -t 80 2> >(tee -a {log} >&2)
{input.rarefaction_tool} -ref_fp {input.megares_db} -annot_fp {input.megares_annot} -sam_fp {params.output_prefix_tmp}/{wildcards.sample}.sam -gene_fp {output.amr_gene}_rare -group_fp {output.amr_group}_rare -class_fp {output.amr_class}_rare -mech_fp {output.amr_mech}_rare -min 5 -max 100 -skip 5 -samples 1 -t 80 2> >(tee -a {log} >&2)
{input.snp_tool} -amr_fp {input.megares_db} -sampe {params.output_prefix_tmp}/{wildcards.sample}.sam -out_fp {output.amr_snps} 2> >(tee -a {log} >&2)
trimmomatic PE {input.read1} {input.read2} {params.output_prefix_tmp}/{wildcards.sample}_r1_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r1_se_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_se_trimmed.fq SLIDINGWINDOW:4:15 LEADING:3 TRAILING:3 MINLEN:36 >{log} 2>&1
bwa mem {input.megares_db} {params.output_prefix_tmp}/{wildcards.sample}_r1_pe_trimmed.fq {params.output_prefix_tmp}/{wildcards.sample}_r2_pe_trimmed.fq 2>> {log} | samtools sort -n -O sam > {params.output_prefix_tmp}/{wildcards.sample}.sam 2>>{log}
{input.resistome_tool} -ref_fp {input.megares_db} -annot_fp {input.megares_annot} -sam_fp {params.output_prefix_tmp}/{wildcards.sample}.sam -gene_fp {output.amr_gene} -group_fp {output.amr_group} -class_fp {output.amr_class} -mech_fp {output.amr_mech} -t 80 >>{log} 2>&1
{input.rarefaction_tool} -ref_fp {input.megares_db} -annot_fp {input.megares_annot} -sam_fp {params.output_prefix_tmp}/{wildcards.sample}.sam -gene_fp {output.amr_gene}_rare -group_fp {output.amr_group}_rare -class_fp {output.amr_class}_rare -mech_fp {output.amr_mech}_rare -min 5 -max 100 -skip 5 -samples 1 -t 80 >>{log} 2>&1
{input.snp_tool} -amr_fp {input.megares_db} -sampe {params.output_prefix_tmp}/{wildcards.sample}.sam -out_fp {output.amr_snps} >>{log} 2>&1
rm -rf {params.output_prefix_tmp}
"""
7 changes: 3 additions & 4 deletions rules/ariba.smk
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,11 @@ rule run_ariba:
"logs/ariba_{sample}.log"
conda:
"../envs/ariba.yaml"
threads:
config["params"]["threads"]
threads: 1
params:
output_folder = "results/{sample}/ariba/"
shell:
"""
rm -r {params.output_folder};
ariba run --threads 1 {input.ref_db} {input.read1} {input.read2} {params.output_folder} 2> >(tee {log} >&2)
rm -r {params.output_folder}
ariba run --threads {threads} {input.ref_db} {input.read1} {input.read2} {params.output_folder} > {log} 2>&1
"""
5 changes: 4 additions & 1 deletion rules/deeparg.smk
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@ rule run_deeparg:
output:
report = "results/{sample}/deeparg/output.mapping.ARG",
report_potential = "results/{sample}/deeparg/output.mapping.potential.ARG"
log:
"logs/amrfinder_{sample}.log"
singularity:
"docker://gaarangoa/deeparg:v1.0.1"
shell:
"""
python /deeparg/deepARG.py --align --type nucl --reads --input /data/results/{wildcards.sample}/deeparg/reads.fasta --output /data/results/{wildcards.sample}/deeparg/output
python /deeparg/deepARG.py --align --type nucl --reads --input /data/results/{wildcards.sample}/deeparg/reads.fasta --output /data/results/{wildcards.sample}/deeparg/output > {log} 2>&1
rm /data/results/{wildcards.sample}/deeparg/reads.fasta
"""

rule prepare_deeparg_reads:
Expand Down
5 changes: 4 additions & 1 deletion rules/groot.smk
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,7 @@ rule run_groot:
max_read_length = config['params']['groot']['read_length'] + 5,
graph_dir = "results/{sample}/groot/graphs"
shell:
"zcat {input.read1} {input.read2} | seqkit seq --min-len {params.min_read_length} --max-len {params.max_read_length} | groot align -g {params.graph_dir} -p {threads} -i {input.db_index} --log {log} | groot report --log {log} > {output.report}"
"""
zcat {input.read1} {input.read2} | seqkit seq --min-len {params.min_read_length} --max-len {params.max_read_length} | groot align -g {params.graph_dir} -p {threads} -i {input.db_index} --log {log} | groot report --log {log} > {output.report}
rm -rf {params.graph_dir}
"""
2 changes: 1 addition & 1 deletion rules/kmerresistance.smk
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,6 @@ rule run_kmerresistance:
shell:
"""
zcat {input.read1} {input.read2} > {params.output_folder}/temp_all_reads.fq
kmerresistance -i {params.output_folder}/temp_all_reads.fq -t_db {params.kma_resfinder_db} -s_db {params.species_db} -o {params.output_folder}/results 2> >(tee {log} >&2)
kmerresistance -i {params.output_folder}/temp_all_reads.fq -t_db {params.kma_resfinder_db} -s_db {params.species_db} -o {params.output_folder}/results > {log} 2>&1
rm {params.output_folder}/temp_all_reads.fq
"""
8 changes: 6 additions & 2 deletions rules/mykrobe.smk
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ rule run_mykrobe:
config["params"]["threads"]
params:
tmp = "results/{sample}/mykrobe/tmp/",
skel_dir = "results/{sample}/mykrobe/skels"
skel_dir = "results/{sample}/mykrobe/skels",
tmp_dir = "results/{sample}/mykrobe/tmp"
shell:
"mykrobe predict {wildcards.sample} tb -1 {input.read1} {input.read2} --skeleton_dir {params.skel_dir} --threads {threads} --format json --output {output.report} --tmp {params.tmp} 2> >(tee {log} >&2) "
"""
mykrobe predict {wildcards.sample} tb -1 {input.read1} {input.read2} --skeleton_dir {params.skel_dir} --threads {threads} --format json --output {output.report} --tmp {params.tmp} > {log} 2>&1
rm -rf {params.skel_dir} {params.tmp_dir}
"""
5 changes: 3 additions & 2 deletions rules/pointfinder.smk
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,10 @@ rule run_pointfinder:
config["params"]["threads"]
params:
species = config["params"]["pointfinder"]["species"],

output_tmp_dir = "results/{sample}/pointfinder/tmp"
shell:
"""
python {input.pointfinder_script} -i {input.contigs} -p {input.pointfinder_db} -s {params.species} -m blastn -m_p $(which blastn) -o results/{wildcards.sample}/pointfinder 2> >(tee {log} >&2)
python {input.pointfinder_script} -i {input.contigs} -p {input.pointfinder_db} -s {params.species} -m blastn -m_p $(which blastn) -o results/{wildcards.sample}/pointfinder > {log} 2>&1
cp {output.raw_report} {output.report}
rm -rf {params.output_tmp_dir}
"""
4 changes: 2 additions & 2 deletions rules/resfams.smk
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@ rule run_resfams:
output_prefix = "results/{sample}/resfams"
shell:
"""
prodigal -i {input.contigs} -a {params.output_prefix}/protein_seqs.faa 2> >(tee {log} >&2);
hmmsearch --cpu {threads} --tblout {output.report} {input.resfams_hmms} {params.output_prefix}/protein_seqs.faa > {log} 2>&1
prodigal -i {input.contigs} -a {params.output_prefix}/protein_seqs.faa > {log} 2>&1
hmmsearch --cpu {threads} --tblout {output.report} {input.resfams_hmms} {params.output_prefix}/protein_seqs.faa >>{log} 2>&1
"""
8 changes: 5 additions & 3 deletions rules/resfinder.smk
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@ rule run_resfinder:
threads:
config["params"]["threads"]
params:
outdir = "results/{sample}/resfinder"
outdir = "results/{sample}/resfinder",
output_tmp_dir = "results/{sample}/resfinder/tmp"
shell:
"""
mkdir -p {params.outdir};
resfinder.py -p {input.resfinder_db} -i {input.contigs} -o {params.outdir} 2> >(tee {log} >&2)
mkdir -p {params.outdir}
resfinder.py -p {input.resfinder_db} -i {input.contigs} -o {params.outdir} > {log} 2>&1
rm -rf {params.output_tmp_dir}
"""
4 changes: 2 additions & 2 deletions rules/rgi.smk
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@ rule run_rgi:
output_prefix = "results/{sample}/rgi/rgi"
shell:
"""
rgi load --card_json {input.card_db}
rgi main --input_sequence {input.contigs} --output_file {params.output_prefix} --clean --num_threads {threads} 2> >(tee {log} >&2)
rgi load --card_json {input.card_db} > {log} 2>&1
rgi main --input_sequence {input.contigs} --output_file {params.output_prefix} --clean --num_threads {threads} >>{log} 2>&1
"""
15 changes: 12 additions & 3 deletions rules/srax.smk
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ rule run_srax:
input:
genome_dir = lambda wildcards: _get_seqdir(wildcards),
output:
report = "results/{sample}/srax/Results/sraX_analysis.html"
report = "results/{sample}/srax/sraX_analysis.html"
message: "Running rule run_srax on {wildcards.sample} with contigs"
log:
"logs/srax_{sample}.log"
Expand All @@ -12,6 +12,15 @@ rule run_srax:
config["params"]["threads"]
params:
dbtype = config["params"]["srax"]["dbtype"],
outdir = "results/{sample}/srax"
outdir = "results/{sample}/srax",
tmp_output_dir = "results/{sample}/srax/tmp",
log_output_dir = "results/{sample}/srax/Log",
ARG_DB_output_dir = "results/{sample}/srax/ARG_DB",
analysis_output_dir = "results/{sample}/srax/Analysis",
result_output_dir = "results/{sample}/srax/Results"
shell:
"sraX -i {input.genome_dir} -t 4 -db {params.dbtype} -o {params.outdir} 2> >(tee {log} >&2)"
"""
sraX -i {input.genome_dir} -t 4 -db {params.dbtype} -o {params.outdir} > {log} 2>&1
mv {params.result_output_dir}/* {params.outdir}
rm -rf {params.tmp_output_dir} {params.log_output_dir} {params.ARG_DB_output_dir} {params.analysis_output_dir} {params.result_output_dir}
"""
2 changes: 1 addition & 1 deletion rules/srst2.smk
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ rule run_srst2:
max_divergence = config["params"]["srst2"]["max_divergence"],
output_prefix = "results/{sample}/srst2/srst2"
shell:
"srst2 --threads {threads} --gene_db {params.gene_db} --forward '_R1' --reverse '_R2' --input_pe {input.read1} {input.read2} --min_depth {params.min_depth} --output {params.output_prefix} 2> >(tee {log} >&2)"
"srst2 --threads {threads} --gene_db {params.gene_db} --forward '_R1' --reverse '_R2' --input_pe {input.read1} {input.read2} --min_depth {params.min_depth} --output {params.output_prefix} > {log} 2>&1"
2 changes: 1 addition & 1 deletion rules/sstar.smk
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ rule run_sstar:
outdir = 'results/{sample}/sstar'
shell:
"""
{input.sstar} -g {input.contigs} -d {input.resgannot_db} --outdir {params.outdir} > {output.report}
{input.sstar} -g {input.contigs} -d {input.resgannot_db} --outdir {params.outdir} > {output.report} 2>{log}
"""
2 changes: 1 addition & 1 deletion rules/staramr.smk
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ rule run_staramr:
shell:
"""
rm -r {params.output_folder};
staramr search -o {params.output_folder} --nproc {threads} {input.contigs} 2> >(tee {log} >&2)
staramr search -o {params.output_folder} --nproc {threads} {input.contigs} >{log} 2>&1
"""
# only support salmonella/campylobacter

0 comments on commit 28bfdf3

Please sign in to comment.