-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile
60 lines (55 loc) · 1.42 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
rule all:
input:
"data/agg/clinvar.bed.beddb",
"data/agg/density.multires.mv5"
rule aggregate_multivec:
input:
chromsizes = "data/hg38.chrom.sizes",
multivec = "data/density.mv5"
output: "data/agg/density.multires.mv5"
shell: """
clodius aggregate multivec \
--chromsizes-filename {input.chromsizes} \
--starting-resolution 1 \
--output-file {output} \
{input.multivec}
"""
rule aggregate_bed:
input:
chromsizes = "data/hg38.chrom.sizes",
bed = "data/clinvar.bed"
output: "data/agg/clinvar.bed.beddb"
shell: """
clodius aggregate bedfile \
--chromsizes-filename {input.chromsizes} \
--delimiter $'\t' \
--importance-column 6 \
--max-per-tile 80 \
--output-file {output} \
{input.bed}
"""
rule convert_vcf:
input:
vcf = "data/clinvar.vcf",
chromsizes = "data/hg38.chrom.sizes",
output:
bed = "data/clinvar.bed",
multivec = "data/density.mv5"
shell: """
python scripts/prepare.py \
--vcf {input.vcf} \
--chromsizes {input.chromsizes} \
--bed {output.bed} \
--multivec {output.multivec}
"""
rule download_vcf:
output: "data/clinvar.vcf"
shell: """
curl https://ftp.ncbi.nlm.nih.gov/pub/clinvar/vcf_GRCh38/clinvar.vcf.gz -o {output}.gz
gunzip {output}.gz
"""
rule download_chromsizes:
output: "data/hg38.chrom.sizes"
shell: """
curl https://raw.githubusercontent.com/igvteam/igv/329449af409bfb7f60e4db5e7793882bd8b5f602/genomes/sizes/hg38.chrom.sizes | head -n 24 > {output}
"""