-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path001C_process_PCAWG_MAF.R
42 lines (31 loc) · 1.89 KB
/
001C_process_PCAWG_MAF.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
date_tag = "210317"
source(paste0("/.mounts/labs/reimandlab/private/users/oocsenas/CA2M/",
# source(paste0("/.mounts/labs/reimandlab/private/users/jreimand/CA2M/",
date_tag,
"/bin/000_HEADER.R"))
source(pff("/bin/999_process_data.R")
input_data_dir = "/.mounts/labs/reimandlab/private/users/oocsenas/CA2M/INPUT_DATA/"
#Load in PCAWG mutation file
PCAWG_mutations_dt = fread("gunzip -c /.mounts/labs/reimandlab/private/users/jreimand/ActiveDriver2/data/mutations/oct2016/October_2016_whitelist_2583.snv_mnv_indel.maf.gz", fill = T, showProgress = T)
#Keep only SNV's
PCAWG_mutations_dt_SNV = PCAWG_mutations_dt[Variant_Type == "SNP"]
#Remove hypermutated samples (>90k mutations)
mutations_per_sample = table(PCAWG_mutations_dt_SNV$Donor_ID)
hypermutated_samples = names(which(mutations_per_sample > 90000))
PCAWG_mutations_dt_SNV_nohyper = PCAWG_mutations_dt_SNV[-which(PCAWG_mutations_dt_SNV$Donor_ID %in% hypermutated_samples)]
seqnames_PCAWG = paste("chr", PCAWG_mutations_dt_SNV_nohyper$Chromosome, sep = "")
#Liftover mutations to hg38
mutation_ranges_hg19 = GRanges(seqnames = seqnames_PCAWG,
IRanges(PCAWG_mutations_dt_SNV_nohyper$Start_position,
PCAWG_mutations_dt_SNV_nohyper$End_position),
index = 1:nrow(PCAWG_mutations_dt_SNV_nohyper))
#Import liftover files
path = system.file(package = "liftOver", "extdata", "hg19ToHg38.over.chain")
ch = import.chain(paste0(input_data_dir, "/hg19ToHg38.over.chain"))
mutation_ranges_hg38 = unlist(liftOver(mutation_ranges_hg19, ch))
PCAWG_mutations_dt_hg38 = PCAWG_mutations_dt_SNV_nohyper[mutation_ranges_hg38$index]
PCAWG_mutations_dt_hg38$Chromosome = as.character(seqnames(mutation_ranges_hg38))
PCAWG_mutations_dt_hg38$Start_position = start(mutation_ranges_hg38)
PCAWG_mutations_dt_hg38$End_position = end(mutation_ranges_hg38)
#Save hg38 PCAWG file
fwrite(PCAWG_mutations_dt_hg38,pff("/data/001C_PCAWG_mutations_hg38.csv"))