Skip to content

Commit 07bce25

Browse files
author
Marta Alcalde-Herraiz
committed
Update
1 parent 9ea5b74 commit 07bce25

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+2573
-0
lines changed

.DS_Store

8 KB
Binary file not shown.

.gitignore

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
.Rproj.user
2+
.Rhistory
3+
.RData
4+
.Ruserdata
5+
Results
6+
R_Scripts/Extras.R
7+
RAP_Scripts/Bgen/

CodeToRun.R

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# ============================================================================ #
2+
# CODE TO RUN #
3+
# Marta Alcalde Herraiz #
4+
# ============================================================================ #
5+
rm(list = ls())
6+
library("dplyr")
7+
library("here")
8+
library("lubridate")
9+
library("pbatR")
10+
library("tableone")
11+
library("ggplot2")
12+
library("stringr")
13+
library("tidyverse")
14+
library("flextable")
15+
library("ftExtra")
16+
library("coloc")
17+
library("readr")
18+
library("tidyverse")
19+
20+
source(here("R_Scripts/Functions.R"))
21+
22+
# Directory where the data is
23+
dir_data <- 'D:/Projects/VaccineResponse_GWAS/'
24+
dir_ukb <- 'D:/Projects/VaccineResponse_GWAS/UKBiobank/'
25+
dir_results <- 'D:/Projects/VaccineResponse_GWAS/Results/'
26+
27+
# Load codes
28+
source(here('R_Scripts','1-ImmuneResponse.R'))
29+
source(here('R_Scripts','2-Breakthrough.R'))
30+
31+
# Run the GWAS in the RAP PLATFORM ---------------------------------------------
32+
source(here("R_Scripts","3-ComputePVal.R"))
33+
34+
# Save the results within the "GWAS" folder under the names:
35+
# breakthroughSeverity.txt
36+
# breakthroughSeverity_Validation.txt
37+
38+
# Run FUMA ---------------------------------------------------------------------
39+
40+
# Colocalisation analysis ------------------------------------------------------
41+
source(here("R_Scripts","4-Colocalisation.R"))
42+
43+
# Validation -------------------------------------------------------------------
44+
source(here("R_Scripts","5-Validation.R"))
45+
46+
# SCRIPTS TO MAKE THE TABLES/FIGURES FROM THE PAPER ----------------------------
47+
source(here("R_Scripts","6-CreateTables.R"))
48+
49+
source(here("R_Scripts","7-CreateManhattanPlots.R"))
50+
source(here("R_Scripts","8-CreateValidationPlot.R"))
51+
52+

RAP_Scripts/.DS_Store

10 KB
Binary file not shown.

RAP_Scripts/Validation/.DS_Store

6 KB
Binary file not shown.

RAP_Scripts/Validation/partB_Merge.sh

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/sh
2+
3+
# It takes about an hour to run
4+
5+
# How to Run:
6+
# ./partB_Merge.sh in the command line
7+
8+
# What this file does:
9+
# Merge genotype calls
10+
11+
directory="MAH/one_dose_validation" #Path, example: MAH/cohorts
12+
phenotype="one_dose_validation" #Example: one_dose_cohort
13+
14+
run_merge="cp /mnt/project/Bulk/Genotype\ Results/Genotype\ calls/ukb22418_c[1-9]* . ;\
15+
ls *.bed | sed -e 's/.bed//g'> files_to_merge.txt;\
16+
plink --merge-list files_to_merge.txt --make-bed\
17+
--autosome-xy --out ukb22418_c1_22_v2_merged;\
18+
rm files_to_merge.txt;"
19+
20+
dx run swiss-army-knife -iin="/${directory}/${phenotype}.phe" \
21+
-icmd="${run_merge}" --tag="Step1" --instance-type "mem1_ssd1_v2_x16"\
22+
--destination="/${directory}" --brief --yes --name="StepB_${phenotype}"
+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#!/bin/sh
2+
3+
4+
directory_input="Whole_genome/Breakthrough_gwas"
5+
directory_output="Whole_genome/Breakthrough_gwas/Intermediary_files"
6+
phenotype="twoDose_validation"
7+
8+
run_plink_qc="plink2 --bfile ukb22418_c1_22_v2_merged\
9+
--keep Initial_input_${phenotype}.phe --autosome\
10+
--maf 0.01 --mac 20 --geno 0.1 --hwe 1e-15\
11+
--mind 0.1 --write-snplist --write-samples\
12+
--no-id-header --out StepC-snps_qc_pass_${phenotype}"
13+
14+
dx run swiss-army-knife -iin="/${directory_input}/Merged_files/ukb22418_c1_22_v2_merged.bed"\
15+
-iin="/${directory_input}/Merged_files/ukb22418_c1_22_v2_merged.bim"\
16+
-iin="/${directory_input}/Merged_files/ukb22418_c1_22_v2_merged.fam"\
17+
-iin="/${directory_input}/Initial_input/Initial_input_${phenotype}.phe"\
18+
-icmd="${run_plink_qc}" --tag="Step1" --instance-type "mem1_ssd1_v2_x16"\
19+
--destination="${project}:/${directory_output}/" --brief --yes --name="StepC_${phenotype}"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#!/bin/sh
2+
3+
# It takes about 9-10h to run
4+
5+
# How to Run:
6+
# ./partD_Step1_Regenie.sh in the command line
7+
8+
# What this file does:
9+
# Builds a regression model using the variants of the Genotype calls that passed the quality control from partC_Step1_QC.sh
10+
11+
12+
directory_input="Whole_genome/Breakthrough_gwas"
13+
directory_output="Whole_genome/Breakthrough_gwas/Intermediary_files"
14+
phenotype="breakthroughSeverity_validation"
15+
outcome="severity_index"
16+
17+
run_regenie_step1="regenie --step 1\
18+
--lowmem --out StepD-${phenotype}_results --bed ukb22418_c1_22_v2_merged\
19+
--phenoFile Initial_input_${phenotype}.phe --covarFile Initial_input_${phenotype}.phe\
20+
--extract StepC-snps_qc_pass_${phenotype}.snplist --phenoCol ${outcome}\
21+
--covarCol Sex\
22+
--covarCol Age\
23+
--covarCol Genetic_batch\
24+
--covarCol PC{1:10}\
25+
--bsize 1000 --bt --loocv --gz --threads 16"
26+
27+
dx run swiss-army-knife -iin="/${directory_input}/Merged_files/ukb22418_c1_22_v2_merged.bed" \
28+
-iin="/${directory_input}/Merged_files/ukb22418_c1_22_v2_merged.bim" \
29+
-iin="/${directory_input}/Merged_files/ukb22418_c1_22_v2_merged.fam"\
30+
-iin="/${directory_input}/Intermediary_files/StepC-snps_qc_pass_${phenotype}.snplist"\
31+
-iin="/${directory_input}/Initial_input/Initial_input_${phenotype}.phe" \
32+
--name="StepD_"${phenotype}\
33+
-icmd="${run_regenie_step1}" --tag="Step1" --instance-type "mem1_ssd1_v2_x16"\
34+
--destination="/${directory_output}" --brief --yes
35+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/bin/bash
2+
3+
directory_input="Whole_genome/Breakthrough_gwas"
4+
directory_output="Whole_genome/Breakthrough_gwas/Intermediary_files"
5+
imputed_file_dir="/Bulk/Imputation/UKB imputation from genotype"
6+
phenotype="breakthroughSusceptibility"
7+
outcome="bt_infection"
8+
9+
10+
for chr in {1,2,3,6,10,19; do
11+
run_plink_wes="plink2 --bfile StepE-selected_snps_${phenotype}_${chr}\
12+
--no-pheno\
13+
--keep ${phenotype}.phe\
14+
--autosome\
15+
--maf 0.01\
16+
--mac 20\
17+
--geno 0.1\
18+
--hwe 1e-15\
19+
--mind 0.1\
20+
--write-snplist\
21+
--write-samples\
22+
--no-id-header\
23+
--rm-dup force-first\
24+
--out c${chr}_snps_qc_pass_${phenotype}"
25+
26+
dx run swiss-army-knife -iin="/${directory}/selected_snps_${phenotype}_${chr}.bed" \
27+
-iin="/${directory}/selected_snps_${phenotype}_${chr}.bim" \
28+
-iin="/${directory}/selected_snps_${phenotype}_${chr}.fam"\
29+
-iin="/${directory}/${phenotype}.phe" \
30+
-icmd="${run_plink_wes}" --tag="Step2" --instance-type "mem1_ssd1_v2_x16"\
31+
--name "StepE_${phenotype}_chr${chr}"\
32+
--destination="${directory}/" --brief --yes
33+
done
34+
35+
36+
37+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/bin/sh
2+
3+
# The time it takes to run depends on the chromosome you are extracting the snps from
4+
5+
# How to Run:
6+
# ./partD_Step1_Regenie_SelectSNPs.sh in the command line
7+
8+
# What this file does:
9+
# Extracts the SNPs listed in validation.txt from the Imputation files
10+
11+
12+
directory_input="Whole_genome/Breakthrough_gwas"
13+
directory_output="Whole_genome/Breakthrough_gwas/Intermediary_files"
14+
imputed_file_dir="/Bulk/Imputation/UKB imputation from genotype"
15+
phenotype="breakthroughSeverity_validation"
16+
outcome="severity_index"
17+
18+
for chr in {1,2,3,6,10,19}; do
19+
run_regenie_step1="plink2 --bgen ukb22828_c${chr}_b0_v3.bgen ref-first\
20+
--sample ukb22828_c${chr}_b0_v3.sample\
21+
-extract validation_${chr}.txt\
22+
--make-bed\
23+
--out StepE-selected_snps_${phenotype}_${chr}\
24+
--no-pheno\
25+
--keep Initial_input_${phenotype}.phe"
26+
27+
dx run swiss-army-knife -iin="${imputed_file_dir}/ukb22828_c${chr}_b0_v3.bgen" \
28+
-iin="${imputed_file_dir}/ukb22828_c${chr}_b0_v3.sample"\
29+
-iin="/${directory_input}/validation_${chr}.txt"\
30+
-iin="/${directory_input}/${phenotype}.phe"\
31+
-icmd="${run_regenie_step1}" --tag="Step1" --instance-type "mem1_ssd1_v2_x16"\
32+
--name="StepE_${phenotype}_chr${chr}"\
33+
--destination="/${directory}/" --brief --yes
34+
35+
36+
done
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#!/bin/bash
2+
3+
imputed_file_dir="/Auxiliary_genetic_dataset/Imputation"
4+
directory="MAH/one_dose_validation" #Output directory, example: MAH/cohorts
5+
phenotype="one_dose_validation" #Example: one_dose_cohort
6+
7+
for chr in 6; do
8+
run_regenie_cmd="regenie --step 2\
9+
--bed selected_snps_${phenotype}_${chr}\
10+
--out ${phenotype}_assoc.c${chr}\
11+
--phenoFile ${phenotype}.phe\
12+
--covarFile ${phenotype}.phe\
13+
--bt\
14+
--approx\
15+
--firth-se\
16+
--firth\
17+
--phenoCol out\
18+
--covarCol Sex\
19+
--covarCol Age\
20+
--covarCol Genetic_batch\
21+
--covarCol PC1\
22+
--covarCol PC2\
23+
--covarCol PC3\
24+
--covarCol PC4\
25+
--covarCol PC5\
26+
--covarCol PC6\
27+
--covarCol PC7\
28+
--covarCol PC8\
29+
--covarCol PC9\
30+
--covarCol PC10\
31+
--pred ${phenotype}_results_pred.list\
32+
--bsize 200\
33+
--pThresh 0.05\
34+
--minMAC 3\
35+
--threads 16\
36+
--gz"
37+
38+
dx run swiss-army-knife -iin="/${directory}/selected_snps_${phenotype}_${chr}.bed"\
39+
-iin="/${directory}/selected_snps_${phenotype}_${chr}.bim"\
40+
-iin="/${directory}/selected_snps_${phenotype}_${chr}.fam"\
41+
-iin="/${directory}/${phenotype}.phe"\
42+
-iin="/${directory}/${phenotype}_results_pred.list"\
43+
-iin="/${directory}/${phenotype}_results_1.loco.gz"\
44+
-icmd="${run_regenie_cmd}" --tag="Step2" --instance-type "mem1_hdd1_v2_x16"\
45+
--name "StepF_${phenotype}_chr${chr}"\
46+
--destination="${project}:/${directory}/" --brief --yes
47+
done
48+
49+
50+
51+
52+
53+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/bin/sh
2+
3+
# Requirements:
4+
# 0-4 - please refer to readme.md
5+
# 5. Must have executed:
6+
# - partB-merge-files-dx-fuse.sh
7+
# - partC-step1-qc-filter.sh
8+
# - partD-step1-regenie.sh
9+
# - partE-step2-qc-filter.sh
10+
# - partF-step2-regenie.sh
11+
12+
# How to Run:
13+
# Run this shell script using:
14+
# sh partG-merge-regenie-files.sh
15+
# on the command line on your own machine
16+
17+
# Inputs:
18+
# Note that you can adjust the output directory by setting the data_file_dir variable
19+
# - /Data/assoc.c1_diabetes_cc.regenie.gz - regenie results for chromosome 1
20+
# - /Data/assoc.c2_diabetes_cc.regenie.gz - regenie results for chromosome 2
21+
# - /Data/assoc.c3_diabetes_cc.regenie.gz - regenie results for chromosome 3
22+
# - /Data/assoc.c4_diabetes_cc.regenie.gz - regenie results for chromosome 4
23+
# - etc.
24+
25+
# Outputs (for each chromosome):
26+
# - /Data/assoc.regenie.merged.txt - merged results for all chromosomes in tab-delimited format
27+
28+
merge_cmd='out_file="OneDoseValidation.txt"
29+
30+
# Use dxFUSE to copy the regenie files into the container storage
31+
cp /mnt/project/MAH/one_dose_validation/*.regenie.gz .
32+
gunzip *.regenie.gz
33+
34+
# add the header back to the top of the merged file
35+
echo -e "CHROM\tGENPOS\tID\tALLELE0\tALLELE1\tA1FREQ\tN\tTEST\tBETA\tSE\tCHISQ\tLOG10P\tEXTRA" > $out_file
36+
37+
files="./*.regenie"
38+
for f in $files
39+
do
40+
tail -n+2 $f | tr " " "\t" >> $out_file
41+
done
42+
rm *.regenie'
43+
44+
dx run swiss-army-knife -iin="/MAH/one_dose_validation/one_dose_validation_assoc.c16_out.regenie.gz"\
45+
-icmd="${merge_cmd}" --tag="Step1" --instance-type "mem1_ssd1_v2_x16"\
46+
--destination="${project}:/MAH/one_dose_validation/" --brief --yes
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
rs6676150
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
rs1977829
+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
rs112313064
2+
rs3760775
3+
rs11673136
4+
rs5117
5+
rs681343
6+
rs429358
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
rs79510369
+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
rs141045534
2+
rs59776512
3+
rs73062389
4+
rs2531757
5+
rs6789386
6+
rs17347644
7+
rs71322420
8+
rs16861415
9+
rs13097481
10+
rs114664200
11+
rs2550250
+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
rs3134931
2+
rs7754570
3+
rs2395195
4+
rs146281128
5+
rs145945003
6+
rs28802989
7+
rs4713558
8+
rs9275109
9+
rs1794514
10+
rs3129733
11+
rs2071472
12+
rs3130216
13+
rs3094055
14+
rs28752480
15+
rs2261033
16+
rs732162
17+
rs28383322
18+
rs68033958
19+
rs9275766

RAP_Scripts/partB_Merge.sh

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/bin/sh
2+
3+
# How to Run:
4+
# ./partB.sh on the command line
5+
6+
# What this file does:
7+
# Merge genotype calls
8+
9+
directory_input="Whole_genome/Breakthrough_gwas"
10+
directory_output="Whole_genome/Breakthrough_gwas/Merged_files"
11+
phenotype="oneDose"
12+
13+
run_merge="cp /mnt/project/Bulk/Genotype\ Results/Genotype\ calls/ukb22418_c[1-9]* . ;\
14+
ls *.bed | sed -e 's/.bed//g'> files_to_merge.txt;\
15+
plink --merge-list files_to_merge.txt --make-bed\
16+
--autosome-xy --out ukb22418_c1_22_v2_merged;\
17+
rm files_to_merge.txt;"
18+
19+
dx run swiss-army-knife -iin="/${directory_input}/Initial_input_${phenotype}.phe"\
20+
-icmd="${run_merge}" --tag="Step1" --instance-type "mem1_ssd1_v2_x16"\
21+
--destination="/${directory_output}" --brief --yes --name="StepB_${phenotype}"

0 commit comments

Comments
 (0)