oxford-pharmacoepi
diff --git a/‎.DS_Store
8 KB b/‎.DS_Store
8 KB
diff --git a/‎.gitignore
+7 b/‎.gitignore
+7
diff --git a/‎CodeToRun.R
+52 b/‎CodeToRun.R
+52
diff --git a/‎RAP_Scripts/.DS_Store
10 KB b/‎RAP_Scripts/.DS_Store
10 KB
diff --git a/‎RAP_Scripts/Validation/.DS_Store
6 KB b/‎RAP_Scripts/Validation/.DS_Store
6 KB
diff --git a/‎RAP_Scripts/Validation/partB_Merge.sh
+22 b/‎RAP_Scripts/Validation/partB_Merge.sh
+22
diff --git a/‎RAP_Scripts/Validation/partC_Step1_QC.sh
+19 b/‎RAP_Scripts/Validation/partC_Step1_QC.sh
+19
diff --git a/‎RAP_Scripts/Validation/partD_Step1_Regenie.sh
+35 b/‎RAP_Scripts/Validation/partD_Step1_Regenie.sh
+35
diff --git a/‎RAP_Scripts/Validation/partE_Step2_QC_Val.sh
+37 b/‎RAP_Scripts/Validation/partE_Step2_QC_Val.sh
+37
diff --git a/‎RAP_Scripts/Validation/partE_Step2_SelectSNPs.sh
+36 b/‎RAP_Scripts/Validation/partE_Step2_SelectSNPs.sh
+36
diff --git a/‎RAP_Scripts/Validation/partF_Step2_Regenie.sh
+53 b/‎RAP_Scripts/Validation/partF_Step2_Regenie.sh
+53
diff --git a/‎RAP_Scripts/Validation/partG-merge-regenie-files.sh
+46 b/‎RAP_Scripts/Validation/partG-merge-regenie-files.sh
+46
diff --git a/‎RAP_Scripts/Validation/validation_1.txt
+1 b/‎RAP_Scripts/Validation/validation_1.txt
+1
diff --git a/‎RAP_Scripts/Validation/validation_10.txt
+1 b/‎RAP_Scripts/Validation/validation_10.txt
+1
diff --git a/‎RAP_Scripts/Validation/validation_19.txt
+6 b/‎RAP_Scripts/Validation/validation_19.txt
+6
diff --git a/‎RAP_Scripts/Validation/validation_2.txt
+1 b/‎RAP_Scripts/Validation/validation_2.txt
+1
diff --git a/‎RAP_Scripts/Validation/validation_3.txt
+11 b/‎RAP_Scripts/Validation/validation_3.txt
+11
diff --git a/‎RAP_Scripts/Validation/validation_6.txt
+19 b/‎RAP_Scripts/Validation/validation_6.txt
+19
diff --git a/‎RAP_Scripts/partB_Merge.sh
+21 b/‎RAP_Scripts/partB_Merge.sh
+21
@@ -0,0 +1,7 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
+Results
+R_Scripts/Extras.R
+RAP_Scripts/Bgen/
@@ -0,0 +1,52 @@
+# ============================================================================ #
+#                                 CODE TO RUN                                  #
+#                            Marta Alcalde Herraiz                             #
+# ============================================================================ #
+rm(list = ls())
+library("dplyr")
+library("here")
+library("lubridate")
+library("pbatR")
+library("tableone")
+library("ggplot2")
+library("stringr")
+library("tidyverse")
+library("flextable")
+library("ftExtra")
+library("coloc")
+library("readr")
+library("tidyverse")
+
+source(here("R_Scripts/Functions.R"))
+
+# Directory where the data is
+dir_data    <- 'D:/Projects/VaccineResponse_GWAS/'
+dir_ukb     <- 'D:/Projects/VaccineResponse_GWAS/UKBiobank/'
+dir_results <- 'D:/Projects/VaccineResponse_GWAS/Results/'
+
+# Load codes
+source(here('R_Scripts','1-ImmuneResponse.R'))
+source(here('R_Scripts','2-Breakthrough.R'))
+
+# Run the GWAS in the RAP PLATFORM ---------------------------------------------
+source(here("R_Scripts","3-ComputePVal.R"))
+
+# Save the results within the "GWAS" folder under the names:
+# breakthroughSeverity.txt
+# breakthroughSeverity_Validation.txt
+
+# Run FUMA ---------------------------------------------------------------------
+
+# Colocalisation analysis ------------------------------------------------------
+source(here("R_Scripts","4-Colocalisation.R"))
+
+# Validation -------------------------------------------------------------------
+source(here("R_Scripts","5-Validation.R"))
+
+# SCRIPTS TO MAKE THE TABLES/FIGURES FROM THE PAPER ----------------------------
+source(here("R_Scripts","6-CreateTables.R"))
+
+source(here("R_Scripts","7-CreateManhattanPlots.R"))
+source(here("R_Scripts","8-CreateValidationPlot.R"))
+
+
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+# It takes about an hour to run
+
+# How to Run:
+# ./partB_Merge.sh in the command line
+
+# What this file does:
+# Merge genotype calls
+
+directory="MAH/one_dose_validation" #Path, example: MAH/cohorts
+phenotype="one_dose_validation" #Example: one_dose_cohort
+
+run_merge="cp /mnt/project/Bulk/Genotype\ Results/Genotype\ calls/ukb22418_c[1-9]* . ;\
+        ls *.bed | sed -e 's/.bed//g'> files_to_merge.txt;\
+        plink --merge-list files_to_merge.txt --make-bed\
+        --autosome-xy --out ukb22418_c1_22_v2_merged;\
+        rm files_to_merge.txt;"
+
+dx run swiss-army-knife -iin="/${directory}/${phenotype}.phe" \
+   -icmd="${run_merge}" --tag="Step1" --instance-type "mem1_ssd1_v2_x16"\
+   --destination="/${directory}" --brief --yes --name="StepB_${phenotype}"
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+
+directory_input="Whole_genome/Breakthrough_gwas"
+directory_output="Whole_genome/Breakthrough_gwas/Intermediary_files"
+phenotype="twoDose_validation"
+
+run_plink_qc="plink2 --bfile ukb22418_c1_22_v2_merged\
+ --keep Initial_input_${phenotype}.phe --autosome\
+ --maf 0.01 --mac 20 --geno 0.1 --hwe 1e-15\
+ --mind 0.1 --write-snplist --write-samples\
+ --no-id-header --out  StepC-snps_qc_pass_${phenotype}"
+
+dx run swiss-army-knife -iin="/${directory_input}/Merged_files/ukb22418_c1_22_v2_merged.bed"\
+   -iin="/${directory_input}/Merged_files/ukb22418_c1_22_v2_merged.bim"\
+   -iin="/${directory_input}/Merged_files/ukb22418_c1_22_v2_merged.fam"\
+   -iin="/${directory_input}/Initial_input/Initial_input_${phenotype}.phe"\
+   -icmd="${run_plink_qc}" --tag="Step1" --instance-type "mem1_ssd1_v2_x16"\
+   --destination="${project}:/${directory_output}/" --brief --yes --name="StepC_${phenotype}"
@@ -0,0 +1,35 @@
+#!/bin/sh
+
+# It takes about 9-10h to run
+
+# How to Run:
+# ./partD_Step1_Regenie.sh in the command line
+
+# What this file does:
+# Builds a regression model using the variants of the Genotype calls that passed the quality control from partC_Step1_QC.sh
+
+
+directory_input="Whole_genome/Breakthrough_gwas"
+directory_output="Whole_genome/Breakthrough_gwas/Intermediary_files"
+phenotype="breakthroughSeverity_validation"
+outcome="severity_index"
+
+run_regenie_step1="regenie --step 1\
+ --lowmem --out StepD-${phenotype}_results --bed ukb22418_c1_22_v2_merged\
+ --phenoFile Initial_input_${phenotype}.phe --covarFile Initial_input_${phenotype}.phe\
+ --extract StepC-snps_qc_pass_${phenotype}.snplist --phenoCol ${outcome}\
+ --covarCol Sex\
+ --covarCol Age\
+ --covarCol Genetic_batch\
+ --covarCol PC{1:10}\
+ --bsize 1000 --bt --loocv --gz --threads 16"
+
+dx run swiss-army-knife -iin="/${directory_input}/Merged_files/ukb22418_c1_22_v2_merged.bed" \
+   -iin="/${directory_input}/Merged_files/ukb22418_c1_22_v2_merged.bim" \
+   -iin="/${directory_input}/Merged_files/ukb22418_c1_22_v2_merged.fam"\
+   -iin="/${directory_input}/Intermediary_files/StepC-snps_qc_pass_${phenotype}.snplist"\
+   -iin="/${directory_input}/Initial_input/Initial_input_${phenotype}.phe" \
+   --name="StepD_"${phenotype}\
+   -icmd="${run_regenie_step1}" --tag="Step1" --instance-type "mem1_ssd1_v2_x16"\
+   --destination="/${directory_output}" --brief --yes
+
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+directory_input="Whole_genome/Breakthrough_gwas"
+directory_output="Whole_genome/Breakthrough_gwas/Intermediary_files"
+imputed_file_dir="/Bulk/Imputation/UKB imputation from genotype"
+phenotype="breakthroughSusceptibility"
+outcome="bt_infection"
+
+
+for chr in {1,2,3,6,10,19; do
+        run_plink_wes="plink2 --bfile StepE-selected_snps_${phenotype}_${chr}\
+            --no-pheno\
+            --keep ${phenotype}.phe\
+            --autosome\
+            --maf 0.01\
+            --mac 20\
+            --geno 0.1\
+            --hwe 1e-15\
+            --mind 0.1\
+            --write-snplist\
+            --write-samples\
+            --no-id-header\
+            --rm-dup force-first\
+            --out c${chr}_snps_qc_pass_${phenotype}"
+
+        dx run swiss-army-knife -iin="/${directory}/selected_snps_${phenotype}_${chr}.bed" \
+            -iin="/${directory}/selected_snps_${phenotype}_${chr}.bim" \
+            -iin="/${directory}/selected_snps_${phenotype}_${chr}.fam"\
+            -iin="/${directory}/${phenotype}.phe" \
+            -icmd="${run_plink_wes}" --tag="Step2" --instance-type "mem1_ssd1_v2_x16"\
+            --name "StepE_${phenotype}_chr${chr}"\
+            --destination="${directory}/" --brief --yes
+done
+
+
+
+
@@ -0,0 +1,36 @@
+#!/bin/sh
+
+# The time it takes to run depends on the chromosome you are extracting the snps from
+
+# How to Run:
+# ./partD_Step1_Regenie_SelectSNPs.sh in the command line
+
+# What this file does:
+# Extracts the SNPs listed in validation.txt from the Imputation files
+
+
+directory_input="Whole_genome/Breakthrough_gwas"
+directory_output="Whole_genome/Breakthrough_gwas/Intermediary_files"
+imputed_file_dir="/Bulk/Imputation/UKB imputation from genotype"
+phenotype="breakthroughSeverity_validation"
+outcome="severity_index"
+
+for chr in {1,2,3,6,10,19}; do
+run_regenie_step1="plink2 --bgen ukb22828_c${chr}_b0_v3.bgen ref-first\
+        --sample ukb22828_c${chr}_b0_v3.sample\
+	-extract validation_${chr}.txt\
+	--make-bed\
+	--out StepE-selected_snps_${phenotype}_${chr}\
+	--no-pheno\
+	--keep Initial_input_${phenotype}.phe"
+
+dx run swiss-army-knife -iin="${imputed_file_dir}/ukb22828_c${chr}_b0_v3.bgen" \
+   -iin="${imputed_file_dir}/ukb22828_c${chr}_b0_v3.sample"\
+   -iin="/${directory_input}/validation_${chr}.txt"\
+   -iin="/${directory_input}/${phenotype}.phe"\
+   -icmd="${run_regenie_step1}" --tag="Step1" --instance-type "mem1_ssd1_v2_x16"\
+   --name="StepE_${phenotype}_chr${chr}"\
+   --destination="/${directory}/" --brief --yes
+ 
+   
+done
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+imputed_file_dir="/Auxiliary_genetic_dataset/Imputation"
+directory="MAH/one_dose_validation" #Output directory, example: MAH/cohorts
+phenotype="one_dose_validation" #Example: one_dose_cohort
+
+    for chr in 6; do
+        run_regenie_cmd="regenie --step 2\
+            --bed selected_snps_${phenotype}_${chr}\
+            --out ${phenotype}_assoc.c${chr}\
+            --phenoFile ${phenotype}.phe\
+            --covarFile ${phenotype}.phe\
+            --bt\
+            --approx\
+            --firth-se\
+            --firth\
+            --phenoCol out\
+            --covarCol Sex\
+            --covarCol Age\
+            --covarCol Genetic_batch\
+            --covarCol PC1\
+            --covarCol PC2\
+            --covarCol PC3\
+            --covarCol PC4\
+            --covarCol PC5\
+            --covarCol PC6\
+            --covarCol PC7\
+            --covarCol PC8\
+            --covarCol PC9\
+            --covarCol PC10\
+            --pred ${phenotype}_results_pred.list\
+            --bsize 200\
+            --pThresh 0.05\
+            --minMAC 3\
+            --threads 16\
+            --gz"
+            
+        dx run swiss-army-knife -iin="/${directory}/selected_snps_${phenotype}_${chr}.bed"\
+            -iin="/${directory}/selected_snps_${phenotype}_${chr}.bim"\
+            -iin="/${directory}/selected_snps_${phenotype}_${chr}.fam"\
+            -iin="/${directory}/${phenotype}.phe"\
+            -iin="/${directory}/${phenotype}_results_pred.list"\
+            -iin="/${directory}/${phenotype}_results_1.loco.gz"\
+            -icmd="${run_regenie_cmd}" --tag="Step2" --instance-type "mem1_hdd1_v2_x16"\
+            --name "StepF_${phenotype}_chr${chr}"\
+            --destination="${project}:/${directory}/" --brief --yes
+    done
+
+
+
+
+
+
@@ -0,0 +1,46 @@
+#!/bin/sh
+
+# Requirements: 
+# 0-4 - please refer to readme.md
+# 5. Must have executed: 
+# - partB-merge-files-dx-fuse.sh 
+# - partC-step1-qc-filter.sh
+# - partD-step1-regenie.sh
+# - partE-step2-qc-filter.sh
+# - partF-step2-regenie.sh
+
+# How to Run:
+# Run this shell script using: 
+#   sh partG-merge-regenie-files.sh 
+# on the command line on your own machine
+
+# Inputs:
+# Note that you can adjust the output directory by setting the data_file_dir variable
+# - /Data/assoc.c1_diabetes_cc.regenie.gz - regenie results for chromosome 1 
+# - /Data/assoc.c2_diabetes_cc.regenie.gz - regenie results for chromosome 2 
+# - /Data/assoc.c3_diabetes_cc.regenie.gz - regenie results for chromosome 3 
+# - /Data/assoc.c4_diabetes_cc.regenie.gz - regenie results for chromosome 4 
+# - etc.
+
+# Outputs (for each chromosome):
+# - /Data/assoc.regenie.merged.txt - merged results for all chromosomes in tab-delimited format
+
+merge_cmd='out_file="OneDoseValidation.txt"
+
+# Use dxFUSE to copy the regenie files into the container storage
+cp /mnt/project/MAH/one_dose_validation/*.regenie.gz .
+gunzip *.regenie.gz
+
+# add the header back to the top of the merged file
+echo -e "CHROM\tGENPOS\tID\tALLELE0\tALLELE1\tA1FREQ\tN\tTEST\tBETA\tSE\tCHISQ\tLOG10P\tEXTRA" > $out_file
+
+files="./*.regenie"
+for f in $files
+do
+   tail -n+2 $f | tr " " "\t" >> $out_file
+done
+rm *.regenie'
+
+dx run swiss-army-knife -iin="/MAH/one_dose_validation/one_dose_validation_assoc.c16_out.regenie.gz"\
+   -icmd="${merge_cmd}" --tag="Step1" --instance-type "mem1_ssd1_v2_x16"\
+   --destination="${project}:/MAH/one_dose_validation/" --brief --yes 
@@ -0,0 +1 @@
+rs6676150
@@ -0,0 +1 @@
+rs1977829
@@ -0,0 +1,6 @@
+rs112313064
+rs3760775
+rs11673136
+rs5117
+rs681343
+rs429358
@@ -0,0 +1 @@
+rs79510369
@@ -0,0 +1,11 @@
+rs141045534
+rs59776512
+rs73062389
+rs2531757
+rs6789386
+rs17347644
+rs71322420
+rs16861415
+rs13097481
+rs114664200
+rs2550250
@@ -0,0 +1,19 @@
+rs3134931
+rs7754570
+rs2395195
+rs146281128
+rs145945003
+rs28802989
+rs4713558
+rs9275109
+rs1794514
+rs3129733
+rs2071472
+rs3130216
+rs3094055
+rs28752480
+rs2261033
+rs732162
+rs28383322
+rs68033958
+rs9275766
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+# How to Run:
+# ./partB.sh on the command line
+
+# What this file does:
+# Merge genotype calls
+
+directory_input="Whole_genome/Breakthrough_gwas"
+directory_output="Whole_genome/Breakthrough_gwas/Merged_files" 
+phenotype="oneDose" 
+
+run_merge="cp /mnt/project/Bulk/Genotype\ Results/Genotype\ calls/ukb22418_c[1-9]* . ;\
+        ls *.bed | sed -e 's/.bed//g'> files_to_merge.txt;\
+        plink --merge-list files_to_merge.txt --make-bed\
+        --autosome-xy --out ukb22418_c1_22_v2_merged;\
+        rm files_to_merge.txt;"
+
+dx run swiss-army-knife -iin="/${directory_input}/Initial_input_${phenotype}.phe"\
+   -icmd="${run_merge}" --tag="Step1" --instance-type "mem1_ssd1_v2_x16"\
+   --destination="/${directory_output}" --brief --yes --name="StepB_${phenotype}"