-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathCOVID-19paed_PBMCanalyses.Rmd
2198 lines (1802 loc) · 183 KB
/
COVID-19paed_PBMCanalyses.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
---
title: "COVID-19paed_PBMCanalyses_RL003"
author: "Rik G.H. Lindeboom"
date: "10/11/2021"
output: pdf_document
---
Code to process and analyse the PBMC data in Yosida et al. Nature, 2021. by Rik Lindeboom. Please reach out if anything is unclear, missing or wrong. Some meta data processing of patient information is omitted to comply with ethics.
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE,fig.height = 10, fig.width = 10,tidy.opts=list(width.cutoff=60),tidy=TRUE)
```
```{r load required packages, message=FALSE}
set.seed(1)
library(Seurat)
library(tidyverse)
library(ggplot2)
library(harmony)
library(ComplexHeatmap)
library(sceasy)
library(reticulate)
library(SoupX)
library(data.table)
library(DoubletFinder)
library(cardelino)
library(reticulate)
source("/mnt/scripts/RL003_function_collection_GitHub.R")
loompy <- import('loompy')
library(randomcoloR)
library(circlize)
library(readr)
library(patchwork)
library(cowplot)
library(lme4)
library(Matrix)
library(numDeriv)
```
```{r label="make sample table",eval = FALSE}
manifest <- read.csv("/mnt/projects/RL003_allCitePbmcsTheta/CV001_KM_COVID Sample tracking table - Manifest_210715.tsv",stringsAsFactors = F,header = T,sep="\t")
sampleTable <- read.csv("/mnt/projects/RL003_allCitePbmcsTheta/Pooled_pbmc_CITEseq_summary_kw_210422_procForR.csv",stringsAsFactors = F,header = T)
manis <- read.csv("/mnt/projects/RL003_allCitePbmcsTheta/RL003_manifest.txt",stringsAsFactors = F,header = T,sep = "\t")
manis$numberOfDonors <- sapply(gsub("^(.).*","\\1",manis$sample_id), function(x) sum(sampleTable$pool_group==x))
manis$sample_name <- manis$sample_id
manis$sample_id <- paste(manis$GEX,manis$CITE,sep="-")
manis$donors <- as.character(sapply(gsub("^(.).*","\\1",manis$sample_name), function(x) unique(sampleTable$Sample.name[sampleTable$pool_group==x])))
manis$location_multiplexed_bam <- paste0("/archive/HCA/10X/",manis$sample_id,"/outs")
manis$irods_or_farm <- "irods"
manis$bam_file <- "possorted_genome_bam.bam"
manis$barcodesLoc <- paste0("/archive/HCA/10X/",manis$sample_id,"/outs/filtered_feature_bc_matrix")
manis$barcode_file <- "barcodes.tsv.gz"
manis$baiFilePresent <- T
manis$bamReady <- NA
for (i in 1:nrow(manis)) {
foo <- tryCatch(system(paste0("ils ",manis$location_multiplexed_bam[i],"/",manis$bam_file[i])))
if (foo==0) { manis$bamReady[i] <- T } else { manis$bamReady[i] <- F }
}
manis$alignment <- "cellranger"
manis$citeFile <- paste0(manis$GEX,"-",manis$CITE)
socTable <- manis[,c("sample_name","sample_id","donors","numberOfDonors","location_multiplexed_bam","irods_or_farm","bam_file","barcodesLoc","barcode_file","baiFilePresent")]
write.table(socTable,file = "/mnt/projects/RL003_allCitePbmcsTheta/souporcell_revision/sampleTable.txt",col.names = T,row.names = F,quote = F,sep=",")
```
'socTable' is used as input for souporcell:
```{bash,eval = FALSE}
# sample_name,sample_id,donors,numberOfDonors,location_multiplexed_bam,irods_or_farm,bam_file,barcodesLoc,barcode_file,baiFilePresent
# K1-PBMC,CV001_KM9465380-CV001_KM9465395,K1-PBMC;K2-PBMC,5,/archive/HCA/10X/CV001_KM9465380-CV001_KM9465395/outs,irods,possorted_genome_bam.bam,/archive/HCA/10X/CV001_KM9465380-CV001_KM9465395/outs/filtered_feature_bc_matrix,barcodes.tsv.gz,TRUE
dos2unix ${multiplexedSampleTable}
while read -r samplePair; do
sample_id=`echo ${samplePair} | cut -f2 -d','`
selected_k=`echo ${samplePair} | cut -f4 -d','`
location_multiplexed_bam=`echo ${samplePair} | cut -f5 -d','`
irods_or_farm=`echo ${samplePair} | cut -f6 -d','`
bam_file=`echo ${samplePair} | cut -f7 -d','`
barcodesLoc=`echo ${samplePair} | cut -f8 -d','`
barcode_file=`echo ${samplePair} | cut -f9 -d','`
baiFilePresent=`echo ${samplePair} | cut -f10 -d','`
if ! [[ ${sample_id} == 'sample_id' ]] ; then
#sample_id=${sample_id}_extraGenotype
cd ${outDir};
mkdir ${sample_id};
cd ${sample_id};
if [[ ${irods_or_farm} == 'irods' ]]; then
iget -Kr ${barcodesLoc}/${barcode_file};
iget -Kr ${location_multiplexed_bam}/${bam_file};
fi;
if [[ $baiFilePresent == FALSE ]]; then
samtools index -@ ${maxThreads} ${bam_file};
else
iget -Kr ${location_multiplexed_bam}/${bam_file}.bai;
fi;
cd ..;
singularity exec -B $PWD /home/ubuntu/bin/souporcell_latest.sif souporcell_pipeline.py -i ${sample_id}/${bam_file} -b ${sample_id}/${barcode_file} -f /mnt/souporcell/genome.fa -t ${maxThreads} -o ${outDir}/${sample_id} --common_variants ${knownVariantsFile} --skip_remap True -k ${selected_k};
rm ${sample_id}/${bam_file}; rm ${sample_id}/${bam_file}.bai; rm ${sample_id}/${barcode_file};
fi;
done < ${multiplexedSampleTable};
```
Downloading and processing the data from our internal storage systems to create a rds containing the GEX and ADT data using Seurat
``` {r label="download data",eval = FALSE}
for (i in 1:nrow(manis)) {
downloadScData(cite=manis$citeFile[i], bcr=manis$BCR[i], tcr=manis$TCR[i], overwrite=F, alignment=manis$alignment[i], out_dir="/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/")
}
for (i in 1:nrow(manis)) {
currentSample <- try(processCiteSamples(sample=manis$citeFile[i], SoupX_rna=T, SoupX_adt=T, save_raw=F, doSct=F, data_dir="/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cite", min_cells=0, min_features=200))
write_rds(currentSample,file=paste0("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/",manis$citeFile[i],".rds"),compress="gz")
}
sampleList <- list()
for (i in 1:nrow(manis)) {
currentSample <- read_rds(paste0("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/",manis$citeFile[i],".rds"))
currentSample <- RenameCells(currentSample,add.cell.id = manis$citeFile[i])
sampleList[[manis$citeFile[i]]] <- currentSample
rm(currentSample)
}
cov <- merge(sampleList[[1]], y = sampleList[2:length(sampleList)],merge.data = TRUE, project = "covidPbmcs_oldNew")
cv <- multiModal_processing(object=cov,gex=T,adt=T,sct=T,gexAdtWnn=F,sctAdtWnn=F,doHarmony=T,npca=30,regress_cellcycle_gex=F,makeFinalWnnUmap=F,doFreshSct=T)
cv@meta.data$dataset <- ifelse(is.na(cv@meta.data$patient_id),"revision","original")
cv@meta.data[,c("GEX","CITE","BCR","TCR","pool_name","pool_patients")] <- NA
for (i in unique(cv@meta.data$orig.ident)) {
sample_name <- manifest$sample_id[manifest$Sanger.Sample.ID==gsub("(.*?)-.*","\\1",i)]
cv@meta.data$pool_name[cv@meta.data$orig.ident==i] <- sample_name
cv@meta.data$GEX[cv@meta.data$orig.ident==i] <- manifest$Sanger.Sample.ID[manifest$sample_id==sample_name & manifest$modality=="GEX"]
cv@meta.data$CITE[cv@meta.data$orig.ident==i] <- manifest$Sanger.Sample.ID[manifest$sample_id==sample_name & manifest$modality=="CITE"]
try(cv@meta.data$BCR[cv@meta.data$orig.ident==i] <- manifest$Sanger.Sample.ID[manifest$sample_id==sample_name & manifest$modality=="BCR"])
try(cv@meta.data$TCR[cv@meta.data$orig.ident==i] <- manifest$Sanger.Sample.ID[manifest$sample_id==sample_name & manifest$modality=="TCR"])
}
for (i in unique(sampleTable$Sample.name)) {
samples <- unlist(str_split(i,";"))
patientIds <- paste(sampleTable$Individal.Samples.ID[sampleTable$Sample.name==i],collapse=";")
cv@meta.data$pool_patients[cv@meta.data$pool_name%in%samples] <- patientIds
}
for (i in unique(cv@meta.data$orig.ident[cv@meta.data$dataset=="original"])) {
cv@meta.data$pool_patients[cv@meta.data$orig.ident==i] <- paste(unique(cv@meta.data$patient_id[cv@meta.data$orig.ident==i]),collapse=";")
}
```
We use souporcell to demultiplex the pooled sequencing libraries based on their genotypes (see bash code above).
Manual inspection and itteration revealed that some failed to detect all genotypes because noise from one genotype is clustered into two, this is fixed by adding once more cluster for some samples where no match is found,
souporcell genotype doublets are used to 'train' DoubletFinder to find more doublets
``` {r label="add souporcell clusters to data for patient id assignment and for doublet detection",eval = FALSE}
for (i in unique(cv@meta.data$orig.ident)) {
soc_out <- read.csv(paste0("/mnt/projects/RL003_allCitePbmcsTheta/souporcell_revision/",i,"/clusters.tsv"),header=T,stringsAsFactor=F,sep="\t")
soc_out$matched_barcode <- paste(i,soc_out$barcode,sep = "_")
soc_out <- soc_out[soc_out$matched_barcode%in%rownames(cv@meta.data),]
cv@meta.data[soc_out$matched_barcode,colnames(soc_out)[colnames(soc_out)!="matched_barcode"]] <- soc_out[,colnames(soc_out)[colnames(soc_out)!="matched_barcode"]]
}
# Some failed to detect all genotypes because noise from one genotype is clustered into two, this is fixed by adding once more cluster for some samples where no match is found:
for (i in unique(cv@meta.data$orig.ident)) {
if (file.exists(paste0("/mnt/projects/RL003_allCitePbmcsTheta/souporcell_revision/",i,"_extraGenotype/clusters.tsv"))) {
soc_out <- read.csv(paste0("/mnt/projects/RL003_allCitePbmcsTheta/souporcell_revision/",i,"_extraGenotype/clusters.tsv"),header=T,stringsAsFactor=F,sep="\t")
} else {
soc_out <- read.csv(paste0("/mnt/projects/RL003_allCitePbmcsTheta/souporcell_revision/",i,"/clusters.tsv"),header=T,stringsAsFactor=F,sep="\t")
}
soc_out$matched_barcode <- paste(i,soc_out$barcode,sep = "_")
soc_out <- soc_out[soc_out$matched_barcode%in%rownames(cv@meta.data),]
cv@meta.data[soc_out$matched_barcode,colnames(soc_out)[colnames(soc_out)!="matched_barcode"]] <- soc_out[,colnames(soc_out)[colnames(soc_out)!="matched_barcode"]]
}
cv@meta.data$df_classification_onSinglets <- NA
for (i in unique(cv@meta.data$orig.ident)) {
cv_subset <- subset(cv,cells=rownames(cv@meta.data)[cv@meta.data$orig.ident==i])
cv_subset <- runDoubletFinderOnSouporcellOutput(object=cv_subset)
cv@meta.data[rownames(cv_subset@meta.data),c("pANN","DF.classifications","doubletFinder_params","df_classification_onSinglets")] <- cv_subset@meta.data[,c("pANN","DF.classifications","doubletFinder_params","df_classification_onSinglets")]
rm(cv_subset)
}
```
We've used the nasal GEX libraries (which were not multiplexed) to generate patient genotypes, which we match to souporcell clusters to assign patient ids to souporcell clusters. This required some minor manual adjustments for mismatches using the automated assignment.
```{r label="compare souporcell genotypes with nasal genotypes to identify patients",eval = FALSE}
ref_gt <- load_GT_vcf("/mnt/projects/RL003_allCitePbmcsTheta/souporcell_revision/newNasals.dedup.280421.vcf.gz",na.rm = F)
allCors <- as.data.frame(ref_gt$GT[0,],stringsAsFactors = F)
cv@meta.data$matched_NB_sample <- NA
cv@meta.data$matched_NB_sample_overlap <- NA
for (i in unique(cv@meta.data$orig.ident)) {
if (file.exists(paste0("/mnt/projects/RL003_allCitePbmcsTheta/souporcell_revision/",i,"_extraGenotype/clusters.tsv"))) {
myCors <- compareGenotype(ref_gt=ref_gt,souporcell_output_dir="/mnt/projects/RL003_allCitePbmcsTheta/souporcell_revision",sample_id=paste0(i,"_extraGenotype"))
} else {
myCors <- compareGenotype(ref_gt=ref_gt,souporcell_output_dir="/mnt/projects/RL003_allCitePbmcsTheta/souporcell_revision",sample_id=i)
}
specificCors <- myCors[,sampleTable$sangerId_matchedSample[!is.na(sampleTable$sangerId_matchedSample) & sampleTable$pool_group%in%gsub("^([A-Z]*?)[0-9].*","\\1",cv@meta.data$pool_name[cv@meta.data$orig.ident==i])]]
if (ncol(specificCors)==0) { print("no matching genotypes present in ref")} else {
specificCors <- specificCors[unique(cv@meta.data$assignment[cv@meta.data$status=="singlet" & cv@meta.data$orig.ident==i]),]
for (myCluster in rownames(specificCors)[order(decreasing=T,apply(specificCors,1,max))]) {
mostLikelyPatient <- names(specificCors[myCluster,][order(specificCors[myCluster,],decreasing = T)])[1]
if (mostLikelyPatient%in%cv@meta.data$matched_NB_sample[cv@meta.data$orig.ident==i & !is.na(cv@meta.data$matched_NB_sample)]) {
cv@meta.data$matched_NB_sample[cv@meta.data$orig.ident==i & cv@meta.data$assignment==myCluster] <- paste0(mostLikelyPatient,"_fail")
rownames(myCors)[rownames(myCors)==myCluster] <- paste0(mostLikelyPatient,"_fail;cluster",myCluster)
} else {
cv@meta.data$matched_NB_sample[cv@meta.data$orig.ident==i & cv@meta.data$assignment==myCluster] <- mostLikelyPatient
rownames(myCors)[rownames(myCors)==myCluster] <- paste0(mostLikelyPatient,";cluster",myCluster)
}
cv@meta.data$matched_NB_sample_overlap[cv@meta.data$orig.ident==i & cv@meta.data$assignment==myCluster] <- as.numeric(specificCors[myCluster,][order(specificCors[myCluster,],decreasing = T)][1])
}
}
rownames(myCors) <- paste0(unique(cv@meta.data$pool_name[cv@meta.data$orig.ident==i]),";",rownames(myCors))
if (any(colnames(myCors)!=colnames(allCors))) { halt }
allCors <- rbind(allCors,myCors)
}
allCors <- allCors[order(rownames(allCors),decreasing = F),]
columnOrder <- c()
for (i in unique(gsub("([A-Z]*?)[0-9]*-.*","\\1",rownames(allCors)))){
columnOrder <- c(columnOrder,which(colnames(allCors)%in%sampleTable$sangerId_matchedSample[sampleTable$pool_group==i & !is.na(sampleTable$sangerId_matchedSample)]))
}
allCors <- allCors[,c(columnOrder,(1:ncol(allCors))[!(1:ncol(allCors))%in%columnOrder])]
#allCors_wNormalGenotypes <- allCors
Heatmap(allCors,cluster_rows = F,cluster_columns = F,row_names_gp = gpar(cex=.55))
#Heatmap(allCors_wNormalGenotypes,cluster_rows = F,cluster_columns = F,row_names_gp = gpar(cex=.55))
for (i in unique(cv@meta.data$matched_NB_sample[!is.na(cv@meta.data$matched_NB_sample) & !grepl("fail",cv@meta.data$matched_NB_sample)])) {
cv@meta.data$patient_id[!is.na(cv@meta.data$matched_NB_sample) & cv@meta.data$matched_NB_sample==i] <- unique(sampleTable$Individal.Samples.ID[sampleTable$sangerId_matchedSample==i & !is.na(sampleTable$sangerId_matchedSample)])
cv@meta.data$state[!is.na(cv@meta.data$matched_NB_sample) & cv@meta.data$matched_NB_sample==i] <- unique(sampleTable$state[sampleTable$sangerId_matchedSample==i & !is.na(sampleTable$sangerId_matchedSample)])
}
#Fix samples where no nasal data is available by assigning the only missing genotype
cv@meta.data$patient_id[cv@meta.data$pool_name=="H2-PBMC" & cv@meta.data$matched_NB_sample=="CV001_KM9166445_fail" & !is.na(cv@meta.data$matched_NB_sample)] <- "PC7"
cv@meta.data$patient_id[cv@meta.data$pool_name=="K1-PBMC" & cv@meta.data$matched_NB_sample=="CV001_KM9465377_fail" & !is.na(cv@meta.data$matched_NB_sample)] <- "PP11"
cv@meta.data$patient_id[cv@meta.data$pool_name=="K2-PBMC" & cv@meta.data$matched_NB_sample=="CV001_KM9465377_fail" & !is.na(cv@meta.data$matched_NB_sample)] <- "PP11"
cv@meta.data$patient_id[cv@meta.data$pool_name%in%c("Q3-PBMC","Q4-PBMC") & cv@meta.data$matched_NB_sample=="CV001_KM9166355_fail" & !is.na(cv@meta.data$matched_NB_sample)] <- "not_ready"
cv@meta.data$patient_id[cv@meta.data$pool_name%in%c("O1-PBMC","O2-PBMC","P1-PBMC","P2-PBMC")] <- "not_ready"
cv@meta.data$state[cv@meta.data$pool_name%in%c("O1-PBMC","O2-PBMC","P1-PBMC","P2-PBMC")] <- "Post-COVID"
```
``` {r label="initial filtering and processing",fig.width=10,fig.height=10,eval = FALSE}
cv[["percent.mt"]] <- PercentageFeatureSet(cv, pattern = "^MT-")
cv <- subset(cv, percent.mt < 10)
cv <- subset(cv,cells=rownames(cv@meta.data[(cv@meta.data$df_classification_onSinglets=="singlet" & cv@meta.data$status=="singlet"),]))
cv <- subset(cv,cells=rownames(cv@meta.data[!grepl("CV001_KM9465",cv@meta.data$orig.ident),])) # Remove bad samples
cv <- FindNeighbors(cv, dims = 1:30,reduction = "harmony_RNA",graph.name="rna_snn")
cv <- FindClusters(cv, graph.name = "rna_snn", resolution = c(.5,4,32),algorithm = 4, method = "igraph", verbose = FALSE)
cv <- FindNeighbors(cv, dims = 1:30,reduction = "harmony_ADT",graph.name="adt_snn")
cv <- FindClusters(cv, graph.name = "adt_snn", resolution = c(.5,4,32),algorithm = 4, method = "igraph", verbose = FALSE)
cv <- FindClusters(cv, graph.name = "wsnn_rnaAdt", resolution = c(.5,4,32),algorithm = 4, method = "igraph", verbose = FALSE)
write_rds(cv,"/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.rds",compress = "gz")
```
We annotate Leiden clusters using cell type markers by subsetting large cell type compartments and reclustering to increase the resolution
``` {r label="subset and subcluster again with new hvgs",fig.height=10,fig.width=10,eval = FALSE}
cv@meta.data$subset <- NA
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("1","6","7","8")] <- "T"
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("2","5","10")] <- "TNK"
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("11")] <- "cycling"
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("3","9")] <- "MonoDCs"
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("12")] <- "platelets"
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("4")] <- "B"
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("14")] <- "HSPC"
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("13","15")] <- "separateCelltypes"
resolutions <- c(0.5,4)
for (i in unique(cv@meta.data$subset)) {
if (!file.exists(paste0("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_",i,".rds"))) {
try(rm(cv_subset))
gc()
cv <- read_rds("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.rds")
cv@meta.data$subset <- NA
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("1","6","7","8")] <- "T"
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("2","5","10")] <- "TNK"
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("11")] <- "cycling"
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("3","9")] <- "MonoDCs"
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("12")] <- "platelets"
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("4")] <- "B"
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("14")] <- "HSPC"
cv@meta.data$subset[cv@meta.data$rna_snn_res.0.5%in%c("13","15")] <- "separateCelltypes"
cv_subset <- subset(cv,cells=rownames(cv@meta.data[cv@meta.data$subset==i,]))
rm(cv)
gc()
cv_subset <- multiModal_processing(object=cv_subset,gex=T,adt=T,sct=T,gexAdtWnn=T,sctAdtWnn=T,doHarmony=T,npca=30,regress_cellcycle_gex=F,makeFinalWnnUmap=T,doFreshSct=T)
cv_subset <- FindNeighbors(cv_subset, dims = 1:30,reduction = "harmony_RNA",graph.name="rna_snn")
cv_subset <- FindNeighbors(cv_subset, dims = 1:30,reduction = "harmony_ADT",graph.name="adt_snn")
cv_subset <- FindNeighbors(cv_subset, dims = 1:30,reduction = "harmony_SCT",graph.name="sct_snn")
cv_subset <- FindClusters(cv_subset, graph.name = "rna_snn", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
cv_subset <- FindClusters(cv_subset, graph.name = "adt_snn", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
cv_subset <- FindClusters(cv_subset, graph.name = "sct_snn", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
cv_subset <- FindClusters(cv_subset, graph.name = "wsnn_rnaAdt", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
cv_subset <- FindClusters(cv_subset, graph.name = "wsnn_sctAdt", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
cv_subset <- combineSmallWnnClusters(object=cv_subset,resolutions=resolutions,graphNames=c("wsnn_rnaAdt","wsnn_sctAdt"),minClusterSize=100)
write_rds(cv_subset,file=paste0("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_",i,".rds"),compress="gz")
}
}
```
In the chunks below we go over each compartment that is subsetted above to annotate cell types manually.
``` {r label="annotate seperate celltypes subset",eval = FALSE}
cv_subset <- read_rds("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_separateCelltypes.rds")
FeaturePlot(cv_subset,reduction = "umapAfterHarmony_SCT",features = c("pANN","log_prob_doublet","log_prob_singleton"))
gexOnlyList <- c("CD3D","CCR7","SELL","CD27","CD4","CD40LG","CD8A","GZMH","IL2RA","FOXP3","IKZF2","TRGV9","TRDV2","TRAV1-2","SLC4A10","MKI67","NCR1","NCAM1","FXYD7","FCGR3A","CD14","C1QA","CLEC4C","IL3RA","AXL","SIGLEC6","CLEC9A","FCER1A","FCER2","CXCR5","CD19","CCR6","IGHD","MS4A1","TNFRSF13B","ENTPD1","KIT","CD34","PPBP","PF4","HBB")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = T,label=T,group.by = "dataset")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = T,label=T,group.by = "rna_snn_res.0.5")
DotPlot(cv_subset,features = gexOnlyList,group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
FeaturePlot(cv_subset,dims = c(1,2),reduction = "umapAfterHarmony_RNA",features = c("HBB","IL3RA","CLEC4C","MKI67"))
foo <- FetchData(cv_subset,c("HBB","IL3RA","CLEC4C","CLEC10A"))
foo[,c("UMAP_1","UMAP_2")] <- as.data.frame(cv_subset@reductions$umapAfterHarmony_RNA@cell.embeddings)
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(HBB+1))) + geom_point() + theme_classic()
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(IL3RA+1))) + geom_point() + theme_classic()
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(CLEC4C+1))) + geom_point() + theme_classic()
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(CLEC10A+1))) + geom_point() + theme_classic()
dplot(x = foo$UMAP_1,y = foo$UMAP_2 ,z = foo$HBB)
dplot(x = foo$UMAP_1,y = foo$UMAP_2 ,z = foo$IL3RA)
dplot(x = foo$UMAP_1,y = foo$UMAP_2 ,z = foo$CLEC4C)
dplot(x = foo$UMAP_1,y = foo$UMAP_2 ,z = foo$CLEC10A)
Idents(cv_subset) <- cv_subset@meta.data$rna_snn_res.0.5
clust7Markers <- FindMarkers(object = cv_subset,ident.1 = "7",assay = "RNA",ident.2 = "1",logfc.threshold = .5,min.pct = .25,only.pos = T) # Seems to be so called AS-DC, markers are AXL SIGLEC6
cv_subset@meta.data$cell_annot_revision <- NA
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("2")] <- "Red Blood Cells"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("1","3","4","5")] <- "pDC"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("7")] <- "AS-DC"
cv_subset@meta.data$cell_annot_revision[is.na(cv_subset@meta.data$cell_annot_revision)] <- "Doublets"
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision")
DotPlot(cv_subset,features = gexOnlyList,group.by = "cell_annot_revision",cluster.idents = T) + RotatedAxis()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision",cols = randomcoloR::distinctColorPalette(length(unique(cv_subset_nk@meta.data$cell_annot_revision))+2))
#write_rds(cv_subset@meta.data,file = "/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_separateCelltypes_annotMeta.rds")
```
```{r annot hpsc subcluster,eval = FALSE}
cv_subset <- read_rds("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_HSPC.rds")
DefaultAssay(object = cv_subset) <- "RNA"
DotPlot(cv_subset,features = c("log_prob_doublet","log_prob_singleton"),group.by = "rna_snn_res.0.5")
DotPlot(cv_subset,features = c("pANN","log_prob_doublet","log_prob_singleton"),group.by = "rna_snn_res.0.5")
DotPlot(cv_subset,features = c("log_prob_doublet","log_prob_singleton"),group.by = "sct_snn_res.0.5")
DotPlot(cv_subset,features = c("pANN","log_prob_doublet","log_prob_singleton"),group.by = "sct_snn_res.0.5")
gexOnlyList <- c("CD3D","CCR7","SELL","CD27","CD4","CD40LG","CD8A","GZMH","IL2RA","FOXP3","IKZF2","TRGV9","TRDV2","TRAV1-2","SLC4A10","MKI67","NCR1","NCAM1","FXYD7","FCGR3A","CD14","C1QA","CLEC4C","IL3RA","AXL","SIGLEC6","CLEC9A","FCER1A","FCER2","CXCR5","CD19","CCR6","IGHD","MS4A1","TNFRSF13B","ENTPD1","KIT","CD34","PPBP","PF4","HBB")
DotPlot(cv_subset,features = gexOnlyList,group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
FeaturePlot(cv_subset,dims = c(1,2),reduction = "umapAfterHarmony_RNA",features = c("HBB","IL3RA","CLEC4C","MKI67"))
foo <- FetchData(cv_subset,c("TPSAB1","CPA3","TPSB2","C1QC","EPX","PRG2"))
foo[,c("UMAP_1","UMAP_2")] <- as.data.frame(cv_subset@reductions$umapAfterHarmony_RNA@cell.embeddings)
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(TPSAB1+1))) + geom_point() + theme_classic()
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(CPA3+1))) + geom_point() + theme_classic()
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(TPSB2+1))) + geom_point() + theme_classic()
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(C1QC+1))) + geom_point() + theme_classic()
dplot(x = foo$UMAP_1,y = foo$UMAP_2 ,z = foo$TPSAB1)
dplot(x = foo$UMAP_1,y = foo$UMAP_2 ,z = foo$CPA3)
dplot(x = foo$UMAP_1,y = foo$UMAP_2 ,z = foo$TPSB2)
dplot(x = foo$UMAP_1,y = foo$UMAP_2 ,z = foo$C1QC)
Idents(cv_subset) <- paste(cv_subset@meta.data$rna_snn_res.0.5)
clust10Markers <- FindMarkers(object = cv_subset,ident.1 = "10",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
clust1Markers <- FindMarkers(object = cv_subset,ident.1 = "1",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
DotPlot(cv_subset,features = gexOnlyList,group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,features = c("TPSAB1","CPA3","KIT","PRSS57","HPGDS","GATA2","TNFSF10","TRIM63","IGHA1","IGHA2","FCER1A"),group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,features = c("SEPP1","AMICA1","GNLY","KIAA1598","IL8","FTL","ALAS2","PTPLAD2","MS4A7","APOE"),group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,features = c("SPINK2","MTND1P23","AL450405.1","BEX3","HBG2","GUCY1A1","AL513365.1","SMIM24","LAPTM4B","CRHBP"),group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,features = c("HDC","MS4A2","PRG2","MS4A3","TPSAB1","TPSB2","EPX","CLC"),cluster.idents = T) + RotatedAxis()
#DotPlot(cv,features = c("HDC","MS4A2","PRG2","MS4A3","TPSAB1"),group.by = "rna_snn_res.32",cluster.idents = T) + RotatedAxis()
cv_subset@meta.data$cell_annot_revision <- NA
cv_subset@meta.data$cell_annot_revision <- "HPSCs"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("11")] <- "HPSCs IFN induced"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("10")] <- "Mast & Eosinophils"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("8")] <- "Doublets"
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision",cols = randomcoloR::distinctColorPalette(length(unique(cv_subset_nk@meta.data$cell_annot_revision))+2))
#write_rds(cv_subset@meta.data,file = "/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_HSPC_annotMeta.rds")
```
```{r annot cycling subcluster,eval = FALSE}
cv_subset <- read_rds("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_cycling.rds")
DefaultAssay(object = cv_subset) <- "RNA"
DimPlot(cv_subset,reduction="wnn.umap_sctAdt",shuffle = T,raster = F,group.by = "projected_annot_rik")
DimPlot(cv_subset,reduction="wnn.umap_rnaAdt",shuffle = T,raster = F,group.by = "projected_annot_rik")
DimPlot(cv_subset,reduction="umapAfterHarmony_ADT",shuffle = T,raster = F,group.by = "projected_annot_rik")
DimPlot(cv_subset,reduction="umapAfterHarmony_SCT",shuffle = T,raster = F,label=T,group.by = "projected_annot_rik")
DimPlot(cv_subset,reduction="umapAfterHarmony_SCT",shuffle = T,raster = F,label=T,group.by = "Immune_All_High")
DimPlot(cv_subset,reduction="umapAfterHarmony_SCT",shuffle = T,raster = F,label=T,group.by = "sct_snn_res.0.5")
DotPlot(cv_subset,features = c("log_prob_doublet","log_prob_singleton"),group.by = "rna_snn_res.0.5")
DotPlot(cv_subset,features = c("pANN","log_prob_doublet","log_prob_singleton"),group.by = "rna_snn_res.0.5")
DotPlot(cv_subset,features = c("log_prob_doublet","log_prob_singleton"),group.by = "sct_snn_res.0.5")
DotPlot(cv_subset,features = c("pANN","log_prob_doublet","log_prob_singleton"),group.by = "sct_snn_res.0.5")
gexOnlyList <- c("CD3D","CCR7","SELL","CD27","CD4","CD40LG","CD8A","GZMH","IL2RA","FOXP3","IKZF2","TRGV9","TRDV2","TRAV1-2","SLC4A10","MKI67","NCR1","NCAM1","FXYD7","FCGR3A","CD14","C1QA","CLEC4C","IL3RA","AXL","SIGLEC6","CLEC9A","FCER1A","FCER2","CXCR5","CD19","CCR6","IGHD","MS4A1","TNFRSF13B","ENTPD1","adt_AB-KIT","adt_AB-CD34","KIT","CD34","PPBP","PF4","HBB")
DotPlot(cv_subset,features = gexOnlyList,group.by = "sct_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,features = gexOnlyList,group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "dataset")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "Phase")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "projected_annot_rik")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "Immune_All_High")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
Idents(cv_subset) <- cv_subset@meta.data$rna_snn_res.0.5
clustMarkers <- FindAllMarkers(object = cv_subset,assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
clustMarkers[order(clustMarkers$pct.2-clustMarkers$pct.1),]
FeaturePlot(cv_subset,dims = c(1,2),reduction = "umapAfterHarmony_RNA",features = c("HBB","IL3RA","CLEC4C","MKI67"))
foo <- FetchData(cv_subset,c("TPSAB1","CPA3","TPSB2","C1QC","EPX","PRG2"))
foo[,c("UMAP_1","UMAP_2")] <- as.data.frame(cv_subset@reductions$umapAfterHarmony_RNA@cell.embeddings)
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(TPSAB1+1))) + geom_point() + theme_classic()
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(CPA3+1))) + geom_point() + theme_classic()
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(TPSB2+1))) + geom_point() + theme_classic()
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(C1QC+1))) + geom_point() + theme_classic()
dplot(x = foo$UMAP_1,y = foo$UMAP_2 ,z = foo$TPSAB1)
dplot(x = foo$UMAP_1,y = foo$UMAP_2 ,z = foo$CPA3)
dplot(x = foo$UMAP_1,y = foo$UMAP_2 ,z = foo$TPSB2)
dplot(x = foo$UMAP_1,y = foo$UMAP_2 ,z = foo$C1QC)
Idents(cv_subset) <- paste(cv_subset@meta.data$rna_snn_res.0.5)
#Idents(cv_subset) <- paste(cv_subset@meta.data$sct_snn_res.0.5)
clust10Markers <- FindMarkers(object = cv_subset,ident.1 = "10",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
clust1Markers <- FindMarkers(object = cv_subset,ident.1 = "1",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
clust6Markers <- FindMarkers(object = cv_subset,ident.1 = "6",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
clust4Markers <- FindMarkers(object = cv_subset,ident.1 = "4",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
DotPlot(cv_subset,features = gexOnlyList,group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,features = c("TPSAB1","CPA3","KIT","PRSS57","HPGDS","GATA2","TNFSF10","TRIM63","IGHA1","IGHA2","FCER1A"),group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,features = c("SEPP1","AMICA1","GNLY","KIAA1598","IL8","FTL","ALAS2","PTPLAD2","MS4A7","APOE"),group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,features = c("SPINK2","MTND1P23","AL450405.1","BEX3","HBG2","GUCY1A1","AL513365.1","SMIM24","LAPTM4B","CRHBP"),group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,features = c("HDC","MS4A2","PRG2","MS4A3","TPSAB1","TPSB2","EPX","CLC"),cluster.idents = T) + RotatedAxis()
#DotPlot(cv,features = c("HDC","MS4A2","PRG2","MS4A3","TPSAB1"),group.by = "rna_snn_res.32",cluster.idents = T) + RotatedAxis()
cv_subset@meta.data$cell_annot_revision <- NA
cv_subset@meta.data$cell_annot_revision <- "Cycling"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("7","12","13")] <- "Doublets"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("9","1")] <- "Plasma cells"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("8")] <- "Plasmablasts"
DotPlot(cv_subset,features = gexOnlyList,group.by = "cell_annot_revision",cluster.idents = T) + RotatedAxis()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision",cols = randomcoloR::distinctColorPalette(length(unique(cv_subset@meta.data$cell_annot_revision))+2))
#write_rds(cv_subset@meta.data,file = "/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_cycling_annotMeta.rds")
```
```{r subset plasmablasts,eval = FALSE}
# Subset plasmablasts to separate isotypes
resolutions <- c(0.5,4)
cv_subset <- subset(cv_subset,cells=rownames(cv_subset@meta.data[cv_subset@meta.data$cell_annot_revision%in%c("Plasma cells","Plasmablasts"),]))
cv_subset@meta.data$type_sample <- gsub("(..).*","\\1",cv_subset@meta.data$patient_id)
gc()
cv_subset <- multiModal_processing(object=cv_subset,gex=T,adt=T,sct=T,gexAdtWnn=T,sctAdtWnn=T,doHarmony=T,npca=30,regress_cellcycle_gex=F,makeFinalWnnUmap=T,doFreshSct=T)
cv_subset <- FindNeighbors(cv_subset, dims = 1:30,reduction = "harmony_RNA",graph.name="rna_snn")
cv_subset <- FindNeighbors(cv_subset, dims = 1:30,reduction = "harmony_ADT",graph.name="adt_snn")
cv_subset <- FindNeighbors(cv_subset, dims = 1:30,reduction = "harmony_SCT",graph.name="sct_snn")
cv_subset <- FindClusters(cv_subset, graph.name = "rna_snn", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
cv_subset <- FindClusters(cv_subset, graph.name = "adt_snn", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
cv_subset <- FindClusters(cv_subset, graph.name = "sct_snn", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision")
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("6")] <- "Doublets"
#write_rds(cv_subset@meta.data,file = "/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_plasmas_annotMeta.rds")
cv_subset <- subset(cv_subset,cells=rownames(cv_subset@meta.data[cv_subset@meta.data$rna_snn_res.0.5!="6",])) # These are doublets
cv_subset <- multiModal_processing(object=cv_subset,gex=T,adt=T,sct=T,gexAdtWnn=T,sctAdtWnn=T,doHarmony=T,npca=30,regress_cellcycle_gex=F,makeFinalWnnUmap=T,doFreshSct=T)
cv_subset <- FindNeighbors(cv_subset, dims = 1:30,reduction = "harmony_RNA",graph.name="rna_snn")
cv_subset <- FindNeighbors(cv_subset, dims = 1:30,reduction = "harmony_ADT",graph.name="adt_snn")
cv_subset <- FindNeighbors(cv_subset, dims = 1:30,reduction = "harmony_SCT",graph.name="sct_snn")
cv_subset <- FindClusters(cv_subset, graph.name = "rna_snn", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
cv_subset <- FindClusters(cv_subset, graph.name = "adt_snn", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
cv_subset <- FindClusters(cv_subset, graph.name = "sct_snn", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
#write_rds(cv_subset,file="farm/cov_oldNewMerged_filtered_badAdtRem.subset_plasmas.rds",compress="gz")
DefaultAssay(cv_subset) <- "RNA"
Idents(cv_subset) <- cv_subset@meta.data$rna_snn_res.0.5
clust05Markers <- FindAllMarkers(object = cv_subset,assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
Idents(cv_subset) <- cv_subset@meta.data$rna_snn_res.4
clust4Markers <- FindAllMarkers(object = cv_subset,assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
clust05Markers <- clust05Markers[order(clust05Markers$pct.2-clust05Markers$pct.1),]
clust4Markers <- clust4Markers[order(clust4Markers$pct.2-clust4Markers$pct.1),]
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,group.by = "projected_annot_rik",cols = myColsForCellTypes)
DimPlot(cv_subset,reduction="umapAfterHarmony_ADT",shuffle = T,raster = F,group.by = "projected_annot_rik",cols = myColsForCellTypes)
DimPlot(cv_subset,reduction="wnn.umap_rnaAdt",shuffle = T,raster = F,group.by = "projected_annot_rik",cols = myColsForCellTypes)
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "dataset")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "projected_annot_rik",cols = myColsForCellTypes)
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "Immune_All_Low")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "Immune_Blood_Low")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "orig.ident") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "patient_id") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "Phase")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "type_sample",cols = randomcoloR::distinctColorPalette(length(unique(cv_subset@meta.data$type_sample))+2))
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
markerList <- c("NKG7",'NCAM1','adt_AB-NCAM1','CD3D','adt_AB-CD3D',"HLA-DRA","S100A4","S100A6","CCL4","CCL5","GZMH","GZMB","GZMK","IL32","IFNG","IFI6","IRF7","IFIT3","PRF1","FCGR3A","adt_AB-FCGR3A","FCER1G","rnaTr","adt_AB-CD8A","CD8A","CD8B","adt_AB-CD4","NCR1","adt_AB-NCR1","PPBP","PF4","KLRB1","TRDV2","adt_AB-TRDV2","TRGV9","adt_AB-TRGV9","IL7R","KLRC3","GNLY","CD27","adt_AB-CD27","SELL","TIGIT","CXCR4","CX3CR1","adt_AB-CX3CR1","adt_AB-PTPRC-1","adt_AB-PTPRC-2","adt_AB-PTPRC-3")
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = markerList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = markerList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = gexOnlyList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = gexOnlyList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = c("adt_AB-PTPRC-2","adt_AB-CD4","adt_AB-CD8A","IL7R","CD27","CCR7","SELL","CX3CR1","adt_AB-CX3CR1","adt_AB-PTPRC-1","adt_AB-PTPRC-3","GZMH","PRF1","PPBP","PF4","TRDV2","adt_AB-TRDV2","TRGV9","adt_AB-TRGV9"),cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = c("adt_AB-PTPRC-2","adt_AB-CD4","adt_AB-CD8A","IL7R","CD27","CCR7","SELL","CX3CR1","adt_AB-CX3CR1","adt_AB-PTPRC-1","adt_AB-PTPRC-3","GZMH","PRF1","PPBP","PF4","TRDV2","adt_AB-TRDV2","TRGV9","adt_AB-TRGV9"),cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = c("IFNG","TBX21","TNF","GATA3","IL4","IL5","RORC","IL17A","IL17F","IL21","CCL5","PHLDA1","LYAR","ODF2L","IL7R","PDE4D"),cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = c("IFNG","TBX21","TNF","GATA3","IL4","IL5","RORC","IL17A","IL17F","IL21","CCL5","PHLDA1","LYAR","ODF2L","IL7R","PDE4D"),cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision")
# Not really any clustering of isotypes
```
```{r annot tnk subcluster,eval = FALSE}
cv_subset <- read_rds("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_TNK.rds")
DefaultAssay(object = cv_subset) <- "RNA"
DimPlot(cv_subset,reduction="wnn.umap_sctAdt",shuffle = T,raster = F,group.by = "projected_annot_rik")
DimPlot(cv_subset,reduction="umapAfterHarmony_ADT",shuffle = T,raster = F,group.by = "projected_annot_rik")
DimPlot(cv_subset,reduction="wnn.umap_rnaAdt",shuffle = T,raster = F,group.by = "projected_annot_rik")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "projected_annot_rik")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "Immune_All_High")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DotPlot(cv_subset,features = c("log_prob_doublet","log_prob_singleton"),group.by = "rna_snn_res.0.5")
DotPlot(cv_subset,features = c("pANN","log_prob_doublet","log_prob_singleton"),group.by = "rna_snn_res.0.5")
DotPlot(cv_subset,features = c("log_prob_doublet","log_prob_singleton"),group.by = "adt_snn_res.0.5")
DotPlot(cv_subset,features = c("pANN","log_prob_doublet","log_prob_singleton"),group.by = "adt_snn_res.0.5")
gexOnlyList <- c("CD3D","CCR7","SELL","CD27","CD4","CD40LG","CD8A","GZMH","IL2RA","FOXP3","IKZF2","TRGV9","TRDV2","TRAV1-2","SLC4A10","MKI67","NCR1","NCAM1","FXYD7","FCGR3A","CD14","C1QA","CLEC4C","IL3RA","AXL","SIGLEC6","CLEC9A","FCER1A","FCER2","CXCR5","CD19","CCR6","IGHD","MS4A1","TNFRSF13B","ENTPD1","KIT","CD34","PPBP","PF4","HBB")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "dataset") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "projected_annot_rik",cols = myColsForCellTypes) + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "Immune_All_Low") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.32") + NoLegend()
#markerList <- c("CD8A","CD8B","CCR7","GZMB",paste0("adt_AB-",c("PTPRC-2","PTPRC-3","CD4","CD8A")))
markerList <- c('NCR1','NCAM1','FCGR3A','FCER1G','adt_AB-CD8A')
markerList <- c('NCAM1','adt_AB-NCAM1','CD3D','adt_AB-CD3D')
markerList <- c('KLRB1','CD3G','FGFBP2')
markerList <- c("FOXP3","IL2RA","CTLA4")
markerList <- c('GNLY','NKG7','GZMK')
#markerList <- c('GNLY','NKG7','CD3D')
# g/d t cells and mait
markerList <- c("SLC4A10", "TRAV1-2", "TRBV6-2","adt_AB-TRAV7")
markerList <- c("TRDV1","TRDV2","TRGV9","TRDC","TRGC1","TRGC2",paste0("adt_AB-",c("TRAV24","TRAV7","TRBV13","TRGV9","TRDV2")))
DotPlot(cv_subset,features = markerList,group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5") + NoLegend()
DotPlot(cv_subset,features = markerList,group.by = "rna_snn_res.4",cluster.idents = T) + RotatedAxis()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4") + NoLegend()
foo <- FetchData(cv_subset,markerList)
foo$rnaTr <- rowSums(FetchData(cv_subset,rownames(cv_subset[["RNA"]])[grep("^TR.*V[0-9].*",rownames(cv_subset[["RNA"]]))]))
foo[,c("UMAP_1","UMAP_2")] <- as.data.frame(cv_subset@reductions$umapAfterHarmony_RNA@cell.embeddings)
for (i in markerList) {print(ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(get(i)+1))) + geom_point(cex=.1) + theme_classic() + ggtitle(i))}
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(rnaTr+1))) + geom_point(cex=.1) + theme_classic() + ggtitle("TR RNA")
#CTLS
markerList <- c("CD8A","CD8B","CCR7","GZMB","GZMH",paste0("adt_AB-",c("PTPRC-2","PTPRC-3","CD4","CD8A")))
#DotPlot(cv_subset,features = markerList,group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
#DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5") + NoLegend()
DotPlot(cv_subset,features = markerList,group.by = "rna_snn_res.4",cluster.idents = T) + RotatedAxis()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4") + NoLegend()
foo <- FetchData(cv_subset,c(markerList,"nCount_RNA"))
foo$rnaTr <- rowSums(FetchData(cv_subset,rownames(cv_subset[["RNA"]])[grep("^TR.*V[0-9].*",rownames(cv_subset[["RNA"]]))]))
foo[,c("UMAP_1","UMAP_2")] <- as.data.frame(cv_subset@reductions$umapAfterHarmony_RNA@cell.embeddings)
for (i in markerList) {print(ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(get(i)+1))) + geom_point(cex=.1) + theme_classic() + ggtitle(i))}
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(rnaTr+1))) + geom_point(cex=.1) + theme_classic() + ggtitle("TR RNA")
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=nCount_RNA)) + geom_point(cex=.1) + theme_classic() + ggtitle("nCount")
for (i in markerList) {print(dplot(x=foo$UMAP_1,y=foo$UMAP_2,z=foo[,i]));title(i)}
Idents(cv_subset) <- paste(cv_subset@meta.data$rna_snn_res.0.5)
clust12Markers <- FindMarkers(object = cv_subset,ident.1 = "1",ident.2 = "4",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = F)
clust12MarkersAdt <- FindMarkers(object = cv_subset,ident.1 = "12",ident.2 = "3",assay = "ADT",logfc.threshold = .5,min.pct = .25,only.pos = F)
clust1Markers <- FindMarkers(object = cv_subset,ident.1 = "1",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
cv_subset@meta.data$cell_annot_revision <- NA
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("6")] <- "g/d T"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("1")] <- "CD4 CTL"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("8")] <- "MAIT"
#cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("1")] <- "AS-DC"
#cv_subset@meta.data$cell_annot_revision[is.na(cv_subset@meta.data$cell_annot_revision)] <- "Doublets"
```
```{r sub-subcluster the tnk cluster,eval = FALSE}
cv_subset@meta.data$subsubcluster <- "subT"
cv_subset@meta.data$subsubcluster[cv_subset@meta.data$rna_snn_res.0.5%in%c("4")] <- "subTNK"
cv_subset@meta.data$subsubcluster[cv_subset@meta.data$rna_snn_res.0.5%in%c("1","6")] <- "subNK"
cv <- cv_subset
for (i in unique(cv@meta.data$subsubcluster)) {
if (!file.exists(paste0("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_",i,".rds"))) {
cv_subset <- subset(cv,cells=rownames(cv@meta.data[cv@meta.data$subsubcluster==i,]))
gc()
cv_subset <- multiModal_processing(object=cv_subset,gex=T,adt=T,sct=T,gexAdtWnn=T,sctAdtWnn=T,doHarmony=T,npca=30,regress_cellcycle_gex=F,makeFinalWnnUmap=T,doFreshSct=T)
cv_subset <- FindNeighbors(cv_subset, dims = 1:30,reduction = "harmony_RNA",graph.name="rna_snn")
cv_subset <- FindNeighbors(cv_subset, dims = 1:30,reduction = "harmony_ADT",graph.name="adt_snn")
cv_subset <- FindNeighbors(cv_subset, dims = 1:30,reduction = "harmony_SCT",graph.name="sct_snn")
cv_subset <- FindClusters(cv_subset, graph.name = "rna_snn", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
cv_subset <- FindClusters(cv_subset, graph.name = "adt_snn", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
cv_subset <- FindClusters(cv_subset, graph.name = "sct_snn", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
#cv_subset <- FindClusters(cv_subset, graph.name = "wsnn_rnaAdt", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
#cv_subset <- FindClusters(cv_subset, graph.name = "wsnn_sctAdt", resolution = resolutions,algorithm = 4, method = "igraph", verbose = FALSE)
#cv_subset <- combineSmallWnnClusters(object=cv_subset,resolutions=resolutions,graphNames=c("wsnn_rnaAdt","wsnn_sctAdt"),minClusterSize=100)
write_rds(cv_subset,file=paste0("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_",i,".rds"),compress="gz")
}
}
```
```{r annot subsubcluster w NKs,eval = FALSE}
cv_subset <- read_rds("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_subNK.rds")
cv_subset@meta.data$type_sample <- gsub("(..).*","\\1",cv_subset@meta.data$patient_id)
cv_subset@meta.data$rnaTr <- rowSums(FetchData(cv_subset,rownames(cv_subset[["RNA"]])[grep("^TR.*V[0-9].*",rownames(cv_subset[["RNA"]]))]))
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "sct_snn_res.4")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "orig.ident") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "patient_id") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "type_sample")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "Phase")
# markerList <- c('NCAM1','adt_AB-NCAM1','CD3D','adt_AB-CD3D')
#
# foo <- FetchData(cv_subset,c(markerList,"nCount_RNA"))
# foo$rnaTr <- rowSums(FetchData(cv_subset,rownames(cv_subset[["RNA"]])[grep("^TR.*V[0-9].*",rownames(cv_subset[["RNA"]]))]))
# foo[,c("UMAP_1","UMAP_2")] <- as.data.frame(cv_subset@reductions$umapAfterHarmony_RNA@cell.embeddings)
# for (i in markerList) {print(ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(get(i)+1))) + geom_point(cex=.1) + theme_classic() + ggtitle(i))}
# ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(rnaTr+1))) + geom_point(cex=.1) + theme_classic() + ggtitle("TR RNA")
# ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=nCount_RNA)) + geom_point(cex=.1) + theme_classic() + ggtitle("nCount")
# for (i in markerList) {print(dplot(x=foo$UMAP_1,y=foo$UMAP_2,z=foo[,i]));title(i)}
# Idents(cv_subset) <- paste(cv_subset@meta.data$rna_snn_res.32)
# clust385Markers <- FindMarkers(object = cv_subset,ident.1 = "385",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = F)
# Idents(cv_subset) <- paste(cv_subset@meta.data$rna_snn_res.0.5)
# clust2Markers <- FindMarkers(object = cv_subset,ident.1 = "2",ident.2 = "1",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = F) # S100A4/6 high FCGR3A high CCL4/5 high GZMH/B high
# Idents(cv_subset) <- paste(cv_subset@meta.data$sct_snn_res.4)
# clust20Markers <- FindMarkers(object = cv_subset,ident.1 = "20",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = F) # IFN activated
# clust45Markers <- FindMarkers(object = cv_subset,ident.1 = "45",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = F) # cycling
# clust29Markers <- FindMarkers(object = cv_subset,ident.1 = "29",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = F) # AP12 specific AP1-complex up
# clust41Markers <- FindMarkers(object = cv_subset,ident.1 = "41",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = F) # IL7R expressing
# clust38Markers <- FindMarkers(object = cv_subset,ident.1 = "38",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = F) # HLA-DR expressing
# clust9Markers <- FindMarkers(object = cv_subset,ident.1 = "9",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = F) #
# clust21Markers <- FindMarkers(object = cv_subset,ident.1 = "21",assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = F) #
DimPlot(cv_subset,reduction="umapAfterHarmony_SCT",shuffle = T,raster = F,label=T,group.by = "Phase")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "Phase")
DimPlot(cv_subset,reduction="umapAfterHarmony_SCT",shuffle = T,raster = F,label=T,group.by = "type_sample",cols = randomcoloR::distinctColorPalette(length(unique(cv_subset@meta.data$type_sample))+2))
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "type_sample",cols = randomcoloR::distinctColorPalette(length(unique(cv_subset@meta.data$type_sample))+2))
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
DimPlot(cv_subset,reduction="umapAfterHarmony_SCT",shuffle = T,raster = F,label=T,group.by = "sct_snn_res.0.5")
DimPlot(cv_subset,reduction="umapAfterHarmony_SCT",shuffle = T,raster = F,label=T,group.by = "sct_snn_res.4")
markerList <- c("NKG7",'NCAM1','adt_AB-NCAM1','CD3D','adt_AB-CD3D',"HLA-DRA","S100A4","S100A6","CCL4","CCL5","GZMH","GZMB","IL32","IFNG","IFI6","IRF7","IFIT3","PRF1","FCGR3A","adt_AB-FCGR3A","FCER1G","rnaTr","adt_AB-CD8A","CD8A","CD8B","adt_AB-CD4","NCR1","adt_AB-NCR1","KLRD1","adt_AB-KLRD1","TRAV24","adt_AB-TRAV24")
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = markerList,cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,group.by = "sct_snn_res.0.5",features = markerList,cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = markerList,cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,group.by = "sct_snn_res.4",features = markerList,cluster.idents = T) + RotatedAxis()
foo <- FetchData(cv_subset,c(markerList,"nCount_RNA"))
foo$rnaTr <- rowSums(FetchData(cv_subset,rownames(cv_subset[["RNA"]])[grep("^TR.*V[0-9].*",rownames(cv_subset[["RNA"]]))]))
#foo[,c("UMAP_1","UMAP_2")] <- as.data.frame(cv_subset@reductions$umapAfterHarmony_SCT@cell.embeddings)
foo[,c("UMAP_1","UMAP_2")] <- as.data.frame(cv_subset@reductions$umapAfterHarmony_RNA@cell.embeddings)
for (i in markerList) {print(ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(get(i)+1))) + geom_point(cex=.1) + theme_classic() + ggtitle(i))}
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(rnaTr+1))) + geom_point(cex=.1) + theme_classic() + ggtitle("TR RNA")
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=nCount_RNA)) + geom_point(cex=.1) + theme_classic() + ggtitle("nCount")
for (i in markerList) {print(dplot(x=foo$UMAP_1,y=foo$UMAP_2,z=foo[,i]));title(i)}
```
``` {r annotate,eval = FALSE}
cv_subset_nk <- cv_subset
cv_subset_nk@meta.data$cell_annot_revision <- NA
cv_subset_nk@meta.data$cell_annot_revision <- "NK FCER1G+"
cv_subset_nk@meta.data$cell_annot_revision[cv_subset_nk@meta.data$rna_snn_res.0.5%in%c("2")] <- "NK"
cv_subset_nk@meta.data$cell_annot_revision[cv_subset_nk@meta.data$sct_snn_res.0.5%in%c("4")] <- "NK CD56 bright"
cv_subset_nk@meta.data$cell_annot_revision[cv_subset_nk@meta.data$sct_snn_res.4%in%c("33")] <- "NKT"
cv_subset_nk@meta.data$cell_annot_revision[cv_subset_nk@meta.data$rna_snn_res.4%in%c("36")] <- "cycling"
cv_subset_nk@meta.data$cell_annot_revision[cv_subset_nk@meta.data$sct_snn_res.4%in%c("38")] <- "NK HLA-DR+"
cv_subset_nk@meta.data$cell_annot_revision[cv_subset_nk@meta.data$sct_snn_res.4%in%c("20")] <- "NK IFN induced"
cv_subset_nk@meta.data$cell_annot_revision[cv_subset_nk@meta.data$sct_snn_res.4%in%c("41")] <- "ILC"
#write_rds(cv_subset@meta.data,file = "/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_subNK_annotMeta.rds")
DimPlot(cv_subset_nk,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision",cols = randomcoloR::distinctColorPalette(length(unique(cv_subset_nk@meta.data$cell_annot_revision))+2))
DimPlot(cv_subset_nk,reduction="umapAfterHarmony_SCT",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision",cols = randomcoloR::distinctColorPalette(length(unique(cv_subset_nk@meta.data$cell_annot_revision))+2))
DotPlot(cv_subset_nk,group.by = "cell_annot_revision",features = markerList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
```
```{r annot subsubcluster w TNKs,eval = FALSE}
cv_subset <- read_rds("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_subTNK.rds")
cv_subset@meta.data$type_sample <- gsub("(..).*","\\1",cv_subset@meta.data$patient_id)
cv_subset@meta.data$rnaTr <- rowSums(FetchData(cv_subset,rownames(cv_subset[["RNA"]])[grep("^TR.*V[0-9].*",rownames(cv_subset[["RNA"]]))]))
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "sct_snn_res.4")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "orig.ident") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "patient_id") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "type_sample")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "Phase")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "Phase")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "type_sample",cols = randomcoloR::distinctColorPalette(length(unique(cv_subset@meta.data$type_sample))+2))
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
markerList <- c("NKG7",'NCAM1','adt_AB-NCAM1',"CD3G",'CD3D','adt_AB-CD3D',"HLA-DRA","CCL4","CCL5","GZMH","GZMB","IL32","IFNG","IFI6","IRF7","IFI44L","PRF1","FCGR3A","adt_AB-FCGR3A","FCER1G","rnaTr","adt_AB-CD8A","CD8A","CD8B","adt_AB-CD4","NCR1","adt_AB-NCR1","KLRD1","adt_AB-KLRD1","TRAV24","adt_AB-TRAV24","TRDV2","TRGV9","IL7R","SELL","MX1")
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = markerList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
#DotPlot(cv_subset,group.by = "sct_snn_res.0.5",features = markerList,cluster.idents = T,) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = markerList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
#DotPlot(cv_subset,group.by = "sct_snn_res.4",features = markerList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(subset(cv_subset,cells=rownames(cv_subset@meta.data[!cv_subset@meta.data$rna_snn_res.4%in%c("3","6","8","9","19","1","30","7","27","20","22","26","34"),])),group.by = "rna_snn_res.4",features = markerList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DimPlot(subset(cv_subset,cells=rownames(cv_subset@meta.data[!cv_subset@meta.data$rna_snn_res.4%in%c("3","6","8","9","19","1","30","7","27","20","22","26","34"),])),reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
foo <- FetchData(cv_subset,c(markerList,"nCount_RNA"))
foo$rnaTr <- rowSums(FetchData(cv_subset,rownames(cv_subset[["RNA"]])[grep("^TR.*V[0-9].*",rownames(cv_subset[["RNA"]]))]))
#foo[,c("UMAP_1","UMAP_2")] <- as.data.frame(cv_subset@reductions$umapAfterHarmony_SCT@cell.embeddings)
foo[,c("UMAP_1","UMAP_2")] <- as.data.frame(cv_subset@reductions$umapAfterHarmony_RNA@cell.embeddings)
for (i in markerList) {print(ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(get(i)+1))) + geom_point(cex=.1) + theme_classic() + ggtitle(i))}
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(rnaTr+1))) + geom_point(cex=.1) + theme_classic() + ggtitle("TR RNA")
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log10(nCount_RNA))) + geom_point(cex=.1) + theme_classic() + ggtitle("nCount")
for (i in markerList) {print(dplot(x=foo$UMAP_1,y=foo$UMAP_2,z=foo[,i]));title(i)}
cv_subset_tnk <- cv_subset
```
```{r annotate TNK,eval = FALSE}
DimPlot(cv_subset_tnk,reduction="umapAfterHarmony_SCT",shuffle = T,raster = F,group.by = "type_sample")
Idents(cv_subset_tnk) <- cv_subset_tnk@meta.data$rna_snn_res.4
clustMarkers <- FindAllMarkers(object = cv_subset_tnk,assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
clustMarkers[order(clustMarkers$pct.2-clustMarkers$pct.1),]
cv_subset_tnk@meta.data$cell_annot_revision <- NA
cv_subset_tnk@meta.data$cell_annot_revision <- as.character(cv_subset_tnk@meta.data$rna_snn_res.4)
cv_subset_tnk@meta.data$cell_annot_revision <- "T CD8 CTL"
cv_subset_tnk@meta.data$cell_annot_revision[cv_subset_tnk@meta.data$rna_snn_res.4%in%c("25","11","10","33")] <- "T CD8 EM"
cv_subset_tnk@meta.data$cell_annot_revision[cv_subset_tnk@meta.data$rna_snn_res.4%in%c("13")] <- "NK CD56 bright"
cv_subset_tnk@meta.data$cell_annot_revision[cv_subset_tnk@meta.data$rna_snn_res.4%in%c("23")] <- "NK IFN induced"
cv_subset_tnk@meta.data$cell_annot_revision[cv_subset_tnk@meta.data$rna_snn_res.4%in%c("32")] <- "T CD8 CTL IFN induced"
cv_subset_tnk@meta.data$cell_annot_revision[cv_subset_tnk@meta.data$rna_snn_res.4%in%c("5")] <- "T g/d"
cv_subset_tnk@meta.data$cell_annot_revision[cv_subset_tnk@meta.data$rna_snn_res.4%in%c("31")] <- "T CD4 CTL"
cv_subset_tnk@meta.data$cell_annot_revision[cv_subset_tnk@meta.data$rna_snn_res.4%in%c("1","9","3")] <- "NK FCER1G+"
cv_subset_tnk@meta.data$cell_annot_revision[cv_subset_tnk@meta.data$rna_snn_res.4%in%c("6","8","19","30","7","27","20","22","34")] <- "NK"
#write_rds(cv_subset@meta.data,file = "/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_subTNK_annotMeta.rds")
DimPlot(cv_subset_tnk,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision",cols = randomcoloR::distinctColorPalette(length(unique(cv_subset_tnk@meta.data$cell_annot_revision))+2))
DimPlot(cv_subset_tnk,reduction="umapAfterHarmony_SCT",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision",cols = randomcoloR::distinctColorPalette(length(unique(cv_subset_tnk@meta.data$cell_annot_revision))+2))
DotPlot(cv_subset_tnk,group.by = "cell_annot_revision",features = markerList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
```
```{r annot subsubcluster T,eval = FALSE}
cv_subset <- read_rds("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_subT.rds")
cv_subset@meta.data$type_sample <- gsub("(..).*","\\1",cv_subset@meta.data$patient_id)
cv_subset@meta.data$rnaTr <- rowSums(FetchData(cv_subset,rownames(cv_subset[["RNA"]])[grep("^TR.*V[0-9].*",rownames(cv_subset[["RNA"]]))]))
Idents(cv_subset) <- cv_subset@meta.data$rna_snn_res.0.5
clust05Markers <- FindAllMarkers(object = cv_subset,assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
Idents(cv_subset) <- cv_subset@meta.data$rna_snn_res.4
clust4Markers <- FindAllMarkers(object = cv_subset,assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
clust05Markers[order(clust05Markers$pct.2-clust05Markers$pct.1),]
clust4Markers[order(clust4Markers$pct.2-clust4Markers$pct.1),]
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "orig.ident") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "patient_id") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "Phase")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "type_sample",cols = randomcoloR::distinctColorPalette(length(unique(cv_subset@meta.data$type_sample))+2))
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
markerList <- c("NKG7",'NCAM1','adt_AB-NCAM1','CD3D','adt_AB-CD3D',"HLA-DRA","S100A4","S100A6","CCL4","CCL5","GZMH","GZMB","GZMK","IL32","IFNG","IFI6","IRF7","IFIT3","PRF1","FCGR3A","adt_AB-FCGR3A","FCER1G","rnaTr","adt_AB-CD8A","CD8A","CD8B","adt_AB-CD4","NCR1","adt_AB-NCR1","PPBP","PF4","TRDV2","adt_AB-TRDV2","TRGV9","adt_AB-TRGV9","IL7R","KLRC3","GNLY","CD27","adt_AB-CD27","SELL","TIGIT","CXCR4","CX3CR1","adt_AB-CX3CR1","adt_AB-PTPRC-1","adt_AB-PTPRC-2","adt_AB-PTPRC-3","SLC4A10", "NCR3", "KLRB1")
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = markerList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = markerList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = gexOnlyList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = gexOnlyList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = c("adt_AB-PTPRC-2","adt_AB-CD4","adt_AB-CD8A","IL7R","CD27","CCR7","SELL","CX3CR1","adt_AB-CX3CR1","adt_AB-PTPRC-1","adt_AB-PTPRC-3","GZMH","PRF1","PPBP","PF4","TRDV2","adt_AB-TRDV2","TRGV9","adt_AB-TRGV9"),cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
foo <- FetchData(cv_subset,c(markerList,"nCount_RNA"))
foo$rnaTr <- rowSums(FetchData(cv_subset,rownames(cv_subset[["RNA"]])[grep("^TR.*V[0-9].*",rownames(cv_subset[["RNA"]]))]))
#foo[,c("UMAP_1","UMAP_2")] <- as.data.frame(cv_subset@reductions$umapAfterHarmony_SCT@cell.embeddings)
foo[,c("UMAP_1","UMAP_2")] <- as.data.frame(cv_subset@reductions$umapAfterHarmony_RNA@cell.embeddings)
for (i in markerList) {print(ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(get(i)+1))) + geom_point(cex=.1) + theme_classic() + ggtitle(i))}
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=log2(rnaTr+1))) + geom_point(cex=.1) + theme_classic() + ggtitle("TR RNA")
ggplot(foo[sample(1:nrow(foo)),],aes(UMAP_1,UMAP_2,col=nCount_RNA)) + geom_point(cex=.1) + theme_classic() + ggtitle("nCount")
for (i in markerList) {print(dplot(x=foo$UMAP_1,y=foo$UMAP_2,z=foo[,i]));title(i)}
cv_subset_t <- cv_subset
#tem: CCR7lo SELLlo CX3CR1hi CD27lo IL7Rhi CD27- CD45RA- "30","1",
#tcm: CCR7hi SELLhi CX3CR1lo CD27hi IL7Rhi CD27+ CD45RA- "35","36","17","7","15","25","19"
#temra: CCR7- IL7Rlo CD27- CD45RA+ "5","3","24",
```
```{r annotate T cells,eval = FALSE}
cv_subset@meta.data$cell_annot_revision <- NA
cv_subset@meta.data$cell_annot_revision <- "T CD8 CTL"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("30")] <- "T CD8 em"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("1")] <- "MAIT"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("35","36","17","7","15","25","19","6")] <- "T CD8 cm"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("5","3","24")] <- "T CD8 emra"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("4","11")] <- "T g/d"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("34","8","9")] <- "T CD4 CTL"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("33")] <- "Doublets"
#write_rds(cv_subset@meta.data,file = "/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_subT_annotMaitMeta.rds")
DotPlot(cv_subset,group.by = "cell_annot_revision",features = c("adt_AB-PTPRC-2","adt_AB-CD4","adt_AB-CD8A","IL7R","CD27","CCR7","SELL","CX3CR1","GZMH","PRF1","SLC4A10", "NCR3", "KLRB1"),cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = c("adt_AB-PTPRC-2","adt_AB-CD4","adt_AB-CD8A","IL7R","CD27","CCR7","SELL","CX3CR1","GZMH","PRF1","SLC4A10", "NCR3", "KLRB1"),cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
```
``` {r annot T compartment (naive etc),eval = FALSE}
cv_subset <- read_rds("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_T.rds")
cv_subset@meta.data$type_sample <- gsub("(..).*","\\1",cv_subset@meta.data$patient_id)
cv_subset@meta.data$rnaTr <- rowSums(FetchData(cv_subset,rownames(cv_subset[["RNA"]])[grep("^TR.*V[0-9].*",rownames(cv_subset[["RNA"]]))]))
Idents(cv_subset) <- cv_subset@meta.data$rna_snn_res.0.5
clust05Markers <- FindAllMarkers(object = cv_subset,assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
Idents(cv_subset) <- cv_subset@meta.data$rna_snn_res.4
clust4Markers <- FindAllMarkers(object = cv_subset,assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
Idents(cv_subset) <- cv_subset@meta.data$rna_snn_res.32
clust32Markers <- FindAllMarkers(object = cv_subset,assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
clust05Markers[order(clust05Markers$pct.2-clust05Markers$pct.1),]
clust4Markers[order(clust4Markers$pct.2-clust4Markers$pct.1),]
clust32Markers[order(clust32Markers$pct.2-clust32Markers$pct.1),]
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "orig.ident") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "patient_id") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "Phase")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "type_sample",cols = randomcoloR::distinctColorPalette(length(unique(cv_subset@meta.data$type_sample))+2))
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
markerList <- c("NKG7",'NCAM1','adt_AB-NCAM1','CD3D','adt_AB-CD3D',"HLA-DRA","S100A4","S100A6","CCL4","CCL5","GZMH","GZMB","GZMK","IL32","IFNG","IFI6","IRF7","IFIT3","PRF1","FCGR3A","adt_AB-FCGR3A","FCER1G","rnaTr","adt_AB-CD8A","CD8A","CD8B","adt_AB-CD4","NCR1","adt_AB-NCR1","PPBP","PF4","KLRB1","TRDV2","adt_AB-TRDV2","TRGV9","adt_AB-TRGV9","IL7R","KLRC3","GNLY","CD27","adt_AB-CD27","SELL","TIGIT","CXCR4","CX3CR1","adt_AB-CX3CR1","adt_AB-PTPRC-1","adt_AB-PTPRC-2","adt_AB-PTPRC-3")
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = markerList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = markerList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = gexOnlyList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = gexOnlyList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = c("adt_AB-PTPRC-2","adt_AB-CD4","adt_AB-CD8A","IL7R","CD27","CCR7","SELL","CX3CR1","adt_AB-CX3CR1","adt_AB-PTPRC-1","adt_AB-PTPRC-3","GZMH","PRF1","PPBP","PF4","TRDV2","adt_AB-TRDV2","TRGV9","adt_AB-TRGV9"),cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = c("adt_AB-PTPRC-2","adt_AB-CD4","adt_AB-CD8A","IL7R","CD27","CCR7","SELL","CX3CR1","adt_AB-CX3CR1","adt_AB-PTPRC-1","adt_AB-PTPRC-3","GZMH","PRF1","PPBP","PF4","TRDV2","adt_AB-TRDV2","TRGV9","adt_AB-TRGV9"),cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = c("IFNG","TBX21","TNF","GATA3","IL4","IL5","RORC","IL17A","IL17F","IL21","CCL5","PHLDA1","LYAR","ODF2L","IL7R","PDE4D"),cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = c("IFNG","TBX21","TNF","GATA3","IL4","IL5","RORC","IL17A","IL17F","IL21","CCL5","PHLDA1","LYAR","ODF2L","IL7R","PDE4D"),cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
#Fairly shallow
cv_subset@meta.data$cell_annot_revision <- NA
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("4")] <- "T low quality"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("5")] <- "T regulatory"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("6")] <- "T CD4 IFN induced"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("1")] <- "T CD4 Naive"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("2")] <- "T CD4 Helper"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("3")] <- "T CD8 Naive"
#write_rds(cv_subset@meta.data,file = "/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_T_annotMeta.rds")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision")
```
```{r look at monoDC compartment,eval = FALSE}
cv_subset <- read_rds("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_MonoDCs.rds")
cv_subset@meta.data$type_sample <- gsub("(..).*","\\1",cv_subset@meta.data$patient_id)
Idents(cv_subset) <- cv_subset@meta.data$rna_snn_res.0.5
clust05Markers <- FindAllMarkers(object = cv_subset,assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
Idents(cv_subset) <- cv_subset@meta.data$rna_snn_res.4
clust4Markers <- FindAllMarkers(object = cv_subset,assay = "RNA",logfc.threshold = .5,min.pct = .25,only.pos = T)
clust05Markers <- clust05Markers[order(clust05Markers$pct.2-clust05Markers$pct.1),]
clust4Markers <- clust4Markers[order(clust4Markers$pct.2-clust4Markers$pct.1),]
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "orig.ident") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "patient_id") + NoLegend()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "Phase")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "type_sample",cols = randomcoloR::distinctColorPalette(length(unique(cv_subset@meta.data$type_sample))+2))
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
DotPlot(cv_subset, features = c('CD14','FCGR3A','C1QA','CLEC9A','CLEC10A','CD1C','PTPRC','PPBP','PF4','MS4A1','NEAT1',"IFI6","IRF7","IFI44L"), group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset, features = c('CD14','FCGR3A','C1QA','CLEC9A','CLEC10A','CD1C','PTPRC','PPBP','PF4','MS4A1','NEAT1',"IFI6","IRF7","IFI44L"), group.by = "rna_snn_res.4",cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
markerList <- c("NKG7",'NCAM1','adt_AB-NCAM1','CD3D','adt_AB-CD3D',"HLA-DRA","S100A4","S100A6","CCL4","CCL5","GZMH","GZMB","GZMK","IL32","IFNG","IFI6","IRF7","IFIT3","PRF1","FCGR3A","adt_AB-FCGR3A","FCER1G","rnaTr","adt_AB-CD8A","CD8A","CD8B","adt_AB-CD4","NCR1","adt_AB-NCR1","PPBP","PF4","KLRB1","TRDV2","adt_AB-TRDV2","TRGV9","adt_AB-TRGV9","IL7R","KLRC3","GNLY","CD27","adt_AB-CD27","SELL","TIGIT","CXCR4","CX3CR1","adt_AB-CX3CR1","adt_AB-PTPRC-1","adt_AB-PTPRC-2","adt_AB-PTPRC-3")
gexOnlyList <- c("CD3D","CCR7","SELL","CD27","CD4","CD40LG","CD8A","GZMH","IL2RA","FOXP3","IKZF2","TRGV9","TRDV2","TRAV1-2","SLC4A10","MKI67","NCR1","NCAM1","FXYD7","FCGR3A","CD14","C1QA","CLEC4C","IL3RA","AXL","SIGLEC6","CLEC9A","FCER1A","FCER2","CXCR5","CD19","CCR6","IGHD","MS4A1","TNFRSF13B","ENTPD1","KIT","CD34","PPBP","PF4","HBB","HDC","MS4A2","PRG2","MS4A3","TPSAB1","TPSB2","EPX","CLC","IFI6","IRF7","IFI44L")
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = markerList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = markerList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = gexOnlyList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = gexOnlyList,cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = c("adt_AB-PTPRC-2","adt_AB-CD4","adt_AB-CD8A","IL7R","CD27","CCR7","SELL","CX3CR1","adt_AB-CX3CR1","adt_AB-PTPRC-1","adt_AB-PTPRC-3","GZMH","PRF1","PPBP","PF4","TRDV2","adt_AB-TRDV2","TRGV9","adt_AB-TRGV9"),cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = c("adt_AB-PTPRC-2","adt_AB-CD4","adt_AB-CD8A","IL7R","CD27","CCR7","SELL","CX3CR1","adt_AB-CX3CR1","adt_AB-PTPRC-1","adt_AB-PTPRC-3","GZMH","PRF1","PPBP","PF4","TRDV2","adt_AB-TRDV2","TRGV9","adt_AB-TRGV9"),cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.0.5",features = c("IL6","GPBAR1","CXCL10","IFNG","TBX21","TNF","GATA3","IL4","IL5","RORC","IL17A","IL17F","IL21","CCL5","PHLDA1","LYAR","ODF2L","IL7R","PDE4D"),cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset,group.by = "rna_snn_res.4",features = c("IL6","GPBAR1","CXCL10","IFNG","TBX21","TNF","GATA3","IL4","IL5","RORC","IL17A","IL17F","IL21","CCL5","PHLDA1","LYAR","ODF2L","IL7R","PDE4D"),cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
cv_subset@meta.data$cell_annot_revision <- NA
cv_subset@meta.data$cell_annot_revision <- "Monocyte CD14"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("36","24","16","5")] <- "Monocyte CD14 IFN-induced"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("39")] <- "Monocyte CD14 IL6+"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("41","1","47","45","6","26")] <- "Monocyte CD16"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("32")] <- "Monocyte CD16 IFN-induced"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("30")] <- "Monocyte CD16+C1"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("48","22","43","38")] <- "doublets"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("49")] <- "cDC1"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.4%in%c("3")] <- "cDC2"
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.4")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision")
DotPlot(cv_subset, features = c("IL6","GPBAR1","CXCL10",'CD14','FCGR3A','C1QA','CLEC9A','CLEC10A','CD1C','PTPRC','PPBP','PF4','MS4A1','NEAT1',"IFI6","IRF7","IFI44L"), group.by = "rna_snn_res.4",cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
DotPlot(cv_subset, features = c("IL6","GPBAR1","CXCL10",'CD14','FCGR3A','C1QA','CLEC9A','CLEC10A','CD1C','PTPRC','PPBP','PF4','MS4A1','NEAT1',"IFI6","IRF7","IFI44L"), group.by = "cell_annot_revision",cluster.idents = T) + RotatedAxis() + theme(axis.text=element_text(size=7))
#write_rds(cv_subset@meta.data,file = "/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_MonoDCs_annotMeta.rds")
cv_subset <- subset(cv_subset,cells=rownames(cv_subset@meta.data)[grepl("Monocyte",cv_subset@meta.data$cell_annot_revision)])
cv_subset <- multiModal_processing(object=cv_subset,gex=T,adt=F,sct=F,gexAdtWnn=F,sctAdtWnn=F,doHarmony=T,npca=30,regress_cellcycle_gex=F,makeFinalWnnUmap=F,doFreshSct=F)
cv_subset <- multiModal_processing(object=cv_subset,gex=F,adt=F,sct=T,gexAdtWnn=F,sctAdtWnn=F,doHarmony=T,npca=30,regress_cellcycle_gex=F,makeFinalWnnUmap=F,doFreshSct=T)
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision")
DimPlot(cv_subset,reduction="umapAfterHarmony_SCT",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision")
```
```{r look at platelet compartment,eval = FALSE}
cv_subset <- read_rds("/mnt/projects/RL003_allCitePbmcsTheta/dataRevision/cov_oldNewMerged_filtered_badAdtRem.subset_platelets.rds")
DotPlot(cv_subset,features = c("log_prob_doublet","log_prob_singleton"),group.by = "rna_snn_res.0.5")
DotPlot(cv_subset,features = c("pANN","log_prob_doublet","log_prob_singleton"),group.by = "rna_snn_res.0.5")
DotPlot(cv_subset,features = c("log_prob_doublet","log_prob_singleton"),group.by = "sct_snn_res.0.5")
DotPlot(cv_subset,features = c("pANN","log_prob_doublet","log_prob_singleton"),group.by = "sct_snn_res.0.5")
gexOnlyList <- c("CD3D","CCR7","SELL","CD27","CD4","CD40LG","CD8A","GZMH","IL2RA","FOXP3","IKZF2","TRGV9","TRDV2","TRAV1-2","SLC4A10","MKI67","NCR1","NCAM1","FXYD7","FCGR3A","CD14","C1QA","CLEC4C","IL3RA","AXL","SIGLEC6","CLEC9A","FCER1A","FCER2","CXCR5","CD19","CCR6","IGHD","MS4A1","TNFRSF13B","ENTPD1","adt_AB-KIT","adt_AB-CD34","KIT","CD34","PPBP","PF4","HBB")
DotPlot(cv_subset,features = gexOnlyList,group.by = "sct_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,features = gexOnlyList,group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "dataset")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "Phase")
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "rna_snn_res.0.5")
DotPlot(cv_subset,features = gexOnlyList,group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,features = c("TPSAB1","CPA3","KIT","PRSS57","HPGDS","GATA2","TNFSF10","TRIM63","IGHA1","IGHA2","FCER1A"),group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,features = c("SEPP1","AMICA1","GNLY","KIAA1598","IL8","FTL","ALAS2","PTPLAD2","MS4A7","APOE"),group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,features = c("SPINK2","MTND1P23","AL450405.1","BEX3","HBG2","GUCY1A1","AL513365.1","SMIM24","LAPTM4B","CRHBP"),group.by = "rna_snn_res.0.5",cluster.idents = T) + RotatedAxis()
DotPlot(cv_subset,features = c("HDC","MS4A2","PRG2","MS4A3","TPSAB1","TPSB2","EPX","CLC"),cluster.idents = T) + RotatedAxis()
#DotPlot(cv,features = c("HDC","MS4A2","PRG2","MS4A3","TPSAB1"),group.by = "rna_snn_res.32",cluster.idents = T) + RotatedAxis()
cv_subset@meta.data$cell_annot_revision <- NA
cv_subset@meta.data$cell_annot_revision <- "Doublets"
cv_subset@meta.data$cell_annot_revision[cv_subset@meta.data$rna_snn_res.0.5%in%c("7","1","3")] <- "Platelets"
DotPlot(cv_subset,features = gexOnlyList,group.by = "cell_annot_revision",cluster.idents = T) + RotatedAxis()
DimPlot(cv_subset,reduction="umapAfterHarmony_RNA",shuffle = T,raster = F,label=T,group.by = "cell_annot_revision")