library(knitr)
library(tidyverse)
library(edgeR)
library(googledrive)
library(tidyverse)
library(fgsea)
library(data.table)
Perform differential gene expression using edgeR on pre-immunization baseline whole-blood samples from the KSPZV1 malaria vaccine trial. Revised analysis includes adjustments for:
JCI reviewer requested using FDR 5% as a cut-off for GSEA
#from google drive
#local path "/Volumes/GoogleDrive/My Drive/Tran Lab Shared/Projects/Doris Duke PfSPZ Kenya/Tuan PfSPZ/KenyaPfSPZ/Final Data Files/KSPZV1 SeqExpressionSet Baseline Reduced Phenodat 35716x487.rds"
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
as_id("1Q4VXYxdl9CqtzhcvHA9pn8q06_YOZgi8"), path = temp, overwrite = TRUE)
x <- readRDS(file = dl$local_path)
x$treat <- factor(x$treat, levels = c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
myTimepoint <- 0
xgroup <- setNames(as.list(c(1:4)), c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
for(i in names(xgroup)){
xgroup[[i]] <- x[, which(x$treat == i)]
xgroup[[i]] <- xgroup[[i]][,xgroup[[i]]$Timepoint == myTimepoint]
xgroup[[i]]$treat <- droplevels(xgroup[[i]]$treat)
#impute only missing CSP Ab value as median CSP A of all baseline samples (only affects one subject in 9.0x10^6 PfSPZ dose group)
xgroup[[i]]$CSPAb <- ifelse(is.na(xgroup[[i]]$CSPAb), median(xgroup[[i]]$CSPAb, na.rm = TRUE), xgroup[[i]]$CSPAb)
print(i)
print(table(Outcome = xgroup[[i]]$Outcome, Timepoint = xgroup[[i]]$Timepoint))
print(table(Outcome = xgroup[[i]]$Outcome, Dosegroup = xgroup[[i]]$treat))
print(dim(xgroup[[i]]))
}
## [1] "Placebo"
## Timepoint
## Outcome 0
## 0 26
## 1 39
## Dosegroup
## Outcome Placebo
## 0 26
## 1 39
## Features Samples
## 35716 65
## [1] "4.5 x 10^5 PfSPZ"
## Timepoint
## Outcome 0
## 0 29
## 1 29
## Dosegroup
## Outcome 4.5 x 10^5 PfSPZ
## 0 29
## 1 29
## Features Samples
## 35716 58
## [1] "9.0 x 10^5 PfSPZ"
## Timepoint
## Outcome 0
## 0 29
## 1 29
## Dosegroup
## Outcome 9.0 x 10^5 PfSPZ
## 0 29
## 1 29
## Features Samples
## 35716 58
## [1] "1.8 x 10^6 PfSPZ"
## Timepoint
## Outcome 0
## 0 38
## 1 25
## Dosegroup
## Outcome 1.8 x 10^6 PfSPZ
## 0 38
## 1 25
## Features Samples
## 35716 63
#Define group by dose, timepoint and outcome
ygroup <- setNames(as.list(c(1:4)), c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
for(i in names(ygroup)){
ygroup[[i]] <- DGEList(counts=counts(xgroup[[i]]), genes=fData(xgroup[[i]]), group= factor(paste(xgroup[[i]]$treat, xgroup[[i]]$Timepoint, xgroup[[i]]$Outcome, sep = "_")), remove.zeros=T)
ygroup[[i]]$samples$PATID <- gsub("_.*", "", rownames(ygroup[[i]]$samples))
}
for(i in names(ygroup)){
keep <- filterByExpr(ygroup[[i]])
ygroup[[i]] <- ygroup[[i]][keep, , keep.lib.sizes=FALSE]
ygroup[[i]]$samples$lib.size <- colSums(ygroup[[i]]$counts)
ygroup[[i]] <- calcNormFactors(ygroup[[i]]) #Normalization
}
degtab <- setNames(as.list(c(1:4)), c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
for(i in names(ygroup)){
Subject <- factor(xgroup[[i]]$PATID)
Outcome <- factor(xgroup[[i]]$Outcome, levels = c(1,0), labels = c("NotProtected","Protected"))
Timepoint <- factor(xgroup[[i]]$Timepoint, levels = c(0,25), labels = c("baseline","postvax"))
Sex <- factor(xgroup[[i]]$SEX)
Batch <- factor(xgroup[[i]]$SEQBATCH, levels = c("Aug2019","Nov2019"))
Site <- as.factor(xgroup[[i]]$site)
CSPAb <- as.integer(xgroup[[i]]$CSPAb)
CSPAb <- log10(CSPAb+1) #log transform CSP Ab data to make linear
Pf_VAX1 <- as.factor(xgroup[[i]]$mal.vax.1)
#This is the revised Timepoint 0 analysis.
design <- model.matrix(~Batch + Sex + Site + CSPAb + Pf_VAX1 + Outcome)
print(i)
print(colnames(design))
rownames(design) <- ygroup[[i]]$samples$PATID
ygroup[[i]] <- estimateDisp(ygroup[[i]],design, robust = TRUE)
fit <- glmQLFit(ygroup[[i]], design, robust = TRUE)
qlf <- glmQLFTest(fit, contrast = c(rep(0, ncol(design)-1),1))
degtab[[i]] <- topTags(qlf, n = nrow(ygroup[[i]]))$table
}
## [1] "Placebo"
## [1] "(Intercept)" "BatchNov2019" "SexM" "SiteWagai"
## [5] "CSPAb" "Pf_VAX11" "OutcomeProtected"
## [1] "4.5 x 10^5 PfSPZ"
## [1] "(Intercept)" "BatchNov2019" "SexM" "SiteWagai"
## [5] "CSPAb" "Pf_VAX11" "OutcomeProtected"
## [1] "9.0 x 10^5 PfSPZ"
## [1] "(Intercept)" "BatchNov2019" "SexM" "SiteWagai"
## [5] "CSPAb" "Pf_VAX11" "OutcomeProtected"
## [1] "1.8 x 10^6 PfSPZ"
## [1] "(Intercept)" "BatchNov2019" "SexM" "SiteWagai"
## [5] "CSPAb" "Pf_VAX11" "OutcomeProtected"
temp <- degtab
for(i in names(degtab)){
temp[[i]] <- degtab[[i]] %>%
dplyr::select(7,8,10:14)
}
Placebo
| GeneSymbol | descripton_new | logFC | logCPM | F | PValue | FDR | |
|---|---|---|---|---|---|---|---|
| ENSG00000198502 | HLA-DRB5 | major histocompatibility complex, class II, DR beta 5 [Source:HGNC Symbol;Acc:HGNC:4953] | -0.3204301 | 4.8632239 | 53.34851 | 0.0000000 | 0.0000137 |
| ENSG00000269102 | CTD-2525I3.5 | novel transcript, antisense to ZNF766 and ZNF480 | -0.7566281 | 0.1228600 | 13.62446 | 0.0004258 | 0.9998239 |
| ENSG00000170509 | HSD17B13 | hydroxysteroid 17-beta dehydrogenase 13 [Source:HGNC Symbol;Acc:HGNC:18685] | 0.9911967 | 0.1771091 | 13.33673 | 0.0004661 | 0.9998239 |
| ENSG00000135919 | SERPINE2 | serpin family E member 2 [Source:HGNC Symbol;Acc:HGNC:8951] | 0.5546532 | 3.2966072 | 13.55441 | 0.0004822 | 0.9998239 |
| ENSG00000239474 | KLHL41 | kelch like family member 41 [Source:HGNC Symbol;Acc:HGNC:16905] | 1.3120210 | 0.8788559 | 13.25895 | 0.0004823 | 0.9998239 |
| ENSG00000267277 | CTD-2342J14.6 | novel transcript, antisense to SWSAP1 | -0.9393928 | 0.1562798 | 13.12189 | 0.0005179 | 0.9998239 |
| ENSG00000145975 | FAM217A | family with sequence similarity 217 member A [Source:HGNC Symbol;Acc:HGNC:21362] | 1.0110628 | -0.2813040 | 12.78545 | 0.0005569 | 0.9998239 |
| ENSG00000270641 | TSIX | TSIX transcript, XIST antisense RNA [Source:HGNC Symbol;Acc:HGNC:12377] | -2.2321348 | 10.0609278 | 12.22884 | 0.0008394 | 0.9998239 |
| ENSG00000232838 | PET117 | PET117 cytochrome c oxidase chaperone [Source:HGNC Symbol;Acc:HGNC:40045] | -0.3037263 | 3.0022032 | 12.03755 | 0.0009454 | 0.9998239 |
| ENSG00000105971 | CAV2 | caveolin 2 [Source:HGNC Symbol;Acc:HGNC:1528] | 1.1153769 | 0.2471148 | 10.96021 | 0.0013792 | 0.9998239 |
| ENSG00000154734 | ADAMTS1 | ADAM metallopeptidase with thrombospondin type 1 motif 1 [Source:HGNC Symbol;Acc:HGNC:217] | 1.3905202 | -0.0870266 | 10.84868 | 0.0013976 | 0.9998239 |
| ENSG00000260156 | RP11-394B2.6 | novel transcript, antisense to VAC14 | -0.6498158 | 0.0826368 | 10.72329 | 0.0016174 | 0.9998239 |
| ENSG00000267001 | AC006538.4 | novel protein | -0.3783996 | 1.7898322 | 10.78385 | 0.0016845 | 0.9998239 |
| ENSG00000169247 | SH3TC2 | SH3 domain and tetratricopeptide repeats 2 [Source:HGNC Symbol;Acc:HGNC:29427] | 1.3065830 | 0.3351870 | 10.38112 | 0.0018273 | 0.9998239 |
| ENSG00000196872 | CRACDL | CRACD like [Source:HGNC Symbol;Acc:HGNC:33454] | 1.2735306 | 0.5037540 | 10.24530 | 0.0019460 | 0.9998239 |
4.5 x 10^5 PfSPZ
| GeneSymbol | descripton_new | logFC | logCPM | F | PValue | FDR | |
|---|---|---|---|---|---|---|---|
| ENSG00000174358 | SLC6A19 | solute carrier family 6 member 19 [Source:HGNC Symbol;Acc:HGNC:27960] | 0.6464728 | 3.4725610 | 48.20614 | 0.0000000 | 0.0000939 |
| ENSG00000228146 | CASP16P | caspase 16, pseudogene [Source:HGNC Symbol;Acc:HGNC:27290] | 1.0120463 | 0.6917596 | 15.64360 | 0.0002077 | 0.9706245 |
| ENSG00000131042 | LILRB2 | leukocyte immunoglobulin like receptor B2 [Source:HGNC Symbol;Acc:HGNC:6606] | -2.2324223 | 1.2356883 | 15.31299 | 0.0002296 | 0.9706245 |
| ENSG00000232860 | SMG7-AS1 | SMG7 antisense RNA 1 [Source:HGNC Symbol;Acc:HGNC:24518] | 1.0780769 | 0.1308753 | 13.91497 | 0.0003940 | 0.9706245 |
| ENSG00000184208 | C22orf46 | chromosome 22 putative open reading frame 46 [Source:HGNC Symbol;Acc:HGNC:26294] | 0.4903798 | 2.5285402 | 12.76209 | 0.0007396 | 0.9706245 |
| ENSG00000116957 | TBCE | NA | -0.2732737 | 4.0904363 | 12.70067 | 0.0007568 | 0.9706245 |
| ENSG00000232131 | NCOA7-AS1 | NCOA7 antisense RNA 1 [Source:HGNC Symbol;Acc:HGNC:40954] | -0.5923958 | 0.0008323 | 12.45038 | 0.0007635 | 0.9706245 |
| ENSG00000257298 | RP3-405J10.3 | novel transcript, sense intronic to LIMA1 | -1.1495151 | 0.1915984 | 12.30623 | 0.0008043 | 0.9706245 |
| ENSG00000261404 | PSMD7-DT | PSMD7 divergent transcript [Source:HGNC Symbol;Acc:HGNC:53056] | 1.0781941 | 0.1621347 | 12.28842 | 0.0008247 | 0.9706245 |
| ENSG00000260488 | RP11-166B2.7 | novel transcript, antisense to RUNDC2A | 0.8667144 | -0.0193516 | 12.15854 | 0.0008376 | 0.9706245 |
| ENSG00000179743 | FLJ37453 | uncharacterized LOC729614 [Source:NCBI gene (formerly Entrezgene);Acc:729614] | 0.3321373 | 3.5788304 | 11.33642 | 0.0013815 | 0.9706245 |
| ENSG00000171680 | PLEKHG5 | pleckstrin homology and RhoGEF domain containing G5 [Source:HGNC Symbol;Acc:HGNC:29105] | 1.2192565 | 0.5782173 | 10.78084 | 0.0015145 | 0.9706245 |
| ENSG00000215045 | GRID2IP | Grid2 interacting protein [Source:HGNC Symbol;Acc:HGNC:18464] | 0.9388905 | 0.9956381 | 11.06766 | 0.0015373 | 0.9706245 |
| ENSG00000279428 | RP11-258F1.2 | TEC | 0.4806882 | 2.2930463 | 11.04463 | 0.0015765 | 0.9706245 |
| ENSG00000276704 | RP11-128N14.4 | novel transcript, sense intronic to RAP2A | -0.6930811 | -0.2636300 | 10.59938 | 0.0016988 | 0.9706245 |
9.0 x 10^5 PfSPZ
| GeneSymbol | descripton_new | logFC | logCPM | F | PValue | FDR | |
|---|---|---|---|---|---|---|---|
| ENSG00000229807 | XIST | X inactive specific transcript [Source:HGNC Symbol;Acc:HGNC:12810] | -3.9328609 | 11.7024516 | 24.65698 | 0.0000059 | 0.0883134 |
| ENSG00000270641 | TSIX | TSIX transcript, XIST antisense RNA [Source:HGNC Symbol;Acc:HGNC:12377] | -3.2429393 | 9.8875476 | 22.21131 | 0.0000143 | 0.0883134 |
| ENSG00000160062 | ZBTB8A | zinc finger and BTB domain containing 8A [Source:HGNC Symbol;Acc:HGNC:24172] | -1.4630630 | 0.1854980 | 21.18984 | 0.0000153 | 0.0883134 |
| ENSG00000134198 | TSPAN2 | tetraspanin 2 [Source:HGNC Symbol;Acc:HGNC:20659] | 0.5752927 | 4.8254668 | 21.66800 | 0.0000200 | 0.0883134 |
| ENSG00000204277 | LINC01993 | long intergenic non-protein coding RNA 1993 [Source:HGNC Symbol;Acc:HGNC:52826] | 1.3027314 | 0.5064032 | 18.32967 | 0.0000641 | 0.2201121 |
| ENSG00000223855 | FLJ44511 | heart tissue-associated transcript 92 [Source:NCBI gene (formerly Entrezgene);Acc:441307] | 1.9135299 | 0.8521485 | 17.29490 | 0.0000794 | 0.2201121 |
| ENSG00000124140 | SLC12A5 | solute carrier family 12 member 5 [Source:HGNC Symbol;Acc:HGNC:13818] | 1.6465180 | 1.0937933 | 17.58976 | 0.0000874 | 0.2201121 |
| ENSG00000185933 | CALHM1 | calcium homeostasis modulator 1 [Source:HGNC Symbol;Acc:HGNC:23494] | 1.3662165 | 0.3817580 | 14.98899 | 0.0002274 | 0.3607218 |
| ENSG00000174705 | SH3PXD2B | SH3 and PX domains 2B [Source:HGNC Symbol;Acc:HGNC:29242] | 1.5523185 | 0.8624536 | 14.87031 | 0.0002416 | 0.3607218 |
| ENSG00000255198 | SNHG9 | small nucleolar RNA host gene 9 [Source:HGNC Symbol;Acc:HGNC:33102] | 1.0527682 | -0.0686977 | 14.80654 | 0.0002468 | 0.3607218 |
| ENSG00000092051 | JPH4 | junctophilin 4 [Source:HGNC Symbol;Acc:HGNC:20156] | 1.5852542 | 0.6588844 | 14.70184 | 0.0002565 | 0.3607218 |
| ENSG00000279259 | RP11-334C17.3 | TEC | 0.9321266 | 0.7208732 | 14.96871 | 0.0002721 | 0.3607218 |
| ENSG00000270885 | RASL10B | RAS like family 10 member B [Source:HGNC Symbol;Acc:HGNC:30295] | 1.6005796 | 0.5601429 | 14.50795 | 0.0002738 | 0.3607218 |
| ENSG00000261394 | RP11-165M1.3 | novel transcript, antisense to a novel protein | -0.8331614 | 0.5787782 | 14.81925 | 0.0002863 | 0.3607218 |
| ENSG00000168159 | RNF187 | ring finger protein 187 [Source:HGNC Symbol;Acc:HGNC:27146] | -0.4259981 | 5.3871479 | 14.69177 | 0.0003202 | 0.3765719 |
1.8 x 10^6 PfSPZ
| GeneSymbol | descripton_new | logFC | logCPM | F | PValue | FDR | |
|---|---|---|---|---|---|---|---|
| ENSG00000171723 | GPHN | gephyrin [Source:HGNC Symbol;Acc:HGNC:15465] | -0.4446555 | 3.1381660 | 16.32377 | 0.0001515 | 0.8923642 |
| ENSG00000224093 | BCAR3-AS1 | BCAR3 antisense RNA 1 [Source:HGNC Symbol;Acc:HGNC:40093] | -1.1018234 | -0.1733145 | 15.13474 | 0.0001920 | 0.8923642 |
| ENSG00000151090 | THRB | thyroid hormone receptor beta [Source:HGNC Symbol;Acc:HGNC:11799] | -1.6602567 | -0.2683385 | 14.67727 | 0.0002219 | 0.8923642 |
| ENSG00000120500 | ARR3 | arrestin 3 [Source:HGNC Symbol;Acc:HGNC:710] | -0.8623949 | -0.0435407 | 14.70570 | 0.0002492 | 0.8923642 |
| ENSG00000155368 | DBI | diazepam binding inhibitor, acyl-CoA binding protein [Source:HGNC Symbol;Acc:HGNC:2690] | 0.4042811 | 4.8208190 | 14.14063 | 0.0003809 | 0.8923642 |
| ENSG00000279107 | RP11-823P9.4 | TEC | -1.6550035 | 1.4440390 | 13.51962 | 0.0004237 | 0.8923642 |
| ENSG00000180574 | EIF2S3B | eukaryotic translation initiation factor 2 subunit gamma B [Source:HGNC Symbol;Acc:HGNC:43863] | 1.2286486 | 0.9493500 | 13.05148 | 0.0005565 | 0.8923642 |
| ENSG00000102390 | PBDC1 | polysaccharide biosynthesis domain containing 1 [Source:HGNC Symbol;Acc:HGNC:28790] | 0.3625400 | 3.1652506 | 13.09394 | 0.0006020 | 0.8923642 |
| ENSG00000145649 | GZMA | granzyme A [Source:HGNC Symbol;Acc:HGNC:4708] | 0.8392956 | 5.4447857 | 12.99217 | 0.0006282 | 0.8923642 |
| ENSG00000170743 | SYT9 | synaptotagmin 9 [Source:HGNC Symbol;Acc:HGNC:19265] | -1.2239459 | -0.1511131 | 12.39982 | 0.0006671 | 0.8923642 |
| ENSG00000125870 | SNRPB2 | small nuclear ribonucleoprotein polypeptide B2 [Source:HGNC Symbol;Acc:HGNC:11155] | 0.2715634 | 5.4846867 | 12.78421 | 0.0006885 | 0.8923642 |
| ENSG00000215529 | EFCAB8 | EF-hand calcium binding domain 8 [Source:HGNC Symbol;Acc:HGNC:34532] | -1.1103715 | 0.4727191 | 12.45192 | 0.0007035 | 0.8923642 |
| ENSG00000095637 | SORBS1 | sorbin and SH3 domain containing 1 [Source:HGNC Symbol;Acc:HGNC:14565] | -1.2426460 | 1.4754030 | 12.61575 | 0.0007397 | 0.8923642 |
| ENSG00000269054 | ZNF497-AS1 | ZNF497 antisense RNA 1 [Source:HGNC Symbol;Acc:HGNC:55277] | 0.8907559 | -0.3178188 | 12.12753 | 0.0007703 | 0.8923642 |
| ENSG00000158301 | GPRASP2 | G protein-coupled receptor associated sorting protein 2 [Source:HGNC Symbol;Acc:HGNC:25169] | -0.8991203 | 1.0094728 | 12.44045 | 0.0007870 | 0.8923642 |
Rank genes by -log10(PValue)*sign(logFC). Run fgsea from fgsea package using NamedGeneRankList2GseaTable helper function.
set.seed(23)
ranks <- degtab
for(i in names(degtab)){
ranks[[i]] <- degtab[[i]] %>%
mutate(rankmetric = -log10(.$PValue)*sign(.$logFC)) %>%
dplyr::select(GeneSymbol,rankmetric) %>%
na.omit() %>%
distinct() %>%
group_by(GeneSymbol) %>%
summarize(rankmetric = mean(rankmetric)) %>%
arrange(desc(rankmetric)) %>%
deframe()
}
#Run fgsea from fgsea package using NamedGeneRankList2GseaTable helper function
devtools::source_url("https://github.com/TranLab/ModuleLists/blob/main/NamedGeneRankList2GseaTable.R?raw=TRUE")
GSEAtab <- ranks
for(i in names(ranks)){
GSEAtab[[i]] <- NamedGeneRankList2GseaTable(rankedgenes = ranks[[i]],
geneset = "all",
output_directory = tempdir(),
filename_prefix = "GSEA",
sampleSize = 101,
minSize = 20,
maxSize = Inf,
scoreType = "std") %>%
as_tibble() %>%
arrange(desc(NES)) %>%
dplyr::select(module_type, pathway, ES, NES, size, leadingEdge, pval, padj) %>%
mutate(leadingEdge = gsub("^c\\(|\\)$", "", leadingEdge)) %>%
mutate(leadingEdge = gsub('"', "", leadingEdge)) %>%
arrange(padj)
}
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
# These are the genes with duplicated rankmetrics...can ignore warning about ties b/c it's trivial (only involves two genes)
# 1 RP11-521C22.3 -0.0303
# 2 AC069547.1 -0.0303
Filter based on padj < 0.05.
plotDat <- GSEAtab %>%
bind_rows(., .id = "dosegroup") %>%
mutate(dosegroup = factor(dosegroup, levels = c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"),
labels = c("Placebo", "4.5x105 PfSPZ", "9.0x105 PfSPZ", "1.8x106 PfSPZ"))) %>%
filter(padj < 0.05) %>%
filter(module_type %in% c("highBTMs", "lowBTMs", "MonacoModules", "BloodGen3Module", "MSigDB_Hallmark_v7.4")) %>%
filter(!grepl("TBA", pathway)) %>%
dplyr::select(dosegroup, module_type, pathway, leadingEdge, size, NES, padj) %>%
mutate(neglogpadj = -log10(padj)) %>%
dplyr::rename("BH-adj p value (FDR)" = "padj") %>%
mutate(pathway = gsub("gd", "γδ", pathway)) %>%
mutate(pathway = gsub("Vd2", "Vδ2", pathway)) %>%
mutate(pathway = gsub("Vg", "Vγ", pathway)) %>%
mutate(pathway = gsub("HALLMARK_", "", pathway)) %>%
mutate(pathway = gsub("_", " ", pathway)) %>%
mutate(pathway = sub(".*?\\_", "", pathway)) %>%
group_by(module_type, dosegroup) %>%
mutate(pathway = fct_reorder(pathway, NES, .desc = TRUE)) %>%
ungroup() %>%
mutate(dosegroup = fct_rev(dosegroup)) %>%
filter(!grepl("TBD", pathway)) %>%
mutate(module_type = factor(module_type, levels = c("highBTMs", "MonacoModules", "lowBTMs", "MSigDB_Hallmark_v7.4", "BloodGen3Module"))) %>%
arrange(desc(neglogpadj)) %>%
droplevels()
#plotting options
basetextsize <- 7.5
myfont <- "Helvetica"
bubble_max_size <- 7.5
TopPlot <- plotDat %>%
filter(module_type %in% c("highBTMs", "MonacoModules")) %>%
ggplot(., aes(x = pathway, y = dosegroup)) +
geom_point(aes(size=neglogpadj, fill = NES), alpha = 0.65, shape=21, stroke = 0.25) +
scale_size_area(name = expression(-log[10]~adjp), max_size = bubble_max_size, breaks = c(1.3, 5, 10, 20, 40), limits = c(1,40)) +
scale_fill_gradient2(low = "blue",
mid = "white",
high = "red") +
hrbrthemes::theme_ipsum_es(base_family = myfont, base_size = basetextsize) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
strip.background = element_blank(),
legend.position = "bottom",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, colour = "black"),
axis.text.y = element_text(color = "black")) +
facet_wrap(~module_type, scales = "free_x", nrow= 2)
MidPlot <- plotDat %>%
filter(module_type %in% c("lowBTMs", "MSigDB_Hallmark_v7.4")) %>%
ggplot(., aes(x = pathway, y = dosegroup)) +
geom_point(aes(size=neglogpadj, fill = NES), alpha = 0.65, shape=21, stroke = 0.25) +
scale_size_area(name = expression(-log[10]~adjp), max_size = bubble_max_size, breaks = c(1.3, 5, 10, 20, 40), limits = c(1,40)) +
scale_fill_gradient2(low = "blue",
mid = "white",
high = "red") +
hrbrthemes::theme_ipsum_es(base_family = myfont, base_size = basetextsize) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
strip.background = element_blank(),
legend.position = "bottom",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, colour = "black"),
axis.text.y = element_text(color = "black")) +
facet_wrap(~module_type, scales = "free_x")
BottomPlot <- plotDat %>%
filter(module_type %in% c("BloodGen3Module")) %>%
ggplot(., aes(x = pathway, y = dosegroup)) +
geom_point(aes(size=neglogpadj, fill = NES), alpha = 0.65, shape=21, stroke = 0.25) +
scale_size_area(name = expression(-log[10]~adjp), max_size = bubble_max_size, breaks = c(1.3, 5, 10, 20, 40), limits = c(1,40)) +
scale_fill_gradient2(low = "blue",
mid = "white",
high = "red") +
hrbrthemes::theme_ipsum_es(base_family = myfont, base_size = basetextsize) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
strip.background = element_blank(),
legend.position = "bottom",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, colour = "black"),
axis.text.y = element_text(color = "black")) +
facet_wrap(~module_type, scales = "free_x")
Red: enriched in protected/uninfected through 3 months post-vax surveillance Blue: enriched in not protected/infected through 3 months post-vax surveillance
This is not included in the manuscript but provided here as it was requested during prior reviews.
i <- "highBTMs"
url <- paste0("https://github.com/TranLab/ModuleLists/blob/main/", i, ".rds?raw=true")
highBTM_list <- readRDS(url(url, method="libcurl"))
# c2.indices <- ids2indices(highBTM_list, ygroup$`1.8 x 10^6 PfSPZ`$genes$GeneSymbol)
# camera(ygroup$`1.8 x 10^6 PfSPZ`, c2.indices, design)
# c2.indices <- ids2indices(highBTM_list, ygroup$Placebo$genes$GeneSymbol)
# camera(ygroup$Placebo, c2.indices, design)
cameratab <- highBTM.indices <- setNames(as.list(c(1:4)), c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
for(i in names(ygroup)){
Subject <- factor(xgroup[[i]]$PATID)
Outcome <- factor(xgroup[[i]]$Outcome, levels = c(1,0), labels = c("NotProtected","Protected"))
Timepoint <- factor(xgroup[[i]]$Timepoint, levels = c(0,25), labels = c("baseline","postvax"))
Sex <- factor(xgroup[[i]]$SEX)
Batch <- factor(xgroup[[i]]$SEQBATCH, levels = c("Aug2019","Nov2019"))
Site <- as.factor(xgroup[[i]]$site)
CSPAb <- as.integer(xgroup[[i]]$CSPAb)
CSPAb <- log10(CSPAb+1) #log transform CSP Ab data to make linear
Pf_VAX1 <- as.factor(xgroup[[i]]$mal.vax.1)
#This is the revised Timepoint 0 analysis.
design <- model.matrix(~Batch + Sex + Site + CSPAb + Pf_VAX1 + Outcome)
print(i)
print(colnames(design))
rownames(design) <- ygroup[[i]]$samples$PATID
ygroup[[i]] <- estimateDisp(ygroup[[i]],design, robust = TRUE)
fit <- glmQLFit(ygroup[[i]], design, robust = TRUE)
qlf <- glmQLFTest(fit, contrast = c(rep(0, ncol(design)-1),1))
highBTM.indices[[i]] <- ids2indices(highBTM_list, ygroup[[i]]$genes$GeneSymbol)
cameratab[[i]] <- camera(ygroup[[i]], highBTM.indices[[i]], design, contrast = c(rep(0, ncol(design)-1),1), inter.gene.cor=0.01)
}
## [1] "Placebo"
## [1] "(Intercept)" "BatchNov2019" "SexM" "SiteWagai"
## [5] "CSPAb" "Pf_VAX11" "OutcomeProtected"
## [1] "4.5 x 10^5 PfSPZ"
## [1] "(Intercept)" "BatchNov2019" "SexM" "SiteWagai"
## [5] "CSPAb" "Pf_VAX11" "OutcomeProtected"
## [1] "9.0 x 10^5 PfSPZ"
## [1] "(Intercept)" "BatchNov2019" "SexM" "SiteWagai"
## [5] "CSPAb" "Pf_VAX11" "OutcomeProtected"
## [1] "1.8 x 10^6 PfSPZ"
## [1] "(Intercept)" "BatchNov2019" "SexM" "SiteWagai"
## [5] "CSPAb" "Pf_VAX11" "OutcomeProtected"
for(i in names(ygroup)){
cameratab[[i]] <- cameratab[[i]] %>%
rownames_to_column(var = "module")
}
cameratab_bind <- dplyr::bind_rows(cameratab, .id = "dosegroup")
Code for Camera for high-dose PfSPZ baseline, all module types
highdosegroup <- xgroup$`1.8 x 10^6 PfSPZ`
Subject <- factor(highdosegroup$PATID)
Outcome <- factor(highdosegroup$Outcome, levels = c(1,0), labels = c("NotProtected","Protected"))
Timepoint <- factor(highdosegroup$Timepoint, levels = c(0,25), labels = c("baseline","postvax"))
Sex <- factor(highdosegroup$SEX)
Batch <- factor(highdosegroup$SEQBATCH, levels = c("Aug2019","Nov2019"))
Site <- as.factor(highdosegroup$site)
CSPAb <- as.integer(highdosegroup$CSPAb)
CSPAb <- log10(CSPAb+1) #log transform CSP Ab data to make linear
Pf_VAX1 <- as.factor(highdosegroup$mal.vax.1)
#This is the revised Timepoint 0 analysis.
design <- model.matrix(~Batch + Sex + Site + CSPAb + Pf_VAX1 + Outcome)
print(colnames(design))
rownames(design) <- ygroup$`1.8 x 10^6 PfSPZ`$samples$PATID
ygroup$`1.8 x 10^6 PfSPZ` <- estimateDisp(ygroup$`1.8 x 10^6 PfSPZ`,design, robust = TRUE)
fit <- glmQLFit(ygroup$`1.8 x 10^6 PfSPZ`, design, robust = TRUE)
qlf <- glmQLFTest(fit, contrast = c(rep(0, ncol(design)-1),1))
cameratab2 <- setNames(as.list(c(1:5)), c("highBTMs", "lowBTMs", "MonacoModules", "MSigDB_Hallmark_v7.4", "BloodGen3Module"))
for(i in names(cameratab2)){
print(i)
url <- paste0("https://github.com/TranLab/ModuleLists/blob/main/", i, ".rds?raw=true")
module_list <- readRDS(url(url, method="libcurl"))
module.indices <- ids2indices(module_list, ygroup$`1.8 x 10^6 PfSPZ`$genes$GeneSymbol)
cameratab2[[i]] <- camera(ygroup$`1.8 x 10^6 PfSPZ`, module.indices, design, contrast = c(rep(0, ncol(design)-1),1), inter.gene.cor=0.01)
cameratab2[[i]] <- cameratab2[[i]] %>%
rownames_to_column(var = "module")
}
myCameraPlotDat_KSPZV1 <- dplyr::bind_rows(cameratab2, .id = "module_type") %>%
mutate(study = "KSPZV1") %>%
dplyr::select(study, module_type, module, NGenes, Direction, FDR) %>%
mutate(neglogpadj = -log(FDR)) %>%
mutate(fdr_direction = ifelse(Direction == "Down", -1*neglogpadj, neglogpadj))
myCameraPlotDat_KSPZV1 %>%
write_rds(. , "/Users/tuantran/Library/CloudStorage/OneDrive-IndianaUniversity/Manuscripts/KSPZV1 Manuscript/JCI Resubmission April 2023/Prelim Results JCI resubmission/KSPZV1_Camera_PvNP.rds")
#plotting options
basetextsize <- 12
myfont <- "Helvetica"
bubble_max_size <- 15
myCameraPlotDat <- cameratab_bind %>%
mutate(dosegroup = factor(dosegroup, levels = c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))) %>%
filter(FDR < 0.05) %>%
#filter(module_type %in% c("highBTMs", "lowBTMs", "MonacoModules", "BloodGen3Module", "MSigDB_Hallmark_v7.4")) %>%
#filter(!grepl("TBA", module)) %>%
dplyr::select(dosegroup, module, NGenes, Direction, FDR) %>%
mutate(neglogpadj = -log10(FDR)) %>%
mutate(fdr_direction = ifelse(Direction == "Up", neglogpadj*1, neglogpadj*-1)) %>%
mutate(module = gsub("gd", "γδ", module)) %>%
mutate(module = gsub("Vd2", "Vδ2", module)) %>%
mutate(module = gsub("Vg", "Vγ", module)) %>%
mutate(module = gsub("HALLMARK_", "", module)) %>%
mutate(module = gsub("_", " ", module)) %>%
mutate(module = sub(".*?\\_", "", module)) %>%
group_by(dosegroup) %>%
mutate(module = fct_reorder(module, fdr_direction, .desc = TRUE)) %>%
ungroup() %>%
mutate(dosegroup = fct_rev(dosegroup)) %>%
filter(!grepl("TBD", module)) %>%
#mutate(module_type = factor(module_type, levels = c("highBTMs", "MonacoModules", "lowBTMs", "MSigDB_Hallmark_v7.4", "BloodGen3Module"))) %>%
arrange(desc(neglogpadj)) %>%
droplevels()
myCameraPlot <- myCameraPlotDat %>%
#filter(module_type %in% c("BloodGen3Module")) %>%
ggplot(., aes(x = module, y = dosegroup)) +
geom_point(aes(size=neglogpadj, fill = Direction), alpha = 0.65, shape=21, stroke = 0.25) +
scale_size_area(name = expression(-log[10]~adj.~p~value), max_size = bubble_max_size, breaks = c(1.3, 5, 10, 20), limits = c(1,20)) +
scale_fill_manual(values = c("blue","red")) +
hrbrthemes::theme_ipsum_es(base_family = myfont, base_size = basetextsize) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
strip.background = element_blank(),
legend.position = "bottom",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))
myCameraPlot