library(knitr)
library(tidyverse)
library(edgeR)
library(googledrive)
library(fgsea)
library(data.table)
library(ggplot2)
library(ggpubr)
Perform differential gene expression using edgeR on pre-immunization baseline whole-blood samples from the KSPZV1 malaria vaccine trial with the binary outcomes for comparison being dichotomized CSP IgG levels at 2 weeks post-vax.
Updated to filter at FDR<5%.
#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
as_id("1Q4VXYxdl9CqtzhcvHA9pn8q06_YOZgi8"), path = temp, overwrite = TRUE)
x <- readRDS(file = dl$local_path)
x$treat <- factor(x$treat, levels = c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
set.seed(23)
plotDat <- pData(x) %>%
filter(!is.na(log2FC_CSPAb)) %>% #one subject in 9.0x10^6 PfSPZ group missing CSP log2 fold change data
mutate(log10CSPAb = log10(CSPAb + 1)) %>%
mutate(Outcome = factor(Outcome, levels = c(1,0), labels = c("infected (not protected)", "uninfected (protected)"))) %>%
group_by(Outcome, treat, Timepoint) %>%
dplyr::mutate(pos = 1:n(), label = ifelse(pos==1, n(), "")) %>%
dplyr::select(-pos) %>%
ungroup() %>%
filter(Timepoint == 0) %>%
pivot_longer(., cols = c(log10CSPAb, log2FC_CSPAb), names_to = "variable", values_to = "value")
myPlot <- plotDat %>%
ggplot(., aes(x = treat, y = value, fill = Outcome, color = Outcome)) +
geom_point(position = position_jitterdodge()) +
geom_violin(alpha = 0.4, color = "grey30", draw_quantiles = c(0.5)) +
stat_compare_means(aes(label = ..p.signif.., group = Outcome), method = "wilcox.test", label.x.npc = "center", vjust = 1, show.legend = F) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("log10 CSP IgG at baseline") +
geom_text(position = position_dodge(width=1), aes(label=label), vjust = -0.25, color = "black") +
theme_bw(base_family = "Arial", base_size = 10) +
theme(axis.text.x=element_text(angle = 45, vjust = 1, hjust=1),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="top"
) +
facet_wrap(~variable, scales = "free_y", nrow = 2,
strip.position = "left",
labeller = as_labeller(c(log10CSPAb = "log10 CSP-specific IgG at baseline",
log2FC_CSPAb = "log2 (post-vax/baseline) CSP-specific IgG response"))) +
ylab(NULL) +
theme(strip.background = element_blank(),
strip.placement = "outside")
Violin plots showing CSP-specific IgG antibodies at baseline and as fold-change (2-weeks post-vaccination/baseline) by treatment and outcome at 3 months.
Based on this data, decided on log2FC > 12.5 is good Ab response.
pData(x) <- pData(x) %>%
mutate(CSP_response_cat = factor(ifelse(log2FC_CSPAb > 12.5, "high_CSP_LFC", "low_CSP_LFC"),
levels = c("low_CSP_LFC", "high_CSP_LFC")))
myGroups <- factor(c("4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"),
levels = c("4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
myTimepoint <- 0
x <- x[,x$treat %in% myGroups]
x <- x[,!is.na(x$CSP_response_cat)]
xgroup <- setNames(as.list(c(1:3)), c("4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
for(i in names(xgroup)){
xgroup[[i]] <- x[, which(x$treat == i)]
xgroup[[i]] <- xgroup[[i]][,xgroup[[i]]$Timepoint == myTimepoint]
xgroup[[i]]$treat <- droplevels(xgroup[[i]]$treat)
print(i)
print(table(Outcome = xgroup[[i]]$Outcome, Timepoint = xgroup[[i]]$Timepoint))
print(table(Outcome = xgroup[[i]]$Outcome, Dosegroup = xgroup[[i]]$treat))
print(dim(xgroup[[i]]))
}
## [1] "4.5 x 10^5 PfSPZ"
## Timepoint
## Outcome 0
## 0 29
## 1 29
## Dosegroup
## Outcome 4.5 x 10^5 PfSPZ
## 0 29
## 1 29
## Features Samples
## 35716 58
## [1] "9.0 x 10^5 PfSPZ"
## Timepoint
## Outcome 0
## 0 28
## 1 29
## Dosegroup
## Outcome 9.0 x 10^5 PfSPZ
## 0 28
## 1 29
## Features Samples
## 35716 57
## [1] "1.8 x 10^6 PfSPZ"
## Timepoint
## Outcome 0
## 0 38
## 1 25
## Dosegroup
## Outcome 1.8 x 10^6 PfSPZ
## 0 38
## 1 25
## Features Samples
## 35716 63
#Define group by dose, timepoint and CSP response
ygroup <- setNames(as.list(c(1:3)), c("4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
for(i in names(ygroup)){
ygroup[[i]] <- DGEList(counts=counts(xgroup[[i]]), genes=fData(xgroup[[i]]), group= factor(paste(xgroup[[i]]$treat, xgroup[[i]]$Timepoint, xgroup[[i]]$CSP_response_cat, sep = "_")), remove.zeros=T)
ygroup[[i]]$samples$PATID <- gsub("_.*", "", rownames(ygroup[[i]]$samples))
}
for(i in names(ygroup)){
keep <- filterByExpr(ygroup[[i]])
ygroup[[i]] <- ygroup[[i]][keep, , keep.lib.sizes=FALSE]
ygroup[[i]]$samples$lib.size <- colSums(ygroup[[i]]$counts)
ygroup[[i]] <- calcNormFactors(ygroup[[i]]) #Normalization
}
degtab <- design <- setNames(as.list(c(1:3)), c("4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
for(i in names(ygroup)){
Subject <- factor(xgroup[[i]]$PATID)
CSPResponse <- factor(xgroup[[i]]$CSP_response_cat)
Protection <- factor(xgroup[[i]]$Outcome, levels = c(0,1), labels = c("protected", "notprotected"))
Batch <- factor(xgroup[[i]]$SEQBATCH, levels = c("Aug2019","Nov2019"))
design[[i]] <- model.matrix(~Batch + Protection + CSPResponse)
print(i)
print(colnames(design[[i]]))
rownames(design[[i]]) <- ygroup[[i]]$samples$PATID
ygroup[[i]] <- estimateDisp(ygroup[[i]],design[[i]], robust = TRUE)
fit <- glmQLFit(ygroup[[i]], design[[i]], robust = TRUE)
print("glmQLFit done.")
qlf <- glmQLFTest(fit, coef = "CSPResponsehigh_CSP_LFC")
print("glmQLFTest done.")
degtab[[i]] <- topTags(qlf, n = Inf)
}
## [1] "4.5 x 10^5 PfSPZ"
## [1] "(Intercept)" "BatchNov2019"
## [3] "Protectionnotprotected" "CSPResponsehigh_CSP_LFC"
## [1] "glmQLFit done."
## [1] "glmQLFTest done."
## [1] "9.0 x 10^5 PfSPZ"
## [1] "(Intercept)" "BatchNov2019"
## [3] "Protectionnotprotected" "CSPResponsehigh_CSP_LFC"
## [1] "glmQLFit done."
## [1] "glmQLFTest done."
## [1] "1.8 x 10^6 PfSPZ"
## [1] "(Intercept)" "BatchNov2019"
## [3] "Protectionnotprotected" "CSPResponsehigh_CSP_LFC"
## [1] "glmQLFit done."
## [1] "glmQLFTest done."
temp <- degtab
for(i in names(degtab)){
temp[[i]] <- degtab[[i]]$table %>%
dplyr::select(7,9,8,10:14)
}
4.5 x 10^5 PfSPZ
| GeneSymbol | descripton_new | logFC | logCPM | F | PValue | FDR | |
|---|---|---|---|---|---|---|---|
| ENSG00000047634 | SCML1 | Scm polycomb group protein like 1 [Source:HGNC Symbol;Acc:HGNC:10580] | 0.5470575 | 5.3168687 | 18.68467 | 0.0000604 | 0.7604745 |
| ENSG00000259700 | RP11-485O10.3 | novel transcript, antisense to CEP152 | 1.2998836 | 0.1252416 | 15.26323 | 0.0002441 | 0.7604745 |
| ENSG00000112210 | RAB23 | RAB23, member RAS oncogene family [Source:HGNC Symbol;Acc:HGNC:14263] | 0.6336624 | 3.0708922 | 13.71097 | 0.0004724 | 0.7604745 |
| ENSG00000074803 | SLC12A1 | solute carrier family 12 member 1 [Source:HGNC Symbol;Acc:HGNC:10910] | -5.3017053 | 1.9479520 | 13.02670 | 0.0006573 | 0.7604745 |
| ENSG00000118200 | CAMSAP2 | calmodulin regulated spectrin associated protein family member 2 [Source:HGNC Symbol;Acc:HGNC:29188] | 0.7953159 | 3.8022678 | 12.81446 | 0.0006977 | 0.7604745 |
| ENSG00000134352 | IL6ST | interleukin 6 cytokine family signal transducer [Source:HGNC Symbol;Acc:HGNC:6021] | 0.4187322 | 8.9090141 | 12.54189 | 0.0007861 | 0.7604745 |
| ENSG00000269929 | MIRLET7A1HG | miRlet-7a-1/let-7f-1/let-7d cluster host gene [Source:HGNC Symbol;Acc:HGNC:53970] | 0.3846854 | 4.6529223 | 12.44118 | 0.0008218 | 0.7604745 |
| ENSG00000181690 | PLAG1 | PLAG1 zinc finger [Source:HGNC Symbol;Acc:HGNC:9045] | 0.6756168 | 4.6523236 | 12.36994 | 0.0008497 | 0.7604745 |
| ENSG00000189410 | SH2D5 | SH2 domain containing 5 [Source:HGNC Symbol;Acc:HGNC:28819] | 2.6739843 | -0.2590413 | 12.22180 | 0.0009054 | 0.7604745 |
| ENSG00000179909 | ZNF154 | zinc finger protein 154 [Source:HGNC Symbol;Acc:HGNC:12939] | 0.3612848 | 5.9008784 | 11.85377 | 0.0010664 | 0.7604745 |
| ENSG00000168958 | MFF | mitochondrial fission factor [Source:HGNC Symbol;Acc:HGNC:24858] | 0.2293740 | 6.3754001 | 11.76019 | 0.0011119 | 0.7604745 |
| ENSG00000270082 | RP11-178L8.8 | novel transcript, antisense to FBXO31 and C16orf95 readthrough | 1.2503497 | 0.0555534 | 11.75165 | 0.0011161 | 0.7604745 |
| ENSG00000163519 | TRAT1 | T cell receptor associated transmembrane adaptor 1 [Source:HGNC Symbol;Acc:HGNC:30698] | 0.4159588 | 7.3288393 | 11.75120 | 0.0011164 | 0.7604745 |
| ENSG00000246859 | STARD4-AS1 | STARD4 antisense RNA 1 [Source:HGNC Symbol;Acc:HGNC:44117] | 0.4316312 | 4.4170770 | 11.72838 | 0.0011278 | 0.7604745 |
| ENSG00000124766 | SOX4 | SRY-box transcription factor 4 [Source:HGNC Symbol;Acc:HGNC:11200] | 0.3820120 | 5.2251979 | 11.70632 | 0.0011390 | 0.7604745 |
9.0 x 10^5 PfSPZ
| GeneSymbol | descripton_new | logFC | logCPM | F | PValue | FDR | |
|---|---|---|---|---|---|---|---|
| ENSG00000197721 | CR1L | complement C3b/C4b receptor 1 like [Source:HGNC Symbol;Acc:HGNC:2335] | -1.1552611 | 3.616811 | 17.79293 | 0.0000897 | 0.5460265 |
| ENSG00000112077 | RHAG | Rh associated glycoprotein [Source:HGNC Symbol;Acc:HGNC:10006] | -2.0824491 | 1.798151 | 17.45938 | 0.0001025 | 0.5460265 |
| ENSG00000205707 | ETFRF1 | electron transfer flavoprotein regulatory factor 1 [Source:HGNC Symbol;Acc:HGNC:27052] | -0.2704120 | 5.532598 | 16.92032 | 0.0001241 | 0.5460265 |
| ENSG00000164211 | STARD4 | StAR related lipid transfer domain containing 4 [Source:HGNC Symbol;Acc:HGNC:18058] | -0.3933441 | 5.674460 | 15.91757 | 0.0001873 | 0.5460265 |
| ENSG00000133742 | CA1 | carbonic anhydrase 1 [Source:HGNC Symbol;Acc:HGNC:1368] | -2.3893423 | 7.167204 | 16.08673 | 0.0001889 | 0.5460265 |
| ENSG00000047597 | XK | X-linked Kx blood group [Source:HGNC Symbol;Acc:HGNC:12811] | -1.5088612 | 5.765518 | 15.60687 | 0.0002258 | 0.5460265 |
| ENSG00000273419 | RP4-751H13.7 | novel transcript, antisense to ZNF862 | 0.4097277 | 2.958471 | 14.90319 | 0.0002862 | 0.5460265 |
| ENSG00000172331 | BPGM | bisphosphoglycerate mutase [Source:HGNC Symbol;Acc:HGNC:1093] | -1.4829271 | 7.633568 | 14.83528 | 0.0003116 | 0.5460265 |
| ENSG00000106244 | PDAP1 | PDGFA associated protein 1 [Source:HGNC Symbol;Acc:HGNC:14634] | 0.1859462 | 6.410596 | 14.45848 | 0.0003455 | 0.5460265 |
| ENSG00000213185 | FAM24B | family with sequence similarity 24 member B [Source:HGNC Symbol;Acc:HGNC:23475] | 0.4974613 | 1.892171 | 14.37741 | 0.0003576 | 0.5460265 |
| ENSG00000140521 | POLG | DNA polymerase gamma, catalytic subunit [Source:HGNC Symbol;Acc:HGNC:9179] | 0.2341676 | 5.524481 | 13.88530 | 0.0004412 | 0.5460265 |
| ENSG00000023445 | BIRC3 | baculoviral IAP repeat containing 3 [Source:HGNC Symbol;Acc:HGNC:591] | -0.2944914 | 9.002853 | 13.58902 | 0.0005012 | 0.5460265 |
| ENSG00000143727 | ACP1 | acid phosphatase 1 [Source:HGNC Symbol;Acc:HGNC:122] | -0.2850187 | 6.222189 | 13.44334 | 0.0005337 | 0.5460265 |
| ENSG00000141504 | SAT2 | spermidine/spermine N1-acetyltransferase family member 2 [Source:HGNC Symbol;Acc:HGNC:23160] | 0.4016152 | 2.730215 | 13.35983 | 0.0005533 | 0.5460265 |
| ENSG00000169255 | B3GALNT1 | beta-1,3-N-acetylgalactosaminyltransferase 1 (globoside blood group) [Source:HGNC Symbol;Acc:HGNC:918] | -1.2146516 | 1.929669 | 13.34896 | 0.0005577 | 0.5460265 |
1.8 x 10^6 PfSPZ
| GeneSymbol | descripton_new | logFC | logCPM | F | PValue | FDR | |
|---|---|---|---|---|---|---|---|
| ENSG00000233296 | TMEM18-DT | TMEM18 divergent transcript [Source:HGNC Symbol;Acc:HGNC:54481] | -1.5611539 | 1.5600112 | 20.06615 | 0.0000313 | 0.5646507 |
| ENSG00000170448 | NFXL1 | nuclear transcription factor, X-box binding like 1 [Source:HGNC Symbol;Acc:HGNC:18726] | 0.3785918 | 4.6382758 | 14.32345 | 0.0003401 | 0.9999456 |
| ENSG00000121940 | CLCC1 | chloride channel CLIC like 1 [Source:HGNC Symbol;Acc:HGNC:29675] | 0.2198391 | 5.3863704 | 13.39698 | 0.0005100 | 0.9999456 |
| ENSG00000179523 | EIF3J-DT | EIF3J divergent transcript [Source:HGNC Symbol;Acc:HGNC:48616] | -0.2858687 | 3.7289935 | 12.75960 | 0.0006765 | 0.9999456 |
| ENSG00000232295 | RP11-154D6.1 | novel transcript, antisense OGFRL1 | 0.3675811 | 5.9261183 | 12.27374 | 0.0008408 | 0.9999456 |
| ENSG00000226419 | SLC16A1-AS1 | SLC16A1 antisense RNA 1 [Source:HGNC Symbol;Acc:HGNC:49445] | -0.3105055 | 4.0076945 | 11.74874 | 0.0010657 | 0.9999456 |
| ENSG00000083838 | ZNF446 | zinc finger protein 446 [Source:HGNC Symbol;Acc:HGNC:21036] | -0.3175324 | 4.3694778 | 11.71155 | 0.0010838 | 0.9999456 |
| ENSG00000254929 | RP11-144G6.12 | novel transcript | -0.5057131 | 3.6901989 | 11.49215 | 0.0011976 | 0.9999456 |
| ENSG00000131238 | PPT1 | palmitoyl-protein thioesterase 1 [Source:HGNC Symbol;Acc:HGNC:9325] | 0.3423849 | 7.3928490 | 11.40124 | 0.0012483 | 0.9999456 |
| ENSG00000178695 | KCTD12 | potassium channel tetramerization domain containing 12 [Source:HGNC Symbol;Acc:HGNC:14678] | 0.3757330 | 7.3336015 | 11.14757 | 0.0014020 | 0.9999456 |
| ENSG00000184508 | HDDC3 | HD domain containing 3 [Source:HGNC Symbol;Acc:HGNC:30522] | -0.4789895 | 2.4408352 | 11.06803 | 0.0014541 | 0.9999456 |
| ENSG00000149269 | PAK1 | p21 (RAC1) activated kinase 1 [Source:HGNC Symbol;Acc:HGNC:8590] | 0.3132222 | 6.5699951 | 11.06715 | 0.0014547 | 0.9999456 |
| ENSG00000213468 | FIRRE | firre intergenic repeating RNA element [Source:HGNC Symbol;Acc:HGNC:49627] | -0.7422046 | 1.6776547 | 11.00778 | 0.0014950 | 0.9999456 |
| ENSG00000268093 | AC022154.7 | novel transcript, antisense to SPHK2 | -0.5212837 | 0.9940586 | 10.96250 | 0.0015264 | 0.9999456 |
| ENSG00000105514 | RAB3D | RAB3D, member RAS oncogene family [Source:HGNC Symbol;Acc:HGNC:9779] | 0.5043009 | 6.1775479 | 10.74900 | 0.0016963 | 0.9999456 |
Rank genes by -log10(PValue)*sign(logFC). Run fgsea from fgsea package using NamedGeneRankList2GseaTable helper function.
set.seed(23)
ranks <- degtab
for(i in names(degtab)){
ranks[[i]] <- degtab[[i]]$table %>%
mutate(rankmetric = -log10(.$PValue)*sign(.$logFC)) %>%
dplyr::select(GeneSymbol,rankmetric) %>%
na.omit() %>%
distinct() %>%
group_by(GeneSymbol) %>%
summarize(rankmetric = mean(rankmetric)) %>%
arrange(desc(rankmetric)) %>%
deframe()
}
#Run fgsea from fgsea package using NamedGeneRankList2GseaTable helper function
devtools::source_url("https://github.com/TranLab/ModuleLists/blob/main/NamedGeneRankList2GseaTable.R?raw=TRUE")
GSEAtab <- ranks
for(i in names(ranks)){
GSEAtab[[i]] <- NamedGeneRankList2GseaTable(rankedgenes = ranks[[i]],
geneset = "all",
output_directory = tempdir(),
filename_prefix = "GSEA",
sampleSize = 101,
minSize = 20,
maxSize = Inf,
scoreType = "std") %>%
as_tibble() %>%
arrange(desc(NES)) %>%
dplyr::select(module_type, pathway, ES, NES, size, leadingEdge, pval, padj) %>%
mutate(leadingEdge = gsub("^c\\(|\\)$", "", leadingEdge)) %>%
mutate(leadingEdge = gsub('"', "", leadingEdge)) %>%
arrange(padj)
}
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
Filter based on padj < 0.05.
plotDat <- GSEAtab %>%
bind_rows(., .id = "dosegroup") %>%
mutate(dosegroup = factor(dosegroup, levels = c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))) %>%
filter(padj < 0.05) %>%
filter(module_type %in% c("highBTMs", "lowBTMs", "MonacoModules", "BloodGen3Module", "MSigDB_Hallmark_v7.4")) %>%
filter(!grepl("TBA", pathway)) %>%
dplyr::select(dosegroup, module_type, pathway, leadingEdge, size, NES, padj) %>%
mutate(neglogpadj = -log10(padj)) %>%
mutate(pathway = gsub("gd", "γδ", pathway)) %>%
mutate(pathway = gsub("Vd2", "Vδ2", pathway)) %>%
mutate(pathway = gsub("Vg", "Vγ", pathway)) %>%
mutate(pathway = gsub("HALLMARK_", "", pathway)) %>%
mutate(pathway = gsub("_", " ", pathway)) %>%
mutate(pathway = sub(".*?\\_", "", pathway)) %>%
group_by(module_type, dosegroup) %>%
#mutate(pathway = fct_reorder(pathway, NES, .desc = TRUE)) %>%
ungroup() %>%
mutate(dosegroup = fct_rev(dosegroup)) %>%
filter(!grepl("TBD", pathway)) %>%
mutate(module_type = factor(module_type, levels = c("highBTMs", "MonacoModules", "lowBTMs", "MSigDB_Hallmark_v7.4", "BloodGen3Module"))) %>%
arrange(desc(neglogpadj)) %>%
droplevels()
#plotting options
basetextsize <- 8
myfont <- "sans"
bubble_max_size <- 8
myPlot <- plotDat %>%
filter(dosegroup == "1.8 x 10^6 PfSPZ") %>%
filter(module_type %in% c("highBTMs", "MonacoModules")) %>%
ggplot(., aes(x = dosegroup, y = factor(pathway,
levels = rev(levels(factor(pathway)))))) +
geom_point(aes(size=neglogpadj, fill = NES), alpha = 0.65, shape=21, stroke = 0.25) +
scale_size_area(name = expression(-log[10]~adj.~p~value), max_size = bubble_max_size, breaks = c(1.3, 5, 10, 20, 40, 80), limits = c(1,90)) +
scale_fill_gradient2(low = "blue",
mid = "white",
high = "red") +
hrbrthemes::theme_ipsum_es(base_family = myfont, base_size = basetextsize) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x.bottom = element_blank(),
strip.background = element_blank(),
legend.position = "left",
legend.direction = "vertical",
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
facet_wrap(~module_type, scales = "free_y", nrow = 2, strip.position = "right")
GSEA using genes ranked by direction and significance of differential gene expression (DGE) at baseline between infants immunized with 1.8x106 PfSPZ who subsequently developed either a high or low CSP IgG response post-vaccination. Only modules with a Benjamini-Hochberg-adjusted p<0.05 are shown. NES = normalized enrichment score.
Red: enriched in protected/uninfected through 3 months post-vax surveillance Blue: enriched in not protected/infected through 3 months post-vax surveillance
addSmallLegend <- function(myPlot, pointSize = 4, textSize = 6, spaceLegend = 0.6) {
myPlot +
guides(shape = guide_legend(override.aes = list(size = pointSize)),
color = guide_legend(override.aes = list(size = pointSize))) +
theme(legend.title = element_text(size = textSize),
legend.text = element_text(size = textSize),
legend.key.size = unit(spaceLegend, "lines"))
}
print(addSmallLegend(myPlot))