library(knitr)
library(tidyverse)
library(edgeR)
library(googledrive)
library(fgsea)
library(data.table)
library(ggplot2)
library(ggpubr)

Objective

Perform differential gene expression using edgeR on pre-immunization baseline whole-blood samples from the KSPZV1 malaria vaccine trial with the binary outcomes for comparison being dichotomized CSP IgG levels at 2 weeks post-vax.

Updated to filter at FDR<5%.

Load data

#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
  as_id("1Q4VXYxdl9CqtzhcvHA9pn8q06_YOZgi8"), path = temp, overwrite = TRUE)
x <- readRDS(file = dl$local_path)
x$treat <- factor(x$treat, levels = c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))

CSP IgG reactivity at baseline and post-vax by treatment and outcome (Fig 3D)

set.seed(23)
plotDat <- pData(x) %>%
  filter(!is.na(log2FC_CSPAb)) %>% #one subject in 9.0x10^6 PfSPZ group missing CSP log2 fold change data
  mutate(log10CSPAb = log10(CSPAb + 1)) %>%
  mutate(Outcome = factor(Outcome, levels = c(1,0), labels = c("infected (not protected)", "uninfected (protected)"))) %>%
  group_by(Outcome, treat, Timepoint) %>%
  dplyr::mutate(pos = 1:n(), label = ifelse(pos==1, n(), "")) %>%
  dplyr::select(-pos) %>%
  ungroup() %>%
  filter(Timepoint == 0) %>%
  pivot_longer(., cols = c(log10CSPAb, log2FC_CSPAb), names_to = "variable", values_to = "value")


myPlot <- plotDat %>%
  ggplot(., aes(x = treat, y = value, fill = Outcome, color = Outcome)) +
  geom_point(position = position_jitterdodge()) +
  geom_violin(alpha = 0.4, color = "grey30", draw_quantiles = c(0.5)) +
  stat_compare_means(aes(label = ..p.signif.., group = Outcome), method = "wilcox.test", label.x.npc = "center", vjust = 1, show.legend = F) +
  scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
  scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
  ylab("log10 CSP IgG at baseline") +
  geom_text(position = position_dodge(width=1), aes(label=label), vjust = -0.25, color = "black") +
  theme_bw(base_family = "Arial", base_size = 10) +
  theme(axis.text.x=element_text(angle = 45, vjust = 1, hjust=1),
          axis.ticks.x=element_blank(),
          axis.title.x=element_blank(),
          strip.text.x = element_blank(),
          strip.background = element_blank(),
          legend.position="top"
          ) +
  facet_wrap(~variable, scales = "free_y", nrow = 2,
             strip.position = "left", 
             labeller = as_labeller(c(log10CSPAb = "log10 CSP-specific IgG at baseline",
                                      log2FC_CSPAb = "log2 (post-vax/baseline) CSP-specific IgG response"))) +
     ylab(NULL) +
     theme(strip.background = element_blank(),
           strip.placement = "outside")

Violin plots showing CSP-specific IgG antibodies at baseline and as fold-change (2-weeks post-vaccination/baseline) by treatment and outcome at 3 months.

Rationale for choosing cut-off for binary response

Based on this data, decided on log2FC > 12.5 is good Ab response.

pData(x) <- pData(x) %>%
  mutate(CSP_response_cat = factor(ifelse(log2FC_CSPAb > 12.5, "high_CSP_LFC", "low_CSP_LFC"),
                                   levels = c("low_CSP_LFC", "high_CSP_LFC")))

Set options

myGroups <- factor(c("4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"),
                   levels = c("4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
myTimepoint <- 0

Reduce samples

x <- x[,x$treat %in% myGroups]
x <- x[,!is.na(x$CSP_response_cat)]
xgroup <- setNames(as.list(c(1:3)), c("4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
for(i in names(xgroup)){
  xgroup[[i]] <- x[, which(x$treat == i)]
  xgroup[[i]] <- xgroup[[i]][,xgroup[[i]]$Timepoint == myTimepoint]
  xgroup[[i]]$treat <- droplevels(xgroup[[i]]$treat)
  print(i)
  print(table(Outcome = xgroup[[i]]$Outcome, Timepoint = xgroup[[i]]$Timepoint))
  print(table(Outcome = xgroup[[i]]$Outcome, Dosegroup = xgroup[[i]]$treat))
  print(dim(xgroup[[i]]))
}
## [1] "4.5 x 10^5 PfSPZ"
##        Timepoint
## Outcome  0
##       0 29
##       1 29
##        Dosegroup
## Outcome 4.5 x 10^5 PfSPZ
##       0               29
##       1               29
## Features  Samples 
##    35716       58 
## [1] "9.0 x 10^5 PfSPZ"
##        Timepoint
## Outcome  0
##       0 28
##       1 29
##        Dosegroup
## Outcome 9.0 x 10^5 PfSPZ
##       0               28
##       1               29
## Features  Samples 
##    35716       57 
## [1] "1.8 x 10^6 PfSPZ"
##        Timepoint
## Outcome  0
##       0 38
##       1 25
##        Dosegroup
## Outcome 1.8 x 10^6 PfSPZ
##       0               38
##       1               25
## Features  Samples 
##    35716       63

Build DGEList Object

#Define group by dose, timepoint and CSP response
ygroup <- setNames(as.list(c(1:3)), c("4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
for(i in names(ygroup)){
  ygroup[[i]]  <- DGEList(counts=counts(xgroup[[i]]), genes=fData(xgroup[[i]]), group= factor(paste(xgroup[[i]]$treat, xgroup[[i]]$Timepoint, xgroup[[i]]$CSP_response_cat, sep = "_")), remove.zeros=T)
  ygroup[[i]]$samples$PATID <- gsub("_.*", "", rownames(ygroup[[i]]$samples))
}

Filter out low expression features

for(i in names(ygroup)){
  keep <- filterByExpr(ygroup[[i]])
  ygroup[[i]] <- ygroup[[i]][keep, , keep.lib.sizes=FALSE]
  ygroup[[i]]$samples$lib.size <- colSums(ygroup[[i]]$counts)
  ygroup[[i]] <- calcNormFactors(ygroup[[i]])   #Normalization
}

Analysis between high CSP LFC and low CSP LFC within a Dose Group

degtab <- design <- setNames(as.list(c(1:3)), c("4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
for(i in names(ygroup)){
  Subject <- factor(xgroup[[i]]$PATID)
  CSPResponse <- factor(xgroup[[i]]$CSP_response_cat)
  Protection <- factor(xgroup[[i]]$Outcome, levels = c(0,1), labels = c("protected", "notprotected"))
  Batch <- factor(xgroup[[i]]$SEQBATCH, levels = c("Aug2019","Nov2019"))
  design[[i]] <- model.matrix(~Batch + Protection + CSPResponse)
  print(i)
  print(colnames(design[[i]]))
  rownames(design[[i]]) <- ygroup[[i]]$samples$PATID
  ygroup[[i]]     <- estimateDisp(ygroup[[i]],design[[i]], robust = TRUE)
  fit   <- glmQLFit(ygroup[[i]], design[[i]], robust = TRUE)
  print("glmQLFit done.")
  qlf <- glmQLFTest(fit, coef = "CSPResponsehigh_CSP_LFC")
  print("glmQLFTest done.")
  degtab[[i]] <- topTags(qlf, n = Inf)
}
## [1] "4.5 x 10^5 PfSPZ"
## [1] "(Intercept)"             "BatchNov2019"           
## [3] "Protectionnotprotected"  "CSPResponsehigh_CSP_LFC"
## [1] "glmQLFit done."
## [1] "glmQLFTest done."
## [1] "9.0 x 10^5 PfSPZ"
## [1] "(Intercept)"             "BatchNov2019"           
## [3] "Protectionnotprotected"  "CSPResponsehigh_CSP_LFC"
## [1] "glmQLFit done."
## [1] "glmQLFTest done."
## [1] "1.8 x 10^6 PfSPZ"
## [1] "(Intercept)"             "BatchNov2019"           
## [3] "Protectionnotprotected"  "CSPResponsehigh_CSP_LFC"
## [1] "glmQLFit done."
## [1] "glmQLFTest done."
temp <- degtab
for(i in names(degtab)){
  temp[[i]] <- degtab[[i]]$table %>%
    dplyr::select(7,9,8,10:14)
  }

Examine top 15 DEGs (high CSP response vs low CSP response)

4.5 x 10^5 PfSPZ

GeneSymbol descripton_new logFC logCPM F PValue FDR
ENSG00000047634 SCML1 Scm polycomb group protein like 1 [Source:HGNC Symbol;Acc:HGNC:10580] 0.5470575 5.3168687 18.68467 0.0000604 0.7604745
ENSG00000259700 RP11-485O10.3 novel transcript, antisense to CEP152 1.2998836 0.1252416 15.26323 0.0002441 0.7604745
ENSG00000112210 RAB23 RAB23, member RAS oncogene family [Source:HGNC Symbol;Acc:HGNC:14263] 0.6336624 3.0708922 13.71097 0.0004724 0.7604745
ENSG00000074803 SLC12A1 solute carrier family 12 member 1 [Source:HGNC Symbol;Acc:HGNC:10910] -5.3017053 1.9479520 13.02670 0.0006573 0.7604745
ENSG00000118200 CAMSAP2 calmodulin regulated spectrin associated protein family member 2 [Source:HGNC Symbol;Acc:HGNC:29188] 0.7953159 3.8022678 12.81446 0.0006977 0.7604745
ENSG00000134352 IL6ST interleukin 6 cytokine family signal transducer [Source:HGNC Symbol;Acc:HGNC:6021] 0.4187322 8.9090141 12.54189 0.0007861 0.7604745
ENSG00000269929 MIRLET7A1HG miRlet-7a-1/let-7f-1/let-7d cluster host gene [Source:HGNC Symbol;Acc:HGNC:53970] 0.3846854 4.6529223 12.44118 0.0008218 0.7604745
ENSG00000181690 PLAG1 PLAG1 zinc finger [Source:HGNC Symbol;Acc:HGNC:9045] 0.6756168 4.6523236 12.36994 0.0008497 0.7604745
ENSG00000189410 SH2D5 SH2 domain containing 5 [Source:HGNC Symbol;Acc:HGNC:28819] 2.6739843 -0.2590413 12.22180 0.0009054 0.7604745
ENSG00000179909 ZNF154 zinc finger protein 154 [Source:HGNC Symbol;Acc:HGNC:12939] 0.3612848 5.9008784 11.85377 0.0010664 0.7604745
ENSG00000168958 MFF mitochondrial fission factor [Source:HGNC Symbol;Acc:HGNC:24858] 0.2293740 6.3754001 11.76019 0.0011119 0.7604745
ENSG00000270082 RP11-178L8.8 novel transcript, antisense to FBXO31 and C16orf95 readthrough 1.2503497 0.0555534 11.75165 0.0011161 0.7604745
ENSG00000163519 TRAT1 T cell receptor associated transmembrane adaptor 1 [Source:HGNC Symbol;Acc:HGNC:30698] 0.4159588 7.3288393 11.75120 0.0011164 0.7604745
ENSG00000246859 STARD4-AS1 STARD4 antisense RNA 1 [Source:HGNC Symbol;Acc:HGNC:44117] 0.4316312 4.4170770 11.72838 0.0011278 0.7604745
ENSG00000124766 SOX4 SRY-box transcription factor 4 [Source:HGNC Symbol;Acc:HGNC:11200] 0.3820120 5.2251979 11.70632 0.0011390 0.7604745

9.0 x 10^5 PfSPZ

GeneSymbol descripton_new logFC logCPM F PValue FDR
ENSG00000197721 CR1L complement C3b/C4b receptor 1 like [Source:HGNC Symbol;Acc:HGNC:2335] -1.1552611 3.616811 17.79293 0.0000897 0.5460265
ENSG00000112077 RHAG Rh associated glycoprotein [Source:HGNC Symbol;Acc:HGNC:10006] -2.0824491 1.798151 17.45938 0.0001025 0.5460265
ENSG00000205707 ETFRF1 electron transfer flavoprotein regulatory factor 1 [Source:HGNC Symbol;Acc:HGNC:27052] -0.2704120 5.532598 16.92032 0.0001241 0.5460265
ENSG00000164211 STARD4 StAR related lipid transfer domain containing 4 [Source:HGNC Symbol;Acc:HGNC:18058] -0.3933441 5.674460 15.91757 0.0001873 0.5460265
ENSG00000133742 CA1 carbonic anhydrase 1 [Source:HGNC Symbol;Acc:HGNC:1368] -2.3893423 7.167204 16.08673 0.0001889 0.5460265
ENSG00000047597 XK X-linked Kx blood group [Source:HGNC Symbol;Acc:HGNC:12811] -1.5088612 5.765518 15.60687 0.0002258 0.5460265
ENSG00000273419 RP4-751H13.7 novel transcript, antisense to ZNF862 0.4097277 2.958471 14.90319 0.0002862 0.5460265
ENSG00000172331 BPGM bisphosphoglycerate mutase [Source:HGNC Symbol;Acc:HGNC:1093] -1.4829271 7.633568 14.83528 0.0003116 0.5460265
ENSG00000106244 PDAP1 PDGFA associated protein 1 [Source:HGNC Symbol;Acc:HGNC:14634] 0.1859462 6.410596 14.45848 0.0003455 0.5460265
ENSG00000213185 FAM24B family with sequence similarity 24 member B [Source:HGNC Symbol;Acc:HGNC:23475] 0.4974613 1.892171 14.37741 0.0003576 0.5460265
ENSG00000140521 POLG DNA polymerase gamma, catalytic subunit [Source:HGNC Symbol;Acc:HGNC:9179] 0.2341676 5.524481 13.88530 0.0004412 0.5460265
ENSG00000023445 BIRC3 baculoviral IAP repeat containing 3 [Source:HGNC Symbol;Acc:HGNC:591] -0.2944914 9.002853 13.58902 0.0005012 0.5460265
ENSG00000143727 ACP1 acid phosphatase 1 [Source:HGNC Symbol;Acc:HGNC:122] -0.2850187 6.222189 13.44334 0.0005337 0.5460265
ENSG00000141504 SAT2 spermidine/spermine N1-acetyltransferase family member 2 [Source:HGNC Symbol;Acc:HGNC:23160] 0.4016152 2.730215 13.35983 0.0005533 0.5460265
ENSG00000169255 B3GALNT1 beta-1,3-N-acetylgalactosaminyltransferase 1 (globoside blood group) [Source:HGNC Symbol;Acc:HGNC:918] -1.2146516 1.929669 13.34896 0.0005577 0.5460265

1.8 x 10^6 PfSPZ

GeneSymbol descripton_new logFC logCPM F PValue FDR
ENSG00000233296 TMEM18-DT TMEM18 divergent transcript [Source:HGNC Symbol;Acc:HGNC:54481] -1.5611539 1.5600112 20.06615 0.0000313 0.5646507
ENSG00000170448 NFXL1 nuclear transcription factor, X-box binding like 1 [Source:HGNC Symbol;Acc:HGNC:18726] 0.3785918 4.6382758 14.32345 0.0003401 0.9999456
ENSG00000121940 CLCC1 chloride channel CLIC like 1 [Source:HGNC Symbol;Acc:HGNC:29675] 0.2198391 5.3863704 13.39698 0.0005100 0.9999456
ENSG00000179523 EIF3J-DT EIF3J divergent transcript [Source:HGNC Symbol;Acc:HGNC:48616] -0.2858687 3.7289935 12.75960 0.0006765 0.9999456
ENSG00000232295 RP11-154D6.1 novel transcript, antisense OGFRL1 0.3675811 5.9261183 12.27374 0.0008408 0.9999456
ENSG00000226419 SLC16A1-AS1 SLC16A1 antisense RNA 1 [Source:HGNC Symbol;Acc:HGNC:49445] -0.3105055 4.0076945 11.74874 0.0010657 0.9999456
ENSG00000083838 ZNF446 zinc finger protein 446 [Source:HGNC Symbol;Acc:HGNC:21036] -0.3175324 4.3694778 11.71155 0.0010838 0.9999456
ENSG00000254929 RP11-144G6.12 novel transcript -0.5057131 3.6901989 11.49215 0.0011976 0.9999456
ENSG00000131238 PPT1 palmitoyl-protein thioesterase 1 [Source:HGNC Symbol;Acc:HGNC:9325] 0.3423849 7.3928490 11.40124 0.0012483 0.9999456
ENSG00000178695 KCTD12 potassium channel tetramerization domain containing 12 [Source:HGNC Symbol;Acc:HGNC:14678] 0.3757330 7.3336015 11.14757 0.0014020 0.9999456
ENSG00000184508 HDDC3 HD domain containing 3 [Source:HGNC Symbol;Acc:HGNC:30522] -0.4789895 2.4408352 11.06803 0.0014541 0.9999456
ENSG00000149269 PAK1 p21 (RAC1) activated kinase 1 [Source:HGNC Symbol;Acc:HGNC:8590] 0.3132222 6.5699951 11.06715 0.0014547 0.9999456
ENSG00000213468 FIRRE firre intergenic repeating RNA element [Source:HGNC Symbol;Acc:HGNC:49627] -0.7422046 1.6776547 11.00778 0.0014950 0.9999456
ENSG00000268093 AC022154.7 novel transcript, antisense to SPHK2 -0.5212837 0.9940586 10.96250 0.0015264 0.9999456
ENSG00000105514 RAB3D RAB3D, member RAS oncogene family [Source:HGNC Symbol;Acc:HGNC:9779] 0.5043009 6.1775479 10.74900 0.0016963 0.9999456

Apply GSEA

Rank genes by -log10(PValue)*sign(logFC). Run fgsea from fgsea package using NamedGeneRankList2GseaTable helper function.

set.seed(23)
ranks <- degtab
for(i in names(degtab)){
  ranks[[i]] <- degtab[[i]]$table %>% 
  mutate(rankmetric = -log10(.$PValue)*sign(.$logFC)) %>%
  dplyr::select(GeneSymbol,rankmetric) %>% 
  na.omit() %>% 
  distinct() %>% 
  group_by(GeneSymbol) %>%  
  summarize(rankmetric = mean(rankmetric)) %>%
  arrange(desc(rankmetric)) %>%
  deframe()
}
#Run fgsea from fgsea package using NamedGeneRankList2GseaTable helper function
devtools::source_url("https://github.com/TranLab/ModuleLists/blob/main/NamedGeneRankList2GseaTable.R?raw=TRUE")
GSEAtab <- ranks
for(i in names(ranks)){
  GSEAtab[[i]] <- NamedGeneRankList2GseaTable(rankedgenes = ranks[[i]],
                                         geneset = "all",
                                         output_directory = tempdir(),
                                         filename_prefix = "GSEA",
                                         sampleSize = 101,
                                         minSize = 20,
                                         maxSize = Inf,
                                         scoreType = "std") %>%
    as_tibble() %>%
    arrange(desc(NES)) %>% 
    dplyr::select(module_type, pathway, ES, NES, size, leadingEdge, pval, padj) %>% 
    mutate(leadingEdge = gsub("^c\\(|\\)$", "", leadingEdge)) %>%
    mutate(leadingEdge = gsub('"', "", leadingEdge)) %>%
    arrange(padj)
}
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"

Visualize GSEA data as bubble plot for 1.8x10^6 PfSPZ dose group.

Filter based on padj < 0.05.

plotDat <- GSEAtab %>%
  bind_rows(., .id = "dosegroup") %>%
  mutate(dosegroup = factor(dosegroup, levels = c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))) %>%
  filter(padj < 0.05) %>%
  filter(module_type %in% c("highBTMs", "lowBTMs", "MonacoModules", "BloodGen3Module", "MSigDB_Hallmark_v7.4"))  %>%
  filter(!grepl("TBA", pathway)) %>%
  dplyr::select(dosegroup, module_type, pathway, leadingEdge, size, NES, padj) %>%
  mutate(neglogpadj = -log10(padj)) %>%
  mutate(pathway = gsub("gd", "γδ", pathway)) %>%
  mutate(pathway = gsub("Vd2", "Vδ2", pathway)) %>%
  mutate(pathway = gsub("Vg", "Vγ", pathway)) %>%
  mutate(pathway = gsub("HALLMARK_", "", pathway)) %>%
  mutate(pathway = gsub("_", " ", pathway)) %>%
  mutate(pathway = sub(".*?\\_", "", pathway)) %>%
  group_by(module_type, dosegroup) %>%
  #mutate(pathway = fct_reorder(pathway, NES, .desc = TRUE)) %>%
  ungroup() %>%
  mutate(dosegroup = fct_rev(dosegroup)) %>%
  filter(!grepl("TBD", pathway)) %>%
  mutate(module_type = factor(module_type, levels = c("highBTMs", "MonacoModules", "lowBTMs", "MSigDB_Hallmark_v7.4", "BloodGen3Module"))) %>%
  arrange(desc(neglogpadj)) %>% 
  droplevels()

#plotting options
basetextsize <- 8  
myfont <- "sans"
bubble_max_size <- 8

myPlot <- plotDat %>%
  filter(dosegroup == "1.8 x 10^6 PfSPZ") %>%
  filter(module_type %in% c("highBTMs", "MonacoModules"))  %>%
  ggplot(., aes(x = dosegroup, y = factor(pathway, 
       levels = rev(levels(factor(pathway)))))) +
  geom_point(aes(size=neglogpadj, fill = NES), alpha = 0.65, shape=21, stroke = 0.25) +
  scale_size_area(name = expression(-log[10]~adj.~p~value), max_size = bubble_max_size, breaks = c(1.3, 5, 10, 20, 40, 80), limits = c(1,90)) +
      scale_fill_gradient2(low = "blue",
                           mid = "white",
                           high = "red") +
  hrbrthemes::theme_ipsum_es(base_family = myfont, base_size = basetextsize) +
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x.bottom = element_blank(),
        strip.background = element_blank(),
        legend.position = "left",
        legend.direction = "vertical",
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
facet_wrap(~module_type, scales = "free_y", nrow = 2, strip.position = "right")

Plot GSEA bubbleplots (Figure 3E of revised manuscript)

GSEA using genes ranked by direction and significance of differential gene expression (DGE) at baseline between infants immunized with 1.8x106 PfSPZ who subsequently developed either a high or low CSP IgG response post-vaccination. Only modules with a Benjamini-Hochberg-adjusted p<0.05 are shown. NES = normalized enrichment score.

Red: enriched in protected/uninfected through 3 months post-vax surveillance Blue: enriched in not protected/infected through 3 months post-vax surveillance

addSmallLegend <- function(myPlot, pointSize = 4, textSize = 6, spaceLegend = 0.6) {
    myPlot +
        guides(shape = guide_legend(override.aes = list(size = pointSize)),
               color = guide_legend(override.aes = list(size = pointSize))) +
        theme(legend.title = element_text(size = textSize), 
              legend.text  = element_text(size = textSize),
              legend.key.size = unit(spaceLegend, "lines"))
}
print(addSmallLegend(myPlot))