library(knitr)
library(tidyverse)
library(googledrive)
library(ggplot2)
library(ggpubr)
library(ggrepel)
library(fgsea)
library(data.table)
library(Biobase)
#set parallel backend.  
library(parallel)
library(parallelMap)
parallelStartSocket(cpus = detectCores())

Objective

Plot correlations between adaptive features (both baseline and post-vax) and time to first Pf infection through 6 months of surveillance post-vaccination.

Load data

#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
  as_id("1O0hONM1be04wxcTZ2qUyn1tIBo1qw0CH"), path = temp, overwrite = TRUE)
FACS_TTE_longdat <- readRDS(file = dl$local_path)
#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
  as_id("1P21cG9h4KJyZiJtExIMTXcxVQwfWkMxQ"), path = temp, overwrite = TRUE)
all_data_btm_for_tte6_correlations <- readRDS(file = dl$local_path)

temp <- tempfile(fileext = ".rds")
dl <- drive_download(
  as_id("1P4t1gpEhGt2dYx13jofcTjyAgx0I3dic"), path = temp, overwrite = TRUE)
three_month_outcome <- readRDS(file = dl$local_path)

pheno_dat <- all_data_btm_for_tte6_correlations  %>%
  full_join(., three_month_outcome, by = "PATID") %>%
  dplyr::select(PATID, treat, site, outcome, mal.atp.3, tte.mal.atp.6, mal.vax.1)
                
CSP_Ab_dat <- all_data_btm_for_tte6_correlations %>%
  dplyr::select(PATID, pfcsp_pre, pfcsp_post, log2FC_CSPAb) %>%
  dplyr::rename(CSPspecific_IgG_baseline = "pfcsp_pre",
                CSPspecific_IgG_postvax = "pfcsp_post",
                CSPspecific_IgG_delta = "log2FC_CSPAb") %>%
  pivot_longer(., cols = contains("CSP"), names_to = "feature", values_to = "value") %>%
  mutate(timepoint = gsub(".*IgG\\_", "", feature)) %>%
  mutate(feature = sub("_[^_]+$", "", feature)) %>%
  dplyr::select(PATID, feature, timepoint, value)

FACS_dat <- all_data_btm_for_tte6_correlations %>%
  dplyr::select(PATID, contains("FACS")) %>%
  pivot_longer(., cols = contains("FACS"), names_to = "feature", values_to = "value") %>%
  mutate(timepoint = gsub("(.*_\\s*(.*$))", "\\2", feature))  %>%
  mutate(feature = sub("_[^_]+$", "", feature)) %>%
  mutate(timepoint = ifelse(timepoint == 0, "baseline", "postvax")) %>%
  pivot_wider(names_from = timepoint, values_from = value) %>%
  mutate(delta = log2((postvax+1e-06)/(baseline+1e-06))) %>%
  pivot_longer(., cols = c(baseline, postvax, delta), names_to = "timepoint", values_to = "value")

all_cor_dat <- pheno_dat %>%
  full_join(., bind_rows(CSP_Ab_dat, FACS_dat),
            by = "PATID")
followuptime <- 6
myCorMeth <- "spearman"

summarized_correlations <- all_cor_dat %>%
  group_by(feature, timepoint) %>%
  summarise(spearman_r = stats::cor.test(tte.mal.atp.6, value, method = myCorMeth)$estimate,
            spearman_p = stats::cor.test(tte.mal.atp.6, value, method = myCorMeth)$p.val) %>%
  ungroup() %>%
  group_by(timepoint) %>%
  mutate(spearman_fdr = p.adjust(spearman_p, method = "BH")) %>%
  ungroup() 
## `summarise()` has grouped output by 'feature'. You can override using the
## `.groups` argument.
corr_plot_dat <- summarized_correlations %>%
  mutate(feature = gsub("FACS_", "", feature)) %>% #clean up names
  mutate(feature = gsub("CSPspecific", "CSP-specific", feature)) %>%
  mutate(feature = gsub("_of_live_leukocytes","", feature)) %>%
  mutate(feature = gsub("_", " ", feature)) %>%
  mutate(neglogpval = -log10(spearman_p)) %>%
  mutate(neglogfdr = -log10(spearman_fdr)) %>%
  mutate(is_significant = ifelse(spearman_fdr <0.05, "significant", "ns")) %>%
  mutate(label = ifelse(spearman_p < 0.05, feature, "")) %>%
  mutate(timepoint = factor(timepoint, levels = c("baseline", "postvax", "delta"), labels = c("baseline","2 weeks post-vax","log2 fold-change"))) %>%
  mutate(color = ifelse(timepoint=="baseline", "blue", "tbd")) %>%
  mutate(color = ifelse(timepoint=="2 weeks post-vax", "red", color)) %>%
  mutate(color = ifelse(timepoint=="log2 fold-change", "purple", color)) %>%
  mutate(color = ifelse(is_significant=="ns", "gray", color)) %>%
  mutate(timepoint2 = ifelse(is_significant=="ns", "not significant", as.character(timepoint))) %>%
  mutate(timepoint2 = factor(timepoint2, levels = c("baseline", "2 weeks post-vax", "log2 fold-change","not significant"), labels = c("baseline","2 weeks post-vax","log2 fold-change", "not significant"))) 

Correlation between CSP IgG reactivity and flow cytometry features with time-to-first parasitemia up to 6 months (Fig 3A of revised manuscript)

cols <- c("baseline" = "blue", "2 weeks post-vax" = "red", "log2 fold-change" = "orange")
cols2 <- c("baseline" = "blue", "2 weeks post-vax" = "red", "log2 fold-change" = "orange", "not significant" = "gray")
myPlot <- ggplot(corr_plot_dat, aes(x = spearman_r, y = neglogfdr, fill = timepoint2, color = timepoint, label = label)) +
  geom_point(shape = 21, alpha = 0.75, size = 2) +
  geom_hline(yintercept = 1.3, linetype = "dotted", color = "red", alpha = 0.7) +
  xlim(c(-0.4,0.4)) +
  ggrepel::geom_text_repel(aes(label=label),hjust=0, vjust=0, color = "black", force = 5, force_pull = 10, size = 3.25, segment.size = 0.2) +
  scale_color_manual(values = cols, aesthetics = "colour") +
  scale_fill_manual(values = cols2, aesthetics = "fill") +
  theme_bw(base_family = "sans", base_size = 14) +
  ylab("-log10(FDR)") +
  xlab("Spearman's rho") +
  theme(legend.position = "top", legend.box="vertical", legend.margin=margin()) +
  guides(colour = guide_legend(nrow = 1),
         fill = guide_legend(nrow = 1)) +
  theme(legend.title=element_blank(),
        axis.text = element_text(colour = "black"),
        axis.title = element_text(color = "black"))

Volcano plot of CSP-specific IgG and flow cytometry features at each timepoint or calculated as fold-change post-vaccination over baseline. Red dashed line indicates P = 0.05.

Table of significant correlations

summarized_correlations_select <- summarized_correlations %>%
  dplyr::select(feature, timepoint, spearman_r, spearman_p, spearman_fdr) %>%
  filter(spearman_fdr<0.05) %>%
  arrange(desc(abs(spearman_r)))

summarized_correlations_select %>%
  knitr::kable()
feature timepoint spearman_r spearman_p spearman_fdr
FACS_CSP-specific_memory_B_cells_of_live_leukocytes postvax 0.2402278 0.0022150 0.0454077
CSPspecific_IgG postvax 0.2256228 0.0003821 0.0156657

Show correlation between CSP-specific memory B cells and Vδ2 γδ T cells with time-to-first parasitemia up to 6 months as scatter plot (Fig 3B)

Correlation between CSP-specific memory B cells and time-to-first parasitemia up to 6 months (n=183) by presence (not protected, NP) or absence (protected, P) of parasitemia through 3 months of follow up

#local path: "/Volumes/GoogleDrive/My Drive/Tran Lab Shared/Projects/Doris Duke PfSPZ Kenya/Tuan PfSPZ/KenyaPfSPZ/Final Data Files/KSPZV1 PhenoData TTE 6 months Long Format.rds"
myCorMeth <- "spearman"
wrap_length <- 25

CSP_MBC_postvax_cordat <- all_cor_dat %>%
  filter(feature == "FACS_CSP-specific_memory_B_cells_of_live_leukocytes" & timepoint == "postvax") %>%
  filter(!is.na(tte.mal.atp.6)) %>%
  filter(!is.na(value))

CSP_IgG_postvax_cordat <- all_cor_dat %>%
  filter(feature == "CSPspecific_IgG" & timepoint == "postvax") %>%
  filter(!is.na(tte.mal.atp.6)) %>%
  filter(!is.na(value))

CSP_MBC_TTE_plot <- ggscatter(CSP_MBC_postvax_cordat, x = "tte.mal.atp.6", y = "value",
            conf.int = TRUE , add = "reg.line", size = 1,
            cor.coef.size = 1.5,
            add.params = list(color = "black", fill = "lightgray", alpha = 0.4, size = 1),
            fill = "outcome", color = "outcome") +
  stat_cor(method = myCorMeth,  r.accuracy = 0.001) +
  scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
  scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
  xlab("days to first parasitemia") +
  ylab(str_wrap("CSP-specific memory B cells of live PBMCs (post-vax)",wrap_length)) +
  theme_bw(base_family = "sans", base_size = 9) +
  theme(legend.position="top")  +
  theme(legend.title=element_blank(),
        axis.text = element_text(colour = "black"),
        axis.title = element_text(color = "black"))

CSP_IgG_TTE_plot <- CSP_IgG_postvax_cordat %>%
  mutate(log10_CSP = log10(value+1)) %>%
  ggscatter(., x = "tte.mal.atp.6", y = "log10_CSP",
            conf.int = TRUE , add = "reg.line", size = 2,
            cor.coef.size = 1.5,
            add.params = list(color = "black", fill = "lightgray", alpha = 0.4, size = 1),
            fill = "outcome", color = "outcome") +
  stat_cor(method = myCorMeth,  r.accuracy = 0.001) +
  scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
  scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
  xlab("days to first parasitemia") +
  ylab(str_wrap("log10 CSP-specific IgG (post-vax)",wrap_length)) +
  theme_bw(base_family = "sans", base_size = 9) +
  theme(legend.position="top")  +
  theme(legend.title=element_blank(),
        axis.text = element_text(colour = "black"),
        axis.title = element_text(color = "black"))

Correlate baseline gene expression with CSP-specific memory B cells

temp <- tempfile(fileext = ".rds")
dl <- drive_download(
  as_id("1togaBlNIxiDj16FyXTC-r7Qw0A9cG2az"), path = temp, overwrite = TRUE)
x  <- readRDS(file = dl$local_path)
myCorMeth <- "spearman"

csp.memoryB.dat <- CSP_MBC_postvax_cordat %>%
  dplyr::select(PATID, treat, outcome, feature, timepoint, value) %>%
  dplyr::filter(feature == "FACS_CSP-specific_memory_B_cells_of_live_leukocytes" & timepoint == "postvax")

cpm.dat <- exprs(x) %>%
  t() %>%
  as.data.frame() %>%
  rownames_to_column(var = "SAMPLEID") %>%
  dplyr::filter(grepl("_0", SAMPLEID)) %>%
  mutate(PATID = gsub("_0", "", SAMPLEID)) %>%
  dplyr::select(PATID, everything()) %>%
  dplyr::select(-c(SAMPLEID)) %>%
  pivot_longer(cols = 2:ncol(.), names_to = "EnsemblID", values_to = "CPM") %>%
  left_join(., fData(x) %>%
               dplyr::select(EnsemblID, GeneSymbol),
             by = "EnsemblID")

cpm.dat.csp.mbc <- csp.memoryB.dat %>%
  left_join(., cpm.dat,
            by = "PATID")

summarized_cpm_feature <- cpm.dat.csp.mbc %>%
  dplyr::select(PATID, treat, outcome, feature, GeneSymbol, EnsemblID, CPM, value) %>%
  drop_na(CPM, value) %>%
  arrange(PATID, feature, GeneSymbol, EnsemblID) %>%
  group_by(GeneSymbol, EnsemblID) %>%
  summarise(baselineCPM_spearman_r = stats::cor.test(CPM, value, method = myCorMeth)$estimate,
            baselineCPM_spearman_p = stats::cor.test(CPM, value, method = myCorMeth)$p.value,
            n = n()) %>%
  ungroup() %>%
  mutate(baselineCPM_spearman_fdr = p.adjust(baselineCPM_spearman_p, method = "BH")) %>%
  mutate(rankMetric_CSP_MBC = sign(baselineCPM_spearman_r)*-(log10(baselineCPM_spearman_p)))

Apply GSEA to all

ranklist <- summarized_cpm_feature %>%
  dplyr::select(GeneSymbol, rankMetric_CSP_MBC) %>%
  na.omit() %>% 
  distinct() %>% 
  group_by(GeneSymbol) %>% 
  dplyr::summarize(rankMetric_CSP_MBC = mean(rankMetric_CSP_MBC)) %>%
  arrange(desc(rankMetric_CSP_MBC)) %>%
  deframe()

devtools::source_url("https://github.com/TranLab/ModuleLists/blob/main/NamedGeneRankList2GseaTable.R?raw=TRUE")
GSEA_CPM_ranked_CSP_MBC_all <- NamedGeneRankList2GseaTable(rankedgenes = ranklist, geneset = "all", output_directory = tempdir(), filename_prefix =
                                                 "GSEA_baseline_CPM_CSP_MBC_all_n150", minSize = 20, fixed_seed = TRUE)
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"

Plot GSEA results as bar plot (Fig 3C)

Gene set enrichment analysis (GSEA) using genes ranked by magnitude and significance of correlation between baseline expression and % CSP-specific memory B cells at 2-weeks post-vaccination. Only showing “MonacoModules” and “lowBTMs” with FDR<1%.

## Warning: Vectorized input to `element_text()` is not officially supported.
## ℹ Results may be unexpected or may change in future versions of ggplot2.
addSmallLegend <- function(myPlot, pointSize = 6, textSize = 6, spaceLegend = 1) {
    myPlot +
        guides(shape = guide_legend(override.aes = list(size = pointSize)),
               color = guide_legend(override.aes = list(size = pointSize))) +
        theme(legend.title = element_text(size = textSize), 
              legend.text  = element_text(size = textSize),
              legend.key.size = unit(spaceLegend, "lines"))
}
print(addSmallLegend(myGSEAClusterPlot))