library(knitr)
library(tidyverse)
library(googledrive)
library(ggplot2)
library(ggpubr)
library(ggrepel)
library(fgsea)
library(data.table)
library(Biobase)
#set parallel backend.
library(parallel)
library(parallelMap)
parallelStartSocket(cpus = detectCores())
Plot correlations between adaptive features (both baseline and post-vax) and time to first Pf infection through 6 months of surveillance post-vaccination.
#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
as_id("1O0hONM1be04wxcTZ2qUyn1tIBo1qw0CH"), path = temp, overwrite = TRUE)
FACS_TTE_longdat <- readRDS(file = dl$local_path)
#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
as_id("1P21cG9h4KJyZiJtExIMTXcxVQwfWkMxQ"), path = temp, overwrite = TRUE)
all_data_btm_for_tte6_correlations <- readRDS(file = dl$local_path)
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
as_id("1P4t1gpEhGt2dYx13jofcTjyAgx0I3dic"), path = temp, overwrite = TRUE)
three_month_outcome <- readRDS(file = dl$local_path)
pheno_dat <- all_data_btm_for_tte6_correlations %>%
full_join(., three_month_outcome, by = "PATID") %>%
dplyr::select(PATID, treat, site, outcome, mal.atp.3, tte.mal.atp.6, mal.vax.1)
CSP_Ab_dat <- all_data_btm_for_tte6_correlations %>%
dplyr::select(PATID, pfcsp_pre, pfcsp_post, log2FC_CSPAb) %>%
dplyr::rename(CSPspecific_IgG_baseline = "pfcsp_pre",
CSPspecific_IgG_postvax = "pfcsp_post",
CSPspecific_IgG_delta = "log2FC_CSPAb") %>%
pivot_longer(., cols = contains("CSP"), names_to = "feature", values_to = "value") %>%
mutate(timepoint = gsub(".*IgG\\_", "", feature)) %>%
mutate(feature = sub("_[^_]+$", "", feature)) %>%
dplyr::select(PATID, feature, timepoint, value)
FACS_dat <- all_data_btm_for_tte6_correlations %>%
dplyr::select(PATID, contains("FACS")) %>%
pivot_longer(., cols = contains("FACS"), names_to = "feature", values_to = "value") %>%
mutate(timepoint = gsub("(.*_\\s*(.*$))", "\\2", feature)) %>%
mutate(feature = sub("_[^_]+$", "", feature)) %>%
mutate(timepoint = ifelse(timepoint == 0, "baseline", "postvax")) %>%
pivot_wider(names_from = timepoint, values_from = value) %>%
mutate(delta = log2((postvax+1e-06)/(baseline+1e-06))) %>%
pivot_longer(., cols = c(baseline, postvax, delta), names_to = "timepoint", values_to = "value")
all_cor_dat <- pheno_dat %>%
full_join(., bind_rows(CSP_Ab_dat, FACS_dat),
by = "PATID")
followuptime <- 6
myCorMeth <- "spearman"
summarized_correlations <- all_cor_dat %>%
group_by(feature, timepoint) %>%
summarise(spearman_r = stats::cor.test(tte.mal.atp.6, value, method = myCorMeth)$estimate,
spearman_p = stats::cor.test(tte.mal.atp.6, value, method = myCorMeth)$p.val) %>%
ungroup() %>%
group_by(timepoint) %>%
mutate(spearman_fdr = p.adjust(spearman_p, method = "BH")) %>%
ungroup()
## `summarise()` has grouped output by 'feature'. You can override using the
## `.groups` argument.
corr_plot_dat <- summarized_correlations %>%
mutate(feature = gsub("FACS_", "", feature)) %>% #clean up names
mutate(feature = gsub("CSPspecific", "CSP-specific", feature)) %>%
mutate(feature = gsub("_of_live_leukocytes","", feature)) %>%
mutate(feature = gsub("_", " ", feature)) %>%
mutate(neglogpval = -log10(spearman_p)) %>%
mutate(neglogfdr = -log10(spearman_fdr)) %>%
mutate(is_significant = ifelse(spearman_fdr <0.05, "significant", "ns")) %>%
mutate(label = ifelse(spearman_p < 0.05, feature, "")) %>%
mutate(timepoint = factor(timepoint, levels = c("baseline", "postvax", "delta"), labels = c("baseline","2 weeks post-vax","log2 fold-change"))) %>%
mutate(color = ifelse(timepoint=="baseline", "blue", "tbd")) %>%
mutate(color = ifelse(timepoint=="2 weeks post-vax", "red", color)) %>%
mutate(color = ifelse(timepoint=="log2 fold-change", "purple", color)) %>%
mutate(color = ifelse(is_significant=="ns", "gray", color)) %>%
mutate(timepoint2 = ifelse(is_significant=="ns", "not significant", as.character(timepoint))) %>%
mutate(timepoint2 = factor(timepoint2, levels = c("baseline", "2 weeks post-vax", "log2 fold-change","not significant"), labels = c("baseline","2 weeks post-vax","log2 fold-change", "not significant")))
cols <- c("baseline" = "blue", "2 weeks post-vax" = "red", "log2 fold-change" = "orange")
cols2 <- c("baseline" = "blue", "2 weeks post-vax" = "red", "log2 fold-change" = "orange", "not significant" = "gray")
myPlot <- ggplot(corr_plot_dat, aes(x = spearman_r, y = neglogfdr, fill = timepoint2, color = timepoint, label = label)) +
geom_point(shape = 21, alpha = 0.75, size = 2) +
geom_hline(yintercept = 1.3, linetype = "dotted", color = "red", alpha = 0.7) +
xlim(c(-0.4,0.4)) +
ggrepel::geom_text_repel(aes(label=label),hjust=0, vjust=0, color = "black", force = 5, force_pull = 10, size = 3.25, segment.size = 0.2) +
scale_color_manual(values = cols, aesthetics = "colour") +
scale_fill_manual(values = cols2, aesthetics = "fill") +
theme_bw(base_family = "sans", base_size = 14) +
ylab("-log10(FDR)") +
xlab("Spearman's rho") +
theme(legend.position = "top", legend.box="vertical", legend.margin=margin()) +
guides(colour = guide_legend(nrow = 1),
fill = guide_legend(nrow = 1)) +
theme(legend.title=element_blank(),
axis.text = element_text(colour = "black"),
axis.title = element_text(color = "black"))
Volcano plot of CSP-specific IgG and flow cytometry features at each timepoint or calculated as fold-change post-vaccination over baseline. Red dashed line indicates P = 0.05.
Table of significant correlations
summarized_correlations_select <- summarized_correlations %>%
dplyr::select(feature, timepoint, spearman_r, spearman_p, spearman_fdr) %>%
filter(spearman_fdr<0.05) %>%
arrange(desc(abs(spearman_r)))
summarized_correlations_select %>%
knitr::kable()
| feature | timepoint | spearman_r | spearman_p | spearman_fdr |
|---|---|---|---|---|
| FACS_CSP-specific_memory_B_cells_of_live_leukocytes | postvax | 0.2402278 | 0.0022150 | 0.0454077 |
| CSPspecific_IgG | postvax | 0.2256228 | 0.0003821 | 0.0156657 |
Correlation between CSP-specific memory B cells and time-to-first parasitemia up to 6 months (n=183) by presence (not protected, NP) or absence (protected, P) of parasitemia through 3 months of follow up
#local path: "/Volumes/GoogleDrive/My Drive/Tran Lab Shared/Projects/Doris Duke PfSPZ Kenya/Tuan PfSPZ/KenyaPfSPZ/Final Data Files/KSPZV1 PhenoData TTE 6 months Long Format.rds"
myCorMeth <- "spearman"
wrap_length <- 25
CSP_MBC_postvax_cordat <- all_cor_dat %>%
filter(feature == "FACS_CSP-specific_memory_B_cells_of_live_leukocytes" & timepoint == "postvax") %>%
filter(!is.na(tte.mal.atp.6)) %>%
filter(!is.na(value))
CSP_IgG_postvax_cordat <- all_cor_dat %>%
filter(feature == "CSPspecific_IgG" & timepoint == "postvax") %>%
filter(!is.na(tte.mal.atp.6)) %>%
filter(!is.na(value))
CSP_MBC_TTE_plot <- ggscatter(CSP_MBC_postvax_cordat, x = "tte.mal.atp.6", y = "value",
conf.int = TRUE , add = "reg.line", size = 1,
cor.coef.size = 1.5,
add.params = list(color = "black", fill = "lightgray", alpha = 0.4, size = 1),
fill = "outcome", color = "outcome") +
stat_cor(method = myCorMeth, r.accuracy = 0.001) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
xlab("days to first parasitemia") +
ylab(str_wrap("CSP-specific memory B cells of live PBMCs (post-vax)",wrap_length)) +
theme_bw(base_family = "sans", base_size = 9) +
theme(legend.position="top") +
theme(legend.title=element_blank(),
axis.text = element_text(colour = "black"),
axis.title = element_text(color = "black"))
CSP_IgG_TTE_plot <- CSP_IgG_postvax_cordat %>%
mutate(log10_CSP = log10(value+1)) %>%
ggscatter(., x = "tte.mal.atp.6", y = "log10_CSP",
conf.int = TRUE , add = "reg.line", size = 2,
cor.coef.size = 1.5,
add.params = list(color = "black", fill = "lightgray", alpha = 0.4, size = 1),
fill = "outcome", color = "outcome") +
stat_cor(method = myCorMeth, r.accuracy = 0.001) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
xlab("days to first parasitemia") +
ylab(str_wrap("log10 CSP-specific IgG (post-vax)",wrap_length)) +
theme_bw(base_family = "sans", base_size = 9) +
theme(legend.position="top") +
theme(legend.title=element_blank(),
axis.text = element_text(colour = "black"),
axis.title = element_text(color = "black"))
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
as_id("1togaBlNIxiDj16FyXTC-r7Qw0A9cG2az"), path = temp, overwrite = TRUE)
x <- readRDS(file = dl$local_path)
myCorMeth <- "spearman"
csp.memoryB.dat <- CSP_MBC_postvax_cordat %>%
dplyr::select(PATID, treat, outcome, feature, timepoint, value) %>%
dplyr::filter(feature == "FACS_CSP-specific_memory_B_cells_of_live_leukocytes" & timepoint == "postvax")
cpm.dat <- exprs(x) %>%
t() %>%
as.data.frame() %>%
rownames_to_column(var = "SAMPLEID") %>%
dplyr::filter(grepl("_0", SAMPLEID)) %>%
mutate(PATID = gsub("_0", "", SAMPLEID)) %>%
dplyr::select(PATID, everything()) %>%
dplyr::select(-c(SAMPLEID)) %>%
pivot_longer(cols = 2:ncol(.), names_to = "EnsemblID", values_to = "CPM") %>%
left_join(., fData(x) %>%
dplyr::select(EnsemblID, GeneSymbol),
by = "EnsemblID")
cpm.dat.csp.mbc <- csp.memoryB.dat %>%
left_join(., cpm.dat,
by = "PATID")
summarized_cpm_feature <- cpm.dat.csp.mbc %>%
dplyr::select(PATID, treat, outcome, feature, GeneSymbol, EnsemblID, CPM, value) %>%
drop_na(CPM, value) %>%
arrange(PATID, feature, GeneSymbol, EnsemblID) %>%
group_by(GeneSymbol, EnsemblID) %>%
summarise(baselineCPM_spearman_r = stats::cor.test(CPM, value, method = myCorMeth)$estimate,
baselineCPM_spearman_p = stats::cor.test(CPM, value, method = myCorMeth)$p.value,
n = n()) %>%
ungroup() %>%
mutate(baselineCPM_spearman_fdr = p.adjust(baselineCPM_spearman_p, method = "BH")) %>%
mutate(rankMetric_CSP_MBC = sign(baselineCPM_spearman_r)*-(log10(baselineCPM_spearman_p)))
ranklist <- summarized_cpm_feature %>%
dplyr::select(GeneSymbol, rankMetric_CSP_MBC) %>%
na.omit() %>%
distinct() %>%
group_by(GeneSymbol) %>%
dplyr::summarize(rankMetric_CSP_MBC = mean(rankMetric_CSP_MBC)) %>%
arrange(desc(rankMetric_CSP_MBC)) %>%
deframe()
devtools::source_url("https://github.com/TranLab/ModuleLists/blob/main/NamedGeneRankList2GseaTable.R?raw=TRUE")
GSEA_CPM_ranked_CSP_MBC_all <- NamedGeneRankList2GseaTable(rankedgenes = ranklist, geneset = "all", output_directory = tempdir(), filename_prefix =
"GSEA_baseline_CPM_CSP_MBC_all_n150", minSize = 20, fixed_seed = TRUE)
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
Gene set enrichment analysis (GSEA) using genes ranked by magnitude and significance of correlation between baseline expression and % CSP-specific memory B cells at 2-weeks post-vaccination. Only showing “MonacoModules” and “lowBTMs” with FDR<1%.
## Warning: Vectorized input to `element_text()` is not officially supported.
## ℹ Results may be unexpected or may change in future versions of ggplot2.
addSmallLegend <- function(myPlot, pointSize = 6, textSize = 6, spaceLegend = 1) {
myPlot +
guides(shape = guide_legend(override.aes = list(size = pointSize)),
color = guide_legend(override.aes = list(size = pointSize))) +
theme(legend.title = element_text(size = textSize),
legend.text = element_text(size = textSize),
legend.key.size = unit(spaceLegend, "lines"))
}
print(addSmallLegend(myGSEAClusterPlot))