The following code generates the unsupervised hierarchical clustering heatmap for the KSPZV1 dataset using the ComplexHeatmap package. Clustering analysis is performed to test for statistical differences between Sample Clusters (SC) for outcomes as well as relevant variables (specifically SC2 and SC1,3,4). In addition, row clustering generates Gene Clusters (GC), and GSEA using blood transcription modules is then applied to genes within GC4 ranked by variance.
Final options for delta: allgroups filter pre-seqmonk < 7.5M MADS = 25% clustering: ward.D2 and euclidean, 4 clusters for columns, 5 clusters for genes
This script reproduces Figures 4B, S5A, 4D in the revised manuscript. It includes the following changes:
For revision, address Reviewer’s concerns that generation of clusters in heatmap is random by generating heatmap using random seed to show that the clustering is deterministic. (Note for the delta heatmap, Ward D2, a hierarchical agglomerative method, was used.)
library(ComplexHeatmap)
library(miscTools)
library(edgeR)
library(Biobase)
library(tidyverse)
library(googledrive)
library(data.table)
library(ggpubr)
library(fgsea)
library(devtools)
library(knitr)
figdir <- "/Volumes/GoogleDrive/My Drive/Manuscripts/KSPZV1 Manuscript/Med Revised Submission 2022/Figures/Revised Figure Panels/"
myBatch <- "both"
myGroup <- "allGroups"
myTimepoint <- "delta"
ClusterCols <- TRUE
col.hclust.method <- "ward.D2"
col.dist.method <- "euclidean"
row.hclust.method <- "ward.D2"
row.dist.method <- "euclidean"
PCT <- 25
filter.type <- paste("mads",PCT,"pct",sep="")
noColClust <- 4
noRowClust <- 5
#local path: "/Volumes/GoogleDrive/My Drive/Tran Lab Shared/Projects/Doris Duke PfSPZ Kenya/Tuan PfSPZ/KenyaPfSPZ/Final Data Files/KSPZV1 logCPM expression sets for visualization/PfSPZ_cpm_ExpressionSet_230x23768_allGroups_bothBatches_delta_rmBatchFX_06082021_TMT.rds"
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
as_id("17xx0YaggLiyWdJc9rqA1nkPSE7dj05p0"), path = temp, overwrite = TRUE)
x <- readRDS(file = dl$local_path)
x$treat <- factor(x$treat, levels = c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
#CBC data
#local path: "/Volumes/GoogleDrive/My Drive/Tran Lab Shared/Projects/Doris Duke PfSPZ Kenya/Tuan PfSPZ/KenyaPfSPZ/CBC baseline with days before VAX1.xlsx"
temp <- tempfile(fileext = ".xlsx")
dl <- drive_download(
as_id("1df0pHJAlFteRIcQuJql3U2IZFe5SpZdB"), path = temp, overwrite = TRUE)
CBCdat <- readxl::read_excel(dl$local_path) %>%
mutate(DaysPriorVax1 = as.integer(gsub(" \\(Dose 1\\)", "", .$`Days Post Vaccination`))*-1,
Hemoglobin = as.numeric(Hemoglobin)) %>%
dplyr::select(PATID, DaysPriorVax1, Hemoglobin, Platelets, WBC, Neutrophils, Percent.Neutrophils)
pData(x) <- pData(x) %>%
left_join(., CBCdat, by = "PATID")
## weight data
#local path: "/Volumes/GoogleDrive/My Drive/Tran Lab Shared/Projects/Doris Duke PfSPZ Kenya/CDC PfSPZ Trial Results/KSPZV1_anthro_2021-04-12.csv"
temp <- tempfile(fileext = ".csv")
dl <- drive_download(
as_id("1hvjTuBKWIYKmDrOdiogGqcHPJUUPZM4P"), path = temp, overwrite = TRUE)
WeightDat <- read_csv(dl$local_path) %>%
dplyr::select(PATID, zwei, zwfl, zbmi) #for definitions: https://cran.r-project.org/web/packages/anthro/anthro.pdf
pData(x) <- pData(x) %>%
left_join(., WeightDat, by = "PATID")
#add pre-vax cluster information
#local path: "/Volumes/GoogleDrive/My Drive/Tran Lab Shared/Projects/Doris Duke PfSPZ Kenya/Tuan PfSPZ/KenyaPfSPZ/Final Data Files/PfSPZ_pData_cluster_preSEQMonk75e5cpm_mads100pct_pam_euclidean_Clust_4_Cols_zscored_0_allGroups_244x24197.csv"
temp <- tempfile(fileext = ".csv")
dl <- drive_download(
as_id("1LtD4PeZZ34dCnghGBikY9wTs9CtsebKo"), path = temp, overwrite = TRUE)
pData(x) <- read_csv(dl$local_path) %>%
dplyr::select(PATID, Cluster) %>%
dplyr::rename(PrevaxCluster = Cluster) %>%
left_join(pData(x), ., by = "PATID")
rownames(pData(x)) <- pData(x)$PATID
all.pDat <- pData(x)
## Filtering features
#for delta, MADS 25% was used
dim(x)
## Features Samples
## 23768 230
if(PCT < 100){
madsno <- as.integer(nrow(x)*(PCT/100))
mads <- apply(exprs(x), 1, mad) #mad filtering
x <- x[mads>sort(mads, decr=TRUE)[madsno],]
}
dim(x)
## Features Samples
## 5941 230
For details on how to make annotations, refer to: https://jokergoo.github.io/ComplexHeatmap-reference/book/
#full
annot.df <- as.data.frame(cbind(
"Site" = as.character(x$site),
"Gender" = as.character(factor(x$SEX, levels = c("M","F"), labels = c("male","female"))),
"Age" = as.character(x$age.vax1),
"Pf at first vax" = as.character(factor(x$mal.vax.1, levels = c(0,1), labels = c("neg","pos"))),
"# Pf episodes during vax" = as.character(x$mal.dvax.tot),
"CSP IgG pre-vax" = as.character(log10(x$pfcsp_pre+1)),
"CSP IgG response (log2 fold change)" = as.character(x$log2FC_CSPAb),
"Days to first parasitemia post-vax" = as.character(x$tte.mal.atp.6),
"Outcome" = as.character(factor(x$mal.atp.3, levels = c(0,1), labels = c("protected","unprotected"))),
"Dose" = as.character(x$treat)
))
annot.df <- rev(annot.df)
for(i in c(1:ncol(annot.df))){
annot.df[,i] <- as.character(annot.df[,i])
}
annot.df$Age <- as.numeric(annot.df$Age)
annot.df$`Days to first parasitemia post-vax` <- as.numeric(annot.df$`Days to first parasitemia post-vax`)
annot.df$`# Pf episodes during vax` <- as.numeric(annot.df$`# Pf episodes during vax`)
annot.df$`CSP IgG response (log2 fold change)` <- as.numeric(annot.df$`CSP IgG response (log2 fold change)`)
annot.df$`CSP IgG pre-vax` <- as.numeric(annot.df$`CSP IgG pre-vax`)
rownames(annot.df) <- colnames(x)
clab <- list(
"Site" = c("Wagai" = "#B2DF8A", "Siaya" = "#33A02C"),
"Gender" = c("male" = "#E5E5E5", "female" = "#191919"),
"Age" = circlize::colorRamp2(c(min(all.pDat$age.vax1, na.rm=T), max(all.pDat$age.vax1, na.rm=T)), c("#E5E5E5", "#333333")),
"Pf at first vax" = c("neg" = "#D1D3D3", "pos" = "#b2182b"),
"# Pf episodes during vax" = circlize::colorRamp2(c(min(all.pDat$mal.dvax.tot, na.rm=T), max(all.pDat$mal.dvax.tot, na.rm=T)),
c("#fee5d9", "#a50f15")),
"CSP IgG pre-vax" = circlize::colorRamp2(c(min(log10(all.pDat$pfcsp_pre+1), na.rm=T), max(log10(all.pDat$pfcsp_pre+1), na.rm=T)), c("#f2f0f7", "#54278f")),
"CSP IgG response (log2 fold change)" = circlize::colorRamp2(c(-max(abs(all.pDat$log2FC_CSPAb), na.rm=T), 0, max(abs(all.pDat$log2FC_CSPAb), na.rm=T)), c("#656566", "white", "#54278f")),
"Days to first parasitemia post-vax" = circlize::colorRamp2(c(min(all.pDat$tte.mal.atp.6, na.rm=T), max(all.pDat$tte.mal.atp.6, na.rm=T)), c("#d4defc", "#112663")),
"Outcome" = c("protected" = "#1F78B4", "unprotected" = "#A6CEE3"),
"Dose" = c("Placebo" = "#808080", "4.5 x 10^5 PfSPZ" = "#fdcc8a", "9.0 x 10^5 PfSPZ" = "#fc8d59", "1.8 x 10^6 PfSPZ" = "#d7301f")
)
clab <- rev(clab)
ha <- HeatmapAnnotation(df = annot.df, col = clab, na_col = "lavender", annotation_name_gp = gpar(fontsize = 7),
simple_anno_size = unit(0.08, "inches"), #this adjusts the height of the column annotations
annotation_name_side = "left")
colnames(x) <- colnames(exprs(x))
unscaled_logCPM <- x
ScaleData <- FALSE
if(myTimepoint == "delta"){
myScaleData <- "logFC"
myHeatlegend.title <- "log2 fold-change over pre-vax"
}
This chunk takes a while if you run the full gene set so be patient.
#https://jokergoo.github.io/ComplexHeatmap-reference/book/a-single-heatmap.html
#See options for variables
#set parallel backend.
library(parallel)
library(parallelMap)
parallelStartSocket(cpus = detectCores())
#use random seed to access stability of clustering motivated by reviewer comment
myseed <- sample(1:5000,1) #sample random integer
set.seed(myseed) #set random seed
hclust.col <- cutree(stats::hclust(dist(t(exprs(x)), method = col.dist.method), method = col.hclust.method), k = noColClust)
hclust.row <- cutree(stats::hclust(dist(exprs(x), method = row.dist.method), method = row.hclust.method), k = noRowClust)
x$treat <- droplevels(x$treat)
x$dummytreatlevels <- factor(x$treat, levels = c("Placebo",
"4.5 x 10^5 PfSPZ",
"9.0 x 10^5 PfSPZ",
"1.8 x 10^6 PfSPZ"),
labels = c(1,2,3,4))
hm <- Heatmap(exprs(x),
name = "mat",
column_title = "subjects",
column_title_side = "bottom",
show_row_dend = FALSE,
row_title = paste(filter.type, nrow(x), "genes"),
row_title_side = "left",
row_names_side = "left",
row_names_gp = gpar(fontsize = 8),
row_names_max_width = unit(9, "cm"),
cluster_columns = TRUE,
cluster_rows = TRUE,
column_split = paste(x$dummytreatlevels, hclust.col, sep = "_"),
row_split = paste("gene cluster ",hclust.row, sep = "_"),
show_row_names = FALSE,
top_annotation = ha,
heatmap_legend_param = list(title = myHeatlegend.title, color_bar = "continuous", legend_direction = "horizontal"),
show_column_names=FALSE,
cluster_column_slices = FALSE,
cluster_row_slices = FALSE,
height = unit(3, "inches"),
width = unit(10, "inches")
)
see https://github.com/jokergoo/ComplexHeatmap/issues/136
Note: may have to run this chunk directly in console if you get the
following error: Error: The width or height of the raster image is zero,
maybe you forget to turn off the previous graphic device or it was
corrupted. Run dev.off() to close it.
hm <- draw(hm)
c.dend <- column_dend(hm) #Extract col dendrogram
ccl.list <- column_order(hm) #Extract clusters (output is a list)
#lapply(ccl.list, function(x) length(x)) #check/confirm size clusters
#loop to extract samples for each cluster.
for (i in 1:length(column_order(hm))){
if (i == 1) {
clu <- t(t(colnames(exprs(x)[,column_order(hm)[[i]], drop = FALSE]))) #one cluster had a single column, so have to add drop = FALSE argument to prevent the matrix from becoming a vector
out <- cbind(clu, paste("cluster", i, sep=""))
colnames(out) <- c("SampleID", "Cluster")
} else {
clu <- t(t(colnames(exprs(x)[,column_order(hm)[[i]], drop = FALSE])))
clu <- cbind(clu, paste("cluster", i, sep=""))
out <- rbind(out, clu)
}
}
## Merge with pData(x)
pData.cluster <- pData(x) %>%
rownames_to_column(var = "SampleID") %>%
left_join(., as_tibble(out), by = "SampleID") %>%
column_to_rownames(var = "SampleID") %>%
mutate(Cluster = gsub("cluster", "SC", .$Cluster)) %>%
mutate(Cluster.pam4.cl16 = Cluster) %>%
mutate(Cluster = gsub("6", "2", .$Cluster)) %>%
mutate(Cluster = gsub("10", "2", .$Cluster)) %>%
mutate(Cluster = gsub("14", "2", .$Cluster)) %>%
mutate(Cluster = ifelse(.$Cluster != "SC2", "SC1,3,4", .$Cluster)) %>%
filter(Cluster == "SC2" | Cluster == "SC1,3,4")
pData.cluster <- pData.cluster %>%
mutate(Cluster2 = ifelse(Cluster.pam4.cl16 == "SC13", "SC1", "TBD")) %>% #clusters 13-16 are SC1-4 in 1.8x10^6 clusters on Figure S5A
mutate(Cluster2 = ifelse(Cluster.pam4.cl16 == "SC14", "SC2", Cluster2)) %>%
mutate(Cluster2 = ifelse(Cluster.pam4.cl16 == "SC15", "SC3", Cluster2)) %>%
mutate(Cluster2 = ifelse(Cluster.pam4.cl16 == "SC16", "SC4", Cluster2)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC1", "SC1.1", "TBD")) %>% #clusters 1-4 are SC1-4 (Placebo) on Figure S2A
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC2", "SC2.1", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC3", "SC3.1", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC4", "SC4.1", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC5", "SC1.2", Cluster3)) %>% #clusters 5-8 are SC1-4 (4.5x10^5) on Figure S2A
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC6", "SC2.2", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC7", "SC3.2", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC8", "SC4.2", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC9", "SC1.3", Cluster3)) %>% #clusters 9-12 are SC1-4 (9.0x10^5) on Figure S2A
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC10", "SC2.3", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC11", "SC3.3", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC12", "SC4.3", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC13", "SC1.4", Cluster3)) %>% #clusters 13-16 are SC1-4 (1.8x10^6) on Figure S2A
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC14", "SC2.4", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC15", "SC3.4", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC16", "SC4.4", Cluster3)) %>%
mutate(Cluster3 = factor(Cluster3,
levels = c("SC1.1", "SC2.1", "SC3.1", "SC4.1",
"SC1.2", "SC2.2", "SC3.2", "SC4.2",
"SC1.3", "SC2.3", "SC3.3", "SC4.3",
"SC1.4", "SC2.4", "SC3.4", "SC4.4"))) %>%
droplevels()
#sanity checks
table(pData.cluster$Cluster2,pData.cluster$treat)
##
## Placebo 4.5 x 10^5 PfSPZ 9.0 x 10^5 PfSPZ 1.8 x 10^6 PfSPZ
## SC1 0 0 0 28
## SC2 0 0 0 13
## SC3 0 0 0 17
## SC4 0 0 0 3
## TBD 61 55 53 0
table(pData.cluster$Cluster3,pData.cluster$treat)
##
## Placebo 4.5 x 10^5 PfSPZ 9.0 x 10^5 PfSPZ 1.8 x 10^6 PfSPZ
## SC1.1 35 0 0 0
## SC2.1 10 0 0 0
## SC3.1 14 0 0 0
## SC4.1 2 0 0 0
## SC1.2 0 36 0 0
## SC2.2 0 1 0 0
## SC3.2 0 12 0 0
## SC4.2 0 6 0 0
## SC1.3 0 0 25 0
## SC2.3 0 0 7 0
## SC3.3 0 0 17 0
## SC4.3 0 0 4 0
## SC1.4 0 0 0 28
## SC2.4 0 0 0 13
## SC3.4 0 0 0 17
## SC4.4 0 0 0 3
#setdiff(colnames(x), rownames(pData.cluster))
#setdiff(rownames(pData.cluster), colnames(x))
#sanity checks
table(pData.cluster$Cluster,pData.cluster$treat)
##
## Placebo 4.5 x 10^5 PfSPZ 9.0 x 10^5 PfSPZ 1.8 x 10^6 PfSPZ
## SC1,3,4 51 54 46 48
## SC2 10 1 7 13
table(pData.cluster$Cluster.pam4.cl16,pData.cluster$treat)
##
## Placebo 4.5 x 10^5 PfSPZ 9.0 x 10^5 PfSPZ 1.8 x 10^6 PfSPZ
## SC1 35 0 0 0
## SC10 0 0 7 0
## SC11 0 0 17 0
## SC12 0 0 4 0
## SC13 0 0 0 28
## SC14 0 0 0 13
## SC15 0 0 0 17
## SC16 0 0 0 3
## SC2 10 0 0 0
## SC3 14 0 0 0
## SC4 2 0 0 0
## SC5 0 36 0 0
## SC6 0 1 0 0
## SC7 0 12 0 0
## SC8 0 6 0 0
## SC9 0 0 25 0
placebo_highdose_SC2 <- pData.cluster %>%
filter(treat == "1.8 x 10^6 PfSPZ" | treat == "Placebo") %>%
filter(Cluster == "SC2") %>%
mutate(Cluster3 = gsub(".1","_placebo", Cluster3)) %>%
mutate(Cluster3 = gsub(".4","_highdose_PfSPZ", Cluster3)) %>%
mutate(Cluster3 = factor(Cluster3, levels = c("SC2_placebo", "SC2_highdose_PfSPZ"))) %>%
mutate(outcome = ifelse(mal.atp.3 == 1, "NP", "P")) %>% #mal.atp.3 == 1 means malaria infected at least once during 3 months (according to protocol)
droplevels()
#original Fisher's exact test between SC2 and SC1,2,4
my_tab <- table(placebo_highdose_SC2$outcome, placebo_highdose_SC2$Cluster3)
my_tab
##
## SC2_placebo SC2_highdose_PfSPZ
## NP 7 2
## P 3 11
fisher.test(my_tab)
##
## Fisher's Exact Test for Count Data
##
## data: my_tab
## p-value = 0.01306
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.27226 167.87678
## sample estimates:
## odds ratio
## 11.11222
#reviewer suggested chi-square between SC1, SC2, SC3, SC4 and placebo vs high-dose
placebo_highdose_allSCs <- pData.cluster %>%
filter(treat == "1.8 x 10^6 PfSPZ" | treat == "Placebo") %>%
mutate(Cluster3 = gsub("\\.1","_placebo", Cluster3)) %>%
mutate(Cluster3 = gsub("\\.4","_highdose_PfSPZ", Cluster3)) %>%
mutate(Cluster3 = factor(Cluster3, levels = c("SC1_placebo", "SC2_placebo", "SC3_placebo", "SC4_placebo",
"SC1_highdose_PfSPZ", "SC2_highdose_PfSPZ", "SC3_highdose_PfSPZ", "SC4_highdose_PfSPZ"))) %>%
mutate(outcome = ifelse(mal.atp.3 == 1, "NP", "P")) %>% #mal.atp.3 == 1 means malaria infected at least once during 3 months (according to protocol)
droplevels()
my_tab <- table(placebo_highdose_allSCs$outcome, placebo_highdose_allSCs$Cluster3)
my_tab
##
## SC1_placebo SC2_placebo SC3_placebo SC4_placebo SC1_highdose_PfSPZ
## NP 16 7 11 2 13
## P 19 3 3 0 15
##
## SC2_highdose_PfSPZ SC3_highdose_PfSPZ SC4_highdose_PfSPZ
## NP 2 8 1
## P 11 9 2
chisq.test(my_tab)
##
## Pearson's Chi-squared test
##
## data: my_tab
## X-squared = 15.166, df = 7, p-value = 0.03393
Results remain significant.
#perform chi-square for P vs NP distribution across ALL 16 clusters
#not requested by reviewer but did just for fun
foo <- pData.cluster %>%
mutate(outcome = ifelse(mal.atp.3 == 1, "NP", "P")) %>% #mal.atp.3 == 1 means malaria infected at least once during 3 months (according to protocol)
droplevels()
my_tab <- table(foo$outcome, foo$Cluster3)
my_tab
##
## SC1.1 SC2.1 SC3.1 SC4.1 SC1.2 SC2.2 SC3.2 SC4.2 SC1.3 SC2.3 SC3.3 SC4.3
## NP 16 7 11 2 20 1 7 0 10 2 13 0
## P 19 3 3 0 16 0 5 6 15 5 4 4
##
## SC1.4 SC2.4 SC3.4 SC4.4
## NP 13 2 8 1
## P 15 11 9 2
chisq.test(my_tab)
##
## Pearson's Chi-squared test
##
## data: my_tab
## X-squared = 33.963, df = 15, p-value = 0.003446
Results are significant.
library(tidyverse)
basefont <- "sans"
basefontsize <- 9
pvalfontsize <- 7
symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("****", "***", "**", "*", "trend",""))
pval_label <- "p.format"
adjust_p <- "adjp"
#OUTCOME
foo1 <- c(1,2,3,4)
names(foo1) <- names(summary(pData.cluster$treat))
for(i in names(summary(pData.cluster$treat))){
pData.cluster.temp <- pData.cluster %>%
filter(treat == i)
foo1[i] <- signif(fisher.test(table("Cluster" = pData.cluster.temp$Cluster, "Outcome" = pData.cluster.temp$mal.atp.3))$p.value,2)
if(pval_label == "p.format" & adjust_p == "adjp"){
foo1 <- p.adjust(foo1, method = "BH")
}
if(pval_label == "p.signif" ){
foo1[i] <- ifelse(foo1[i] >= 0.05, "",
ifelse(foo1[i] >= 0.01 & foo1[i] < 0.05, "*",
ifelse(foo1[i] >= 0.001 & foo1[i] < 0.01, "**",
ifelse(foo1[i] >= 0.0001 & foo1[i] < 0.001, "***", "****"))))
}
}
dat_text <- data.frame(treat = factor(levels(pData.cluster$treat),
levels = levels(pData.cluster$treat)),
label = foo1, Cluster = levels(factor(pData.cluster.temp$Cluster)), x = 1.5, y = 25,
Outcome = c("not protected (infected)", "protected (uninfected)"))
Outcome.counts <- pData.cluster %>%
mutate(Cluster = factor(Cluster)) %>%
mutate(Outcome = factor(mal.atp.3, levels = c(1,0), labels = c("not protected (infected)", "protected (uninfected)"))) %>%
ggplot(., aes(x=Cluster, fill = Outcome)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("Outcome (count)") +
facet_wrap(.~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.background = element_rect(fill="white"),
legend.position="none"
) +
geom_text(
data = dat_text,
mapping = aes(x = x, y = y, label = label))
#Days to first parasitemia
TTE.boxplot.dat <- pData.cluster %>%
mutate(Outcome = factor(mal.atp.3, levels = c(1,0), labels = c("not protected (infected)", "protected (uninfected)"))) %>%
dplyr::filter(!(is.na(tte.mal.atp.6) | is.na(Outcome)))
TTE.boxplot.stats <- TTE.boxplot.dat %>%
filter(!(treat == "4.5 x 10^5 PfSPZ" & Cluster == "SC2")) %>%
group_by(treat, Cluster) %>%
do(w = wilcox.test(tte.mal.atp.6~Outcome, data=., paired=FALSE, exact = FALSE)) %>%
summarize(treat, Cluster, Wilcox_pval = w$p.value) %>%
mutate(padj = p.adjust(Wilcox_pval, method= "BH")) %>%
mutate_if(is.numeric, signif, 2)
TTE.boxplot <- TTE.boxplot.dat %>%
ggplot(., aes(Cluster, tte.mal.atp.6, fill = Outcome, color = Outcome)) +
geom_point(position = position_jitterdodge()) +
geom_boxplot(alpha = 0.4, color = "grey30", outlier.shape = NA) +
stat_compare_means(aes(group = Outcome), label = pval_label,
method = "wilcox.test", label.x.npc = "center", vjust = 1, symnum.args = symnum.args) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("Days to first parasitemia") +
ylim(c(0,180)) +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.background = element_rect(fill="white"),
legend.position="none"
)
#CSP-specific IgG at pre-vax
CSPbl.boxplot.dat <- pData.cluster %>%
mutate(Outcome = factor(mal.atp.3, levels = c(1,0), labels = c("not protected (infected)", "protected (uninfected)"))) %>%
dplyr::filter(!(is.na(pfcsp_pre) | is.na(Outcome)))
CSPbl.boxplot.stats <- CSPbl.boxplot.dat %>%
filter(!(treat == "4.5 x 10^5 PfSPZ" & Cluster == "SC2")) %>%
group_by(treat, Cluster) %>%
do(w = wilcox.test(pfcsp_pre~Outcome, data=., paired=FALSE, exact = FALSE)) %>%
summarize(treat, Cluster, Wilcox_pval = w$p.value) %>%
mutate(padj = p.adjust(Wilcox_pval, method= "BH")) %>%
mutate_if(is.numeric, signif, 2)
CSPbl.boxplot <- CSPbl.boxplot.dat %>%
ggplot(., aes(Cluster, log10(pfcsp_pre+1), fill = Outcome, color = Outcome)) +
geom_point(position = position_jitterdodge()) +
geom_boxplot(alpha = 0.4, color = "grey30", outlier.shape = NA) +
stat_compare_means(aes(group = Outcome), label = pval_label,
method = "wilcox.test", label.x.npc = "center", vjust = 1, symnum.args = symnum.args) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("CSP-specific IgG at pre-vax") +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="none"
)
#CSP-specific IgG log2FC
CSPlogfc.boxplot.dat <- pData.cluster %>%
mutate(Outcome = factor(mal.atp.3, levels = c(1,0), labels = c("not protected (infected)", "protected (uninfected)"))) %>%
dplyr::filter(!(is.na(log2FC_CSPAb) | is.na(Outcome)))
CSPlogfc.boxplot.stats <- CSPlogfc.boxplot.dat %>%
filter(!(treat == "4.5 x 10^5 PfSPZ" & Cluster == "SC2")) %>%
group_by(treat, Cluster) %>%
do(w = wilcox.test(log2FC_CSPAb~Outcome, data=., paired=FALSE, exact = FALSE)) %>%
summarize(treat, Cluster, Wilcox_pval = w$p.value) %>%
mutate(padj = p.adjust(Wilcox_pval, method= "BH")) %>%
mutate_if(is.numeric, signif, 2)
CSPlogfc.boxplot <- CSPlogfc.boxplot.dat %>%
ggplot(., aes(x = Cluster, y = log2FC_CSPAb, fill = Outcome, color = Outcome)) +
geom_point(position = position_jitterdodge()) +
geom_boxplot(alpha = 0.4, color = "grey30", outlier.shape = NA) +
stat_compare_means(aes(group = Outcome), label = pval_label,
method = "wilcox.test", label.x.npc = "center", vjust = 1, symnum.args = symnum.args) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("CSP-specific IgG (log fold change)") +
ylim(c(-12.5,25)) +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="none"
)
#Pf at first vax
foo1 <- c(1,2,3,4)
names(foo1) <- names(summary(pData.cluster$treat))
pData.cluster <- pData.cluster %>%
mutate(Cluster = factor(Cluster)) %>%
mutate(Pf = factor(mal.vax.1, levels = c(1,0), labels = c("pos", "neg")))
for(i in names(summary(pData.cluster$treat))){
pData.cluster.temp <- pData.cluster %>%
filter(treat == i)
foo1[i] <- signif(fisher.test(table("Cluster" = pData.cluster.temp$Cluster, "Pf at Vax1" = pData.cluster.temp$Pf))$p.value,2)
if(pval_label == "p.format" & adjust_p == "adjp"){
foo1 <- p.adjust(foo1, method = "BH")
}
if(pval_label == "p.signif" ){
foo1[i] <- ifelse(foo1[i] >= 0.05, "",
ifelse(foo1[i] >= 0.01 & foo1[i] < 0.05, "*",
ifelse(foo1[i] >= 0.001 & foo1[i] < 0.01, "**",
ifelse(foo1[i] >= 0.0001 & foo1[i] < 0.001, "***", "****"))))
}
}
dat_text <- data.frame(treat = factor(levels(pData.cluster$treat), levels = levels(pData.cluster$treat)), label = foo1, Cluster = levels(factor(pData.cluster.temp$Cluster)),
x = 1.5, y = 45,
Pf = c("pos","neg"))
Pf.counts <- pData.cluster %>%
ggplot(., aes(x=Cluster, fill = Pf)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("#b2182b", "#D1D3D3")) +
ylab("Pf at first vax (counts)") +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="none"
) +
geom_text(data = dat_text,mapping = aes(x = x, y = y, label = label))
#Pf episodes during vax period
mal.dvax.tot.boxplot.dat <- pData.cluster %>%
mutate(Outcome = factor(mal.atp.3, levels = c(1,0), labels = c("not protected (infected)", "protected (uninfected)")))
mal.dvax.tot.boxplot.stats <- mal.dvax.tot.boxplot.dat %>%
filter(!(treat == "4.5 x 10^5 PfSPZ" & Cluster == "SC2")) %>%
group_by(treat, Cluster) %>%
do(w = wilcox.test(mal.dvax.tot~Outcome, data=., paired=FALSE, exact = FALSE)) %>%
summarize(treat, Cluster, Wilcox_pval = w$p.value) %>%
mutate(padj = p.adjust(Wilcox_pval, method= "BH")) %>%
mutate_if(is.numeric, signif, 2)
mal.dvax.tot.boxplot <- mal.dvax.tot.boxplot.dat %>%
ggplot(., aes(Cluster, mal.dvax.tot, fill = Outcome, color = Outcome)) +
geom_point(position = position_jitterdodge()) +
geom_boxplot(alpha = 0.4, color = "grey30", outlier.shape = NA) +
stat_compare_means(aes(group = Outcome), label = pval_label,
method = "wilcox.test", label.x.npc = "center", vjust = 1, symnum.args = symnum.args) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("# Pf episodes during vax") +
ylim(c(0,6.5)) +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="none"
)
foo1 <- c(1,2,3,4)
names(foo1) <- names(summary(pData.cluster$treat))
pData.cluster <- pData.cluster %>%
mutate(Site = factor(site))
for(i in names(summary(pData.cluster$treat))){
pData.cluster.temp <- pData.cluster %>%
filter(treat == i)
foo1[i] <- signif(fisher.test(table("Cluster" = pData.cluster.temp$Cluster, "Site" = pData.cluster.temp$Site))$p.value,2)
if(pval_label == "p.format" & adjust_p == "adjp"){
foo1 <- p.adjust(foo1, method = "BH")
}
if(pval_label == "p.signif" ){
foo1[i] <- ifelse(foo1[i] >= 0.05, "",
ifelse(foo1[i] >= 0.01 & foo1[i] < 0.05, "*",
ifelse(foo1[i] >= 0.001 & foo1[i] < 0.01, "**",
ifelse(foo1[i] >= 0.0001 & foo1[i] < 0.001, "***", "****"))))
}
}
dat_text <- data.frame(treat = factor(levels(pData.cluster$treat), levels = levels(pData.cluster$treat)), label = foo1, Cluster = levels(factor(pData.cluster.temp$Cluster)),
x = 1.5, y = 25,
Site = c("Siaya","Wagai"))
Site.counts <- pData.cluster %>%
dplyr::filter(!(is.na(Cluster) | is.na(Site))) %>%
ggplot(., aes(x= Cluster, fill = Site)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("#33A02C","#B2DF8A")) +
ylab("Site (counts)") +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="none"
) +
geom_text(data = dat_text,mapping = aes(x = x, y = y, label = label, size = basefontsize))
Note that the boxplots here show unadjusted p values, whereas the count plots show adjusted p values. Figure S5 in the manuscript shows BH-adjusted p values for all. BH-adjusted p values for the boxplots can be accessed from the objects with the “.stats” suffix.
print(ggarrange(
Outcome.counts,
TTE.boxplot,
CSPbl.boxplot,
CSPlogfc.boxplot,
Pf.counts,
mal.dvax.tot.boxplot,
Site.counts,
ncol = 2,
nrow = 4)
)
pval_label <- "p.format"
adjust_p <- "adjp"
sc2_subset <- pData.cluster %>%
filter(Cluster == "SC2") %>%
filter(treat == "Placebo" | treat == "1.8 x 10^6 PfSPZ") %>%
droplevels() %>%
mutate(treat = factor(treat)) %>%
mutate(Outcome = factor(mal.atp.3, levels = c(1,0), labels = c("not protected (infected)", "protected (uninfected)")))
#OUTCOME SC2 Placebo vs SC2 1.8x10^6 PfSPZ
foo <- signif(fisher.test(table("Dosegroup" = sc2_subset$treat, "Outcome" = sc2_subset$Outcome))$p.value,2)
if(pval_label == "p.format" & adjust_p == "adjp"){
foo_text <- paste0("p=",p.adjust(foo, method = "BH"))
}
if(pval_label == "p.signif" ){
foo_text <- ifelse(foo >= 0.05, "",
ifelse(foo >= 0.01 & foo < 0.05, "*",
ifelse(foo >= 0.001 & foo1[i] < 0.01, "**",
ifelse(foo1[i] >= 0.0001 & foo1[i] < 0.001, "***", "****"))))
}
dat_text <- data.frame(label = foo_text, x = 1.5, y = 9, Outcome = c("not protected (infected)", "protected (uninfected)"))
Outcome.counts <- sc2_subset %>%
ggplot(., aes(x=treat, fill = Outcome)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("Outcome (count)") +
theme_classic(base_family = "Arial", base_size = 10) +
theme(#axis.text.x=element_text(angle = 45, vjust = -1, hjust = 0),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.background = element_rect(fill="white"),
legend.position="none"
) +
geom_text(data = dat_text,
mapping = aes(x = x, y = y, label = label))
Outcome.counts