The following code generates the unsupervised hierarchical clustering heatmap for the KSPZV1 dataset using the ComplexHeatmap package. Clustering analysis is performed to test for statistical differences between Sample Clusters (SC) for outcomes as well as relevant variables (specifically SC1 and SC2-4). Gene Clusters (GC) are removed.
This script reproduces Figures 1C, S2, 1D, 1E in the revised manuscript. It includes the following changes:
For revision, address Reviewer’s concerns that generation of clusters in heatmap is random by generating heatmap using random seed to show that the clustering is deterministic in our case.
library(tidyverse)
library(ComplexHeatmap)
library(miscTools)
library(edgeR)
library(Biobase)
library(googledrive)
library(data.table)
library(ggpubr)
library(fgsea)
library(devtools)
library(Cairo)
Final options for Timepoint 0 (baseline): allgroups filter pre-seqmonk < 7.5M MADS = 100% (no filter, take all genes) clustering: pam and euclidean, 4 clusters for columns, 5 clusters for genes
myBatch <- "both" # "Aug2019" "Nov2019"
myGroup <- "allGroups" #"allGroups" #Placebo #4.5 x 10^5 PfSPZ #9.0 x 10^5 PfSPZ #1.8 x 10^6 PfSPZ
myTimepoint <- 0 #"delta" #0 25 #delta
ClusterCols <- TRUE
myFilter <- "preSEQMonk" #filter to remove based on library sizes prior to SEQMonk correction
CPMFILTER <- 7.5e6 #filter any samples with mapped library sizes less than 7.5 million reads
myCPMFILTER <- gsub("00000","e5", paste0(myFilter, CPMFILTER, "cpm"))
col.hclust.method <- "pam"
col.dist.method <- "euclidean"
row.hclust.method <- "pam"
row.dist.method <- "euclidean"
PCT <- 100
filter.type <- paste("mads",PCT,"pct",sep="")
noColClust <- 4
noRowClust <- 5
#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
as_id("1togaBlNIxiDj16FyXTC-r7Qw0A9cG2az"), path = temp, overwrite = TRUE)
x <- readRDS(file = dl$local_path)
x$treat <- factor(x$treat, levels = c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
#CBC data
#local file "CBC baseline with days before VAX1.xlsx"
temp <- tempfile(fileext = ".xlsx")
dl <- drive_download(
as_id("1df0pHJAlFteRIcQuJql3U2IZFe5SpZdB"), path = temp, overwrite = TRUE)
CBCdat <- readxl::read_excel(dl$local_path) %>%
mutate(DaysPriorVax1 = as.integer(gsub(" \\(Dose 1\\)", "", .$`Days Post Vaccination`))*-1,
Hemoglobin = as.numeric(Hemoglobin)) %>%
dplyr::select(PATID, DaysPriorVax1, Hemoglobin, Platelets, WBC, Neutrophils, Percent.Neutrophils)
pData(x) <- pData(x) %>%
left_join(., CBCdat, by = "PATID")
## weight data
#loca file "KSPZV1_anthro_2021-04-12.csv""
temp <- tempfile(fileext = ".csv")
dl <- drive_download(
as_id("1hvjTuBKWIYKmDrOdiogGqcHPJUUPZM4P"), path = temp, overwrite = TRUE)
WeightDat <- read_csv(dl$local_path) %>%
dplyr::select(PATID, zwei, zwfl, zbmi) #for definitions: https://cran.r-project.org/web/packages/anthro/anthro.pdf
pData(x) <- pData(x) %>%
left_join(., WeightDat, by = "PATID")
rownames(pData(x)) <- pData(x)$SAMPLEID
all.pDat <- pData(x)
For details on how to make annotations, refer to: https://jokergoo.github.io/ComplexHeatmap-reference/book/
annot.df <- as.data.frame(cbind(
"Site" = as.character(x$site),
"Weight-for-age" = as.character(x$zwei),
"Gender" = as.character(factor(x$SEX, levels = c("M","F"), labels = c("male","female"))),
"Age" = as.character(x$age.vax1),
"Pf at first vax" = as.character(factor(x$mal.vax.1, levels = c(0,1), labels = c("neg","pos"))),
"CSP IgG pre-vax" = as.character(log10(x$pfcsp_pre+1)),
"CSP IgG response (log2 fold change)" = as.character(x$log2FC_CSPAb),
"Outcome" = as.character(factor(x$mal.atp.3, levels = c(0,1), labels = c("uninfected (protected)","infected (not protected)"))),
"Dose" = as.character(x$treat)))
annot.df <- rev(annot.df)
for(i in c(1:ncol(annot.df))){
annot.df[,i] <- as.character(annot.df[,i])
}
annot.df$`Weight-for-age` <- as.numeric(annot.df$`Weight-for-age`)
annot.df$Age <- as.numeric(annot.df$Age)
annot.df$`CSP IgG response (log2 fold change)` <- as.numeric(annot.df$`CSP IgG response (log2 fold change)`)
annot.df$`CSP IgG pre-vax` <- as.numeric(annot.df$`CSP IgG pre-vax`)
rownames(annot.df) <- colnames(x)
clab <- list(
"Site" = c("Wagai" = "#B2DF8A", "Siaya" = "#33A02C"),
"Weight-for-age" = circlize::colorRamp2(c(-max(abs(all.pDat$zwei), na.rm = TRUE), 0, max(abs(all.pDat$zwei), na.rm = TRUE)), c("blue", "white", "red")),
"Gender" = c("male" = "#E5E5E5", "female" = "#191919"),
"Age" = circlize::colorRamp2(c(min(all.pDat$age.vax1, na.rm=T), max(all.pDat$age.vax1, na.rm=T)), c("#E5E5E5", "#333333")),
"Pf at first vax" = c("neg" = "#D1D3D3", "pos" = "#b2182b"),
"CSP IgG pre-vax" = circlize::colorRamp2(c(min(log10(all.pDat$pfcsp_pre+1), na.rm=T), max(log10(all.pDat$pfcsp_pre+1), na.rm=T)), c("#f2f0f7", "#54278f")),
"CSP IgG response (log2 fold change)" = circlize::colorRamp2(c(-max(abs(all.pDat$log2FC_CSPAb), na.rm=T), 0, max(abs(all.pDat$log2FC_CSPAb), na.rm=T)), c("#656566", "white", "#54278f")),
"Outcome" = c("uninfected (protected)" = "#1F78B4", "infected (not protected)" = "#A6CEE3"),
"Dose" = c("Placebo" = "#808080", "4.5 x 10^5 PfSPZ" = "#fdcc8a", "9.0 x 10^5 PfSPZ" = "#fc8d59", "1.8 x 10^6 PfSPZ" = "#d7301f")
)
clab <- rev(clab)
ha <- HeatmapAnnotation(df = annot.df, col = clab, na_col = "lavender", annotation_name_gp = gpar(fontsize = 7),
simple_anno_size = unit(0.08, "inches"), #this adjusts the height of the column annotations
annotation_name_side = "left",
annotation_legend_param = list(ncol = 4, by_row = TRUE))
colnames(x) <- colnames(exprs(x))
unscaled_logCPM <- x
ScaleData <- TRUE
if(ScaleData == TRUE){
myScaleData <- "zscored"
exprs(x) <- t(scale(t(exprs(x)), center = TRUE, scale = TRUE)) #scale is generic function whose default method centers and/or scales the columns of a numeric matrix, so you must transpose to scale the genes
myHeatlegend.title <- "z-score"
}else{
myScaleData <- "cpm"
myHeatlegend.title <- "cpm"
}
Note: This chunk takes a while if you run the full gene set.
#https://jokergoo.github.io/ComplexHeatmap-reference/book/a-single-heatmap.html
#See options for variables
#set.seed(12345)
#set parallel backend.
library(parallel)
library(parallelMap)
parallelStartSocket(cpus = detectCores())
#use random seed to access stability of clustering motivated by reviewer comment
myseed <- sample(1:5000,1) #sample random integer
set.seed(myseed) #set random seed
if(col.hclust.method == "pam"){
pa.col <- cluster::pam(t(exprs(x)), k = noColClust, metric = col.dist.method)
col.dist.method <- "euclidean"
}
if(col.hclust.method == "kmeans"){
km.col = amap::Kmeans(t(exprs(x)), centers = noColClust, nstart = 25, method = col.dist.method)$cluster
}
if(col.hclust.method != "kmeans" & col.hclust.method != "pam"){
hclust.col <- cutree(stats::hclust(dist(t(exprs(x)), method = col.dist.method), method = col.hclust.method), k = noColClust)
}
#row.hclust.method <- "pam" #pam #kmeans
if(row.hclust.method == "pam"){
pa.row <- cluster::pam(exprs(x), k = noRowClust, metric = row.dist.method)
}
if(row.hclust.method == "kmeans"){
km.row = amap::Kmeans(exprs(x), centers = noRowClust, nstart = 25, method = row.dist.method)$cluster
}
if(row.hclust.method != "kmeans" & row.hclust.method != "pam"){
hclust.row <- cutree(stats::hclust(dist(exprs(x), method = row.dist.method), method = row.hclust.method), k = noRowClust)
}
For manuscript, used dummy levels for ordering by treat correctly AFTER pam cluster. For columns, used pam k=4 clusters for pre-vax For rows, used pam k=5 cluster for pre-vax
hm <- draw(hm, heatmap_legend_side="top", annotation_legend_side="right",
legend_grouping = "original")
see https://github.com/jokergoo/ComplexHeatmap/issues/136
Note: may have to run this chunk directly in console if you get the
following error: Error: The width or height of the raster image is zero,
maybe you forget to turn off the previous graphic device or it was
corrupted. Run dev.off() to close it.
c.dend <- column_dend(hm) #Extract col dendrogram
ccl.list <- column_order(hm) #Extract clusters (output is a list)
#lapply(ccl.list, function(x) length(x)) #check/confirm size clusters
# loop to extract samples for each cluster.
for (i in 1:length(column_order(hm))){
if (i == 1) {
clu <- t(t(colnames(exprs(x)[,column_order(hm)[[i]], drop = FALSE]))) #add drop = FALSE argument to prevent the matrix from becoming a vector for single column clusters
out <- cbind(clu, paste("cluster", i, sep=""))
colnames(out) <- c("SampleID", "Cluster")
} else {
clu <- t(t(colnames(exprs(x)[,column_order(hm)[[i]], drop = FALSE])))
clu <- cbind(clu, paste("cluster", i, sep=""))
out <- rbind(out, clu)
}
}
## Merge with pData(x)
if(myGroup == "allGroups"){
if(noColClust == 4){
if(myTimepoint != "delta"){
pData.cluster <- pData(x) %>%
rownames_to_column(var = "SampleID") %>%
left_join(., as_tibble(out), by = "SampleID") %>%
column_to_rownames(var = "SampleID") %>%
mutate(Cluster = gsub("cluster", "SC", .$Cluster)) %>%
mutate(Cluster.pam4.cl16 = Cluster) %>%
mutate(Cluster = gsub("5", "1", .$Cluster)) %>%
mutate(Cluster = gsub("9", "1", .$Cluster)) %>%
mutate(Cluster = gsub("13", "1", .$Cluster)) %>%
mutate(Cluster = ifelse(.$Cluster != "SC1", "SC2-4", .$Cluster)) %>% #regroup SC2, SC3, SC4 as single group SC2-4
filter(Cluster == "SC1" | Cluster == "SC2-4")
}
}
}
pData.cluster <- pData.cluster %>%
mutate(Cluster2 = ifelse(Cluster.pam4.cl16 == "SC13", "SC1", "TBD")) %>% #clusters 13-16 are SC1-4 in 1.8x10^6 clusters on Figure S2A
mutate(Cluster2 = ifelse(Cluster.pam4.cl16 == "SC14", "SC2", Cluster2)) %>%
mutate(Cluster2 = ifelse(Cluster.pam4.cl16 == "SC15", "SC3", Cluster2)) %>%
mutate(Cluster2 = ifelse(Cluster.pam4.cl16 == "SC16", "SC4", Cluster2)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC1", "SC1.1", "TBD")) %>% #clusters 1-4 are SC1-4 (Placebo) on Figure S2A
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC2", "SC2.1", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC3", "SC3.1", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC4", "SC4.1", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC5", "SC1.2", Cluster3)) %>% #clusters 5-8 are SC1-4 (4.5x10^5) on Figure S2A
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC6", "SC2.2", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC7", "SC3.2", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC8", "SC4.2", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC9", "SC1.3", Cluster3)) %>% #clusters 9-12 are SC1-4 (9.0x10^5) on Figure S2A
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC10", "SC2.3", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC11", "SC3.3", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC12", "SC4.3", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC13", "SC1.4", Cluster3)) %>% #clusters 13-16 are SC1-4 (1.8x10^6) on Figure S2A
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC14", "SC2.4", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC15", "SC3.4", Cluster3)) %>%
mutate(Cluster3 = ifelse(Cluster.pam4.cl16 == "SC16", "SC4.4", Cluster3)) %>%
mutate(Cluster3 = factor(Cluster3,
levels = c("SC1.1", "SC2.1", "SC3.1", "SC4.1",
"SC1.2", "SC2.2", "SC3.2", "SC4.2",
"SC1.3", "SC2.3", "SC3.3", "SC4.3",
"SC1.4", "SC2.4", "SC3.4", "SC4.4"))) %>%
droplevels()
#sanity checks
table(pData.cluster$Cluster2,pData.cluster$treat)
##
## Placebo 4.5 x 10^5 PfSPZ 9.0 x 10^5 PfSPZ 1.8 x 10^6 PfSPZ
## SC1 0 0 0 23
## SC2 0 0 0 22
## SC3 0 0 0 12
## SC4 0 0 0 6
## TBD 65 58 58 0
table(pData.cluster$Cluster3,pData.cluster$treat)
##
## Placebo 4.5 x 10^5 PfSPZ 9.0 x 10^5 PfSPZ 1.8 x 10^6 PfSPZ
## SC1.1 18 0 0 0
## SC2.1 18 0 0 0
## SC3.1 21 0 0 0
## SC4.1 8 0 0 0
## SC1.2 0 12 0 0
## SC2.2 0 27 0 0
## SC3.2 0 13 0 0
## SC4.2 0 6 0 0
## SC1.3 0 0 14 0
## SC2.3 0 0 18 0
## SC3.3 0 0 17 0
## SC4.3 0 0 9 0
## SC1.4 0 0 0 23
## SC2.4 0 0 0 22
## SC3.4 0 0 0 12
## SC4.4 0 0 0 6
#setdiff(colnames(x), rownames(pData.cluster))
#setdiff(rownames(pData.cluster), colnames(x))
high_dose_SC <- pData.cluster %>%
filter(treat == "1.8 x 10^6 PfSPZ") %>%
mutate(outcome = ifelse(mal.atp.3 == 1, "NP", "P")) %>% #mal.atp.3 == 1 means malaria infected at least once during 3 months (according to protocol)
droplevels()
#original Fisher's exact test between SC1 and SC2-4
my_tab <- table(high_dose_SC$outcome, high_dose_SC$Cluster)
my_tab
##
## SC1 SC2-4
## NP 4 21
## P 19 19
fisher.test(my_tab)
##
## Fisher's Exact Test for Count Data
##
## data: my_tab
## p-value = 0.007686
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.04093891 0.73404658
## sample estimates:
## odds ratio
## 0.1955618
#reviewer suggested chi-square between SC1, SC2, SC3, SC4
my_tab <- table(high_dose_SC$outcome, high_dose_SC$Cluster2)
my_tab
##
## SC1 SC2 SC3 SC4
## NP 4 12 5 4
## P 19 10 7 2
chisq.test(my_tab)
##
## Pearson's Chi-squared test
##
## data: my_tab
## X-squared = 8.6502, df = 3, p-value = 0.03432
Results remain significant.
#perform chi-square for P vs NP distribution across ALL 16 clusters
#not requested by reviewer but did just for fun
foo <- pData.cluster %>%
mutate(outcome = ifelse(mal.atp.3 == 1, "NP", "P")) %>% #mal.atp.3 == 1 means malaria infected at least once during 3 months (according to protocol)
droplevels()
my_tab <- table(foo$outcome, foo$Cluster3)
my_tab
##
## SC1.1 SC2.1 SC3.1 SC4.1 SC1.2 SC2.2 SC3.2 SC4.2 SC1.3 SC2.3 SC3.3 SC4.3
## NP 10 12 15 2 7 15 5 2 7 12 9 1
## P 8 6 6 6 5 12 8 4 7 6 8 8
##
## SC1.4 SC2.4 SC3.4 SC4.4
## NP 4 12 5 4
## P 19 10 7 2
chisq.test(my_tab)
##
## Pearson's Chi-squared test
##
## data: my_tab
## X-squared = 28.573, df = 15, p-value = 0.01825
Results remain significant.
library(tidyverse)
basefont <- "sans"
basefontsize <- 14
pvalfontsize <- 6
symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", ""))
pval_label <- "p.format"
adjust_p <- "adjp"
if(myGroup == "allGroups"){
#OUTCOME
foo1 <- c(1,2,3,4)
names(foo1) <- names(summary(pData.cluster$treat))
for(i in names(summary(pData.cluster$treat))){
pData.cluster.temp <- pData.cluster %>%
filter(treat == i)
foo1[i] <- signif(fisher.test(table("Cluster" = pData.cluster.temp$Cluster, "Outcome" = pData.cluster.temp$mal.atp.3))$p.value,2)
if(pval_label == "p.format" & adjust_p == "adjp"){
foo1 <- p.adjust(foo1, method = "BH")
}
if(pval_label == "p.signif" ){
foo1[i] <- ifelse(foo1[i] >= 0.05, "",
ifelse(foo1[i] >= 0.01 & foo1[i] < 0.05, "*",
ifelse(foo1[i] >= 0.001 & foo1[i] < 0.01, "**",
ifelse(foo1[i] >= 0.0001 & foo1[i] < 0.001, "***", "****"))))
}
}
dat_text <- data.frame(treat = factor(levels(pData.cluster$treat),
levels = levels(pData.cluster$treat)),
label = foo1, Cluster = levels(factor(pData.cluster.temp$Cluster)), x = 1.5, y = 25,
Outcome = c("not protected (infected)", "protected (uninfected)"))
Outcome.counts <- pData.cluster %>%
mutate(Cluster = factor(Cluster)) %>%
mutate(Outcome = factor(mal.atp.3, levels = c(1,0), labels = c("not protected (infected)", "protected (uninfected)"))) %>%
dplyr::filter(!(is.na(Cluster) | is.na(Outcome))) %>%
ggplot(., aes(x=Cluster, fill = Outcome)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("Outcome (count)") +
facet_wrap(.~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.background = element_rect(fill="white"),
legend.position="none"
) +
geom_text(
data = dat_text,
mapping = aes(x = x, y = y, label = label, size = basefontsize))
tte_dat <- pData.cluster %>%
mutate(Outcome = factor(mal.atp.3, levels = c(1,0), labels = c("not protected (infected)", "protected (uninfected)"))) %>%
dplyr::filter(!(is.na(tte.mal.atp.6) | is.na(Outcome)))
tte_dat_stats <- tte_dat %>%
group_by(treat, Cluster) %>%
do(w = wilcox.test(tte.mal.atp.6~Outcome, data=., paired=FALSE, exact = FALSE)) %>%
summarize(treat, Cluster, Wilcox_pval = w$p.value) %>%
mutate(padj = p.adjust(Wilcox_pval, method= "BH"))
tte_dat_boxplot <- tte_dat %>%
ggplot(., aes(Cluster, tte.mal.atp.6, fill = Outcome, color = Outcome)) +
geom_point(position = position_jitterdodge()) +
geom_boxplot(alpha = 0.4, color = "grey30", outlier.shape = NA) +
stat_compare_means(aes(group = Outcome), label = pval_label, method = "wilcox.test", label.x.npc = "center", vjust = 1, symnum.args = symnum.args) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("Days to first parasitemia") +
ylim(c(0,180)) +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.background = element_rect(fill="white"),
legend.position="none"
)
#CSP Ab logFC
csp_LFC_dat <- pData.cluster %>%
mutate(Outcome = factor(mal.atp.3, levels = c(1,0), labels = c("not protected (infected)", "protected (uninfected)"))) %>%
dplyr::filter(!(is.na(log2FC_CSPAb) | is.na(Outcome)))
csp_LFC_dat_stats <- csp_LFC_dat %>%
group_by(treat, Cluster) %>%
do(w = wilcox.test(log2FC_CSPAb~Outcome, data=., paired=FALSE, exact = FALSE)) %>%
summarize(treat, Cluster, Wilcox_pval = w$p.value) %>%
mutate(padj = p.adjust(Wilcox_pval, method= "BH"))
csp_LFC_dat_boxplot <- csp_LFC_dat %>%
ggplot(., aes(x = Cluster, y = log2FC_CSPAb, fill = Outcome, color = Outcome)) +
geom_point(position = position_jitterdodge()) +
geom_boxplot(alpha = 0.4, color = "grey30", outlier.shape = NA) +
stat_compare_means(aes(group = Outcome), label = pval_label, method = "wilcox.test", label.x.npc = "center", vjust = 1, symnum.args = symnum.args) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("CSP-specific IgG (log fold change)") +
ylim(c(-12.5,25)) +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="none"
)
csp_baseline_dat <- pData.cluster %>%
mutate(Outcome = factor(mal.atp.3, levels = c(1,0), labels = c("not protected (infected)", "protected (uninfected)"))) %>%
dplyr::filter(!(is.na(pfcsp_pre) | is.na(Outcome)))
csp_baseline_dat_stats <- csp_baseline_dat %>%
group_by(treat, Cluster) %>%
do(w = wilcox.test(pfcsp_pre~Outcome, data=., paired=FALSE, exact = FALSE)) %>%
summarize(treat, Cluster, Wilcox_pval = w$p.value) %>%
mutate(padj = p.adjust(Wilcox_pval, method= "BH"))
csp_baseline_dat_boxplot <- csp_baseline_dat %>%
ggplot(., aes(Cluster, log10(pfcsp_pre+1), fill = Outcome, color = Outcome)) +
geom_point(position = position_jitterdodge()) +
geom_boxplot(alpha = 0.4, color = "grey30", outlier.shape = NA) +
stat_compare_means(aes(group = Outcome), label = pval_label, method = "wilcox.test", label.x.npc = "center", vjust = 1, symnum.args = symnum.args) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("CSP-specific IgG at pre-vax") +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="none"
)
foo1 <- c(1,2,3,4)
names(foo1) <- names(summary(pData.cluster$treat))
pData.cluster <- pData.cluster %>%
mutate(Cluster = factor(Cluster)) %>%
mutate(Pf.any.vax = factor(ifelse(mal.dvax.tot == 0, 0, 1), levels = c(1,0), labels = c("pos", "neg")))
for(i in names(summary(pData.cluster$treat))){
pData.cluster.temp <- pData.cluster %>%
filter(treat == i)
foo1[i] <- signif(fisher.test(table("Cluster" = pData.cluster.temp$Cluster, "Pf during vax period" = pData.cluster.temp$Pf.any.vax))$p.value,2)
if(pval_label == "p.format" & adjust_p == "adjp"){
foo1 <- p.adjust(foo1, method = "BH")
}
if(pval_label == "p.signif" ){
foo1[i] <- ifelse(foo1[i] >= 0.05, "",
ifelse(foo1[i] >= 0.01 & foo1[i] < 0.05, "*",
ifelse(foo1[i] >= 0.001 & foo1[i] < 0.01, "**",
ifelse(foo1[i] >= 0.0001 & foo1[i] < 0.001, "***", "****"))))
}
}
dat_text <- data.frame(treat = factor(levels(pData.cluster$treat), levels = levels(pData.cluster$treat)), label = foo1, Cluster = levels(factor(pData.cluster.temp$Cluster)),
x = 1.5, y = 25, Pf.any.vax = c("pos", "neg"))
Pf.any.vax <- pData.cluster %>%
dplyr::filter(!(is.na(Pf.any.vax) | is.na(Cluster))) %>%
ggplot(., aes(x=Cluster, fill = Pf.any.vax)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("#b2182b", "#D1D3D3")) +
facet_wrap(~treat, ncol = 4) +
ylab("Pf during vax period (counts)") +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="none"
) +
geom_text(
data = dat_text,
mapping = aes(x = x, y = y, label = label, size = basefontsize))
mal_dvax_tot <- pData.cluster %>%
mutate(Outcome = factor(mal.atp.3, levels = c(1,0), labels = c("not protected (infected)", "protected (uninfected)"))) %>%
dplyr::filter(!(is.na(mal.dvax.tot) | is.na(Outcome)))
mal_dvax_tot_stats <- mal_dvax_tot %>%
group_by(treat, Cluster) %>%
do(w = wilcox.test(mal.dvax.tot~Outcome, data=., paired=FALSE, exact = FALSE)) %>%
summarize(treat, Cluster, Wilcox_pval = w$p.value) %>%
mutate(padj = p.adjust(Wilcox_pval, method= "BH"))
mal_dvax_tot_plot <- mal_dvax_tot %>%
ggplot(., aes(Cluster, mal.dvax.tot, fill = Outcome, color = Outcome)) +
geom_point(position = position_jitterdodge()) +
geom_boxplot(alpha = 0.4, color = "grey30", outlier.shape = NA) +
stat_compare_means(aes(group = Outcome), label = pval_label, method = "wilcox.test", label.x.npc = "center", vjust = 1, symnum.args = symnum.args) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("# Pf episodes during vax") +
ylim(c(0,6.5)) +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="none"
)
foo1 <- c(1,2,3,4)
names(foo1) <- names(summary(pData.cluster$treat))
pData.cluster <- pData.cluster %>%
mutate(Cluster = factor(Cluster)) %>%
mutate(Pf = factor(mal.vax.1, levels = c(1,0), labels = c("pos", "neg")))
for(i in names(summary(pData.cluster$treat))){
pData.cluster.temp <- pData.cluster %>%
filter(treat == i)
foo1[i] <- signif(fisher.test(table("Cluster" = pData.cluster.temp$Cluster, "Pf at Vax1" = pData.cluster.temp$Pf))$p.value,2)
if(pval_label == "p.format" & adjust_p == "adjp"){
foo1 <- p.adjust(foo1, method = "BH")
}
if(pval_label == "p.signif" ){
foo1[i] <- ifelse(foo1[i] >= 0.05, "",
ifelse(foo1[i] >= 0.01 & foo1[i] < 0.05, "*",
ifelse(foo1[i] >= 0.001 & foo1[i] < 0.01, "**",
ifelse(foo1[i] >= 0.0001 & foo1[i] < 0.001, "***", "****"))))
}
}
dat_text <- data.frame(treat = factor(levels(pData.cluster$treat), levels = levels(pData.cluster$treat)), label = foo1, Cluster = levels(factor(pData.cluster.temp$Cluster)),
x = 1.5, y = 45,
Pf = c("pos","neg"))
Pf.counts <- pData.cluster %>%
dplyr::filter(!(is.na(Cluster) | is.na(Pf))) %>%
ggplot(., aes(x=Cluster, fill = Pf)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("#b2182b", "#D1D3D3")) +
ylab("Pf at first vax (counts)") +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="none"
) +
geom_text(data = dat_text,mapping = aes(x = x, y = y, label = label, size = basefontsize))
foo1 <- c(1,2,3,4)
names(foo1) <- names(summary(pData.cluster$treat))
pData.cluster <- pData.cluster %>%
mutate(Site = factor(site))
for(i in names(summary(pData.cluster$treat))){
pData.cluster.temp <- pData.cluster %>%
filter(treat == i)
if(pval_label == "p.format" & adjust_p == "adjp"){
foo1 <- p.adjust(foo1, method = "BH")
}
foo1[i] <- signif(fisher.test(table("Cluster" = pData.cluster.temp$Cluster, "Site" = pData.cluster.temp$Site))$p.value,2)
if(pval_label == "p.signif" ){
foo1[i] <- ifelse(foo1[i] >= 0.05, "",
ifelse(foo1[i] >= 0.01 & foo1[i] < 0.05, "*",
ifelse(foo1[i] >= 0.001 & foo1[i] < 0.01, "**",
ifelse(foo1[i] >= 0.0001 & foo1[i] < 0.001, "***", "****"))))
}
}
dat_text <- data.frame(treat = factor(levels(pData.cluster$treat), levels = levels(pData.cluster$treat)), label = foo1, Cluster = levels(factor(pData.cluster.temp$Cluster)),
x = 1.5, y = 25,
Site = c("Siaya","Wagai"))
Site.counts <- pData.cluster %>%
dplyr::filter(!(is.na(Cluster) | is.na(Site))) %>%
ggplot(., aes(x= Cluster, fill = Site)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("#33A02C","#B2DF8A")) +
ylab("Site (counts)") +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="none"
) +
geom_text(data = dat_text,mapping = aes(x = x, y = y, label = label, size = basefontsize))
age_dat <- pData.cluster %>%
mutate(Outcome = factor(mal.atp.3, levels = c(1,0), labels = c("not protected (infected)", "protected (uninfected)"))) %>%
dplyr::filter(!(is.na(Outcome) | is.na(age.vax1)))
age_dat_stats <- age_dat %>%
group_by(treat, Cluster) %>%
do(w = wilcox.test(age.vax1~Outcome, data=., paired=FALSE, exact = FALSE)) %>%
summarize(treat, Cluster, Wilcox_pval = w$p.value) %>%
mutate(padj = p.adjust(Wilcox_pval, method= "BH"))
age_dat_boxplot <- age_dat %>%
ggplot(., aes(Cluster, age.vax1, fill = Outcome, color = Outcome)) +
geom_point(position = position_jitterdodge()) +
geom_boxplot(alpha = 0.4, color = "grey30", outlier.shape = NA) +
stat_compare_means(aes(group = Outcome), label = pval_label, method = "wilcox.test", label.x.npc = "center", vjust = 1, symnum.args = symnum.args) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("Age at first vax (months)") +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="none"
)
foo1 <- c(1,2,3,4)
names(foo1) <- names(summary(pData.cluster$treat))
pData.cluster <- pData.cluster %>%
mutate(Gender = factor(SEX))
for(i in names(summary(pData.cluster$treat))){
pData.cluster.temp <- pData.cluster %>%
filter(treat == i)
foo1[i] <- signif(fisher.test(table("Cluster" = pData.cluster.temp$Cluster, "Gender" = pData.cluster.temp$Gender))$p.value,2)
if(pval_label == "p.format" & adjust_p == "adjp"){
foo1 <- p.adjust(foo1, method = "BH")
}
if(pval_label == "p.signif" ){
foo1[i] <- ifelse(foo1[i] >= 0.05, "",
ifelse(foo1[i] >= 0.01 & foo1[i] < 0.05, "*",
ifelse(foo1[i] >= 0.001 & foo1[i] < 0.01, "**",
ifelse(foo1[i] >= 0.0001 & foo1[i] < 0.001, "***", "****"))))
}
}
dat_text <- data.frame(treat = factor(levels(pData.cluster$treat), levels = levels(pData.cluster$treat)), label = foo1, Cluster = levels(factor(pData.cluster.temp$Cluster)),
x = 1.5, y = 27.5,
Gender = c("F","M"))
Gender.counts <- pData.cluster %>%
dplyr::filter(!(is.na(Cluster) | is.na(Gender))) %>%
ggplot(., aes(x=Cluster, fill = Gender)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("#191919", "#E5E5E5")) +
ylab("Gender (counts)") +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="none"
) +
geom_text(data = dat_text,mapping = aes(x = x, y = y, label = label, size = basefontsize))
DaysPreVax <- pData.cluster %>%
mutate(Outcome = factor(mal.atp.3, levels = c(1,0), labels = c("not protected (infected)", "protected (uninfected)"))) %>%
dplyr::filter(!(is.na(Cluster) | is.na(DaysPriorVax1)))
DaysPreVax_stats <- DaysPreVax %>%
group_by(treat, Cluster) %>%
do(w = wilcox.test(DaysPriorVax1~Outcome, data=., paired=FALSE, exact = FALSE)) %>%
summarize(treat, Cluster, Wilcox_pval = w$p.value) %>%
mutate(padj = p.adjust(Wilcox_pval, method= "BH"))
DaysPreVax_plot <- DaysPreVax %>%
ggplot(., aes(Cluster, DaysPriorVax1, fill = Outcome, color = Outcome)) +
geom_point(position = position_jitterdodge()) +
geom_boxplot(alpha = 0.4, color = "grey30", outlier.shape = NA) +
stat_compare_means(aes(group = Outcome), label = pval_label, method = "wilcox.test", label.x.npc = "center", vjust = 1, symnum.args = symnum.args) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("Days before first dose") +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="none"
)
Weight4Age <- pData.cluster %>%
mutate(Outcome = factor(mal.atp.3, levels = c(1,0), labels = c("not protected (infected)", "protected (uninfected)"))) %>%
dplyr::filter(!(is.na(Outcome) | is.na(zwei)))
Weight4Age_stats <- Weight4Age %>%
group_by(treat, Cluster) %>%
do(w = wilcox.test(zwei~Outcome, data=., paired=FALSE, exact = FALSE)) %>%
summarize(treat, Cluster, Wilcox_pval = w$p.value) %>%
mutate(padj = p.adjust(Wilcox_pval, method= "BH"))
Weight4Age_plot <- Weight4Age %>%
ggplot(., aes(Cluster, zwei, fill = Outcome, color = Outcome)) +
geom_point(position = position_jitterdodge()) +
geom_boxplot(alpha = 0.4, color = "grey30", outlier.shape = NA) +
stat_compare_means(aes(group = Outcome), label = pval_label, method = "wilcox.test", label.x.npc = "center", vjust = 1, symnum.args = symnum.args) +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
scale_color_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("weight-for-age z-score") +
facet_wrap(~treat, ncol = 4) +
theme_classic(base_family = basefont, base_size = basefontsize) +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.text.x = element_blank(),
strip.background = element_blank(),
legend.position="none"
)
}
print(ggarrange(
Outcome.counts,
tte_dat_boxplot,
csp_baseline_dat_boxplot,
csp_LFC_dat_boxplot,
Weight4Age_plot,
age_dat_boxplot,
Gender.counts,
Pf.counts,
Site.counts,
labels = "AUTO",
ncol = 2,
nrow = 5)
)
sc1_subset <- pData.cluster %>%
filter(Cluster == "SC1") %>%
filter(treat == "Placebo" | treat == "1.8 x 10^6 PfSPZ") %>%
droplevels() %>%
mutate(treat = factor(treat)) %>%
mutate(Outcome = factor(mal.atp.3, levels = c(1,0), labels = c("not protected (infected)", "protected (uninfected)")))
#OUTCOME SC1 Placebo vs SC1 1.8x10^6 PfSPZ
foo <- signif(fisher.test(table("Dosegroup" = sc1_subset$treat, "Outcome" = sc1_subset$Outcome))$p.value,2)
foo_text <- ifelse(foo >= 0.05, "",
ifelse(foo >= 0.01 & foo < 0.05, "*",
ifelse(foo >= 0.001 & foo1[i] < 0.01, "**",
ifelse(foo1[i] >= 0.0001 & foo1[i] < 0.001, "***", "****"))))
dat_text <- data.frame(label = foo_text, x = 1.5, y = 9, Outcome = c("not protected (infected)", "protected (uninfected)"))
Outcome.counts <- sc1_subset %>%
ggplot(., aes(x=treat, fill = Outcome)) +
geom_bar(position = "dodge") +
scale_fill_manual(values = c("#A6CEE3", "#1F78B4")) +
ylab("Outcome (count)") +
theme_classic(base_family = "Arial", base_size = 10) +
theme(axis.text.x=element_text(angle = 45, vjust = 0, hjust = 0.25),
axis.ticks.x=element_blank(),
axis.title.x=element_blank(),
strip.background = element_rect(fill="white"),
legend.position="none"
) +
geom_text(data = dat_text,
mapping = aes(x = x, y = y, label = label))