Objective

Perform differential gene expression using limma voom to assess differences between protected and not protected infants in the KSPZV1 malaria vaccine trial. This analysis specifically evaluates differences in post-vax with adjustment for baseline.

This analysis also includes adjustments for:

  1. Batch
  2. Sex
  3. Study site
  4. log10(CSP Ab + 1)
  5. number of malaria infections during vaccination period
library(knitr)
library(tidyverse)
library(limma)
library(edgeR)
library(googledrive)
library(tidyverse)
library(fgsea)
library(data.table)
library(EDASeq)
library(gtools)

Load ExpressionSet

#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
  as_id("17P7RwDaPCwDXcrD82N4fYd3vGOcueFEH"), path = temp, overwrite = TRUE)
x <- readRDS(file = dl$local_path)
x$treat <- factor(x$treat, levels = c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))

Set options

myGroups <- unique(x$treat)
## [1] "1.8 x 10^6 PfSPZ"
##        Timepoint
## Outcome  0 25
##       0 38 38
##       1 25 27
##        Dosegroup
## Outcome 1.8 x 10^6 PfSPZ
##       0               76
##       1               52
## Features  Samples 
##    35716      128

Build DGEList Object and make Expression Sets

Build DGEList Object

#Define group by dose, timepoint and outcome #if running for all dose groups
#ygroup <- setNames(as.list(c(1:4)), c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
ygroup <- setNames(as.list(c(1:1)), c("1.8 x 10^6 PfSPZ"))
for(i in names(ygroup)){
  ygroup[[i]]  <- DGEList(counts=counts(xgroup[[i]]), genes=fData(xgroup[[i]]), group= factor(paste(xgroup[[i]]$treat, xgroup[[i]]$Timepoint,
                                                                                                    xgroup[[i]]$Outcome, sep = "_")),
                          remove.zeros=T)
  ygroup[[i]]$samples$PATID <- gsub("_.*", "", rownames(ygroup[[i]]$samples))
  }

Filter out low expression features

for(i in names(ygroup)){
  keep <- filterByExpr(ygroup[[i]])
  ygroup[[i]] <- ygroup[[i]][keep, , keep.lib.sizes=FALSE]
  ygroup[[i]]$samples$lib.size <- colSums(ygroup[[i]]$counts)
  ygroup[[i]] <- calcNormFactors(ygroup[[i]])   #Normalization
}

Remove unpaired samples and then arrange by patient and timepoint

This is necessary for paired analyses

for(i in names(ygroup)){
  ygroup[[i]] <- ygroup[[i]][,!is.na(ygroup[[i]]$samples$group)]
  ygroup[[i]] <- ygroup[[i]][,duplicated(ygroup[[i]]$samples$PATID) | duplicated(ygroup[[i]]$samples$PATID, fromLast = TRUE)]
  ygroup[[i]] <- ygroup[[i]][,order(ygroup[[i]]$samples$PATID, ygroup[[i]]$samples$group)]
  print(i)
  print(ifelse(all(gsub('_.*', '', colnames(ygroup[[i]][,grepl("_0", colnames(ygroup[[i]]))])) ==
                     gsub('_.*', '', colnames(ygroup[[i]][,grepl("_25", colnames(ygroup[[i]]))]))) &
                 all(ygroup[[i]]$samples$Timepoint == rep(c(0,25),length(ygroup[[i]]$samples$Timepoint)/2)), 
               "Samples are paired. All is good.","Stop and check order of samples."))
}
## [1] "1.8 x 10^6 PfSPZ"
## [1] "Samples are paired. All is good."

Remake ExpressionSet after sample and feature (gene) filtering steps.

Design Matrix - Compare Protected vs Not Protected

Analysis between Protected and Not Protected within a Dose Group. Subject as random effect using duplicate correlation. Batch, sex, study site, CSP-specific IgG pre-immunization, and malaria during vaccination period as fixed effects. Limma voom can handle this. See following:

https://bioconductor.riken.jp/packages/3.9/bioc/vignettes/variancePartition/inst/doc/dream.html

DeltaDelta <- DeltaNotProtected <- DeltaProtected <- fit <- design <- xgroup
for(i in names(xgroup)){
  Subject <- factor(xgroup[[i]]$PATID)
  Outcome <- factor(xgroup[[i]]$Outcome, levels = c(1,0), labels = c("NotProtected","Protected"))
  Timepoint <- factor(xgroup[[i]]$Timepoint, levels = c(0,25), labels = c("baseline","postvax"))
  Age <- xgroup[[i]]$age.vax1
  Sex <- as.factor(xgroup[[i]]$SEX)
  Batch <- factor(xgroup[[i]]$SEQBATCH, levels = c("Aug2019","Nov2019"))
  Site <- factor(xgroup[[i]]$site)
  MALdVax <- factor(xgroup[[i]]$mal.dvax, levels = c(0,1), labels = c("noMALdVAX","yesMALdVAX"))
  MALdVaxTotal <- as.numeric(ifelse(is.na(xgroup[[i]]$mal.dvax.tot), 0, xgroup[[i]]$mal.dvax.tot))
  CSPAb_baseline <- as.numeric(xgroup[[i]]$pfcsp_pre)
  CSPAb_baseline[is.na(CSPAb_baseline)] <- median(CSPAb_baseline, na.rm = TRUE) #Impute 2 missing values with median of all samples
  #Define protective outcome-specific PfSPZ Vaccination effects and append them to the design matrix
  Protected.Postvax <- Outcome == "Protected" & Timepoint=="postvax"
  NotProtected.Postvax <- Outcome =="NotProtected" & Timepoint=="postvax"
  design[[i]] <- model.matrix(~Batch+Sex+Site+CSPAb_baseline+MALdVaxTotal)
  design[[i]] <- cbind(design[[i]], Protected.Postvax, NotProtected.Postvax) #make design matrix according to edgeR 3.5 (p42)
  #https://www.bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf
  rownames(design[[i]]) <- xgroup[[i]]$PATID
  print(paste0("running model fit for ", i))
  print(colnames(design[[i]])) #check colnames

  #Use voom() to convert the read counts to log2-cpm, with associated weights, ready for linear modeling
  v <- voom(ygroup[[i]], design[[i]])
  cor <- duplicateCorrelation(v, design[[i]], block = Subject)
  print(cor$consensus)
  v <- voom(ygroup[[i]], design[[i]], plot = TRUE, block = Subject, correlation = cor$consensus)
  cor <- duplicateCorrelation(v, design[[i]], block = Subject)
  print(cor$consensus)
}
## [1] "running model fit for 1.8 x 10^6 PfSPZ"
## [1] "(Intercept)"          "BatchNov2019"         "SexM"                
## [4] "SiteWagai"            "CSPAb_baseline"       "MALdVaxTotal"        
## [7] "Protected.Postvax"    "NotProtected.Postvax"
## [1] -0.01283081

## [1] -0.01282869
for(i in names(xgroup)){
  fit <- lmFit(v, design[[i]], block = Subject, correlation = cor$consensus)
  fit <- eBayes(fit)

  DeltaProtected[[i]] <- topTable(fit,n=Inf, coef="Protected.Postvax")
  DeltaNotProtected[[i]] <-  topTable(fit,n=Inf, coef="NotProtected.Postvax")
  fit2 <- contrasts.fit(fit, contrasts = c(rep(0,(ncol(design[[i]])-2)),1,-1))
  fit2 <- eBayes(fit2)
  DeltaDelta[[i]] <- topTable(fit2,n=Inf)
}

Examine top 15 DEGs in 1.8x10^6 PfSPZ dose group

Δ Protected

GeneSymbol EnsemblID descripton_new logFC AveExpr t P.Value adj.P.Val B
MTMR4 ENSG00000108389 myotubularin related protein 4 [Source:HGNC Symbol;Acc:HGNC:7452] -0.206 5.91 -4.68 7.50e-06 0.0817 3.510
RTF1 ENSG00000137815 RTF1 homolog, Paf1/RNA polymerase II complex component [Source:HGNC Symbol;Acc:HGNC:28996] -0.245 5.65 -4.66 8.30e-06 0.0817 3.420
SLC4A1AP ENSG00000163798 solute carrier family 4 member 1 adaptor protein [Source:HGNC Symbol;Acc:HGNC:13813] -0.240 4.82 -4.55 1.29e-05 0.0844 3.010
ZDHHC14 ENSG00000175048 zinc finger DHHC-type palmitoyltransferase 14 [Source:HGNC Symbol;Acc:HGNC:20341] 0.381 4.03 4.48 1.72e-05 0.0844 2.730
XRN2 ENSG00000088930 5’-3’ exoribonuclease 2 [Source:HGNC Symbol;Acc:HGNC:12836] -0.229 6.66 -4.39 2.49e-05 0.0912 2.400
MASP2 ENSG00000009724 MBL associated serine protease 2 [Source:HGNC Symbol;Acc:HGNC:6902] 0.319 3.84 4.32 3.25e-05 0.0912 2.150
YARS1 ENSG00000134684 tyrosyl-tRNA synthetase 1 [Source:HGNC Symbol;Acc:HGNC:12840] -0.220 6.13 -4.00 1.09e-04 0.2670 1.060
YWHAG ENSG00000170027 tyrosine 3-monooxygenase/tryptophan 5-monooxygenase activation protein gamma [Source:HGNC Symbol;Acc:HGNC:12852] -0.239 6.47 -3.95 1.35e-04 0.2830 0.851
ZRSR2 ENSG00000169249 zinc finger CCCH-type, RNA binding motif and serine/arginine rich 2 [Source:HGNC Symbol;Acc:HGNC:23019] 0.288 3.85 3.88 1.74e-04 0.2830 0.680
SH3KBP1 ENSG00000147010 SH3 domain containing kinase binding protein 1 [Source:HGNC Symbol;Acc:HGNC:13867] -0.225 7.56 -3.86 1.88e-04 0.2830 0.522
NUDT19 ENSG00000213965 nudix hydrolase 19 [Source:HGNC Symbol;Acc:HGNC:32036] -0.324 4.04 -3.82 2.16e-04 0.2830 0.491
COPG1 ENSG00000181789 COPI coat complex subunit gamma 1 [Source:HGNC Symbol;Acc:HGNC:2236] -0.291 5.90 -3.83 2.07e-04 0.2830 0.477
ZBTB39 ENSG00000166860 zinc finger and BTB domain containing 39 [Source:HGNC Symbol;Acc:HGNC:29014] 0.216 5.13 3.80 2.28e-04 0.2830 0.418
NCL ENSG00000115053 nucleolin [Source:HGNC Symbol;Acc:HGNC:7667] -0.247 7.31 -3.81 2.21e-04 0.2830 0.381
TXNRD1 ENSG00000198431 thioredoxin reductase 1 [Source:HGNC Symbol;Acc:HGNC:12437] -0.232 5.96 -3.80 2.30e-04 0.2830 0.380

Δ Not Protected

GeneSymbol EnsemblID descripton_new logFC AveExpr t P.Value adj.P.Val B
TMPO-AS1 ENSG00000257167 TMPO antisense RNA 1 [Source:HGNC Symbol;Acc:HGNC:44158] 0.351 5.11 4.16 6.05e-05 0.474 1.60000
PPM1N ENSG00000213889 protein phosphatase, Mg2+/Mn2+ dependent 1N (putative) [Source:HGNC Symbol;Acc:HGNC:26845] 0.723 2.11 4.31 3.33e-05 0.474 1.33000
PSMG3 ENSG00000157778 proteasome assembly chaperone 3 [Source:HGNC Symbol;Acc:HGNC:22420] 0.435 2.71 3.85 1.91e-04 0.532 0.25200
STK4 ENSG00000101109 serine/threonine kinase 4 [Source:HGNC Symbol;Acc:HGNC:11408] -0.176 9.36 -3.71 3.18e-04 0.532 0.12300
MXRA7 ENSG00000182534 matrix remodeling associated 7 [Source:HGNC Symbol;Acc:HGNC:7541] 0.728 3.30 3.70 3.26e-04 0.532 -0.00767
RP11-536K7.3 ENSG00000232807 novel transcript, antisense to FBXO18 -0.265 3.57 -3.68 3.52e-04 0.532 -0.06060
AC007283.5 ENSG00000234431 novel transcript -0.403 3.94 -3.60 4.59e-04 0.584 -0.23000
ALDH6A1 ENSG00000119711 aldehyde dehydrogenase 6 family member A1 [Source:HGNC Symbol;Acc:HGNC:7179] -0.359 5.02 -3.55 5.47e-04 0.597 -0.31600
BATF ENSG00000156127 basic leucine zipper ATF-like transcription factor [Source:HGNC Symbol;Acc:HGNC:958] 0.720 1.97 3.73 2.99e-04 0.532 -0.34800
C1QBP ENSG00000108561 complement C1q binding protein [Source:HGNC Symbol;Acc:HGNC:1243] 0.305 6.01 3.51 6.35e-04 0.635 -0.45900
RNF38 ENSG00000137075 ring finger protein 38 [Source:HGNC Symbol;Acc:HGNC:18052] -0.197 7.32 -3.47 7.36e-04 0.635 -0.60400
RPS19BP1 ENSG00000187051 ribosomal protein S19 binding protein 1 [Source:HGNC Symbol;Acc:HGNC:28749] 0.305 4.04 3.43 8.35e-04 0.635 -0.69400
HIKESHI ENSG00000149196 heat shock protein nuclear import factor hikeshi [Source:HGNC Symbol;Acc:HGNC:26938] 0.233 4.30 3.41 8.77e-04 0.635 -0.72600
MPDU1 ENSG00000129255 mannose-P-dolichol utilization defect 1 [Source:HGNC Symbol;Acc:HGNC:7207] 0.289 5.05 3.41 9.01e-04 0.635 -0.74600
CKS2 ENSG00000123975 CDC28 protein kinase regulatory subunit 2 [Source:HGNC Symbol;Acc:HGNC:2000] 0.684 1.86 3.57 5.24e-04 0.597 -0.75500

Δ Protected vs. Δ Not Protected

GeneSymbol EnsemblID descripton_new logFC AveExpr t P.Value adj.P.Val B
RNF144A ENSG00000151692 ring finger protein 144A [Source:HGNC Symbol;Acc:HGNC:20457] 0.320 6.29 3.52 0.000605 0.954 -1.76
MED28 ENSG00000118579 mediator complex subunit 28 [Source:HGNC Symbol;Acc:HGNC:24628] 0.177 7.74 3.41 0.000884 0.954 -1.79
RP11-440L14.1 ENSG00000249592 novel transcript, antisense to PCGF3 0.273 4.53 3.78 0.000246 0.954 -1.97
KAT7 ENSG00000136504 lysine acetyltransferase 7 [Source:HGNC Symbol;Acc:HGNC:17016] 0.140 7.25 3.11 0.002330 0.954 -2.34
ANXA4 ENSG00000196975 annexin A4 [Source:HGNC Symbol;Acc:HGNC:542] -0.338 5.70 -3.22 0.001670 0.954 -2.35
RFX5 ENSG00000143390 regulatory factor X5 [Source:HGNC Symbol;Acc:HGNC:9986] -0.153 6.22 -3.15 0.002080 0.954 -2.38
PSMC2 ENSG00000161057 proteasome 26S subunit, ATPase 2 [Source:HGNC Symbol;Acc:HGNC:9548] -0.261 5.40 -3.22 0.001650 0.954 -2.42
C1QBP ENSG00000108561 complement C1q binding protein [Source:HGNC Symbol;Acc:HGNC:1243] -0.302 6.01 -3.12 0.002290 0.954 -2.45
LDHA ENSG00000134333 lactate dehydrogenase A [Source:HGNC Symbol;Acc:HGNC:6535] -0.244 7.21 -3.04 0.002890 0.954 -2.45
VDAC1 ENSG00000213585 voltage dependent anion channel 1 [Source:HGNC Symbol;Acc:HGNC:12669] -0.253 5.75 -3.15 0.002090 0.954 -2.45
SLBP ENSG00000163950 stem-loop binding protein [Source:HGNC Symbol;Acc:HGNC:10904] -0.271 5.14 -3.23 0.001580 0.954 -2.47
SEPTIN2 ENSG00000168385 septin 2 [Source:HGNC Symbol;Acc:HGNC:7729] -0.183 7.37 -2.99 0.003370 0.954 -2.52
MAML2 ENSG00000184384 mastermind like transcriptional coactivator 2 [Source:HGNC Symbol;Acc:HGNC:16259] 0.240 7.32 2.99 0.003430 0.954 -2.54
H2AZ1 ENSG00000164032 H2A.Z variant histone 1 [Source:HGNC Symbol;Acc:HGNC:4741] -0.336 6.02 -3.05 0.002830 0.954 -2.55
CELF2-AS1 ENSG00000181800 CELF2 antisense RNA 1 [Source:HGNC Symbol;Acc:HGNC:23515] 0.272 6.62 3.00 0.003320 0.954 -2.57

Apply GSEA

Rank genes by -log10(P.Value)*sign(logFC). Run fgsea from fgsea package using NamedGeneRankList2GseaTable helper function.

set.seed(23)
#restructure dataframes
degtabs <- setNames(as.list(c(1:4)), c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
for(i in names(ygroup)){
    degtabs[[i]] <- setNames(as.list(c(1:3)), c("DeltaDelta", "DeltaP", "DeltaNP"))
    degtabs[[i]]$DeltaDelta <- DeltaDelta[[i]]
    degtabs[[i]]$DeltaP <- DeltaProtected[[i]]
    degtabs[[i]]$DeltaNP <- DeltaNotProtected[[i]]
}
ranks <- degtabs
for(i in names(degtabs)){
  for(j in names(degtabs[[i]])){
    ranks[[i]][[j]] <-  degtabs[[i]][[j]] %>%
      mutate(rankmetric = -log10(.$P.Value)*sign(.$logFC)) %>%
      dplyr::select(GeneSymbol,rankmetric) %>%
      na.omit() %>% 
      distinct() %>% 
      group_by(GeneSymbol) %>%
      summarize(rankmetric = mean(rankmetric)) %>%
      arrange(desc(rankmetric))
  }
}
#Run fgsea from fgsea package using NamedGeneRankList2GseaTable helper function
devtools::source_url("https://github.com/TranLab/ModuleLists/blob/main/NamedGeneRankList2GseaTable.R?raw=TRUE")
GSEAtab <- ranks
for(i in names(ranks)){
  for(j in names(ranks[[i]])){
    print(paste0("starting ", j, " in ", i))
    GSEAtab[[i]][[j]] <- NamedGeneRankList2GseaTable(rankedgenes = deframe(ranks[[i]][[j]]),
                                                     geneset = "all",
                                                     output_directory = tempdir(),
                                                     filename_prefix = "GSEA",
                                                     sampleSize = 101,
                                                     minSize = 20,
                                                     maxSize = Inf,
                                                     scoreType = "std") %>%
      as_tibble() %>%
      arrange(desc(NES)) %>% 
      dplyr::select(module_type, pathway, ES, NES, size, leadingEdge, pval, padj) %>% 
      mutate(leadingEdge = gsub("^c\\(|\\)$", "", leadingEdge)) %>%
      mutate(leadingEdge = gsub('"', "", leadingEdge)) %>%
      arrange(padj)
    #closeAllConnections() #this line prevents using up all url connections
    }
}
## [1] "starting DeltaDelta in 1.8 x 10^6 PfSPZ"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "starting DeltaP in 1.8 x 10^6 PfSPZ"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "starting DeltaNP in 1.8 x 10^6 PfSPZ"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"

Visualize GSEA data as bubble plot

Filter based on FDR<5%.

Plot GSEA bubbleplots (Figure 4D)

Red: goes up post-vax relative to baseline Blue: goes down post-vax relative to baseline

Gene-level analysis

  1. Identify genes that were differentially induced in uninfected and infected children receiving 1.8x10^6 PfSPZ Vaccine

  2. Merge ΔP vs ΔNP, ΔP, and ΔNP tables.

  3. Criteria: ΔP vs ΔNP P val < 0.005 ΔP logFC > 0 and ΔP P val < 0.05 ΔNP logFC < 0 and ΔNP P val < 0.10

AllDegs1 <- bind_rows(DeltaDelta, .id = "Treatment") %>%
  mutate(Comparison = "DeltaDelta")
AllDegs2 <- bind_rows(DeltaProtected, .id = "Treatment") %>%
  mutate(Comparison = "DeltaProtected")
AllDegs3 <- bind_rows(DeltaNotProtected, .id = "Treatment") %>%
  mutate(Comparison = "DeltaNotProtected")
AllDegs <- bind_rows(AllDegs1, AllDegs2, AllDegs3) %>%
  dplyr::select(Treatment, Comparison, EnsemblID, GeneSymbol, descripton_new, logFC, AveExpr, "t", P.Value, adj.P.Val) %>%
  dplyr::rename(PValue = P.Value) %>%
  dplyr::rename(FDR = adj.P.Val) %>%
  dplyr::rename(Description = "descripton_new")

gois <- AllDegs %>%
  filter(Treatment == "1.8 x 10^6 PfSPZ") %>%
  droplevels() %>%
  dplyr::select(Treatment, Comparison, EnsemblID, GeneSymbol, Description, logFC, PValue, FDR) %>%
  pivot_wider(., names_from = Comparison, names_glue = "{Comparison}_{.value}", values_from = c(logFC, PValue, FDR)) %>%
  filter((DeltaDelta_PValue < 0.005 & DeltaDelta_logFC > 2) &
           (DeltaProtected_logFC > 0 & DeltaProtected_PValue < 0.05)) %>%
  arrange(DeltaDelta_PValue)

gois_ms <- gois %>%
  dplyr::select(GeneSymbol, DeltaDelta_logFC, DeltaDelta_PValue, DeltaProtected_logFC, DeltaProtected_PValue, DeltaNotProtected_logFC, DeltaNotProtected_PValue)

Figure 4E (table)

gois_ms %>%
  mutate_if(is.numeric, signif,3) %>%
  knitr::kable()
GeneSymbol DeltaDelta_logFC DeltaDelta_PValue DeltaProtected_logFC DeltaProtected_PValue DeltaNotProtected_logFC DeltaNotProtected_PValue
FSTL4 2.09 0.00136 0.878 0.0409 -1.21 0.0529

Reshape and merge for survival analysis

library(gtools)
#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
  as_id("17xx0YaggLiyWdJc9rqA1nkPSE7dj05p0"), path = temp, overwrite = TRUE)
cpm_delta <- readRDS(file = dl$local_path)
cpm_delta <- cpm_delta[fData(cpm_delta)$GeneSymbol %in% gois_ms$GeneSymbol, ]
cpm_delta <- cpm_delta[,cpm_delta$treat != "Placebo"] #remove placebo---we want to just look at all PfSPZ doses
pData(cpm_delta) <- droplevels(pData(cpm_delta))
survdat <- Biobase::exprs(cpm_delta) %>%
  as.data.frame() %>%
  rownames_to_column(var = "EnsemblID") %>%
  left_join(., fData(cpm_delta) %>%
              dplyr::select(EnsemblID, GeneSymbol),
            by = "EnsemblID") %>%
  dplyr::select(-c(EnsemblID)) %>%
  dplyr::select(GeneSymbol, everything()) %>%
  pivot_longer(2:ncol(.), names_to = "PATID", values_to = "expression") %>%
  group_by(GeneSymbol) %>%
  mutate(exprs_up_down = factor(ifelse(expression>log2(1), "induced", "not induced"), levels = c("not induced", "induced"))) %>%
  mutate(exprs_terciles = factor(quantcut(expression, q=3, na.rm=TRUE), labels = c("bottom_tercile", "middle_tercile", "top_tercile"))) %>%
  mutate(exprs_median = factor(quantcut(expression, q=2, na.rm=TRUE), labels = c("bottom", "top"))) %>%
  left_join(., fData(cpm_delta) %>%
              dplyr::select(c(GeneSymbol, description)), by = "GeneSymbol") %>%
  left_join(., pData(cpm_delta)%>%
              dplyr::select(c(PATID, Timepoint, treat, site, SEX, pfcsp_pre, age.vax1, mal.vax.1, mal.dvax, mal.dvax.tot, contains("atp"))), by = "PATID") %>%
  droplevels()
survdat$tte.mal.atp.6 <- survdat$tte.mal.atp.6+1
survdat[is.na(survdat$pfcsp_pre),]$pfcsp_pre <- median(survdat$pfcsp_pre, na.rm = TRUE) #Impute 2 missing values with median of all samples

Kaplan-Meier plots

library(survival)
library(survminer)
myFollowup <- 6 #3 or 6
N <- length(unique(survdat$PATID))
survdat.filtered <- fit <- mySurvPlot <- c()
for(i in gois_ms$GeneSymbol){
  survdat.filtered[[i]] <- survdat %>%
    filter(GeneSymbol == i)
  fit[[i]] <- survfit(Surv(tte.mal.atp.6, mal.atp.6) ~ exprs_up_down, data= survdat.filtered[[i]])
  mySurvPlot[[i]] <- ggsurvplot(fit[[i]], risk.table = TRUE, pval = TRUE,
                                break.time.by = 30, title = unique(survdat.filtered[[i]]$GeneSymbol),
                                xlab = "Days since vaccination",
                                ylab = "% free of parasitemia",
                                censor = TRUE,
                                palette = "lancet",
                                conf.int = TRUE,
                                conf.int.alpha = 0.1,
                                font.family = "Arial")
}

Kaplan-Meier plot for FSTL4 (Figure 4F)

print(mySurvPlot$FSTL4, newpage = FALSE)

Cox Regression (Table S4)

cox.res <- c()
for(i in names(survdat.filtered)){
  survdat.cp <- survdat.filtered[[i]]
  survdat.cp$tte.mal.atp.6 <- survdat.cp$tte.mal.atp.6 + 1 #add one day to each value given zero values
  survdat.cp$start      <-      0
  survdat.cp$stop       <-      survdat.cp$tte.mal.atp.6
  subj.pcrp <- rep(survdat.cp$start, as.vector(table(survdat.cp$PATID)))
  survdat.cp$start1 <- survdat.cp$start-subj.pcrp
  survdat.cp$stop1 <- survdat.cp$stop-subj.pcrp
  survdat.cp <- survdat.cp %>%
    droplevels()
  fit <- coxph(Surv(start1, stop1, mal.atp.6) ~ exprs_up_down + factor(SEX) + factor(site) + log10(pfcsp_pre+1) + mal.dvax.tot + factor(treat), data = survdat.cp)
  #vfit <- cox.zph(fit)
  #par(mfrow=c(3,2))
  #plot(vfit)
  #cox.zph(fit)
  cox.res[[i]] <- as.data.frame(cbind(fit$n, fit$nevent, summary(fit)$conf.int[,c(1,3:4)], summary(fit)$coefficient[,5]))
  colnames(cox.res[[i]]) <- c("n","number of events","HR", "LCI", "UCI", "P value")
  cox.res[[i]] <- cox.res[[i]] %>%
    as.matrix() %>%
    signif(., digits = 3) %>%
    as.data.frame()
  cox.res[[i]]$Gene <- i
  rownames(cox.res[[i]]) <- c("upregulated post-vax (ref:downregulated post-vax)",
                              "gender (ref:female)",
                              "Wagai (ref:Siaya)",
                              "CSP-specific IgG baseline",
                              "number of Pf infections during vaccination period",
                              "9.0x10^5 PfSPZ (ref:4.5x10^5 PfSPZ)",
                              "1.8x10^6 PfSPZ (ref:4.5x10^5 PfSPZ)")
  cox.res[[i]] <- cox.res[[i]] %>%
    rownames_to_column(var = "Covariate") %>%
    dplyr::select(Gene, everything())
  }
  cox.res.bound <- bind_rows(cox.res, .id = "Gene") %>%
    mutate(Significant = ifelse(`P value`<0.001, "***",
                              ifelse(`P value`<0.01, "**",
                                     ifelse(`P value`<0.05, "*", ""))))

Only display FSTL4 here.

cox.res.bound %>%
  filter(Gene == "FSTL4") %>%
  knitr::kable()
Gene Covariate n number of events HR LCI UCI P value Significant
FSTL4 upregulated post-vax (ref:downregulated post-vax) 169 107 0.643 0.433 0.954 0.02820 *
FSTL4 gender (ref:female) 169 107 0.920 0.618 1.370 0.67900
FSTL4 Wagai (ref:Siaya) 169 107 0.629 0.421 0.939 0.02340 *
FSTL4 CSP-specific IgG baseline 169 107 1.000 0.795 1.260 0.99900
FSTL4 number of Pf infections during vaccination period 169 107 1.250 1.070 1.460 0.00465 **
FSTL4 9.0x10^5 PfSPZ (ref:4.5x10^5 PfSPZ) 169 107 1.100 0.693 1.760 0.67700
FSTL4 1.8x10^6 PfSPZ (ref:4.5x10^5 PfSPZ) 169 107 0.899 0.551 1.470 0.66900