KSPZV1 DGE Δ (postvax/baseline) limma voom

Objective

Perform differential gene expression using limma voom to assess differences between protected and not protected infants in the KSPZV1 malaria vaccine trial. This analysis specifically evaluates differences in post-vax with adjustment for baseline.

This analysis also includes adjustments for:

Batch
Sex
Study site
log10(CSP Ab + 1)
number of malaria infections during vaccination period

library(knitr)
library(tidyverse)
library(limma)
library(edgeR)
library(googledrive)
library(tidyverse)
library(fgsea)
library(data.table)
library(EDASeq)
library(gtools)

Load ExpressionSet

#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
  as_id("17P7RwDaPCwDXcrD82N4fYd3vGOcueFEH"), path = temp, overwrite = TRUE)
x <- readRDS(file = dl$local_path)
x$treat <- factor(x$treat, levels = c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))

Set options

myGroups <- unique(x$treat)

## [1] "1.8 x 10^6 PfSPZ"
##        Timepoint
## Outcome  0 25
##       0 38 38
##       1 25 27
##        Dosegroup
## Outcome 1.8 x 10^6 PfSPZ
##       0               76
##       1               52
## Features  Samples 
##    35716      128

Build DGEList Object and make Expression Sets

Build DGEList Object

#Define group by dose, timepoint and outcome #if running for all dose groups
#ygroup <- setNames(as.list(c(1:4)), c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
ygroup <- setNames(as.list(c(1:1)), c("1.8 x 10^6 PfSPZ"))
for(i in names(ygroup)){
  ygroup[[i]]  <- DGEList(counts=counts(xgroup[[i]]), genes=fData(xgroup[[i]]), group= factor(paste(xgroup[[i]]$treat, xgroup[[i]]$Timepoint,
                                                                                                    xgroup[[i]]$Outcome, sep = "_")),
                          remove.zeros=T)
  ygroup[[i]]$samples$PATID <- gsub("_.*", "", rownames(ygroup[[i]]$samples))
  }

Filter out low expression features

for(i in names(ygroup)){
  keep <- filterByExpr(ygroup[[i]])
  ygroup[[i]] <- ygroup[[i]][keep, , keep.lib.sizes=FALSE]
  ygroup[[i]]$samples$lib.size <- colSums(ygroup[[i]]$counts)
  ygroup[[i]] <- calcNormFactors(ygroup[[i]])   #Normalization
}

Remove unpaired samples and then arrange by patient and timepoint

This is necessary for paired analyses

for(i in names(ygroup)){
  ygroup[[i]] <- ygroup[[i]][,!is.na(ygroup[[i]]$samples$group)]
  ygroup[[i]] <- ygroup[[i]][,duplicated(ygroup[[i]]$samples$PATID) | duplicated(ygroup[[i]]$samples$PATID, fromLast = TRUE)]
  ygroup[[i]] <- ygroup[[i]][,order(ygroup[[i]]$samples$PATID, ygroup[[i]]$samples$group)]
  print(i)
  print(ifelse(all(gsub('_.*', '', colnames(ygroup[[i]][,grepl("_0", colnames(ygroup[[i]]))])) ==
                     gsub('_.*', '', colnames(ygroup[[i]][,grepl("_25", colnames(ygroup[[i]]))]))) &
                 all(ygroup[[i]]$samples$Timepoint == rep(c(0,25),length(ygroup[[i]]$samples$Timepoint)/2)), 
               "Samples are paired. All is good.","Stop and check order of samples."))
}

## [1] "1.8 x 10^6 PfSPZ"
## [1] "Samples are paired. All is good."

Remake ExpressionSet after sample and feature (gene) filtering steps.

Design Matrix - Compare Protected vs Not Protected

Analysis between Protected and Not Protected within a Dose Group. Subject as random effect using duplicate correlation. Batch, sex, study site, CSP-specific IgG pre-immunization, and malaria during vaccination period as fixed effects. Limma voom can handle this. See following:

https://bioconductor.riken.jp/packages/3.9/bioc/vignettes/variancePartition/inst/doc/dream.html

DeltaDelta <- DeltaNotProtected <- DeltaProtected <- fit <- design <- xgroup
for(i in names(xgroup)){
  Subject <- factor(xgroup[[i]]$PATID)
  Outcome <- factor(xgroup[[i]]$Outcome, levels = c(1,0), labels = c("NotProtected","Protected"))
  Timepoint <- factor(xgroup[[i]]$Timepoint, levels = c(0,25), labels = c("baseline","postvax"))
  Age <- xgroup[[i]]$age.vax1
  Sex <- as.factor(xgroup[[i]]$SEX)
  Batch <- factor(xgroup[[i]]$SEQBATCH, levels = c("Aug2019","Nov2019"))
  Site <- factor(xgroup[[i]]$site)
  MALdVax <- factor(xgroup[[i]]$mal.dvax, levels = c(0,1), labels = c("noMALdVAX","yesMALdVAX"))
  MALdVaxTotal <- as.numeric(ifelse(is.na(xgroup[[i]]$mal.dvax.tot), 0, xgroup[[i]]$mal.dvax.tot))
  CSPAb_baseline <- as.numeric(xgroup[[i]]$pfcsp_pre)
  CSPAb_baseline[is.na(CSPAb_baseline)] <- median(CSPAb_baseline, na.rm = TRUE) #Impute 2 missing values with median of all samples
  #Define protective outcome-specific PfSPZ Vaccination effects and append them to the design matrix
  Protected.Postvax <- Outcome == "Protected" & Timepoint=="postvax"
  NotProtected.Postvax <- Outcome =="NotProtected" & Timepoint=="postvax"
  design[[i]] <- model.matrix(~Batch+Sex+Site+CSPAb_baseline+MALdVaxTotal)
  design[[i]] <- cbind(design[[i]], Protected.Postvax, NotProtected.Postvax) #make design matrix according to edgeR 3.5 (p42)
  #https://www.bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf
  rownames(design[[i]]) <- xgroup[[i]]$PATID
  print(paste0("running model fit for ", i))
  print(colnames(design[[i]])) #check colnames

  #Use voom() to convert the read counts to log2-cpm, with associated weights, ready for linear modeling
  v <- voom(ygroup[[i]], design[[i]])
  cor <- duplicateCorrelation(v, design[[i]], block = Subject)
  print(cor$consensus)
  v <- voom(ygroup[[i]], design[[i]], plot = TRUE, block = Subject, correlation = cor$consensus)
  cor <- duplicateCorrelation(v, design[[i]], block = Subject)
  print(cor$consensus)
}

## [1] "running model fit for 1.8 x 10^6 PfSPZ"
## [1] "(Intercept)"          "BatchNov2019"         "SexM"                
## [4] "SiteWagai"            "CSPAb_baseline"       "MALdVaxTotal"        
## [7] "Protected.Postvax"    "NotProtected.Postvax"
## [1] -0.01283081

## [1] -0.01282869

for(i in names(xgroup)){
  fit <- lmFit(v, design[[i]], block = Subject, correlation = cor$consensus)
  fit <- eBayes(fit)

  DeltaProtected[[i]] <- topTable(fit,n=Inf, coef="Protected.Postvax")
  DeltaNotProtected[[i]] <-  topTable(fit,n=Inf, coef="NotProtected.Postvax")
  fit2 <- contrasts.fit(fit, contrasts = c(rep(0,(ncol(design[[i]])-2)),1,-1))
  fit2 <- eBayes(fit2)
  DeltaDelta[[i]] <- topTable(fit2,n=Inf)
}

Examine top 15 DEGs in 1.8x10^6 PfSPZ dose group

Δ Protected

GeneSymbol	EnsemblID	descripton_new	logFC	AveExpr	t	P.Value	adj.P.Val	B
MTMR4	ENSG00000108389	myotubularin related protein 4 [Source:HGNC Symbol;Acc:HGNC:7452]	-0.206	5.91	-4.68	7.50e-06	0.0817	3.510
RTF1	ENSG00000137815	RTF1 homolog, Paf1/RNA polymerase II complex component [Source:HGNC Symbol;Acc:HGNC:28996]	-0.245	5.65	-4.66	8.30e-06	0.0817	3.420
SLC4A1AP	ENSG00000163798	solute carrier family 4 member 1 adaptor protein [Source:HGNC Symbol;Acc:HGNC:13813]	-0.240	4.82	-4.55	1.29e-05	0.0844	3.010
ZDHHC14	ENSG00000175048	zinc finger DHHC-type palmitoyltransferase 14 [Source:HGNC Symbol;Acc:HGNC:20341]	0.381	4.03	4.48	1.72e-05	0.0844	2.730
XRN2	ENSG00000088930	5’-3’ exoribonuclease 2 [Source:HGNC Symbol;Acc:HGNC:12836]	-0.229	6.66	-4.39	2.49e-05	0.0912	2.400
MASP2	ENSG00000009724	MBL associated serine protease 2 [Source:HGNC Symbol;Acc:HGNC:6902]	0.319	3.84	4.32	3.25e-05	0.0912	2.150
YARS1	ENSG00000134684	tyrosyl-tRNA synthetase 1 [Source:HGNC Symbol;Acc:HGNC:12840]	-0.220	6.13	-4.00	1.09e-04	0.2670	1.060
YWHAG	ENSG00000170027	tyrosine 3-monooxygenase/tryptophan 5-monooxygenase activation protein gamma [Source:HGNC Symbol;Acc:HGNC:12852]	-0.239	6.47	-3.95	1.35e-04	0.2830	0.851
ZRSR2	ENSG00000169249	zinc finger CCCH-type, RNA binding motif and serine/arginine rich 2 [Source:HGNC Symbol;Acc:HGNC:23019]	0.288	3.85	3.88	1.74e-04	0.2830	0.680
SH3KBP1	ENSG00000147010	SH3 domain containing kinase binding protein 1 [Source:HGNC Symbol;Acc:HGNC:13867]	-0.225	7.56	-3.86	1.88e-04	0.2830	0.522
NUDT19	ENSG00000213965	nudix hydrolase 19 [Source:HGNC Symbol;Acc:HGNC:32036]	-0.324	4.04	-3.82	2.16e-04	0.2830	0.491
COPG1	ENSG00000181789	COPI coat complex subunit gamma 1 [Source:HGNC Symbol;Acc:HGNC:2236]	-0.291	5.90	-3.83	2.07e-04	0.2830	0.477
ZBTB39	ENSG00000166860	zinc finger and BTB domain containing 39 [Source:HGNC Symbol;Acc:HGNC:29014]	0.216	5.13	3.80	2.28e-04	0.2830	0.418
NCL	ENSG00000115053	nucleolin [Source:HGNC Symbol;Acc:HGNC:7667]	-0.247	7.31	-3.81	2.21e-04	0.2830	0.381
TXNRD1	ENSG00000198431	thioredoxin reductase 1 [Source:HGNC Symbol;Acc:HGNC:12437]	-0.232	5.96	-3.80	2.30e-04	0.2830	0.380

Δ Not Protected

GeneSymbol	EnsemblID	descripton_new	logFC	AveExpr	t	P.Value	adj.P.Val	B
TMPO-AS1	ENSG00000257167	TMPO antisense RNA 1 [Source:HGNC Symbol;Acc:HGNC:44158]	0.351	5.11	4.16	6.05e-05	0.474	1.60000
PPM1N	ENSG00000213889	protein phosphatase, Mg2+/Mn2+ dependent 1N (putative) [Source:HGNC Symbol;Acc:HGNC:26845]	0.723	2.11	4.31	3.33e-05	0.474	1.33000
PSMG3	ENSG00000157778	proteasome assembly chaperone 3 [Source:HGNC Symbol;Acc:HGNC:22420]	0.435	2.71	3.85	1.91e-04	0.532	0.25200
STK4	ENSG00000101109	serine/threonine kinase 4 [Source:HGNC Symbol;Acc:HGNC:11408]	-0.176	9.36	-3.71	3.18e-04	0.532	0.12300
MXRA7	ENSG00000182534	matrix remodeling associated 7 [Source:HGNC Symbol;Acc:HGNC:7541]	0.728	3.30	3.70	3.26e-04	0.532	-0.00767
RP11-536K7.3	ENSG00000232807	novel transcript, antisense to FBXO18	-0.265	3.57	-3.68	3.52e-04	0.532	-0.06060
AC007283.5	ENSG00000234431	novel transcript	-0.403	3.94	-3.60	4.59e-04	0.584	-0.23000
ALDH6A1	ENSG00000119711	aldehyde dehydrogenase 6 family member A1 [Source:HGNC Symbol;Acc:HGNC:7179]	-0.359	5.02	-3.55	5.47e-04	0.597	-0.31600
BATF	ENSG00000156127	basic leucine zipper ATF-like transcription factor [Source:HGNC Symbol;Acc:HGNC:958]	0.720	1.97	3.73	2.99e-04	0.532	-0.34800
C1QBP	ENSG00000108561	complement C1q binding protein [Source:HGNC Symbol;Acc:HGNC:1243]	0.305	6.01	3.51	6.35e-04	0.635	-0.45900
RNF38	ENSG00000137075	ring finger protein 38 [Source:HGNC Symbol;Acc:HGNC:18052]	-0.197	7.32	-3.47	7.36e-04	0.635	-0.60400
RPS19BP1	ENSG00000187051	ribosomal protein S19 binding protein 1 [Source:HGNC Symbol;Acc:HGNC:28749]	0.305	4.04	3.43	8.35e-04	0.635	-0.69400
HIKESHI	ENSG00000149196	heat shock protein nuclear import factor hikeshi [Source:HGNC Symbol;Acc:HGNC:26938]	0.233	4.30	3.41	8.77e-04	0.635	-0.72600
MPDU1	ENSG00000129255	mannose-P-dolichol utilization defect 1 [Source:HGNC Symbol;Acc:HGNC:7207]	0.289	5.05	3.41	9.01e-04	0.635	-0.74600
CKS2	ENSG00000123975	CDC28 protein kinase regulatory subunit 2 [Source:HGNC Symbol;Acc:HGNC:2000]	0.684	1.86	3.57	5.24e-04	0.597	-0.75500

Δ Protected vs. Δ Not Protected

GeneSymbol	EnsemblID	descripton_new	logFC	AveExpr	t	P.Value	adj.P.Val	B
RNF144A	ENSG00000151692	ring finger protein 144A [Source:HGNC Symbol;Acc:HGNC:20457]	0.320	6.29	3.52	0.000605	0.954	-1.76
MED28	ENSG00000118579	mediator complex subunit 28 [Source:HGNC Symbol;Acc:HGNC:24628]	0.177	7.74	3.41	0.000884	0.954	-1.79
RP11-440L14.1	ENSG00000249592	novel transcript, antisense to PCGF3	0.273	4.53	3.78	0.000246	0.954	-1.97
KAT7	ENSG00000136504	lysine acetyltransferase 7 [Source:HGNC Symbol;Acc:HGNC:17016]	0.140	7.25	3.11	0.002330	0.954	-2.34
ANXA4	ENSG00000196975	annexin A4 [Source:HGNC Symbol;Acc:HGNC:542]	-0.338	5.70	-3.22	0.001670	0.954	-2.35
RFX5	ENSG00000143390	regulatory factor X5 [Source:HGNC Symbol;Acc:HGNC:9986]	-0.153	6.22	-3.15	0.002080	0.954	-2.38
PSMC2	ENSG00000161057	proteasome 26S subunit, ATPase 2 [Source:HGNC Symbol;Acc:HGNC:9548]	-0.261	5.40	-3.22	0.001650	0.954	-2.42
C1QBP	ENSG00000108561	complement C1q binding protein [Source:HGNC Symbol;Acc:HGNC:1243]	-0.302	6.01	-3.12	0.002290	0.954	-2.45
LDHA	ENSG00000134333	lactate dehydrogenase A [Source:HGNC Symbol;Acc:HGNC:6535]	-0.244	7.21	-3.04	0.002890	0.954	-2.45
VDAC1	ENSG00000213585	voltage dependent anion channel 1 [Source:HGNC Symbol;Acc:HGNC:12669]	-0.253	5.75	-3.15	0.002090	0.954	-2.45
SLBP	ENSG00000163950	stem-loop binding protein [Source:HGNC Symbol;Acc:HGNC:10904]	-0.271	5.14	-3.23	0.001580	0.954	-2.47
SEPTIN2	ENSG00000168385	septin 2 [Source:HGNC Symbol;Acc:HGNC:7729]	-0.183	7.37	-2.99	0.003370	0.954	-2.52
MAML2	ENSG00000184384	mastermind like transcriptional coactivator 2 [Source:HGNC Symbol;Acc:HGNC:16259]	0.240	7.32	2.99	0.003430	0.954	-2.54
H2AZ1	ENSG00000164032	H2A.Z variant histone 1 [Source:HGNC Symbol;Acc:HGNC:4741]	-0.336	6.02	-3.05	0.002830	0.954	-2.55
CELF2-AS1	ENSG00000181800	CELF2 antisense RNA 1 [Source:HGNC Symbol;Acc:HGNC:23515]	0.272	6.62	3.00	0.003320	0.954	-2.57

Apply GSEA

Rank genes by -log10(P.Value)*sign(logFC). Run fgsea from fgsea package using NamedGeneRankList2GseaTable helper function.

set.seed(23)
#restructure dataframes
degtabs <- setNames(as.list(c(1:4)), c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
for(i in names(ygroup)){
    degtabs[[i]] <- setNames(as.list(c(1:3)), c("DeltaDelta", "DeltaP", "DeltaNP"))
    degtabs[[i]]$DeltaDelta <- DeltaDelta[[i]]
    degtabs[[i]]$DeltaP <- DeltaProtected[[i]]
    degtabs[[i]]$DeltaNP <- DeltaNotProtected[[i]]
}
ranks <- degtabs
for(i in names(degtabs)){
  for(j in names(degtabs[[i]])){
    ranks[[i]][[j]] <-  degtabs[[i]][[j]] %>%
      mutate(rankmetric = -log10(.$P.Value)*sign(.$logFC)) %>%
      dplyr::select(GeneSymbol,rankmetric) %>%
      na.omit() %>% 
      distinct() %>% 
      group_by(GeneSymbol) %>%
      summarize(rankmetric = mean(rankmetric)) %>%
      arrange(desc(rankmetric))
  }
}
#Run fgsea from fgsea package using NamedGeneRankList2GseaTable helper function
devtools::source_url("https://github.com/TranLab/ModuleLists/blob/main/NamedGeneRankList2GseaTable.R?raw=TRUE")
GSEAtab <- ranks
for(i in names(ranks)){
  for(j in names(ranks[[i]])){
    print(paste0("starting ", j, " in ", i))
    GSEAtab[[i]][[j]] <- NamedGeneRankList2GseaTable(rankedgenes = deframe(ranks[[i]][[j]]),
                                                     geneset = "all",
                                                     output_directory = tempdir(),
                                                     filename_prefix = "GSEA",
                                                     sampleSize = 101,
                                                     minSize = 20,
                                                     maxSize = Inf,
                                                     scoreType = "std") %>%
      as_tibble() %>%
      arrange(desc(NES)) %>% 
      dplyr::select(module_type, pathway, ES, NES, size, leadingEdge, pval, padj) %>% 
      mutate(leadingEdge = gsub("^c\\(|\\)$", "", leadingEdge)) %>%
      mutate(leadingEdge = gsub('"', "", leadingEdge)) %>%
      arrange(padj)
    #closeAllConnections() #this line prevents using up all url connections
    }
}

## [1] "starting DeltaDelta in 1.8 x 10^6 PfSPZ"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "starting DeltaP in 1.8 x 10^6 PfSPZ"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "starting DeltaNP in 1.8 x 10^6 PfSPZ"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"

Visualize GSEA data as bubble plot

Filter based on FDR<5%.

Plot GSEA bubbleplots (Figure 4D)

Red: goes up post-vax relative to baseline Blue: goes down post-vax relative to baseline

Gene-level analysis

Identify genes that were differentially induced in uninfected and infected children receiving 1.8x10^6 PfSPZ Vaccine
Merge ΔP vs ΔNP, ΔP, and ΔNP tables.
Criteria: ΔP vs ΔNP P val < 0.005 ΔP logFC > 0 and ΔP P val < 0.05 ΔNP logFC < 0 and ΔNP P val < 0.10

AllDegs1 <- bind_rows(DeltaDelta, .id = "Treatment") %>%
  mutate(Comparison = "DeltaDelta")
AllDegs2 <- bind_rows(DeltaProtected, .id = "Treatment") %>%
  mutate(Comparison = "DeltaProtected")
AllDegs3 <- bind_rows(DeltaNotProtected, .id = "Treatment") %>%
  mutate(Comparison = "DeltaNotProtected")
AllDegs <- bind_rows(AllDegs1, AllDegs2, AllDegs3) %>%
  dplyr::select(Treatment, Comparison, EnsemblID, GeneSymbol, descripton_new, logFC, AveExpr, "t", P.Value, adj.P.Val) %>%
  dplyr::rename(PValue = P.Value) %>%
  dplyr::rename(FDR = adj.P.Val) %>%
  dplyr::rename(Description = "descripton_new")

gois <- AllDegs %>%
  filter(Treatment == "1.8 x 10^6 PfSPZ") %>%
  droplevels() %>%
  dplyr::select(Treatment, Comparison, EnsemblID, GeneSymbol, Description, logFC, PValue, FDR) %>%
  pivot_wider(., names_from = Comparison, names_glue = "{Comparison}_{.value}", values_from = c(logFC, PValue, FDR)) %>%
  filter((DeltaDelta_PValue < 0.005 & DeltaDelta_logFC > 2) &
           (DeltaProtected_logFC > 0 & DeltaProtected_PValue < 0.05)) %>%
  arrange(DeltaDelta_PValue)

gois_ms <- gois %>%
  dplyr::select(GeneSymbol, DeltaDelta_logFC, DeltaDelta_PValue, DeltaProtected_logFC, DeltaProtected_PValue, DeltaNotProtected_logFC, DeltaNotProtected_PValue)

Figure 4E (table)

gois_ms %>%
  mutate_if(is.numeric, signif,3) %>%
  knitr::kable()

GeneSymbol	DeltaDelta_logFC	DeltaDelta_PValue	DeltaProtected_logFC	DeltaProtected_PValue	DeltaNotProtected_logFC	DeltaNotProtected_PValue
FSTL4	2.09	0.00136	0.878	0.0409	-1.21	0.0529

Reshape and merge for survival analysis

library(gtools)
#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
  as_id("17xx0YaggLiyWdJc9rqA1nkPSE7dj05p0"), path = temp, overwrite = TRUE)
cpm_delta <- readRDS(file = dl$local_path)
cpm_delta <- cpm_delta[fData(cpm_delta)$GeneSymbol %in% gois_ms$GeneSymbol, ]
cpm_delta <- cpm_delta[,cpm_delta$treat != "Placebo"] #remove placebo---we want to just look at all PfSPZ doses
pData(cpm_delta) <- droplevels(pData(cpm_delta))
survdat <- Biobase::exprs(cpm_delta) %>%
  as.data.frame() %>%
  rownames_to_column(var = "EnsemblID") %>%
  left_join(., fData(cpm_delta) %>%
              dplyr::select(EnsemblID, GeneSymbol),
            by = "EnsemblID") %>%
  dplyr::select(-c(EnsemblID)) %>%
  dplyr::select(GeneSymbol, everything()) %>%
  pivot_longer(2:ncol(.), names_to = "PATID", values_to = "expression") %>%
  group_by(GeneSymbol) %>%
  mutate(exprs_up_down = factor(ifelse(expression>log2(1), "induced", "not induced"), levels = c("not induced", "induced"))) %>%
  mutate(exprs_terciles = factor(quantcut(expression, q=3, na.rm=TRUE), labels = c("bottom_tercile", "middle_tercile", "top_tercile"))) %>%
  mutate(exprs_median = factor(quantcut(expression, q=2, na.rm=TRUE), labels = c("bottom", "top"))) %>%
  left_join(., fData(cpm_delta) %>%
              dplyr::select(c(GeneSymbol, description)), by = "GeneSymbol") %>%
  left_join(., pData(cpm_delta)%>%
              dplyr::select(c(PATID, Timepoint, treat, site, SEX, pfcsp_pre, age.vax1, mal.vax.1, mal.dvax, mal.dvax.tot, contains("atp"))), by = "PATID") %>%
  droplevels()
survdat$tte.mal.atp.6 <- survdat$tte.mal.atp.6+1
survdat[is.na(survdat$pfcsp_pre),]$pfcsp_pre <- median(survdat$pfcsp_pre, na.rm = TRUE) #Impute 2 missing values with median of all samples

Kaplan-Meier plots

library(survival)
library(survminer)
myFollowup <- 6 #3 or 6
N <- length(unique(survdat$PATID))
survdat.filtered <- fit <- mySurvPlot <- c()
for(i in gois_ms$GeneSymbol){
  survdat.filtered[[i]] <- survdat %>%
    filter(GeneSymbol == i)
  fit[[i]] <- survfit(Surv(tte.mal.atp.6, mal.atp.6) ~ exprs_up_down, data= survdat.filtered[[i]])
  mySurvPlot[[i]] <- ggsurvplot(fit[[i]], risk.table = TRUE, pval = TRUE,
                                break.time.by = 30, title = unique(survdat.filtered[[i]]$GeneSymbol),
                                xlab = "Days since vaccination",
                                ylab = "% free of parasitemia",
                                censor = TRUE,
                                palette = "lancet",
                                conf.int = TRUE,
                                conf.int.alpha = 0.1,
                                font.family = "Arial")
}

Kaplan-Meier plot for FSTL4 (Figure 4F)

print(mySurvPlot$FSTL4, newpage = FALSE)

Cox Regression (Table S4)

cox.res <- c()
for(i in names(survdat.filtered)){
  survdat.cp <- survdat.filtered[[i]]
  survdat.cp$tte.mal.atp.6 <- survdat.cp$tte.mal.atp.6 + 1 #add one day to each value given zero values
  survdat.cp$start      <-      0
  survdat.cp$stop       <-      survdat.cp$tte.mal.atp.6
  subj.pcrp <- rep(survdat.cp$start, as.vector(table(survdat.cp$PATID)))
  survdat.cp$start1 <- survdat.cp$start-subj.pcrp
  survdat.cp$stop1 <- survdat.cp$stop-subj.pcrp
  survdat.cp <- survdat.cp %>%
    droplevels()
  fit <- coxph(Surv(start1, stop1, mal.atp.6) ~ exprs_up_down + factor(SEX) + factor(site) + log10(pfcsp_pre+1) + mal.dvax.tot + factor(treat), data = survdat.cp)
  #vfit <- cox.zph(fit)
  #par(mfrow=c(3,2))
  #plot(vfit)
  #cox.zph(fit)
  cox.res[[i]] <- as.data.frame(cbind(fit$n, fit$nevent, summary(fit)$conf.int[,c(1,3:4)], summary(fit)$coefficient[,5]))
  colnames(cox.res[[i]]) <- c("n","number of events","HR", "LCI", "UCI", "P value")
  cox.res[[i]] <- cox.res[[i]] %>%
    as.matrix() %>%
    signif(., digits = 3) %>%
    as.data.frame()
  cox.res[[i]]$Gene <- i
  rownames(cox.res[[i]]) <- c("upregulated post-vax (ref:downregulated post-vax)",
                              "gender (ref:female)",
                              "Wagai (ref:Siaya)",
                              "CSP-specific IgG baseline",
                              "number of Pf infections during vaccination period",
                              "9.0x10^5 PfSPZ (ref:4.5x10^5 PfSPZ)",
                              "1.8x10^6 PfSPZ (ref:4.5x10^5 PfSPZ)")
  cox.res[[i]] <- cox.res[[i]] %>%
    rownames_to_column(var = "Covariate") %>%
    dplyr::select(Gene, everything())
  }
  cox.res.bound <- bind_rows(cox.res, .id = "Gene") %>%
    mutate(Significant = ifelse(`P value`<0.001, "***",
                              ifelse(`P value`<0.01, "**",
                                     ifelse(`P value`<0.05, "*", ""))))

Only display FSTL4 here.

cox.res.bound %>%
  filter(Gene == "FSTL4") %>%
  knitr::kable()

Gene	Covariate	n	number of events	HR	LCI	UCI	P value	Significant
FSTL4	upregulated post-vax (ref:downregulated post-vax)	169	107	0.643	0.433	0.954	0.02820	*
FSTL4	gender (ref:female)	169	107	0.920	0.618	1.370	0.67900
FSTL4	Wagai (ref:Siaya)	169	107	0.629	0.421	0.939	0.02340	*
FSTL4	CSP-specific IgG baseline	169	107	1.000	0.795	1.260	0.99900
FSTL4	number of Pf infections during vaccination period	169	107	1.250	1.070	1.460	0.00465	**
FSTL4	9.0x10^5 PfSPZ (ref:4.5x10^5 PfSPZ)	169	107	1.100	0.693	1.760	0.67700
FSTL4	1.8x10^6 PfSPZ (ref:4.5x10^5 PfSPZ)	169	107	0.899	0.551	1.470	0.66900

KSPZV1 DGE Δ (postvax/baseline) limma voom - JCI revision

Tuan M. Tran

15 December, 2023