Perform differential gene expression using limma voom to assess differences between protected and not protected infants in the KSPZV1 malaria vaccine trial. This analysis specifically evaluates differences in post-vax with adjustment for baseline.
This analysis also includes adjustments for:
library(knitr)
library(tidyverse)
library(limma)
library(edgeR)
library(googledrive)
library(tidyverse)
library(fgsea)
library(data.table)
library(EDASeq)
library(gtools)
#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
as_id("17P7RwDaPCwDXcrD82N4fYd3vGOcueFEH"), path = temp, overwrite = TRUE)
x <- readRDS(file = dl$local_path)
x$treat <- factor(x$treat, levels = c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
myGroups <- unique(x$treat)
## [1] "1.8 x 10^6 PfSPZ"
## Timepoint
## Outcome 0 25
## 0 38 38
## 1 25 27
## Dosegroup
## Outcome 1.8 x 10^6 PfSPZ
## 0 76
## 1 52
## Features Samples
## 35716 128
#Define group by dose, timepoint and outcome #if running for all dose groups
#ygroup <- setNames(as.list(c(1:4)), c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
ygroup <- setNames(as.list(c(1:1)), c("1.8 x 10^6 PfSPZ"))
for(i in names(ygroup)){
ygroup[[i]] <- DGEList(counts=counts(xgroup[[i]]), genes=fData(xgroup[[i]]), group= factor(paste(xgroup[[i]]$treat, xgroup[[i]]$Timepoint,
xgroup[[i]]$Outcome, sep = "_")),
remove.zeros=T)
ygroup[[i]]$samples$PATID <- gsub("_.*", "", rownames(ygroup[[i]]$samples))
}
for(i in names(ygroup)){
keep <- filterByExpr(ygroup[[i]])
ygroup[[i]] <- ygroup[[i]][keep, , keep.lib.sizes=FALSE]
ygroup[[i]]$samples$lib.size <- colSums(ygroup[[i]]$counts)
ygroup[[i]] <- calcNormFactors(ygroup[[i]]) #Normalization
}
This is necessary for paired analyses
for(i in names(ygroup)){
ygroup[[i]] <- ygroup[[i]][,!is.na(ygroup[[i]]$samples$group)]
ygroup[[i]] <- ygroup[[i]][,duplicated(ygroup[[i]]$samples$PATID) | duplicated(ygroup[[i]]$samples$PATID, fromLast = TRUE)]
ygroup[[i]] <- ygroup[[i]][,order(ygroup[[i]]$samples$PATID, ygroup[[i]]$samples$group)]
print(i)
print(ifelse(all(gsub('_.*', '', colnames(ygroup[[i]][,grepl("_0", colnames(ygroup[[i]]))])) ==
gsub('_.*', '', colnames(ygroup[[i]][,grepl("_25", colnames(ygroup[[i]]))]))) &
all(ygroup[[i]]$samples$Timepoint == rep(c(0,25),length(ygroup[[i]]$samples$Timepoint)/2)),
"Samples are paired. All is good.","Stop and check order of samples."))
}
## [1] "1.8 x 10^6 PfSPZ"
## [1] "Samples are paired. All is good."
Analysis between Protected and Not Protected within a Dose Group. Subject as random effect using duplicate correlation. Batch, sex, study site, CSP-specific IgG pre-immunization, and malaria during vaccination period as fixed effects. Limma voom can handle this. See following:
https://bioconductor.riken.jp/packages/3.9/bioc/vignettes/variancePartition/inst/doc/dream.html
DeltaDelta <- DeltaNotProtected <- DeltaProtected <- fit <- design <- xgroup
for(i in names(xgroup)){
Subject <- factor(xgroup[[i]]$PATID)
Outcome <- factor(xgroup[[i]]$Outcome, levels = c(1,0), labels = c("NotProtected","Protected"))
Timepoint <- factor(xgroup[[i]]$Timepoint, levels = c(0,25), labels = c("baseline","postvax"))
Age <- xgroup[[i]]$age.vax1
Sex <- as.factor(xgroup[[i]]$SEX)
Batch <- factor(xgroup[[i]]$SEQBATCH, levels = c("Aug2019","Nov2019"))
Site <- factor(xgroup[[i]]$site)
MALdVax <- factor(xgroup[[i]]$mal.dvax, levels = c(0,1), labels = c("noMALdVAX","yesMALdVAX"))
MALdVaxTotal <- as.numeric(ifelse(is.na(xgroup[[i]]$mal.dvax.tot), 0, xgroup[[i]]$mal.dvax.tot))
CSPAb_baseline <- as.numeric(xgroup[[i]]$pfcsp_pre)
CSPAb_baseline[is.na(CSPAb_baseline)] <- median(CSPAb_baseline, na.rm = TRUE) #Impute 2 missing values with median of all samples
#Define protective outcome-specific PfSPZ Vaccination effects and append them to the design matrix
Protected.Postvax <- Outcome == "Protected" & Timepoint=="postvax"
NotProtected.Postvax <- Outcome =="NotProtected" & Timepoint=="postvax"
design[[i]] <- model.matrix(~Batch+Sex+Site+CSPAb_baseline+MALdVaxTotal)
design[[i]] <- cbind(design[[i]], Protected.Postvax, NotProtected.Postvax) #make design matrix according to edgeR 3.5 (p42)
#https://www.bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf
rownames(design[[i]]) <- xgroup[[i]]$PATID
print(paste0("running model fit for ", i))
print(colnames(design[[i]])) #check colnames
#Use voom() to convert the read counts to log2-cpm, with associated weights, ready for linear modeling
v <- voom(ygroup[[i]], design[[i]])
cor <- duplicateCorrelation(v, design[[i]], block = Subject)
print(cor$consensus)
v <- voom(ygroup[[i]], design[[i]], plot = TRUE, block = Subject, correlation = cor$consensus)
cor <- duplicateCorrelation(v, design[[i]], block = Subject)
print(cor$consensus)
}
## [1] "running model fit for 1.8 x 10^6 PfSPZ"
## [1] "(Intercept)" "BatchNov2019" "SexM"
## [4] "SiteWagai" "CSPAb_baseline" "MALdVaxTotal"
## [7] "Protected.Postvax" "NotProtected.Postvax"
## [1] -0.01283081
## [1] -0.01282869
for(i in names(xgroup)){
fit <- lmFit(v, design[[i]], block = Subject, correlation = cor$consensus)
fit <- eBayes(fit)
DeltaProtected[[i]] <- topTable(fit,n=Inf, coef="Protected.Postvax")
DeltaNotProtected[[i]] <- topTable(fit,n=Inf, coef="NotProtected.Postvax")
fit2 <- contrasts.fit(fit, contrasts = c(rep(0,(ncol(design[[i]])-2)),1,-1))
fit2 <- eBayes(fit2)
DeltaDelta[[i]] <- topTable(fit2,n=Inf)
}
Δ Protected
| GeneSymbol | EnsemblID | descripton_new | logFC | AveExpr | t | P.Value | adj.P.Val | B |
|---|---|---|---|---|---|---|---|---|
| MTMR4 | ENSG00000108389 | myotubularin related protein 4 [Source:HGNC Symbol;Acc:HGNC:7452] | -0.206 | 5.91 | -4.68 | 7.50e-06 | 0.0817 | 3.510 |
| RTF1 | ENSG00000137815 | RTF1 homolog, Paf1/RNA polymerase II complex component [Source:HGNC Symbol;Acc:HGNC:28996] | -0.245 | 5.65 | -4.66 | 8.30e-06 | 0.0817 | 3.420 |
| SLC4A1AP | ENSG00000163798 | solute carrier family 4 member 1 adaptor protein [Source:HGNC Symbol;Acc:HGNC:13813] | -0.240 | 4.82 | -4.55 | 1.29e-05 | 0.0844 | 3.010 |
| ZDHHC14 | ENSG00000175048 | zinc finger DHHC-type palmitoyltransferase 14 [Source:HGNC Symbol;Acc:HGNC:20341] | 0.381 | 4.03 | 4.48 | 1.72e-05 | 0.0844 | 2.730 |
| XRN2 | ENSG00000088930 | 5’-3’ exoribonuclease 2 [Source:HGNC Symbol;Acc:HGNC:12836] | -0.229 | 6.66 | -4.39 | 2.49e-05 | 0.0912 | 2.400 |
| MASP2 | ENSG00000009724 | MBL associated serine protease 2 [Source:HGNC Symbol;Acc:HGNC:6902] | 0.319 | 3.84 | 4.32 | 3.25e-05 | 0.0912 | 2.150 |
| YARS1 | ENSG00000134684 | tyrosyl-tRNA synthetase 1 [Source:HGNC Symbol;Acc:HGNC:12840] | -0.220 | 6.13 | -4.00 | 1.09e-04 | 0.2670 | 1.060 |
| YWHAG | ENSG00000170027 | tyrosine 3-monooxygenase/tryptophan 5-monooxygenase activation protein gamma [Source:HGNC Symbol;Acc:HGNC:12852] | -0.239 | 6.47 | -3.95 | 1.35e-04 | 0.2830 | 0.851 |
| ZRSR2 | ENSG00000169249 | zinc finger CCCH-type, RNA binding motif and serine/arginine rich 2 [Source:HGNC Symbol;Acc:HGNC:23019] | 0.288 | 3.85 | 3.88 | 1.74e-04 | 0.2830 | 0.680 |
| SH3KBP1 | ENSG00000147010 | SH3 domain containing kinase binding protein 1 [Source:HGNC Symbol;Acc:HGNC:13867] | -0.225 | 7.56 | -3.86 | 1.88e-04 | 0.2830 | 0.522 |
| NUDT19 | ENSG00000213965 | nudix hydrolase 19 [Source:HGNC Symbol;Acc:HGNC:32036] | -0.324 | 4.04 | -3.82 | 2.16e-04 | 0.2830 | 0.491 |
| COPG1 | ENSG00000181789 | COPI coat complex subunit gamma 1 [Source:HGNC Symbol;Acc:HGNC:2236] | -0.291 | 5.90 | -3.83 | 2.07e-04 | 0.2830 | 0.477 |
| ZBTB39 | ENSG00000166860 | zinc finger and BTB domain containing 39 [Source:HGNC Symbol;Acc:HGNC:29014] | 0.216 | 5.13 | 3.80 | 2.28e-04 | 0.2830 | 0.418 |
| NCL | ENSG00000115053 | nucleolin [Source:HGNC Symbol;Acc:HGNC:7667] | -0.247 | 7.31 | -3.81 | 2.21e-04 | 0.2830 | 0.381 |
| TXNRD1 | ENSG00000198431 | thioredoxin reductase 1 [Source:HGNC Symbol;Acc:HGNC:12437] | -0.232 | 5.96 | -3.80 | 2.30e-04 | 0.2830 | 0.380 |
Δ Not Protected
| GeneSymbol | EnsemblID | descripton_new | logFC | AveExpr | t | P.Value | adj.P.Val | B |
|---|---|---|---|---|---|---|---|---|
| TMPO-AS1 | ENSG00000257167 | TMPO antisense RNA 1 [Source:HGNC Symbol;Acc:HGNC:44158] | 0.351 | 5.11 | 4.16 | 6.05e-05 | 0.474 | 1.60000 |
| PPM1N | ENSG00000213889 | protein phosphatase, Mg2+/Mn2+ dependent 1N (putative) [Source:HGNC Symbol;Acc:HGNC:26845] | 0.723 | 2.11 | 4.31 | 3.33e-05 | 0.474 | 1.33000 |
| PSMG3 | ENSG00000157778 | proteasome assembly chaperone 3 [Source:HGNC Symbol;Acc:HGNC:22420] | 0.435 | 2.71 | 3.85 | 1.91e-04 | 0.532 | 0.25200 |
| STK4 | ENSG00000101109 | serine/threonine kinase 4 [Source:HGNC Symbol;Acc:HGNC:11408] | -0.176 | 9.36 | -3.71 | 3.18e-04 | 0.532 | 0.12300 |
| MXRA7 | ENSG00000182534 | matrix remodeling associated 7 [Source:HGNC Symbol;Acc:HGNC:7541] | 0.728 | 3.30 | 3.70 | 3.26e-04 | 0.532 | -0.00767 |
| RP11-536K7.3 | ENSG00000232807 | novel transcript, antisense to FBXO18 | -0.265 | 3.57 | -3.68 | 3.52e-04 | 0.532 | -0.06060 |
| AC007283.5 | ENSG00000234431 | novel transcript | -0.403 | 3.94 | -3.60 | 4.59e-04 | 0.584 | -0.23000 |
| ALDH6A1 | ENSG00000119711 | aldehyde dehydrogenase 6 family member A1 [Source:HGNC Symbol;Acc:HGNC:7179] | -0.359 | 5.02 | -3.55 | 5.47e-04 | 0.597 | -0.31600 |
| BATF | ENSG00000156127 | basic leucine zipper ATF-like transcription factor [Source:HGNC Symbol;Acc:HGNC:958] | 0.720 | 1.97 | 3.73 | 2.99e-04 | 0.532 | -0.34800 |
| C1QBP | ENSG00000108561 | complement C1q binding protein [Source:HGNC Symbol;Acc:HGNC:1243] | 0.305 | 6.01 | 3.51 | 6.35e-04 | 0.635 | -0.45900 |
| RNF38 | ENSG00000137075 | ring finger protein 38 [Source:HGNC Symbol;Acc:HGNC:18052] | -0.197 | 7.32 | -3.47 | 7.36e-04 | 0.635 | -0.60400 |
| RPS19BP1 | ENSG00000187051 | ribosomal protein S19 binding protein 1 [Source:HGNC Symbol;Acc:HGNC:28749] | 0.305 | 4.04 | 3.43 | 8.35e-04 | 0.635 | -0.69400 |
| HIKESHI | ENSG00000149196 | heat shock protein nuclear import factor hikeshi [Source:HGNC Symbol;Acc:HGNC:26938] | 0.233 | 4.30 | 3.41 | 8.77e-04 | 0.635 | -0.72600 |
| MPDU1 | ENSG00000129255 | mannose-P-dolichol utilization defect 1 [Source:HGNC Symbol;Acc:HGNC:7207] | 0.289 | 5.05 | 3.41 | 9.01e-04 | 0.635 | -0.74600 |
| CKS2 | ENSG00000123975 | CDC28 protein kinase regulatory subunit 2 [Source:HGNC Symbol;Acc:HGNC:2000] | 0.684 | 1.86 | 3.57 | 5.24e-04 | 0.597 | -0.75500 |
Δ Protected vs. Δ Not Protected
| GeneSymbol | EnsemblID | descripton_new | logFC | AveExpr | t | P.Value | adj.P.Val | B |
|---|---|---|---|---|---|---|---|---|
| RNF144A | ENSG00000151692 | ring finger protein 144A [Source:HGNC Symbol;Acc:HGNC:20457] | 0.320 | 6.29 | 3.52 | 0.000605 | 0.954 | -1.76 |
| MED28 | ENSG00000118579 | mediator complex subunit 28 [Source:HGNC Symbol;Acc:HGNC:24628] | 0.177 | 7.74 | 3.41 | 0.000884 | 0.954 | -1.79 |
| RP11-440L14.1 | ENSG00000249592 | novel transcript, antisense to PCGF3 | 0.273 | 4.53 | 3.78 | 0.000246 | 0.954 | -1.97 |
| KAT7 | ENSG00000136504 | lysine acetyltransferase 7 [Source:HGNC Symbol;Acc:HGNC:17016] | 0.140 | 7.25 | 3.11 | 0.002330 | 0.954 | -2.34 |
| ANXA4 | ENSG00000196975 | annexin A4 [Source:HGNC Symbol;Acc:HGNC:542] | -0.338 | 5.70 | -3.22 | 0.001670 | 0.954 | -2.35 |
| RFX5 | ENSG00000143390 | regulatory factor X5 [Source:HGNC Symbol;Acc:HGNC:9986] | -0.153 | 6.22 | -3.15 | 0.002080 | 0.954 | -2.38 |
| PSMC2 | ENSG00000161057 | proteasome 26S subunit, ATPase 2 [Source:HGNC Symbol;Acc:HGNC:9548] | -0.261 | 5.40 | -3.22 | 0.001650 | 0.954 | -2.42 |
| C1QBP | ENSG00000108561 | complement C1q binding protein [Source:HGNC Symbol;Acc:HGNC:1243] | -0.302 | 6.01 | -3.12 | 0.002290 | 0.954 | -2.45 |
| LDHA | ENSG00000134333 | lactate dehydrogenase A [Source:HGNC Symbol;Acc:HGNC:6535] | -0.244 | 7.21 | -3.04 | 0.002890 | 0.954 | -2.45 |
| VDAC1 | ENSG00000213585 | voltage dependent anion channel 1 [Source:HGNC Symbol;Acc:HGNC:12669] | -0.253 | 5.75 | -3.15 | 0.002090 | 0.954 | -2.45 |
| SLBP | ENSG00000163950 | stem-loop binding protein [Source:HGNC Symbol;Acc:HGNC:10904] | -0.271 | 5.14 | -3.23 | 0.001580 | 0.954 | -2.47 |
| SEPTIN2 | ENSG00000168385 | septin 2 [Source:HGNC Symbol;Acc:HGNC:7729] | -0.183 | 7.37 | -2.99 | 0.003370 | 0.954 | -2.52 |
| MAML2 | ENSG00000184384 | mastermind like transcriptional coactivator 2 [Source:HGNC Symbol;Acc:HGNC:16259] | 0.240 | 7.32 | 2.99 | 0.003430 | 0.954 | -2.54 |
| H2AZ1 | ENSG00000164032 | H2A.Z variant histone 1 [Source:HGNC Symbol;Acc:HGNC:4741] | -0.336 | 6.02 | -3.05 | 0.002830 | 0.954 | -2.55 |
| CELF2-AS1 | ENSG00000181800 | CELF2 antisense RNA 1 [Source:HGNC Symbol;Acc:HGNC:23515] | 0.272 | 6.62 | 3.00 | 0.003320 | 0.954 | -2.57 |
Rank genes by -log10(P.Value)*sign(logFC). Run fgsea from fgsea package using NamedGeneRankList2GseaTable helper function.
set.seed(23)
#restructure dataframes
degtabs <- setNames(as.list(c(1:4)), c("Placebo", "4.5 x 10^5 PfSPZ", "9.0 x 10^5 PfSPZ", "1.8 x 10^6 PfSPZ"))
for(i in names(ygroup)){
degtabs[[i]] <- setNames(as.list(c(1:3)), c("DeltaDelta", "DeltaP", "DeltaNP"))
degtabs[[i]]$DeltaDelta <- DeltaDelta[[i]]
degtabs[[i]]$DeltaP <- DeltaProtected[[i]]
degtabs[[i]]$DeltaNP <- DeltaNotProtected[[i]]
}
ranks <- degtabs
for(i in names(degtabs)){
for(j in names(degtabs[[i]])){
ranks[[i]][[j]] <- degtabs[[i]][[j]] %>%
mutate(rankmetric = -log10(.$P.Value)*sign(.$logFC)) %>%
dplyr::select(GeneSymbol,rankmetric) %>%
na.omit() %>%
distinct() %>%
group_by(GeneSymbol) %>%
summarize(rankmetric = mean(rankmetric)) %>%
arrange(desc(rankmetric))
}
}
#Run fgsea from fgsea package using NamedGeneRankList2GseaTable helper function
devtools::source_url("https://github.com/TranLab/ModuleLists/blob/main/NamedGeneRankList2GseaTable.R?raw=TRUE")
GSEAtab <- ranks
for(i in names(ranks)){
for(j in names(ranks[[i]])){
print(paste0("starting ", j, " in ", i))
GSEAtab[[i]][[j]] <- NamedGeneRankList2GseaTable(rankedgenes = deframe(ranks[[i]][[j]]),
geneset = "all",
output_directory = tempdir(),
filename_prefix = "GSEA",
sampleSize = 101,
minSize = 20,
maxSize = Inf,
scoreType = "std") %>%
as_tibble() %>%
arrange(desc(NES)) %>%
dplyr::select(module_type, pathway, ES, NES, size, leadingEdge, pval, padj) %>%
mutate(leadingEdge = gsub("^c\\(|\\)$", "", leadingEdge)) %>%
mutate(leadingEdge = gsub('"', "", leadingEdge)) %>%
arrange(padj)
#closeAllConnections() #this line prevents using up all url connections
}
}
## [1] "starting DeltaDelta in 1.8 x 10^6 PfSPZ"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "starting DeltaP in 1.8 x 10^6 PfSPZ"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "starting DeltaNP in 1.8 x 10^6 PfSPZ"
## [1] "Running fgsea for MonacoModules signatures"
## [1] "Running fgsea for highBTMs signatures"
## [1] "Running fgsea for lowBTMs signatures"
## [1] "Running fgsea for BloodGen3Module signatures"
## [1] "Running fgsea for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C8_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C7_all_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running fgsea for MSigDB_C5_GO_mf_v7.4 signatures"
Filter based on FDR<5%.
Red: goes up post-vax relative to baseline Blue: goes down post-vax relative to baseline
Identify genes that were differentially induced in uninfected and infected children receiving 1.8x10^6 PfSPZ Vaccine
Merge ΔP vs ΔNP, ΔP, and ΔNP tables.
Criteria: ΔP vs ΔNP P val < 0.005 ΔP logFC > 0 and ΔP P val < 0.05 ΔNP logFC < 0 and ΔNP P val < 0.10
AllDegs1 <- bind_rows(DeltaDelta, .id = "Treatment") %>%
mutate(Comparison = "DeltaDelta")
AllDegs2 <- bind_rows(DeltaProtected, .id = "Treatment") %>%
mutate(Comparison = "DeltaProtected")
AllDegs3 <- bind_rows(DeltaNotProtected, .id = "Treatment") %>%
mutate(Comparison = "DeltaNotProtected")
AllDegs <- bind_rows(AllDegs1, AllDegs2, AllDegs3) %>%
dplyr::select(Treatment, Comparison, EnsemblID, GeneSymbol, descripton_new, logFC, AveExpr, "t", P.Value, adj.P.Val) %>%
dplyr::rename(PValue = P.Value) %>%
dplyr::rename(FDR = adj.P.Val) %>%
dplyr::rename(Description = "descripton_new")
gois <- AllDegs %>%
filter(Treatment == "1.8 x 10^6 PfSPZ") %>%
droplevels() %>%
dplyr::select(Treatment, Comparison, EnsemblID, GeneSymbol, Description, logFC, PValue, FDR) %>%
pivot_wider(., names_from = Comparison, names_glue = "{Comparison}_{.value}", values_from = c(logFC, PValue, FDR)) %>%
filter((DeltaDelta_PValue < 0.005 & DeltaDelta_logFC > 2) &
(DeltaProtected_logFC > 0 & DeltaProtected_PValue < 0.05)) %>%
arrange(DeltaDelta_PValue)
gois_ms <- gois %>%
dplyr::select(GeneSymbol, DeltaDelta_logFC, DeltaDelta_PValue, DeltaProtected_logFC, DeltaProtected_PValue, DeltaNotProtected_logFC, DeltaNotProtected_PValue)
gois_ms %>%
mutate_if(is.numeric, signif,3) %>%
knitr::kable()
| GeneSymbol | DeltaDelta_logFC | DeltaDelta_PValue | DeltaProtected_logFC | DeltaProtected_PValue | DeltaNotProtected_logFC | DeltaNotProtected_PValue |
|---|---|---|---|---|---|---|
| FSTL4 | 2.09 | 0.00136 | 0.878 | 0.0409 | -1.21 | 0.0529 |
library(gtools)
#from google drive
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
as_id("17xx0YaggLiyWdJc9rqA1nkPSE7dj05p0"), path = temp, overwrite = TRUE)
cpm_delta <- readRDS(file = dl$local_path)
cpm_delta <- cpm_delta[fData(cpm_delta)$GeneSymbol %in% gois_ms$GeneSymbol, ]
cpm_delta <- cpm_delta[,cpm_delta$treat != "Placebo"] #remove placebo---we want to just look at all PfSPZ doses
pData(cpm_delta) <- droplevels(pData(cpm_delta))
survdat <- Biobase::exprs(cpm_delta) %>%
as.data.frame() %>%
rownames_to_column(var = "EnsemblID") %>%
left_join(., fData(cpm_delta) %>%
dplyr::select(EnsemblID, GeneSymbol),
by = "EnsemblID") %>%
dplyr::select(-c(EnsemblID)) %>%
dplyr::select(GeneSymbol, everything()) %>%
pivot_longer(2:ncol(.), names_to = "PATID", values_to = "expression") %>%
group_by(GeneSymbol) %>%
mutate(exprs_up_down = factor(ifelse(expression>log2(1), "induced", "not induced"), levels = c("not induced", "induced"))) %>%
mutate(exprs_terciles = factor(quantcut(expression, q=3, na.rm=TRUE), labels = c("bottom_tercile", "middle_tercile", "top_tercile"))) %>%
mutate(exprs_median = factor(quantcut(expression, q=2, na.rm=TRUE), labels = c("bottom", "top"))) %>%
left_join(., fData(cpm_delta) %>%
dplyr::select(c(GeneSymbol, description)), by = "GeneSymbol") %>%
left_join(., pData(cpm_delta)%>%
dplyr::select(c(PATID, Timepoint, treat, site, SEX, pfcsp_pre, age.vax1, mal.vax.1, mal.dvax, mal.dvax.tot, contains("atp"))), by = "PATID") %>%
droplevels()
survdat$tte.mal.atp.6 <- survdat$tte.mal.atp.6+1
survdat[is.na(survdat$pfcsp_pre),]$pfcsp_pre <- median(survdat$pfcsp_pre, na.rm = TRUE) #Impute 2 missing values with median of all samples
library(survival)
library(survminer)
myFollowup <- 6 #3 or 6
N <- length(unique(survdat$PATID))
survdat.filtered <- fit <- mySurvPlot <- c()
for(i in gois_ms$GeneSymbol){
survdat.filtered[[i]] <- survdat %>%
filter(GeneSymbol == i)
fit[[i]] <- survfit(Surv(tte.mal.atp.6, mal.atp.6) ~ exprs_up_down, data= survdat.filtered[[i]])
mySurvPlot[[i]] <- ggsurvplot(fit[[i]], risk.table = TRUE, pval = TRUE,
break.time.by = 30, title = unique(survdat.filtered[[i]]$GeneSymbol),
xlab = "Days since vaccination",
ylab = "% free of parasitemia",
censor = TRUE,
palette = "lancet",
conf.int = TRUE,
conf.int.alpha = 0.1,
font.family = "Arial")
}
print(mySurvPlot$FSTL4, newpage = FALSE)
cox.res <- c()
for(i in names(survdat.filtered)){
survdat.cp <- survdat.filtered[[i]]
survdat.cp$tte.mal.atp.6 <- survdat.cp$tte.mal.atp.6 + 1 #add one day to each value given zero values
survdat.cp$start <- 0
survdat.cp$stop <- survdat.cp$tte.mal.atp.6
subj.pcrp <- rep(survdat.cp$start, as.vector(table(survdat.cp$PATID)))
survdat.cp$start1 <- survdat.cp$start-subj.pcrp
survdat.cp$stop1 <- survdat.cp$stop-subj.pcrp
survdat.cp <- survdat.cp %>%
droplevels()
fit <- coxph(Surv(start1, stop1, mal.atp.6) ~ exprs_up_down + factor(SEX) + factor(site) + log10(pfcsp_pre+1) + mal.dvax.tot + factor(treat), data = survdat.cp)
#vfit <- cox.zph(fit)
#par(mfrow=c(3,2))
#plot(vfit)
#cox.zph(fit)
cox.res[[i]] <- as.data.frame(cbind(fit$n, fit$nevent, summary(fit)$conf.int[,c(1,3:4)], summary(fit)$coefficient[,5]))
colnames(cox.res[[i]]) <- c("n","number of events","HR", "LCI", "UCI", "P value")
cox.res[[i]] <- cox.res[[i]] %>%
as.matrix() %>%
signif(., digits = 3) %>%
as.data.frame()
cox.res[[i]]$Gene <- i
rownames(cox.res[[i]]) <- c("upregulated post-vax (ref:downregulated post-vax)",
"gender (ref:female)",
"Wagai (ref:Siaya)",
"CSP-specific IgG baseline",
"number of Pf infections during vaccination period",
"9.0x10^5 PfSPZ (ref:4.5x10^5 PfSPZ)",
"1.8x10^6 PfSPZ (ref:4.5x10^5 PfSPZ)")
cox.res[[i]] <- cox.res[[i]] %>%
rownames_to_column(var = "Covariate") %>%
dplyr::select(Gene, everything())
}
cox.res.bound <- bind_rows(cox.res, .id = "Gene") %>%
mutate(Significant = ifelse(`P value`<0.001, "***",
ifelse(`P value`<0.01, "**",
ifelse(`P value`<0.05, "*", ""))))
Only display FSTL4 here.
cox.res.bound %>%
filter(Gene == "FSTL4") %>%
knitr::kable()
| Gene | Covariate | n | number of events | HR | LCI | UCI | P value | Significant |
|---|---|---|---|---|---|---|---|---|
| FSTL4 | upregulated post-vax (ref:downregulated post-vax) | 169 | 107 | 0.643 | 0.433 | 0.954 | 0.02820 | * |
| FSTL4 | gender (ref:female) | 169 | 107 | 0.920 | 0.618 | 1.370 | 0.67900 | |
| FSTL4 | Wagai (ref:Siaya) | 169 | 107 | 0.629 | 0.421 | 0.939 | 0.02340 | * |
| FSTL4 | CSP-specific IgG baseline | 169 | 107 | 1.000 | 0.795 | 1.260 | 0.99900 | |
| FSTL4 | number of Pf infections during vaccination period | 169 | 107 | 1.250 | 1.070 | 1.460 | 0.00465 | ** |
| FSTL4 | 9.0x10^5 PfSPZ (ref:4.5x10^5 PfSPZ) | 169 | 107 | 1.100 | 0.693 | 1.760 | 0.67700 | |
| FSTL4 | 1.8x10^6 PfSPZ (ref:4.5x10^5 PfSPZ) | 169 | 107 | 0.899 | 0.551 | 1.470 | 0.66900 |