Perform weighted gene correlation network analysis as originally described by Horvath et al. on pre-immunization baseline transcriptomic data from the KSPZV1 clinical trial.
References:
https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/Tutorials/ https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/JMiller/Tutorial%20document.pdf
Reviewer requested that we provide scatter plots for the correlations between modules and phenotypes. Review of the original code revealed that using Pearson’s or Spearman’s correlation to assess the relationship between modules and binary traits was not appropriate. Thus, we performed limma (empirical Bayes moderated t test) and presented differences as beeswarm plots with mean and 95%CI.
library(edgeR)
library(readxl)
library(EDASeq)
library(Biobase)
library(WGCNA)
library(ape)
library(Cairo)
library(CorLevelPlot)
library(tidyverse)
library(igraph)
library(remotes)
library(fgsea)
library(data.table)
library(ggplot2)
library(viridis)
library(ggpubr)
library(googledrive)
allowWGCNAThreads()
## Allowing multi-threading with up to 10 threads.
library(broom)
library(ComplexHeatmap)
myCor = "bicor"
power <- 12.5 #determined by evaluating previous plot using pickSoftThreshold
myMergeCutHeight <- 0.05
myDeepSplit <- 2
minModSize <- 20
enforceMMS <- FALSE
cor.pval <- 0.05
#local file: "PfSPZ_cpm_ExpressionSet_244x21815_AllGroups_bothBatches_0_rmBatchFX_06082021_TMT_logTRUE.rds"
temp <- tempfile(fileext = ".rds")
dl <- drive_download(
as_id("1togaBlNIxiDj16FyXTC-r7Qw0A9cG2az"), path = temp, overwrite = TRUE)
x <- readRDS(file = dl$local_path)
dim(x)
## Features Samples
## 21815 244
WGCNA_matrix <- t(exprs(x)) #make correlations only with full eset
blockSize(ncol(WGCNA_matrix), rectangularBlocks = TRUE, maxMemoryAllocation = 4^31)
## [1] 21815
par(mfrow=c(1,1))
plotClusterTreeSamples(datExpr=WGCNA_matrix)
## NULL
#used bicor for Timepoint 0
powers <- seq(4,20,by=0.5)
sft <- pickSoftThreshold(WGCNA_matrix, powerVector = powers, corFnc = myCor, verbose = 5, networkType ="signed", blockSize = ncol(WGCNA_matrix))
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 21815 of 21815
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 4.0 0.0273 -0.729 0.904 1860.00 1850.00 2780.0
## 2 4.5 0.0749 -1.090 0.939 1400.00 1390.00 2220.0
## 3 5.0 0.1520 -1.450 0.955 1070.00 1050.00 1800.0
## 4 5.5 0.2150 -1.650 0.959 817.00 801.00 1470.0
## 5 6.0 0.3160 -1.980 0.964 629.00 611.00 1230.0
## 6 6.5 0.4060 -2.230 0.969 488.00 469.00 1050.0
## 7 7.0 0.4940 -2.410 0.974 380.00 362.00 894.0
## 8 7.5 0.5660 -2.500 0.979 299.00 280.00 770.0
## 9 8.0 0.6340 -2.590 0.982 236.00 218.00 667.0
## 10 8.5 0.6880 -2.590 0.987 187.00 171.00 582.0
## 11 9.0 0.7260 -2.590 0.988 150.00 134.00 511.0
## 12 9.5 0.7660 -2.570 0.991 121.00 106.00 450.0
## 13 10.0 0.7950 -2.540 0.993 97.70 84.00 399.0
## 14 10.5 0.8100 -2.560 0.990 79.60 66.70 357.0
## 15 11.0 0.8240 -2.550 0.989 65.20 53.40 321.0
## 16 11.5 0.8350 -2.530 0.989 53.80 42.80 290.0
## 17 12.0 0.8470 -2.500 0.991 44.60 34.50 262.0
## 18 12.5 0.8570 -2.460 0.991 37.10 27.80 238.0
## 19 13.0 0.8670 -2.420 0.994 31.10 22.50 216.0
## 20 13.5 0.8760 -2.390 0.996 26.20 18.30 197.0
## 21 14.0 0.8800 -2.360 0.997 22.20 14.90 180.0
## 22 14.5 0.8800 -2.350 0.995 18.90 12.10 165.0
## 23 15.0 0.8840 -2.320 0.996 16.10 9.89 151.0
## 24 15.5 0.8850 -2.290 0.994 13.90 8.08 139.0
## 25 16.0 0.8830 -2.270 0.992 11.90 6.63 128.0
## 26 16.5 0.8810 -2.260 0.989 10.30 5.45 118.0
## 27 17.0 0.8830 -2.220 0.989 8.99 4.50 109.0
## 28 17.5 0.8850 -2.200 0.988 7.84 3.71 101.0
## 29 18.0 0.8850 -2.170 0.986 6.87 3.07 93.1
## 30 18.5 0.8880 -2.140 0.987 6.04 2.55 86.3
## 31 19.0 0.8750 -2.140 0.980 5.32 2.11 80.0
## 32 19.5 0.8800 -2.110 0.982 4.71 1.75 74.3
## 33 20.0 0.8820 -2.070 0.981 4.18 1.47 69.1
sft$powerEstimate
## [1] 12.5
par(mfrow=c(1,1))
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab='Soft Threshold (power)',ylab='Scale Free Topology Model Fit,signed R²',
type='n', main = paste('Scale independence'));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=1,col='red'); abline(h=0.90,col='red')
#use blockwiseModules given many genes
net <- blockwiseModules(WGCNA_matrix,
power=power,
deepSplit= myDeepSplit,
minModuleSize=minModSize,
TOMType="none",
mergeCutHeight=myMergeCutHeight,
TOMDenom="mean",
detectCutHeight=0.995,
corType=myCor,
networkType="signed",
pamStage=TRUE,
pamRespectsDendro=TRUE,
reassignThresh=0.05,
verbose=5,
saveTOMs=FALSE,
maxBlockSize=ncol(WGCNA_matrix),
nThreads = 0)
## Calculating module eigengenes block-wise from all genes
## Flagging genes and samples with too many missing values...
## ..step 1
## ..Working on block 1 .
## TOM calculation: adjacency..
## ..will use 10 parallel threads.
## Fraction of slow calculations: 0.000000
##
## ....clustering..
## ....detecting modules..
## ..done.
## ....calculating module eigengenes..
## moduleEigengenes : Working on ME for module 1
## moduleEigengenes : Working on ME for module 2
## moduleEigengenes : Working on ME for module 3
## moduleEigengenes : Working on ME for module 4
## moduleEigengenes : Working on ME for module 5
## moduleEigengenes : Working on ME for module 6
## moduleEigengenes : Working on ME for module 7
## moduleEigengenes : Working on ME for module 8
## moduleEigengenes : Working on ME for module 9
## moduleEigengenes : Working on ME for module 10
## moduleEigengenes : Working on ME for module 11
## moduleEigengenes : Working on ME for module 12
## moduleEigengenes : Working on ME for module 13
## moduleEigengenes : Working on ME for module 14
## moduleEigengenes : Working on ME for module 15
## moduleEigengenes : Working on ME for module 16
## moduleEigengenes : Working on ME for module 17
## moduleEigengenes : Working on ME for module 18
## moduleEigengenes : Working on ME for module 19
## moduleEigengenes : Working on ME for module 20
## moduleEigengenes : Working on ME for module 21
## moduleEigengenes : Working on ME for module 22
## moduleEigengenes : Working on ME for module 23
## moduleEigengenes : Working on ME for module 24
## moduleEigengenes : Working on ME for module 25
## moduleEigengenes : Working on ME for module 26
## moduleEigengenes : Working on ME for module 27
## moduleEigengenes : Working on ME for module 28
## moduleEigengenes : Working on ME for module 29
## moduleEigengenes : Working on ME for module 30
## moduleEigengenes : Working on ME for module 31
## moduleEigengenes : Working on ME for module 32
## moduleEigengenes : Working on ME for module 33
## moduleEigengenes : Working on ME for module 34
## moduleEigengenes : Working on ME for module 35
## moduleEigengenes : Working on ME for module 36
## moduleEigengenes : Working on ME for module 37
## moduleEigengenes : Working on ME for module 38
## moduleEigengenes : Working on ME for module 39
## moduleEigengenes : Working on ME for module 40
## moduleEigengenes : Working on ME for module 41
## moduleEigengenes : Working on ME for module 42
## moduleEigengenes : Working on ME for module 43
## moduleEigengenes : Working on ME for module 44
## moduleEigengenes : Working on ME for module 45
## moduleEigengenes : Working on ME for module 46
## moduleEigengenes : Working on ME for module 47
## moduleEigengenes : Working on ME for module 48
## moduleEigengenes : Working on ME for module 49
## moduleEigengenes : Working on ME for module 50
## moduleEigengenes : Working on ME for module 51
## moduleEigengenes : Working on ME for module 52
## moduleEigengenes : Working on ME for module 53
## moduleEigengenes : Working on ME for module 54
## moduleEigengenes : Working on ME for module 55
## moduleEigengenes : Working on ME for module 56
## moduleEigengenes : Working on ME for module 57
## moduleEigengenes : Working on ME for module 58
## moduleEigengenes : Working on ME for module 59
## moduleEigengenes : Working on ME for module 60
## moduleEigengenes : Working on ME for module 61
## moduleEigengenes : Working on ME for module 62
## moduleEigengenes : Working on ME for module 63
## moduleEigengenes : Working on ME for module 64
## moduleEigengenes : Working on ME for module 65
## moduleEigengenes : Working on ME for module 66
## moduleEigengenes : Working on ME for module 67
## moduleEigengenes : Working on ME for module 68
## moduleEigengenes : Working on ME for module 69
## moduleEigengenes : Working on ME for module 70
## moduleEigengenes : Working on ME for module 71
## moduleEigengenes : Working on ME for module 72
## moduleEigengenes : Working on ME for module 73
## moduleEigengenes : Working on ME for module 74
## moduleEigengenes : Working on ME for module 75
## moduleEigengenes : Working on ME for module 76
## moduleEigengenes : Working on ME for module 77
## moduleEigengenes : Working on ME for module 78
## moduleEigengenes : Working on ME for module 79
## moduleEigengenes : Working on ME for module 80
## moduleEigengenes : Working on ME for module 81
## moduleEigengenes : Working on ME for module 82
## moduleEigengenes : Working on ME for module 83
## moduleEigengenes : Working on ME for module 84
## moduleEigengenes : Working on ME for module 85
## moduleEigengenes : Working on ME for module 86
## moduleEigengenes : Working on ME for module 87
## moduleEigengenes : Working on ME for module 88
## moduleEigengenes : Working on ME for module 89
## moduleEigengenes : Working on ME for module 90
## moduleEigengenes : Working on ME for module 91
## moduleEigengenes : Working on ME for module 92
## moduleEigengenes : Working on ME for module 93
## moduleEigengenes : Working on ME for module 94
## moduleEigengenes : Working on ME for module 95
## moduleEigengenes : Working on ME for module 96
## moduleEigengenes : Working on ME for module 97
## moduleEigengenes : Working on ME for module 98
## moduleEigengenes : Working on ME for module 99
## moduleEigengenes : Working on ME for module 100
## moduleEigengenes : Working on ME for module 101
## moduleEigengenes : Working on ME for module 102
## moduleEigengenes : Working on ME for module 103
## moduleEigengenes : Working on ME for module 104
## moduleEigengenes : Working on ME for module 105
## moduleEigengenes : Working on ME for module 106
## moduleEigengenes : Working on ME for module 107
## moduleEigengenes : Working on ME for module 108
## moduleEigengenes : Working on ME for module 109
## moduleEigengenes : Working on ME for module 110
## moduleEigengenes : Working on ME for module 111
## moduleEigengenes : Working on ME for module 112
## moduleEigengenes : Working on ME for module 113
## moduleEigengenes : Working on ME for module 114
## moduleEigengenes : Working on ME for module 115
## moduleEigengenes : Working on ME for module 116
## ....checking kME in modules..
## ..removing 29 genes from module 1 because their KME is too low.
## ..removing 10 genes from module 2 because their KME is too low.
## ..removing 1 genes from module 8 because their KME is too low.
## ..removing 1 genes from module 12 because their KME is too low.
## ..removing 2 genes from module 13 because their KME is too low.
## ..removing 1 genes from module 27 because their KME is too low.
## ..removing 1 genes from module 37 because their KME is too low.
## ..removing 1 genes from module 49 because their KME is too low.
## ..removing 1 genes from module 79 because their KME is too low.
## ..deleting module 100: of 40 total genes in the module
## only 0 have the requisite high correlation with the eigengene.
## ..removing 2 genes from module 114 because their KME is too low.
## ..reassigning 230 genes from module 1 to modules with higher KME.
## ..reassigning 4 genes from module 2 to modules with higher KME.
## ..reassigning 99 genes from module 3 to modules with higher KME.
## ..reassigning 128 genes from module 4 to modules with higher KME.
## ..reassigning 208 genes from module 5 to modules with higher KME.
## ..reassigning 30 genes from module 6 to modules with higher KME.
## ..reassigning 102 genes from module 7 to modules with higher KME.
## ..reassigning 69 genes from module 8 to modules with higher KME.
## ..reassigning 47 genes from module 9 to modules with higher KME.
## ..reassigning 47 genes from module 10 to modules with higher KME.
## ..reassigning 20 genes from module 11 to modules with higher KME.
## ..reassigning 30 genes from module 12 to modules with higher KME.
## ..reassigning 17 genes from module 13 to modules with higher KME.
## ..reassigning 33 genes from module 14 to modules with higher KME.
## ..reassigning 35 genes from module 15 to modules with higher KME.
## ..reassigning 49 genes from module 16 to modules with higher KME.
## ..reassigning 46 genes from module 17 to modules with higher KME.
## ..reassigning 41 genes from module 18 to modules with higher KME.
## ..reassigning 26 genes from module 19 to modules with higher KME.
## ..reassigning 44 genes from module 20 to modules with higher KME.
## ..reassigning 47 genes from module 21 to modules with higher KME.
## ..reassigning 38 genes from module 22 to modules with higher KME.
## ..reassigning 48 genes from module 23 to modules with higher KME.
## ..reassigning 34 genes from module 24 to modules with higher KME.
## ..reassigning 51 genes from module 25 to modules with higher KME.
## ..reassigning 36 genes from module 26 to modules with higher KME.
## ..reassigning 54 genes from module 27 to modules with higher KME.
## ..reassigning 30 genes from module 28 to modules with higher KME.
## ..reassigning 20 genes from module 29 to modules with higher KME.
## ..reassigning 24 genes from module 30 to modules with higher KME.
## ..reassigning 17 genes from module 31 to modules with higher KME.
## ..reassigning 32 genes from module 32 to modules with higher KME.
## ..reassigning 20 genes from module 33 to modules with higher KME.
## ..reassigning 20 genes from module 34 to modules with higher KME.
## ..reassigning 45 genes from module 35 to modules with higher KME.
## ..reassigning 21 genes from module 36 to modules with higher KME.
## ..reassigning 17 genes from module 37 to modules with higher KME.
## ..reassigning 28 genes from module 38 to modules with higher KME.
## ..reassigning 20 genes from module 39 to modules with higher KME.
## ..reassigning 42 genes from module 40 to modules with higher KME.
## ..reassigning 32 genes from module 41 to modules with higher KME.
## ..reassigning 36 genes from module 42 to modules with higher KME.
## ..reassigning 19 genes from module 43 to modules with higher KME.
## ..reassigning 19 genes from module 44 to modules with higher KME.
## ..reassigning 28 genes from module 45 to modules with higher KME.
## ..reassigning 18 genes from module 46 to modules with higher KME.
## ..reassigning 16 genes from module 47 to modules with higher KME.
## ..reassigning 23 genes from module 48 to modules with higher KME.
## ..reassigning 15 genes from module 49 to modules with higher KME.
## ..reassigning 21 genes from module 50 to modules with higher KME.
## ..reassigning 16 genes from module 51 to modules with higher KME.
## ..reassigning 16 genes from module 52 to modules with higher KME.
## ..reassigning 32 genes from module 53 to modules with higher KME.
## ..reassigning 23 genes from module 54 to modules with higher KME.
## ..reassigning 15 genes from module 55 to modules with higher KME.
## ..reassigning 22 genes from module 56 to modules with higher KME.
## ..reassigning 19 genes from module 57 to modules with higher KME.
## ..reassigning 14 genes from module 58 to modules with higher KME.
## ..reassigning 27 genes from module 59 to modules with higher KME.
## ..reassigning 13 genes from module 60 to modules with higher KME.
## ..reassigning 12 genes from module 61 to modules with higher KME.
## ..reassigning 7 genes from module 62 to modules with higher KME.
## ..reassigning 25 genes from module 63 to modules with higher KME.
## ..reassigning 23 genes from module 64 to modules with higher KME.
## ..reassigning 9 genes from module 65 to modules with higher KME.
## ..reassigning 15 genes from module 66 to modules with higher KME.
## ..reassigning 19 genes from module 67 to modules with higher KME.
## ..reassigning 17 genes from module 68 to modules with higher KME.
## ..reassigning 8 genes from module 69 to modules with higher KME.
## ..reassigning 9 genes from module 70 to modules with higher KME.
## ..reassigning 19 genes from module 71 to modules with higher KME.
## ..reassigning 24 genes from module 72 to modules with higher KME.
## ..reassigning 27 genes from module 73 to modules with higher KME.
## ..reassigning 14 genes from module 74 to modules with higher KME.
## ..reassigning 8 genes from module 75 to modules with higher KME.
## ..reassigning 10 genes from module 76 to modules with higher KME.
## ..reassigning 7 genes from module 77 to modules with higher KME.
## ..reassigning 12 genes from module 78 to modules with higher KME.
## ..reassigning 7 genes from module 79 to modules with higher KME.
## ..reassigning 12 genes from module 80 to modules with higher KME.
## ..reassigning 9 genes from module 81 to modules with higher KME.
## ..reassigning 9 genes from module 82 to modules with higher KME.
## ..reassigning 7 genes from module 83 to modules with higher KME.
## ..reassigning 14 genes from module 84 to modules with higher KME.
## ..reassigning 2 genes from module 85 to modules with higher KME.
## ..reassigning 7 genes from module 86 to modules with higher KME.
## ..reassigning 4 genes from module 87 to modules with higher KME.
## ..reassigning 10 genes from module 88 to modules with higher KME.
## ..reassigning 6 genes from module 89 to modules with higher KME.
## ..reassigning 1 genes from module 90 to modules with higher KME.
## ..reassigning 6 genes from module 91 to modules with higher KME.
## ..reassigning 18 genes from module 92 to modules with higher KME.
## ..reassigning 13 genes from module 93 to modules with higher KME.
## ..reassigning 7 genes from module 94 to modules with higher KME.
## ..reassigning 7 genes from module 95 to modules with higher KME.
## ..reassigning 4 genes from module 96 to modules with higher KME.
## ..reassigning 8 genes from module 97 to modules with higher KME.
## ..reassigning 7 genes from module 98 to modules with higher KME.
## ..reassigning 10 genes from module 99 to modules with higher KME.
## ..reassigning 6 genes from module 100 to modules with higher KME.
## ..reassigning 4 genes from module 101 to modules with higher KME.
## ..reassigning 6 genes from module 102 to modules with higher KME.
## ..reassigning 6 genes from module 103 to modules with higher KME.
## ..reassigning 7 genes from module 104 to modules with higher KME.
## ..reassigning 2 genes from module 105 to modules with higher KME.
## ..reassigning 3 genes from module 106 to modules with higher KME.
## ..reassigning 4 genes from module 107 to modules with higher KME.
## ..reassigning 7 genes from module 108 to modules with higher KME.
## ..reassigning 5 genes from module 109 to modules with higher KME.
## ..reassigning 4 genes from module 110 to modules with higher KME.
## ..reassigning 1 genes from module 111 to modules with higher KME.
## ..reassigning 2 genes from module 112 to modules with higher KME.
## ..reassigning 2 genes from module 114 to modules with higher KME.
## ..merging modules that are too close..
## mergeCloseModules: Merging modules whose distance is less than 0.05
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 117 module eigengenes in given set.
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 115 module eigengenes in given set.
## Calculating new MEs...
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 115 module eigengenes in given set.
nModules <- length(table(net$colors))-1
modules <- cbind(colnames(as.matrix(table(net$colors))),table(net$colors))
orderedModules <- cbind(Mnum=paste("M",seq(1:nModules),sep=""),Color=labels2colors(c(1:nModules)))
modules <- modules[match(as.character(orderedModules[,2]),rownames(modules)),]
tmpMEs <- MEs <- net$MEs
colnames(tmpMEs) <- paste("ME",colnames(MEs),sep="")
kMEdat <- signedKME(WGCNA_matrix, tmpMEs, corFnc=myCor) #calculate (signed) eigengene-based connectivity, also known as module membership
WGCNA_dat <- cbind(fData(x)$GeneSymbol, colnames(WGCNA_matrix),net$colors,kMEdat) %>%
as.data.frame() %>%
dplyr::rename(GeneSymbol = "fData(x)$GeneSymbol") %>%
dplyr::rename(ENSEMBLID = "colnames(WGCNA_matrix)") %>%
dplyr::rename(ModuleColors = "net$colors")
# Define numbers of genes and samples
nGenes = ncol(WGCNA_matrix)
nSamples = nrow(WGCNA_matrix)
datvar <- pData(x) %>%
dplyr::select(PATID, SEQBATCH, site, SEX, age.vax1, mal.vax.1, treat, mal.atp.3, tte.mal.atp.6, mal.dvax, mal.dvax.tot, pfcsp_pre, pfcsp_post, log2FC_CSPAb) %>%
mutate('Gender (female)' = factor(ifelse(SEX == "F", 1, 0))) %>%
mutate('Site (Siaya)' = factor(ifelse(site == "Siaya", 1, 0))) %>%
dplyr::rename('Pf infection at first vax' = "mal.vax.1") %>%
mutate(pfcsp_pre = ifelse(is.na(pfcsp_pre), median(pfcsp_pre, na.rm=TRUE), pfcsp_pre)) %>%
mutate(pfcsp_post = ifelse(is.na(pfcsp_post), median(pfcsp_post, na.rm=TRUE), pfcsp_post)) %>%
mutate('pre-vax anti-CSP IgG' = log10(pfcsp_pre+1)) %>%
mutate('post-vax anti-CSP IgG' = log10(pfcsp_post+1)) %>%
mutate('log2FC anti-CSP IgG' = log2((pfcsp_post+1)/(pfcsp_pre+1))) %>%
mutate('1.8 x 10^6 PfSPZ' = factor(ifelse(treat == "1.8 x 10^6 PfSPZ", 1, 0))) %>%
mutate('parasitemic events during vax period' = mal.dvax.tot) %>%
mutate('uninfected, 3 months' = factor(ifelse(mal.atp.3 == 0, 1, 0))) %>%
dplyr::rename('days to first parasitemia' = "tte.mal.atp.6") %>%
mutate(Age = age.vax1) %>%
dplyr::select(PATID, Age, site, SEX, treat,'pre-vax anti-CSP IgG', 'Pf infection at first vax', 'uninfected, 3 months', 'days to first parasitemia', 'log2FC anti-CSP IgG') %>%
as_tibble() %>%
column_to_rownames(var = "PATID")
modTraitCor <- cor(orderMEs(net$MEs), datvar, use = "pairwise.complete.obs", method = "pearson")
modTraitP <- corPvalueStudent(modTraitCor, nSamples)
#Since we have a moderately large number of modules and traits, a suitable graphical representation will help in reading the table. We color code each association by the correlation value: Will display correlations and their p-values
#Select out only modules that have P<0.05 in Protection
modTraitP.temp <- modTraitP %>%
as.data.frame() %>%
rownames_to_column(var = "Module") %>%
filter(.$'uninfected, 3 months' < cor.pval | .$'days to first parasitemia' < cor.pval)
modTraitCor.select <- modTraitCor[modTraitP.temp$Module,]
modTraitP.select <- modTraitP[modTraitP.temp$Module,]
textMatrix <- paste(signif(modTraitCor.select, 2), "\n(P=",
signif(modTraitP.select, 1), ")", sep = "")
dim(textMatrix) <- dim(modTraitCor.select)
topmodules <- chooseTopHubInEachModule(WGCNA_matrix, net$colors, omitColors = "grey", power = 12.5, type ="signed")
diffME_dat <- orderMEs(net$MEs) %>%
rownames_to_column(var = "subject") %>%
mutate(subject = gsub("\\_0", "", subject)) %>%
right_join(., datvar %>%
dplyr::select(-c(Age, "days to first parasitemia")) %>%
mutate("Pf infection at first vax" = factor(.$"Pf infection at first vax", levels = c(0,1),
labels = c("Pf-","Pf+"))) %>%
mutate("uninfected, 3 months" = factor(.$"uninfected, 3 months", levels = c(0,1),
labels = c("infected","never infected"))) %>%
mutate("pre-vax anti-CSP IgG" = factor(ifelse(.$"pre-vax anti-CSP IgG" > 0, "present", "absent"),
levels = c("absent","present"))) %>%
mutate("log2FC anti-CSP IgG" = factor(ifelse(.$"log2FC anti-CSP IgG" > 12.5, "high responder", "low responder"),
levels = c("low responder","high responder"))) %>%
dplyr::rename("fold-change anti-CSP IgG" = "log2FC anti-CSP IgG") %>%
rownames_to_column(var = "subject"),
by = "subject") %>%
pivot_longer(cols = contains("ME"), names_to = "module", values_to = "MEs") %>%
pivot_longer(cols = c("pre-vax anti-CSP IgG":"fold-change anti-CSP IgG"), names_to = "trait", values_to = "trait_cat") %>%
arrange(module, site, treat, trait, trait_cat) %>%
drop_na(trait_cat)
diffME_dat_samplesize <- diffME_dat %>%
group_by(module, treat, trait, trait_cat) %>%
summarize(n=n()) %>%
ungroup()
## `summarise()` has grouped output by 'module', 'treat', 'trait'. You can
## override using the `.groups` argument.
pheno_dat <- diffME_dat %>%
dplyr::select(subject, site, treat, trait, trait_cat) %>%
distinct(subject, site, treat, trait, trait_cat) %>%
pivot_wider(names_from = trait, values_from = trait_cat) %>%
arrange(subject) %>%
column_to_rownames(var = "subject")
feature_dat <- diffME_dat %>%
dplyr::select(module, MEs) %>%
distinct(module) %>%
mutate(module_color = gsub("ME","", module)) %>%
left_join(., topmodules %>%
enframe() %>%
dplyr::rename(EnsemblID = "value",
module_color = "name") %>%
left_join(., fData(x) %>%
dplyr::select(EnsemblID, GeneSymbol), by = "EnsemblID")) %>%
column_to_rownames(var = "module") %>%
dplyr::rename(hub_gene = "GeneSymbol")
## Joining with `by = join_by(module_color)`
exprs_dat <- orderMEs(net$MEs) %>%
rownames_to_column(var = "subject") %>%
mutate(subject = gsub("\\_0", "", subject)) %>%
arrange(subject) %>%
column_to_rownames(var = "subject") %>%
dplyr::select(rownames(feature_dat)) %>%
t()
#check
ifelse(all(colnames(exprs_dat) == rownames(pheno_dat)) &
all(rownames(exprs_dat) == rownames(feature_dat)),
"Good to go!",
"Check for matching names")
## [1] "Good to go!"
#make expression set
wgcna_eset <- ExpressionSet(exprs_dat, phenoData = AnnotatedDataFrame(pheno_dat), featureData = AnnotatedDataFrame(feature_dat))
Full dataset
#Pf_baseline
pf_first_vax <- factor(wgcna_eset$`Pf infection at first vax`) #encodes a vector as a factor
treatment <- wgcna_eset$treat
site <- factor(wgcna_eset$site)
design <- model.matrix(~0+pf_first_vax+site+treatment)
colnames(design)
## [1] "pf_first_vaxPf-" "pf_first_vaxPf+"
## [3] "siteWagai" "treatment4.5 x 10^5 PfSPZ"
## [5] "treatment9.0 x 10^5 PfSPZ" "treatment1.8 x 10^6 PfSPZ"
colnames(design) <- c("uninfected_first_vax", "infected_first_vax","wagai", "lowdose","mediumdose","highdose")
fit <- lmFit(exprs(wgcna_eset), design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(infected_first_vax - uninfected_first_vax,
levels=design)
contrasts
## Contrasts
## Levels infected_first_vax - uninfected_first_vax
## uninfected_first_vax -1
## infected_first_vax 1
## wagai 0
## lowdose 0
## mediumdose 0
## highdose 0
fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)
Pf_baseline <- topTable(fit2, coef="infected_first_vax - uninfected_first_vax", number = Inf) %>%
rownames_to_column("module") %>%
right_join(., feature_dat %>%
rownames_to_column("module"),
by = "module") %>%
mutate(comparison = "infected_vs_uninfected_baseline") %>%
dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
Pf_baseline %>%
filter(P.Value<0.05) %>%
knitr::kable()
| comparison | module | module_color | hub_gene | logFC | P.Value | adj.P.Val |
|---|---|---|---|---|---|---|
| infected_vs_uninfected_baseline | MEskyblue1 | skyblue1 | RIOK3 | 0.0547711 | 0.0001150 | 0.0132298 |
| infected_vs_uninfected_baseline | MEthistle3 | thistle3 | SEC62 | 0.0474694 | 0.0008313 | 0.0340638 |
| infected_vs_uninfected_baseline | MEgreen | green | MPP1 | 0.0467892 | 0.0009985 | 0.0340638 |
| infected_vs_uninfected_baseline | MEmediumpurple1 | mediumpurple1 | CSDE1 | 0.0460783 | 0.0011848 | 0.0340638 |
| infected_vs_uninfected_baseline | MEmagenta4 | magenta4 | TALDO1 | 0.0419379 | 0.0031908 | 0.0733893 |
| infected_vs_uninfected_baseline | MElavenderblush3 | lavenderblush3 | MAF1 | 0.0346543 | 0.0147587 | 0.2828749 |
| infected_vs_uninfected_baseline | MElightsteelblue | lightsteelblue | CARMIL2 | -0.0327109 | 0.0214436 | 0.3522885 |
| infected_vs_uninfected_baseline | MElightyellow | lightyellow | KHSRP | -0.0288723 | 0.0420653 | 0.4463755 |
| infected_vs_uninfected_baseline | MEmediumpurple2 | mediumpurple2 | LAT | -0.0283895 | 0.0455961 | 0.4463755 |
| infected_vs_uninfected_baseline | MElightcyan | lightcyan | PPP1CA | -0.0283928 | 0.0457675 | 0.4463755 |
| infected_vs_uninfected_baseline | MEivory | ivory | BCL9L | -0.0281607 | 0.0474036 | 0.4463755 |
#prevax_antiCSPIgG
prevax_antiCSPIgG <- factor(wgcna_eset$`pre-vax anti-CSP IgG`)
design <- model.matrix(~0+prevax_antiCSPIgG+site+treatment)
colnames(design)
## [1] "prevax_antiCSPIgGabsent" "prevax_antiCSPIgGpresent"
## [3] "siteWagai" "treatment4.5 x 10^5 PfSPZ"
## [5] "treatment9.0 x 10^5 PfSPZ" "treatment1.8 x 10^6 PfSPZ"
colnames(design) <- c("noCSPAb_first_vax", "CSPAb_first_vax","wagai","lowdose","mediumdose","highdose")
fit <- lmFit(exprs(wgcna_eset), design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(CSPAb_first_vax - noCSPAb_first_vax,
levels=design)
contrasts
## Contrasts
## Levels CSPAb_first_vax - noCSPAb_first_vax
## noCSPAb_first_vax -1
## CSPAb_first_vax 1
## wagai 0
## lowdose 0
## mediumdose 0
## highdose 0
fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)
CSPAb_baseline <- topTable(fit2, coef="CSPAb_first_vax - noCSPAb_first_vax", number = Inf) %>%
rownames_to_column("module") %>%
right_join(., feature_dat %>%
rownames_to_column("module"),
by = "module") %>%
mutate(comparison = "present_vs_absent_baseline") %>%
dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
CSPAb_baseline %>%
filter(P.Value<0.05) %>%
knitr::kable()
| comparison | module | module_color | hub_gene | logFC | P.Value | adj.P.Val |
|---|---|---|---|---|---|---|
| present_vs_absent_baseline | MEcoral2 | coral2 | RAB3D | -0.0255489 | 0.0033601 | 0.3399596 |
| present_vs_absent_baseline | MEfloralwhite | floralwhite | FBXL5 | -0.0217659 | 0.0124531 | 0.3399596 |
| present_vs_absent_baseline | MEskyblue3 | skyblue3 | ZDHHC18 | -0.0217125 | 0.0127404 | 0.3399596 |
| present_vs_absent_baseline | MElightblue4 | lightblue4 | KIAA1586 | 0.0206726 | 0.0175885 | 0.3399596 |
| present_vs_absent_baseline | MEantiquewhite2 | antiquewhite2 | CLC | -0.0204126 | 0.0192767 | 0.3399596 |
| present_vs_absent_baseline | MEmaroon | maroon | YWHAZ | -0.0200262 | 0.0214971 | 0.3399596 |
| present_vs_absent_baseline | MEplum1 | plum1 | TCF7 | -0.0199077 | 0.0224393 | 0.3399596 |
| present_vs_absent_baseline | MEpalevioletred1 | palevioletred1 | REL | -0.0193491 | 0.0267179 | 0.3399596 |
| present_vs_absent_baseline | MEcyan | cyan | TUBB1 | -0.0188312 | 0.0307620 | 0.3399596 |
| present_vs_absent_baseline | MEthistle4 | thistle4 | ZFX | -0.0183656 | 0.0348945 | 0.3399596 |
| present_vs_absent_baseline | MEroyalblue | royalblue | SLC7A6 | -0.0177337 | 0.0419063 | 0.3399596 |
| present_vs_absent_baseline | MEpink4 | pink4 | IKZF1 | -0.0175644 | 0.0436578 | 0.3399596 |
| present_vs_absent_baseline | MEdarkslateblue | darkslateblue | CAMK4 | -0.0175251 | 0.0440864 | 0.3399596 |
| present_vs_absent_baseline | MEplum4 | plum4 | KDM3B | -0.0172022 | 0.0484004 | 0.3399596 |
| present_vs_absent_baseline | MEorange | orange | SPI1 | -0.0171718 | 0.0488727 | 0.3399596 |
| present_vs_absent_baseline | MEskyblue1 | skyblue1 | RIOK3 | 0.0170896 | 0.0497238 | 0.3399596 |
#protection
outcome_3mos <- factor(wgcna_eset$`uninfected, 3 months`)
design <- model.matrix(~0+outcome_3mos+site+treatment)
colnames(design)
## [1] "outcome_3mosinfected" "outcome_3mosnever infected"
## [3] "siteWagai" "treatment4.5 x 10^5 PfSPZ"
## [5] "treatment9.0 x 10^5 PfSPZ" "treatment1.8 x 10^6 PfSPZ"
colnames(design) <- c("not_protected","protected", "wagai", "lowdose","mediumdose","highdose")
fit <- lmFit(exprs(wgcna_eset), design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(protected - not_protected,
levels=design)
contrasts
## Contrasts
## Levels protected - not_protected
## not_protected -1
## protected 1
## wagai 0
## lowdose 0
## mediumdose 0
## highdose 0
fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)
p_vs_np_3mos <- topTable(fit2, coef="protected - not_protected", number = Inf) %>%
rownames_to_column("module") %>%
right_join(., feature_dat %>%
rownames_to_column("module"),
by = "module") %>%
mutate(comparison = "protected_vs_not_protected") %>%
dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
p_vs_np_3mos %>%
filter(P.Value<0.05) %>%
knitr::kable()
| comparison | module | module_color | hub_gene | logFC | P.Value | adj.P.Val |
|---|---|---|---|---|---|---|
| protected_vs_not_protected | MEthistle3 | thistle3 | SEC62 | -0.0189284 | 0.0233458 | 0.6663581 |
| protected_vs_not_protected | MEskyblue1 | skyblue1 | RIOK3 | -0.0186788 | 0.0252342 | 0.6663581 |
| protected_vs_not_protected | MEmediumpurple1 | mediumpurple1 | CSDE1 | -0.0185282 | 0.0265553 | 0.6663581 |
| protected_vs_not_protected | MElavenderblush2 | lavenderblush2 | EFHD2 | 0.0172669 | 0.0388438 | 0.6663581 |
#CSP Ab response
FC_CSPAb <- factor(wgcna_eset$`fold-change anti-CSP IgG`)
design <- model.matrix(~0+FC_CSPAb+site+treatment)
colnames(design)
## [1] "FC_CSPAblow responder" "FC_CSPAbhigh responder"
## [3] "siteWagai" "treatment4.5 x 10^5 PfSPZ"
## [5] "treatment9.0 x 10^5 PfSPZ" "treatment1.8 x 10^6 PfSPZ"
colnames(design) <- c("low_responder","high_responder","wagai","lowdose","mediumdose","highdose")
fit <- lmFit(exprs_dat, design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(high_responder - low_responder,
levels=design)
contrasts
## Contrasts
## Levels high_responder - low_responder
## low_responder -1
## high_responder 1
## wagai 0
## lowdose 0
## mediumdose 0
## highdose 0
fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)
hi_vs_low_CSP <- topTable(fit2, coef="high_responder - low_responder", number = Inf) %>%
rownames_to_column("module") %>%
right_join(., feature_dat %>%
rownames_to_column("module"),
by = "module") %>%
mutate(comparison = "hi_vs_low_CSP") %>%
dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
hi_vs_low_CSP %>%
filter(P.Value<0.05) %>%
knitr::kable()
| comparison | module | module_color | hub_gene | logFC | P.Value | adj.P.Val |
|---|---|---|---|---|---|---|
| hi_vs_low_CSP | MEtan | tan | FCGR2A | 0.0316844 | 0.0200955 | 0.8094339 |
| hi_vs_low_CSP | MEcoral2 | coral2 | RAB3D | 0.0306907 | 0.0243497 | 0.8094339 |
| hi_vs_low_CSP | MEcyan | cyan | TUBB1 | 0.0299536 | 0.0279575 | 0.8094339 |
| hi_vs_low_CSP | MEskyblue3 | skyblue3 | ZDHHC18 | 0.0298027 | 0.0287540 | 0.8094339 |
| hi_vs_low_CSP | MEmagenta3 | magenta3 | PCDHGA7 | -0.0277972 | 0.0413908 | 0.8094339 |
| hi_vs_low_CSP | MEorange | orange | SPI1 | 0.0274708 | 0.0438310 | 0.8094339 |
all_module_trait_dat <- rbind(Pf_baseline, CSPAb_baseline, hi_vs_low_CSP, p_vs_np_3mos) %>%
arrange(P.Value)
p_vs_np_3mos_select <- p_vs_np_3mos %>%
filter(P.Value<0.05)
all_module_trait_hm_dat <- all_module_trait_dat %>%
filter(hub_gene %in% p_vs_np_3mos_select$hub_gene) %>%
dplyr::select(comparison, module_color, hub_gene, logFC) %>%
pivot_wider(names_from = comparison, values_from = logFC) %>%
column_to_rownames(var = "hub_gene")
all_module_trait_hm_annot_dat <- all_module_trait_hm_dat %>%
rownames_to_column(var= "hub_gene") %>%
dplyr::select(hub_gene)
all_module_trait_hm_mat <- all_module_trait_hm_dat %>%
dplyr::select(-module_color) %>%
as.matrix()
row_ha = rowAnnotation(module = all_module_trait_hm_dat$module_color)
clab <- list(
"hub_gene" = c("RIOK3" = "skyblue1", "SEC62" = "thistle3", CSDE1 = "mediumpurple1", EFHD2 = "lavenderblush2")
)
clab <- rev(clab)
row_ha <- rowAnnotation(df = all_module_trait_hm_annot_dat, col = clab, na_col = "lavender", annotation_name_gp = gpar(fontsize = 7),
simple_anno_size = unit(0.2, "inches"))
paletteLength <- 50
# length(breaks) == length(paletteLength) + 1
# use floor and ceiling to deal with even/odd length pallettelengths
myBreaks <- c(seq(-1*max(abs(all_module_trait_hm_mat)), 0, length.out=ceiling(paletteLength/2) + 1),
seq(max(all_module_trait_hm_mat)/paletteLength, max(all_module_trait_hm_mat), length.out=floor(paletteLength/2)))
myColor <- colorRampPalette(c("dodgerblue", "white", "#b22222"))(length(myBreaks))
hm <- Heatmap(all_module_trait_hm_mat,
col = circlize::colorRamp2(myBreaks, myColor),
color_space = "LAB",
row_names_side = "left",
row_dend_side = "right",
left_annotation = row_ha,
border = TRUE)
draw(hm)
myColors <- gsub("ME", "", p_vs_np_3mos_select$module_color)
mytopmodules <- topmodules[myColors] %>%
as.data.frame() %>%
dplyr::rename(EnsemblID = ".") %>%
rownames_to_column("module_label") %>%
as_tibble() %>%
left_join(., fData(x) %>%
dplyr::select(EnsemblID, GeneSymbol), by = "EnsemblID")
devtools::source_url("https://github.com/jtlovell/limmaDE2/blob/master/R/wgcna2igraph.R?raw=TRUE")
graph <- wgcna2igraph(net = net, WGCNA_matrix, modules2plot = myColors,
colors2plot = myColors,
kME.threshold = 0.5, adjacency.threshold = 0.1,
adj.power = power, verbose = T,
node.size = 1.5, frame.color = NA, node.color = scales::muted("red"),
edge.alpha = .7, edge.width = 0.5)
## using KME values to find genes with high module membership
## subsetting to modules: thistle3, skyblue1, mediumpurple1, lavenderblush2
## culling edges by adjacency
## removing unconnected nodes
## coverting to igraph format
## returning a network with 169 nodes and 3154 edges
hubscores <- hub_score(graph, scale = TRUE, weights = NULL,
options = arpack_defaults)
beeswarmplot_dat <- orderMEs(net$MEs) %>%
dplyr::select(rownames(modTraitCor.select)) %>%
rownames_to_column(var = "subject") %>%
mutate(subject = gsub("\\_0", "", subject)) %>%
right_join(., pData(wgcna_eset) %>%
dplyr::select(-c(site, treat)) %>%
rownames_to_column(var = "subject"),
by = "subject") %>%
pivot_longer(cols = contains("ME"), names_to = "module", values_to = "MEs") %>%
mutate(module = gsub("MEskyblue1", "RIOK3", module)) %>%
mutate(module = gsub("MEmediumpurple1", "CSDE1", module)) %>%
mutate(module = gsub("MEthistle3", "SEC62", module)) %>%
mutate(module = gsub("MElavenderblush2", "EFHD2", module))
library(tidyverse)
library(broom)
beeswarmplot_dat_factor <- beeswarmplot_dat %>%
pivot_longer(cols = c("Pf infection at first vax":"uninfected, 3 months"), names_to = "trait", values_to = "value") %>%
filter(module %in% p_vs_np_3mos_select$hub_gene) %>%
mutate(trait = factor(trait, levels = c("Pf infection at first vax",
"pre-vax anti-CSP IgG",
"uninfected, 3 months",
"fold-change anti-CSP IgG")))
beeswarmplot_dat_factor_norm_test <- beeswarmplot_dat_factor %>%
group_by(module, trait, value)%>%
do(tidy(shapiro.test(.$MEs))) %>%
ungroup() %>%
dplyr::select(-method) %>%
mutate(is_normal = ifelse(p.value<0.05, "no","yes"))
beeswarmplot_dat_factor_n <- beeswarmplot_dat_factor %>%
group_by(module, trait, value) %>%
summarise(n = n())
## `summarise()` has grouped output by 'module', 'trait'. You can override using
## the `.groups` argument.
data_summary <- function(x) {
m <- mean(x, na.rm=TRUE)
sem <-sd(x, na.rm=TRUE)/sqrt(sum(!is.na(x)))
ymin<-m-1.96*sem
ymax<-m+1.96*sem
return(c(y=m,ymin=ymin,ymax=ymax))
}
all_beeswarm_plot <- beeswarmplot_dat_factor %>%
ggplot(., aes(x=value, y=MEs)) +
ggbeeswarm::geom_beeswarm(alpha = 0.5, color = "lightblue") +
stat_summary(fun="mean", geom="segment", mapping=aes(xend=..x.. - 0.1, yend=..y..), linewidth = 0.5)+
stat_summary(fun="mean", geom="segment", mapping=aes(xend=..x.. + 0.1, yend=..y..), linewidth = 0.5) +
stat_summary(fun.data=data_summary, geom="errorbar", width=0.1) +
facet_grid(module~trait, scales = "free") +
scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
ylab("module eigengene") +
xlab("") +
theme_bw() +
theme(axis.text = element_text(colour = "black"),
strip.background = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
plot.title = element_text(size=8))
signif_hub_genes_only <- all_module_trait_dat %>%
filter(hub_gene %in% p_vs_np_3mos_select$hub_gene)
all_beeswarm_plot
## Warning: The dot-dot notation (`..x..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(x)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#### Identify hub genes 2
myColors <- gsub("ME", "", rownames(modTraitCor.select))
topmodules <- chooseTopHubInEachModule(WGCNA_matrix, net$colors, omitColors = "grey", power = 12.5, type ="signed")
mytopmodules <- topmodules[myColors] %>%
as.data.frame() %>%
dplyr::rename(EnsemblID = ".") %>%
rownames_to_column("module_label") %>%
as_tibble() %>%
left_join(., fData(x) %>%
dplyr::select(EnsemblID, GeneSymbol), by = "EnsemblID")
devtools::source_url("https://github.com/jtlovell/limmaDE2/blob/master/R/wgcna2igraph.R?raw=TRUE")
graph <- wgcna2igraph(net = net, WGCNA_matrix, modules2plot = myColors,
colors2plot = myColors,
kME.threshold = 0.5, adjacency.threshold = 0.1,
adj.power = power, verbose = T,
node.size = 1.5, frame.color = NA, node.color = scales::muted("red"),
edge.alpha = .7, edge.width = 0.5)
## using KME values to find genes with high module membership
## subsetting to modules: skyblue1, mediumpurple1, thistle3, lavenderblush2
## culling edges by adjacency
## removing unconnected nodes
## coverting to igraph format
## returning a network with 169 nodes and 3154 edges
hubscores <- hub_score(graph, scale = TRUE, weights = NULL,
options = arpack_defaults)
Network graphs of significant modules containing nodes (red dots) and edges (lines) meeting minimum thresholds. Correlations between nodes in different modules are shown as black edges.
WGCNA_dat_select <- c()
for(i in modTraitP.temp$Module){
module.color <- sub("ME","", i)
module.colname <- paste0("kME", i)
WGCNA_dat_select[[i]] <- WGCNA_dat %>%
filter(ModuleColors == module.color) %>%
dplyr::select(GeneSymbol, all_of(module.colname))
}
lapply(WGCNA_dat_select, nrow)
## $MEskyblue1
## [1] 75
##
## $MEmediumpurple1
## [1] 55
##
## $MEthistle3
## [1] 68
##
## $MElavenderblush2
## [1] 62
source("https://raw.githubusercontent.com/TranLab/kspzv1-systems/main/helper/ApplyORA2Genesets.R")
myuniverse <- unique(fData(x)[fData(x)$EnsemblID %in% colnames(WGCNA_matrix),]$GeneSymbol)
ORA_baseline_bound_df <- c()
minSize <- 15
#Combine similar modules skyblue1, mediumpurple1, thistle3 in to one
WGCNA_dat_select2 <- list()
WGCNA_dat_select2$EFHD2 <- WGCNA_dat_select$MElavenderblush2
WGCNA_dat_select2$RIOK3_CSDE1_SEC62 <- data.table::rbindlist(list(WGCNA_dat_select$MEskyblue1,
WGCNA_dat_select$MEmediumpurple1,
WGCNA_dat_select$MEthistle3)) %>%
as.data.frame() %>%
rename(kMEMEall = "kMEMEskyblue1")
for(k in names(WGCNA_dat_select2)){
WGCNA_dat_select2[[k]] <- WGCNA_dat_select2[[k]][order(WGCNA_dat_select2[[k]][,2], decreasing = TRUE),]
ORA_baseline_bound_df[[k]] <- ApplyORA2Genesets(genelist = WGCNA_dat_select2[[k]]$GeneSymbol,
geneset = "all",
universe = myuniverse,
output_directory = tempdir(),
filename_prefix = paste0("ORA_Mod_Corr_Protect_3_mos_", k,
"_minSize", minSize),
minSize = minSize)
}
## [1] "Running ORA for MonacoModules signatures"
## [1] "Running ORA for highBTMs signatures"
## [1] "Running ORA for lowBTMs signatures"
## [1] "Running ORA for BloodGen3Module signatures"
## [1] "Running ORA for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running ORA for MSigDB_C8_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C7_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "Running ORA for MonacoModules signatures"
## [1] "Running ORA for highBTMs signatures"
## [1] "Running ORA for lowBTMs signatures"
## [1] "Running ORA for BloodGen3Module signatures"
## [1] "Running ORA for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running ORA for MSigDB_C8_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C7_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_mf_v7.4 signatures"
RIOK3_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$MEskyblue1, .id = "module_type") %>%
mutate(module = "skyblue1") %>%
mutate(module_size = nrow(WGCNA_dat_select$MEskyblue1)) %>%
mutate(hub_gene = "RIOK3")
CSDE1_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$MEmediumpurple1, .id = "module_type") %>%
mutate(module = "mediumpurple1") %>%
mutate(module_size = nrow(WGCNA_dat_select$MEmediumpurple1)) %>%
mutate(hub_gene = "CSDE1")
SEC62_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$MEthistle3, .id = "module_type") %>%
mutate(module = "thistle3") %>%
mutate(module_size = nrow(WGCNA_dat_select$MEthistle3)) %>%
mutate(hub_gene = "SEC62")
EFHD2_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$MElavenderblush2, .id = "module_type") %>%
mutate(module = "lavenderblush2") %>%
mutate(module_size = nrow(WGCNA_dat_select$MElavenderblush2)) %>%
mutate(hub_gene = "EFHD2")
EFHD2_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$EFHD2, .id = "module_type") %>%
mutate(module = "lavenderblush2") %>%
mutate(module_size = nrow(WGCNA_dat_select$MElavenderblush2)) %>%
mutate(hub_gene = "EFHD2")
RIOK3_CSDE1_SEC62_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$RIOK3_CSDE1_SEC62, .id = "module_type") %>%
mutate(module = "the_three_amigos") %>%
mutate(module_size = nrow(WGCNA_dat_select$MElavenderblush2)) %>%
mutate(hub_gene = "RIOK3_CSDE1_SEC62")
ORA_baseline_allbound <- bind_rows(RIOK3_ORA_baseline_bound,
CSDE1_ORA_baseline_bound,
SEC62_ORA_baseline_bound,
EFHD2_ORA_baseline_bound,
RIOK3_CSDE1_SEC62_ORA_baseline_bound) %>%
mutate(neglogpadj = -log10(padj)) %>%
mutate(pct_overlap = 100*(overlap/module_size)) %>%
dplyr::select(module, module_size, hub_gene, module_type, pathway, overlap,
pct_overlap, size, overlapGenes, pval, padj, neglogpadj)
baseline analysis
myModuleTypes <- c("MSigDB_Hallmark_v7.4", "MSigDB_C2_kegg_v7.4", "highBTMs", "BloodGen3Module")
myORAClusterPlotDat <- ORA_baseline_allbound %>%
mutate(pathway = gsub("VS", "v", pathway)) %>%
mutate(pathway = gsub("Vd", "Vδ", pathway)) %>%
mutate(pathway = gsub("gd", "γδ", pathway)) %>%
mutate(pathway = sub(".*?\\_", "", pathway)) %>%
mutate(pathway = fct_reorder(pathway, neglogpadj)) %>%
arrange(desc(neglogpadj))%>%
mutate(TextLabelColor = ifelse(module_type == "BloodGen3Module", scales::muted("red"),
ifelse(module_type == "MSigDB_C2_kegg_v7.4", scales::muted("blue"),
ifelse(module_type == "MSigDB_Hallmark_v7.4", "black","gray")))) %>%
filter(padj < 0.01) %>%
filter(pct_overlap >= 5) %>%
filter(module_type %in% myModuleTypes) %>%
group_by(hub_gene) %>%
arrange(neglogpadj) %>%
slice_tail(n = 4) %>%
ungroup()
addSmallLegend <- function(myPlot, pointSize = 1.5, textSize = 3, spaceLegend = 0.3) {
myPlot +
guides(shape = guide_legend(override.aes = list(size = pointSize)),
color = guide_legend(override.aes = list(size = pointSize))) +
theme(legend.title = element_text(size = textSize),
legend.text = element_text(size = textSize),
legend.key.size = unit(spaceLegend, "lines"))
}
scale_begin <- floor(min(myORAClusterPlotDat$pct_overlap, na.rm = TRUE))
scale_end <- ceiling(max(myORAClusterPlotDat$pct_overlap, na.rm = TRUE))
myArrangedPlot <- myORAClusterPlotDat %>%
filter(hub_gene == "EFHD2" | hub_gene == "RIOK3_CSDE1_SEC62") %>%
mutate(pathway = fct_reorder(pathway, neglogpadj)) %>%
ggplot(., aes(x = neglogpadj, y = pathway, fill = pct_overlap)) +
geom_bar(stat = 'identity') +
geom_vline(xintercept = 2, color = "red", linetype = "dotted") +
scale_fill_distiller(direction = 1, breaks = c(0, 10, 20, 30, 40, 50), limits = c(0,50)) +
ylab("module") +
theme_classic(base_family = "sans", base_size = 14) +
theme(legend.position = "bottom",
plot.margin = unit(c(0,0.5,0,0.5), "cm")) +
facet_wrap(~hub_gene, scales ="free", nrow = 2) +
theme(strip.background = element_blank())