KSPZV1 WGCNA baseline

Objective

Perform weighted gene correlation network analysis as originally described by Horvath et al. on pre-immunization baseline transcriptomic data from the KSPZV1 clinical trial.

References:

https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/Tutorials/ https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/JMiller/Tutorial%20document.pdf

Reviewer requested that we provide scatter plots for the correlations between modules and phenotypes. Review of the original code revealed that using Pearson’s or Spearman’s correlation to assess the relationship between modules and binary traits was not appropriate. Thus, we performed limma (empirical Bayes moderated t test) and presented differences as beeswarm plots with mean and 95%CI.

Load packages

library(edgeR)
library(readxl)
library(EDASeq)
library(Biobase)
library(WGCNA)
library(ape)
library(Cairo)
library(CorLevelPlot)
library(tidyverse)
library(igraph)
library(remotes)
library(fgsea)
library(data.table)
library(ggplot2)
library(viridis)
library(ggpubr)
library(googledrive)
allowWGCNAThreads()

## Allowing multi-threading with up to 10 threads.

library(broom)
library(ComplexHeatmap)

Options and define variables

myCor = "bicor"
power <- 12.5 #determined by evaluating previous plot using pickSoftThreshold
myMergeCutHeight <- 0.05
myDeepSplit <- 2
minModSize <- 20
enforceMMS <- FALSE
cor.pval <- 0.05

Load ExpressionSet

#local file: "PfSPZ_cpm_ExpressionSet_244x21815_AllGroups_bothBatches_0_rmBatchFX_06082021_TMT_logTRUE.rds"

temp <- tempfile(fileext = ".rds")
dl <- drive_download(
  as_id("1togaBlNIxiDj16FyXTC-r7Qw0A9cG2az"), path = temp, overwrite = TRUE)
x <- readRDS(file = dl$local_path)
dim(x)

## Features  Samples 
##    21815      244

Make weighted gene correlation matrix based on full data set

WGCNA_matrix <- t(exprs(x)) #make correlations only with full eset
blockSize(ncol(WGCNA_matrix), rectangularBlocks = TRUE, maxMemoryAllocation = 4^31)

## [1] 21815

par(mfrow=c(1,1))
plotClusterTreeSamples(datExpr=WGCNA_matrix)

## NULL

#used bicor for Timepoint 0
powers <- seq(4,20,by=0.5)
sft <- pickSoftThreshold(WGCNA_matrix, powerVector = powers, corFnc = myCor, verbose = 5, networkType ="signed", blockSize = ncol(WGCNA_matrix))

##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 21815 of 21815
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1    4.0   0.0273 -0.729          0.904 1860.00   1850.00 2780.0
## 2    4.5   0.0749 -1.090          0.939 1400.00   1390.00 2220.0
## 3    5.0   0.1520 -1.450          0.955 1070.00   1050.00 1800.0
## 4    5.5   0.2150 -1.650          0.959  817.00    801.00 1470.0
## 5    6.0   0.3160 -1.980          0.964  629.00    611.00 1230.0
## 6    6.5   0.4060 -2.230          0.969  488.00    469.00 1050.0
## 7    7.0   0.4940 -2.410          0.974  380.00    362.00  894.0
## 8    7.5   0.5660 -2.500          0.979  299.00    280.00  770.0
## 9    8.0   0.6340 -2.590          0.982  236.00    218.00  667.0
## 10   8.5   0.6880 -2.590          0.987  187.00    171.00  582.0
## 11   9.0   0.7260 -2.590          0.988  150.00    134.00  511.0
## 12   9.5   0.7660 -2.570          0.991  121.00    106.00  450.0
## 13  10.0   0.7950 -2.540          0.993   97.70     84.00  399.0
## 14  10.5   0.8100 -2.560          0.990   79.60     66.70  357.0
## 15  11.0   0.8240 -2.550          0.989   65.20     53.40  321.0
## 16  11.5   0.8350 -2.530          0.989   53.80     42.80  290.0
## 17  12.0   0.8470 -2.500          0.991   44.60     34.50  262.0
## 18  12.5   0.8570 -2.460          0.991   37.10     27.80  238.0
## 19  13.0   0.8670 -2.420          0.994   31.10     22.50  216.0
## 20  13.5   0.8760 -2.390          0.996   26.20     18.30  197.0
## 21  14.0   0.8800 -2.360          0.997   22.20     14.90  180.0
## 22  14.5   0.8800 -2.350          0.995   18.90     12.10  165.0
## 23  15.0   0.8840 -2.320          0.996   16.10      9.89  151.0
## 24  15.5   0.8850 -2.290          0.994   13.90      8.08  139.0
## 25  16.0   0.8830 -2.270          0.992   11.90      6.63  128.0
## 26  16.5   0.8810 -2.260          0.989   10.30      5.45  118.0
## 27  17.0   0.8830 -2.220          0.989    8.99      4.50  109.0
## 28  17.5   0.8850 -2.200          0.988    7.84      3.71  101.0
## 29  18.0   0.8850 -2.170          0.986    6.87      3.07   93.1
## 30  18.5   0.8880 -2.140          0.987    6.04      2.55   86.3
## 31  19.0   0.8750 -2.140          0.980    5.32      2.11   80.0
## 32  19.5   0.8800 -2.110          0.982    4.71      1.75   74.3
## 33  20.0   0.8820 -2.070          0.981    4.18      1.47   69.1

sft$powerEstimate

## [1] 12.5

par(mfrow=c(1,1))
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab='Soft Threshold (power)',ylab='Scale Free Topology Model Fit,signed R²',
     type='n', main = paste('Scale independence'));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=1,col='red'); abline(h=0.90,col='red')

Run an automated network analysis

#use blockwiseModules given many genes
net <- blockwiseModules(WGCNA_matrix,
                        power=power,
                        deepSplit= myDeepSplit,
                        minModuleSize=minModSize,
                        TOMType="none", 
                        mergeCutHeight=myMergeCutHeight, 
                        TOMDenom="mean",
                        detectCutHeight=0.995,
                        corType=myCor,
                        networkType="signed",
                        pamStage=TRUE,
                        pamRespectsDendro=TRUE,
                        reassignThresh=0.05,
                        verbose=5,
                        saveTOMs=FALSE,
                        maxBlockSize=ncol(WGCNA_matrix), 
                        nThreads = 0)

##  Calculating module eigengenes block-wise from all genes
##    Flagging genes and samples with too many missing values...
##     ..step 1
##  ..Working on block 1 .
##     TOM calculation: adjacency..
##     ..will use 10 parallel threads.
##      Fraction of slow calculations: 0.000000
## 
##  ....clustering..
##  ....detecting modules..
##      ..done.
##  ....calculating module eigengenes..
##      moduleEigengenes : Working on ME for module 1
##      moduleEigengenes : Working on ME for module 2
##      moduleEigengenes : Working on ME for module 3
##      moduleEigengenes : Working on ME for module 4
##      moduleEigengenes : Working on ME for module 5
##      moduleEigengenes : Working on ME for module 6
##      moduleEigengenes : Working on ME for module 7
##      moduleEigengenes : Working on ME for module 8
##      moduleEigengenes : Working on ME for module 9
##      moduleEigengenes : Working on ME for module 10
##      moduleEigengenes : Working on ME for module 11
##      moduleEigengenes : Working on ME for module 12
##      moduleEigengenes : Working on ME for module 13
##      moduleEigengenes : Working on ME for module 14
##      moduleEigengenes : Working on ME for module 15
##      moduleEigengenes : Working on ME for module 16
##      moduleEigengenes : Working on ME for module 17
##      moduleEigengenes : Working on ME for module 18
##      moduleEigengenes : Working on ME for module 19
##      moduleEigengenes : Working on ME for module 20
##      moduleEigengenes : Working on ME for module 21
##      moduleEigengenes : Working on ME for module 22
##      moduleEigengenes : Working on ME for module 23
##      moduleEigengenes : Working on ME for module 24
##      moduleEigengenes : Working on ME for module 25
##      moduleEigengenes : Working on ME for module 26
##      moduleEigengenes : Working on ME for module 27
##      moduleEigengenes : Working on ME for module 28
##      moduleEigengenes : Working on ME for module 29
##      moduleEigengenes : Working on ME for module 30
##      moduleEigengenes : Working on ME for module 31
##      moduleEigengenes : Working on ME for module 32
##      moduleEigengenes : Working on ME for module 33
##      moduleEigengenes : Working on ME for module 34
##      moduleEigengenes : Working on ME for module 35
##      moduleEigengenes : Working on ME for module 36
##      moduleEigengenes : Working on ME for module 37
##      moduleEigengenes : Working on ME for module 38
##      moduleEigengenes : Working on ME for module 39
##      moduleEigengenes : Working on ME for module 40
##      moduleEigengenes : Working on ME for module 41
##      moduleEigengenes : Working on ME for module 42
##      moduleEigengenes : Working on ME for module 43
##      moduleEigengenes : Working on ME for module 44
##      moduleEigengenes : Working on ME for module 45
##      moduleEigengenes : Working on ME for module 46
##      moduleEigengenes : Working on ME for module 47
##      moduleEigengenes : Working on ME for module 48
##      moduleEigengenes : Working on ME for module 49
##      moduleEigengenes : Working on ME for module 50
##      moduleEigengenes : Working on ME for module 51
##      moduleEigengenes : Working on ME for module 52
##      moduleEigengenes : Working on ME for module 53
##      moduleEigengenes : Working on ME for module 54
##      moduleEigengenes : Working on ME for module 55
##      moduleEigengenes : Working on ME for module 56
##      moduleEigengenes : Working on ME for module 57
##      moduleEigengenes : Working on ME for module 58
##      moduleEigengenes : Working on ME for module 59
##      moduleEigengenes : Working on ME for module 60
##      moduleEigengenes : Working on ME for module 61
##      moduleEigengenes : Working on ME for module 62
##      moduleEigengenes : Working on ME for module 63
##      moduleEigengenes : Working on ME for module 64
##      moduleEigengenes : Working on ME for module 65
##      moduleEigengenes : Working on ME for module 66
##      moduleEigengenes : Working on ME for module 67
##      moduleEigengenes : Working on ME for module 68
##      moduleEigengenes : Working on ME for module 69
##      moduleEigengenes : Working on ME for module 70
##      moduleEigengenes : Working on ME for module 71
##      moduleEigengenes : Working on ME for module 72
##      moduleEigengenes : Working on ME for module 73
##      moduleEigengenes : Working on ME for module 74
##      moduleEigengenes : Working on ME for module 75
##      moduleEigengenes : Working on ME for module 76
##      moduleEigengenes : Working on ME for module 77
##      moduleEigengenes : Working on ME for module 78
##      moduleEigengenes : Working on ME for module 79
##      moduleEigengenes : Working on ME for module 80
##      moduleEigengenes : Working on ME for module 81
##      moduleEigengenes : Working on ME for module 82
##      moduleEigengenes : Working on ME for module 83
##      moduleEigengenes : Working on ME for module 84
##      moduleEigengenes : Working on ME for module 85
##      moduleEigengenes : Working on ME for module 86
##      moduleEigengenes : Working on ME for module 87
##      moduleEigengenes : Working on ME for module 88
##      moduleEigengenes : Working on ME for module 89
##      moduleEigengenes : Working on ME for module 90
##      moduleEigengenes : Working on ME for module 91
##      moduleEigengenes : Working on ME for module 92
##      moduleEigengenes : Working on ME for module 93
##      moduleEigengenes : Working on ME for module 94
##      moduleEigengenes : Working on ME for module 95
##      moduleEigengenes : Working on ME for module 96
##      moduleEigengenes : Working on ME for module 97
##      moduleEigengenes : Working on ME for module 98
##      moduleEigengenes : Working on ME for module 99
##      moduleEigengenes : Working on ME for module 100
##      moduleEigengenes : Working on ME for module 101
##      moduleEigengenes : Working on ME for module 102
##      moduleEigengenes : Working on ME for module 103
##      moduleEigengenes : Working on ME for module 104
##      moduleEigengenes : Working on ME for module 105
##      moduleEigengenes : Working on ME for module 106
##      moduleEigengenes : Working on ME for module 107
##      moduleEigengenes : Working on ME for module 108
##      moduleEigengenes : Working on ME for module 109
##      moduleEigengenes : Working on ME for module 110
##      moduleEigengenes : Working on ME for module 111
##      moduleEigengenes : Working on ME for module 112
##      moduleEigengenes : Working on ME for module 113
##      moduleEigengenes : Working on ME for module 114
##      moduleEigengenes : Working on ME for module 115
##      moduleEigengenes : Working on ME for module 116
##  ....checking kME in modules..
##      ..removing 29 genes from module 1 because their KME is too low.
##      ..removing 10 genes from module 2 because their KME is too low.
##      ..removing 1 genes from module 8 because their KME is too low.
##      ..removing 1 genes from module 12 because their KME is too low.
##      ..removing 2 genes from module 13 because their KME is too low.
##      ..removing 1 genes from module 27 because their KME is too low.
##      ..removing 1 genes from module 37 because their KME is too low.
##      ..removing 1 genes from module 49 because their KME is too low.
##      ..removing 1 genes from module 79 because their KME is too low.
##     ..deleting module 100: of 40 total genes in the module
##        only 0 have the requisite high correlation with the eigengene.
##      ..removing 2 genes from module 114 because their KME is too low.
##   ..reassigning 230 genes from module 1 to modules with higher KME.
##   ..reassigning 4 genes from module 2 to modules with higher KME.
##   ..reassigning 99 genes from module 3 to modules with higher KME.
##   ..reassigning 128 genes from module 4 to modules with higher KME.
##   ..reassigning 208 genes from module 5 to modules with higher KME.
##   ..reassigning 30 genes from module 6 to modules with higher KME.
##   ..reassigning 102 genes from module 7 to modules with higher KME.
##   ..reassigning 69 genes from module 8 to modules with higher KME.
##   ..reassigning 47 genes from module 9 to modules with higher KME.
##   ..reassigning 47 genes from module 10 to modules with higher KME.
##   ..reassigning 20 genes from module 11 to modules with higher KME.
##   ..reassigning 30 genes from module 12 to modules with higher KME.
##   ..reassigning 17 genes from module 13 to modules with higher KME.
##   ..reassigning 33 genes from module 14 to modules with higher KME.
##   ..reassigning 35 genes from module 15 to modules with higher KME.
##   ..reassigning 49 genes from module 16 to modules with higher KME.
##   ..reassigning 46 genes from module 17 to modules with higher KME.
##   ..reassigning 41 genes from module 18 to modules with higher KME.
##   ..reassigning 26 genes from module 19 to modules with higher KME.
##   ..reassigning 44 genes from module 20 to modules with higher KME.
##   ..reassigning 47 genes from module 21 to modules with higher KME.
##   ..reassigning 38 genes from module 22 to modules with higher KME.
##   ..reassigning 48 genes from module 23 to modules with higher KME.
##   ..reassigning 34 genes from module 24 to modules with higher KME.
##   ..reassigning 51 genes from module 25 to modules with higher KME.
##   ..reassigning 36 genes from module 26 to modules with higher KME.
##   ..reassigning 54 genes from module 27 to modules with higher KME.
##   ..reassigning 30 genes from module 28 to modules with higher KME.
##   ..reassigning 20 genes from module 29 to modules with higher KME.
##   ..reassigning 24 genes from module 30 to modules with higher KME.
##   ..reassigning 17 genes from module 31 to modules with higher KME.
##   ..reassigning 32 genes from module 32 to modules with higher KME.
##   ..reassigning 20 genes from module 33 to modules with higher KME.
##   ..reassigning 20 genes from module 34 to modules with higher KME.
##   ..reassigning 45 genes from module 35 to modules with higher KME.
##   ..reassigning 21 genes from module 36 to modules with higher KME.
##   ..reassigning 17 genes from module 37 to modules with higher KME.
##   ..reassigning 28 genes from module 38 to modules with higher KME.
##   ..reassigning 20 genes from module 39 to modules with higher KME.
##   ..reassigning 42 genes from module 40 to modules with higher KME.
##   ..reassigning 32 genes from module 41 to modules with higher KME.
##   ..reassigning 36 genes from module 42 to modules with higher KME.
##   ..reassigning 19 genes from module 43 to modules with higher KME.
##   ..reassigning 19 genes from module 44 to modules with higher KME.
##   ..reassigning 28 genes from module 45 to modules with higher KME.
##   ..reassigning 18 genes from module 46 to modules with higher KME.
##   ..reassigning 16 genes from module 47 to modules with higher KME.
##   ..reassigning 23 genes from module 48 to modules with higher KME.
##   ..reassigning 15 genes from module 49 to modules with higher KME.
##   ..reassigning 21 genes from module 50 to modules with higher KME.
##   ..reassigning 16 genes from module 51 to modules with higher KME.
##   ..reassigning 16 genes from module 52 to modules with higher KME.
##   ..reassigning 32 genes from module 53 to modules with higher KME.
##   ..reassigning 23 genes from module 54 to modules with higher KME.
##   ..reassigning 15 genes from module 55 to modules with higher KME.
##   ..reassigning 22 genes from module 56 to modules with higher KME.
##   ..reassigning 19 genes from module 57 to modules with higher KME.
##   ..reassigning 14 genes from module 58 to modules with higher KME.
##   ..reassigning 27 genes from module 59 to modules with higher KME.
##   ..reassigning 13 genes from module 60 to modules with higher KME.
##   ..reassigning 12 genes from module 61 to modules with higher KME.
##   ..reassigning 7 genes from module 62 to modules with higher KME.
##   ..reassigning 25 genes from module 63 to modules with higher KME.
##   ..reassigning 23 genes from module 64 to modules with higher KME.
##   ..reassigning 9 genes from module 65 to modules with higher KME.
##   ..reassigning 15 genes from module 66 to modules with higher KME.
##   ..reassigning 19 genes from module 67 to modules with higher KME.
##   ..reassigning 17 genes from module 68 to modules with higher KME.
##   ..reassigning 8 genes from module 69 to modules with higher KME.
##   ..reassigning 9 genes from module 70 to modules with higher KME.
##   ..reassigning 19 genes from module 71 to modules with higher KME.
##   ..reassigning 24 genes from module 72 to modules with higher KME.
##   ..reassigning 27 genes from module 73 to modules with higher KME.
##   ..reassigning 14 genes from module 74 to modules with higher KME.
##   ..reassigning 8 genes from module 75 to modules with higher KME.
##   ..reassigning 10 genes from module 76 to modules with higher KME.
##   ..reassigning 7 genes from module 77 to modules with higher KME.
##   ..reassigning 12 genes from module 78 to modules with higher KME.
##   ..reassigning 7 genes from module 79 to modules with higher KME.
##   ..reassigning 12 genes from module 80 to modules with higher KME.
##   ..reassigning 9 genes from module 81 to modules with higher KME.
##   ..reassigning 9 genes from module 82 to modules with higher KME.
##   ..reassigning 7 genes from module 83 to modules with higher KME.
##   ..reassigning 14 genes from module 84 to modules with higher KME.
##   ..reassigning 2 genes from module 85 to modules with higher KME.
##   ..reassigning 7 genes from module 86 to modules with higher KME.
##   ..reassigning 4 genes from module 87 to modules with higher KME.
##   ..reassigning 10 genes from module 88 to modules with higher KME.
##   ..reassigning 6 genes from module 89 to modules with higher KME.
##   ..reassigning 1 genes from module 90 to modules with higher KME.
##   ..reassigning 6 genes from module 91 to modules with higher KME.
##   ..reassigning 18 genes from module 92 to modules with higher KME.
##   ..reassigning 13 genes from module 93 to modules with higher KME.
##   ..reassigning 7 genes from module 94 to modules with higher KME.
##   ..reassigning 7 genes from module 95 to modules with higher KME.
##   ..reassigning 4 genes from module 96 to modules with higher KME.
##   ..reassigning 8 genes from module 97 to modules with higher KME.
##   ..reassigning 7 genes from module 98 to modules with higher KME.
##   ..reassigning 10 genes from module 99 to modules with higher KME.
##   ..reassigning 6 genes from module 100 to modules with higher KME.
##   ..reassigning 4 genes from module 101 to modules with higher KME.
##   ..reassigning 6 genes from module 102 to modules with higher KME.
##   ..reassigning 6 genes from module 103 to modules with higher KME.
##   ..reassigning 7 genes from module 104 to modules with higher KME.
##   ..reassigning 2 genes from module 105 to modules with higher KME.
##   ..reassigning 3 genes from module 106 to modules with higher KME.
##   ..reassigning 4 genes from module 107 to modules with higher KME.
##   ..reassigning 7 genes from module 108 to modules with higher KME.
##   ..reassigning 5 genes from module 109 to modules with higher KME.
##   ..reassigning 4 genes from module 110 to modules with higher KME.
##   ..reassigning 1 genes from module 111 to modules with higher KME.
##   ..reassigning 2 genes from module 112 to modules with higher KME.
##   ..reassigning 2 genes from module 114 to modules with higher KME.
##  ..merging modules that are too close..
##      mergeCloseModules: Merging modules whose distance is less than 0.05
##        multiSetMEs: Calculating module MEs.
##          Working on set 1 ...
##          moduleEigengenes: Calculating 117 module eigengenes in given set.
##        multiSetMEs: Calculating module MEs.
##          Working on set 1 ...
##          moduleEigengenes: Calculating 115 module eigengenes in given set.
##        Calculating new MEs...
##        multiSetMEs: Calculating module MEs.
##          Working on set 1 ...
##          moduleEigengenes: Calculating 115 module eigengenes in given set.

nModules <- length(table(net$colors))-1
modules <- cbind(colnames(as.matrix(table(net$colors))),table(net$colors))
orderedModules <- cbind(Mnum=paste("M",seq(1:nModules),sep=""),Color=labels2colors(c(1:nModules)))
modules <- modules[match(as.character(orderedModules[,2]),rownames(modules)),]
tmpMEs <- MEs <- net$MEs
colnames(tmpMEs) <- paste("ME",colnames(MEs),sep="")
kMEdat <- signedKME(WGCNA_matrix, tmpMEs, corFnc=myCor) #calculate (signed) eigengene-based connectivity, also known as module membership
WGCNA_dat <- cbind(fData(x)$GeneSymbol, colnames(WGCNA_matrix),net$colors,kMEdat) %>%
  as.data.frame() %>%
  dplyr::rename(GeneSymbol = "fData(x)$GeneSymbol") %>%
  dplyr::rename(ENSEMBLID = "colnames(WGCNA_matrix)") %>%
  dplyr::rename(ModuleColors = "net$colors")

Correlate modules with traits

# Define numbers of genes and samples
nGenes = ncol(WGCNA_matrix)
nSamples = nrow(WGCNA_matrix)

datvar <- pData(x) %>%
  dplyr::select(PATID, SEQBATCH, site, SEX, age.vax1, mal.vax.1, treat, mal.atp.3, tte.mal.atp.6, mal.dvax, mal.dvax.tot, pfcsp_pre, pfcsp_post, log2FC_CSPAb) %>%
  mutate('Gender (female)' = factor(ifelse(SEX == "F", 1, 0))) %>%
  mutate('Site (Siaya)' = factor(ifelse(site == "Siaya", 1, 0))) %>%
  dplyr::rename('Pf infection at first vax' = "mal.vax.1") %>%
  mutate(pfcsp_pre = ifelse(is.na(pfcsp_pre), median(pfcsp_pre, na.rm=TRUE), pfcsp_pre)) %>%
  mutate(pfcsp_post = ifelse(is.na(pfcsp_post), median(pfcsp_post, na.rm=TRUE), pfcsp_post)) %>%
  mutate('pre-vax anti-CSP IgG' = log10(pfcsp_pre+1)) %>%
  mutate('post-vax anti-CSP IgG' = log10(pfcsp_post+1)) %>%
  mutate('log2FC anti-CSP IgG' = log2((pfcsp_post+1)/(pfcsp_pre+1))) %>%
  mutate('1.8 x 10^6 PfSPZ' = factor(ifelse(treat == "1.8 x 10^6 PfSPZ", 1, 0))) %>%
  mutate('parasitemic events during vax period' = mal.dvax.tot) %>%
  mutate('uninfected, 3 months' = factor(ifelse(mal.atp.3 == 0, 1, 0))) %>%
  dplyr::rename('days to first parasitemia' = "tte.mal.atp.6") %>%
  mutate(Age = age.vax1) %>%
  dplyr::select(PATID, Age, site, SEX, treat,'pre-vax anti-CSP IgG', 'Pf infection at first vax', 'uninfected, 3 months', 'days to first parasitemia', 'log2FC anti-CSP IgG') %>% 
  as_tibble() %>%
  column_to_rownames(var = "PATID")

modTraitCor <- cor(orderMEs(net$MEs), datvar, use = "pairwise.complete.obs", method = "pearson")
modTraitP <- corPvalueStudent(modTraitCor, nSamples)
#Since we have a moderately large number of modules and traits, a suitable graphical representation will help in reading the table. We color code each association by the correlation value: Will display correlations and their p-values
#Select out only modules that have P<0.05 in Protection

modTraitP.temp <- modTraitP %>%
  as.data.frame() %>%
  rownames_to_column(var = "Module") %>%
  filter(.$'uninfected, 3 months' < cor.pval | .$'days to first parasitemia' < cor.pval)
modTraitCor.select <- modTraitCor[modTraitP.temp$Module,]
modTraitP.select <- modTraitP[modTraitP.temp$Module,]
textMatrix <- paste(signif(modTraitCor.select, 2), "\n(P=",
                   signif(modTraitP.select, 1), ")", sep = "")
dim(textMatrix) <- dim(modTraitCor.select)

topmodules <- chooseTopHubInEachModule(WGCNA_matrix, net$colors, omitColors = "grey", power = 12.5, type ="signed")

Prepare data for differential module eigengene analysis

diffME_dat <- orderMEs(net$MEs) %>% 
  rownames_to_column(var = "subject") %>%
  mutate(subject = gsub("\\_0", "", subject)) %>%
  right_join(., datvar %>%
               dplyr::select(-c(Age, "days to first parasitemia")) %>%
               mutate("Pf infection at first vax" = factor(.$"Pf infection at first vax", levels = c(0,1),
                                                           labels = c("Pf-","Pf+"))) %>%
               mutate("uninfected, 3 months" = factor(.$"uninfected, 3 months", levels = c(0,1),
                                                      labels = c("infected","never infected"))) %>%
               mutate("pre-vax anti-CSP IgG" = factor(ifelse(.$"pre-vax anti-CSP IgG" > 0, "present", "absent"),
                                                      levels = c("absent","present"))) %>%
               mutate("log2FC anti-CSP IgG" = factor(ifelse(.$"log2FC anti-CSP IgG" > 12.5, "high responder", "low responder"),
                                                      levels = c("low responder","high responder"))) %>%
               dplyr::rename("fold-change anti-CSP IgG" = "log2FC anti-CSP IgG") %>%
               rownames_to_column(var = "subject"),
             by = "subject") %>%
  pivot_longer(cols = contains("ME"), names_to = "module", values_to = "MEs") %>%
  pivot_longer(cols = c("pre-vax anti-CSP IgG":"fold-change anti-CSP IgG"), names_to = "trait", values_to = "trait_cat") %>%
  arrange(module, site, treat, trait, trait_cat) %>%
  drop_na(trait_cat)

diffME_dat_samplesize <- diffME_dat %>%
  group_by(module, treat, trait, trait_cat) %>%
  summarize(n=n()) %>% 
  ungroup()

## `summarise()` has grouped output by 'module', 'treat', 'trait'. You can
## override using the `.groups` argument.

Limma method

pheno_dat <- diffME_dat %>%
  dplyr::select(subject, site, treat, trait, trait_cat) %>%
  distinct(subject, site, treat, trait, trait_cat) %>%
  pivot_wider(names_from = trait, values_from = trait_cat) %>%
  arrange(subject) %>%
  column_to_rownames(var = "subject") 

feature_dat <- diffME_dat %>%
  dplyr::select(module, MEs) %>%
  distinct(module) %>%
  mutate(module_color = gsub("ME","", module)) %>%
  left_join(., topmodules %>%
  enframe() %>%
  dplyr::rename(EnsemblID = "value",
                module_color = "name") %>%
  left_join(., fData(x) %>%
              dplyr::select(EnsemblID, GeneSymbol), by = "EnsemblID")) %>%
  column_to_rownames(var = "module") %>%
  dplyr::rename(hub_gene = "GeneSymbol")

## Joining with `by = join_by(module_color)`

exprs_dat <- orderMEs(net$MEs) %>% 
  rownames_to_column(var = "subject") %>%
  mutate(subject = gsub("\\_0", "", subject)) %>%
  arrange(subject) %>%
  column_to_rownames(var = "subject") %>%
  dplyr::select(rownames(feature_dat)) %>%
  t() 
  
#check
ifelse(all(colnames(exprs_dat) == rownames(pheno_dat)) &
         all(rownames(exprs_dat) == rownames(feature_dat)),
       "Good to go!",
       "Check for matching names")

## [1] "Good to go!"

#make expression set
wgcna_eset <- ExpressionSet(exprs_dat, phenoData = AnnotatedDataFrame(pheno_dat), featureData = AnnotatedDataFrame(feature_dat))

Full dataset

#Pf_baseline
pf_first_vax <- factor(wgcna_eset$`Pf infection at first vax`) #encodes a vector as a factor
treatment <- wgcna_eset$treat
site <- factor(wgcna_eset$site)
design <- model.matrix(~0+pf_first_vax+site+treatment) 
colnames(design)

## [1] "pf_first_vaxPf-"           "pf_first_vaxPf+"          
## [3] "siteWagai"                 "treatment4.5 x 10^5 PfSPZ"
## [5] "treatment9.0 x 10^5 PfSPZ" "treatment1.8 x 10^6 PfSPZ"

colnames(design) <- c("uninfected_first_vax", "infected_first_vax","wagai", "lowdose","mediumdose","highdose")
fit <- lmFit(exprs(wgcna_eset), design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(infected_first_vax - uninfected_first_vax,
                           levels=design) 
contrasts

##                       Contrasts
## Levels                 infected_first_vax - uninfected_first_vax
##   uninfected_first_vax                                        -1
##   infected_first_vax                                           1
##   wagai                                                        0
##   lowdose                                                      0
##   mediumdose                                                   0
##   highdose                                                     0

fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)

Pf_baseline <- topTable(fit2, coef="infected_first_vax - uninfected_first_vax", number = Inf) %>%
  rownames_to_column("module") %>%
  right_join(., feature_dat %>%
               rownames_to_column("module"),
             by = "module") %>%
  mutate(comparison = "infected_vs_uninfected_baseline") %>%
  dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
Pf_baseline %>%
  filter(P.Value<0.05) %>%
  knitr::kable()

comparison	module	module_color	hub_gene	logFC	P.Value	adj.P.Val
infected_vs_uninfected_baseline	MEskyblue1	skyblue1	RIOK3	0.0547711	0.0001150	0.0132298
infected_vs_uninfected_baseline	MEthistle3	thistle3	SEC62	0.0474694	0.0008313	0.0340638
infected_vs_uninfected_baseline	MEgreen	green	MPP1	0.0467892	0.0009985	0.0340638
infected_vs_uninfected_baseline	MEmediumpurple1	mediumpurple1	CSDE1	0.0460783	0.0011848	0.0340638
infected_vs_uninfected_baseline	MEmagenta4	magenta4	TALDO1	0.0419379	0.0031908	0.0733893
infected_vs_uninfected_baseline	MElavenderblush3	lavenderblush3	MAF1	0.0346543	0.0147587	0.2828749
infected_vs_uninfected_baseline	MElightsteelblue	lightsteelblue	CARMIL2	-0.0327109	0.0214436	0.3522885
infected_vs_uninfected_baseline	MElightyellow	lightyellow	KHSRP	-0.0288723	0.0420653	0.4463755
infected_vs_uninfected_baseline	MEmediumpurple2	mediumpurple2	LAT	-0.0283895	0.0455961	0.4463755
infected_vs_uninfected_baseline	MElightcyan	lightcyan	PPP1CA	-0.0283928	0.0457675	0.4463755
infected_vs_uninfected_baseline	MEivory	ivory	BCL9L	-0.0281607	0.0474036	0.4463755

#prevax_antiCSPIgG
prevax_antiCSPIgG <- factor(wgcna_eset$`pre-vax anti-CSP IgG`)
design <- model.matrix(~0+prevax_antiCSPIgG+site+treatment)
colnames(design)

## [1] "prevax_antiCSPIgGabsent"   "prevax_antiCSPIgGpresent" 
## [3] "siteWagai"                 "treatment4.5 x 10^5 PfSPZ"
## [5] "treatment9.0 x 10^5 PfSPZ" "treatment1.8 x 10^6 PfSPZ"

colnames(design) <- c("noCSPAb_first_vax", "CSPAb_first_vax","wagai","lowdose","mediumdose","highdose")
fit <- lmFit(exprs(wgcna_eset), design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(CSPAb_first_vax - noCSPAb_first_vax,
                           levels=design) 
contrasts

##                    Contrasts
## Levels              CSPAb_first_vax - noCSPAb_first_vax
##   noCSPAb_first_vax                                  -1
##   CSPAb_first_vax                                     1
##   wagai                                               0
##   lowdose                                             0
##   mediumdose                                          0
##   highdose                                            0

fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)

CSPAb_baseline <- topTable(fit2, coef="CSPAb_first_vax - noCSPAb_first_vax", number = Inf) %>%
  rownames_to_column("module") %>%
  right_join(., feature_dat %>%
               rownames_to_column("module"),
             by = "module") %>%
  mutate(comparison = "present_vs_absent_baseline") %>%
  dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
CSPAb_baseline %>%
  filter(P.Value<0.05) %>%
  knitr::kable()

comparison	module	module_color	hub_gene	logFC	P.Value	adj.P.Val
present_vs_absent_baseline	MEcoral2	coral2	RAB3D	-0.0255489	0.0033601	0.3399596
present_vs_absent_baseline	MEfloralwhite	floralwhite	FBXL5	-0.0217659	0.0124531	0.3399596
present_vs_absent_baseline	MEskyblue3	skyblue3	ZDHHC18	-0.0217125	0.0127404	0.3399596
present_vs_absent_baseline	MElightblue4	lightblue4	KIAA1586	0.0206726	0.0175885	0.3399596
present_vs_absent_baseline	MEantiquewhite2	antiquewhite2	CLC	-0.0204126	0.0192767	0.3399596
present_vs_absent_baseline	MEmaroon	maroon	YWHAZ	-0.0200262	0.0214971	0.3399596
present_vs_absent_baseline	MEplum1	plum1	TCF7	-0.0199077	0.0224393	0.3399596
present_vs_absent_baseline	MEpalevioletred1	palevioletred1	REL	-0.0193491	0.0267179	0.3399596
present_vs_absent_baseline	MEcyan	cyan	TUBB1	-0.0188312	0.0307620	0.3399596
present_vs_absent_baseline	MEthistle4	thistle4	ZFX	-0.0183656	0.0348945	0.3399596
present_vs_absent_baseline	MEroyalblue	royalblue	SLC7A6	-0.0177337	0.0419063	0.3399596
present_vs_absent_baseline	MEpink4	pink4	IKZF1	-0.0175644	0.0436578	0.3399596
present_vs_absent_baseline	MEdarkslateblue	darkslateblue	CAMK4	-0.0175251	0.0440864	0.3399596
present_vs_absent_baseline	MEplum4	plum4	KDM3B	-0.0172022	0.0484004	0.3399596
present_vs_absent_baseline	MEorange	orange	SPI1	-0.0171718	0.0488727	0.3399596
present_vs_absent_baseline	MEskyblue1	skyblue1	RIOK3	0.0170896	0.0497238	0.3399596

#protection
outcome_3mos <- factor(wgcna_eset$`uninfected, 3 months`)
design <- model.matrix(~0+outcome_3mos+site+treatment)
colnames(design)

## [1] "outcome_3mosinfected"       "outcome_3mosnever infected"
## [3] "siteWagai"                  "treatment4.5 x 10^5 PfSPZ" 
## [5] "treatment9.0 x 10^5 PfSPZ"  "treatment1.8 x 10^6 PfSPZ"

colnames(design) <- c("not_protected","protected", "wagai", "lowdose","mediumdose","highdose")
fit <- lmFit(exprs(wgcna_eset), design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(protected - not_protected,
                           levels=design) 
contrasts

##                Contrasts
## Levels          protected - not_protected
##   not_protected                        -1
##   protected                             1
##   wagai                                 0
##   lowdose                               0
##   mediumdose                            0
##   highdose                              0

fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)

p_vs_np_3mos <- topTable(fit2, coef="protected - not_protected", number = Inf) %>%
  rownames_to_column("module") %>%
  right_join(., feature_dat %>%
               rownames_to_column("module"),
             by = "module") %>%
  mutate(comparison = "protected_vs_not_protected") %>%
  dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
p_vs_np_3mos %>%
  filter(P.Value<0.05) %>%
  knitr::kable()

comparison	module	module_color	hub_gene	logFC	P.Value	adj.P.Val
protected_vs_not_protected	MEthistle3	thistle3	SEC62	-0.0189284	0.0233458	0.6663581
protected_vs_not_protected	MEskyblue1	skyblue1	RIOK3	-0.0186788	0.0252342	0.6663581
protected_vs_not_protected	MEmediumpurple1	mediumpurple1	CSDE1	-0.0185282	0.0265553	0.6663581
protected_vs_not_protected	MElavenderblush2	lavenderblush2	EFHD2	0.0172669	0.0388438	0.6663581

#CSP Ab response 
FC_CSPAb <- factor(wgcna_eset$`fold-change anti-CSP IgG`)
design <- model.matrix(~0+FC_CSPAb+site+treatment) 
colnames(design)

## [1] "FC_CSPAblow responder"     "FC_CSPAbhigh responder"   
## [3] "siteWagai"                 "treatment4.5 x 10^5 PfSPZ"
## [5] "treatment9.0 x 10^5 PfSPZ" "treatment1.8 x 10^6 PfSPZ"

colnames(design) <- c("low_responder","high_responder","wagai","lowdose","mediumdose","highdose")
fit <- lmFit(exprs_dat, design) # Fit linear models for each gene given a series of arrays.
contrasts <- makeContrasts(high_responder - low_responder,
                           levels=design) 
contrasts

##                 Contrasts
## Levels           high_responder - low_responder
##   low_responder                              -1
##   high_responder                              1
##   wagai                                       0
##   lowdose                                     0
##   mediumdose                                  0
##   highdose                                    0

fit2 <- contrasts.fit(fit, contrasts)
fit2 <- eBayes(fit2, trend=TRUE)
hi_vs_low_CSP <- topTable(fit2, coef="high_responder - low_responder", number = Inf) %>%
  rownames_to_column("module") %>%
  right_join(., feature_dat %>%
               rownames_to_column("module"),
             by = "module") %>%
  mutate(comparison = "hi_vs_low_CSP") %>%
  dplyr::select(comparison, module, module_color, hub_gene, logFC, P.Value, adj.P.Val)
hi_vs_low_CSP %>%
  filter(P.Value<0.05) %>%
  knitr::kable()

comparison	module	module_color	hub_gene	logFC	P.Value	adj.P.Val
hi_vs_low_CSP	MEtan	tan	FCGR2A	0.0316844	0.0200955	0.8094339
hi_vs_low_CSP	MEcoral2	coral2	RAB3D	0.0306907	0.0243497	0.8094339
hi_vs_low_CSP	MEcyan	cyan	TUBB1	0.0299536	0.0279575	0.8094339
hi_vs_low_CSP	MEskyblue3	skyblue3	ZDHHC18	0.0298027	0.0287540	0.8094339
hi_vs_low_CSP	MEmagenta3	magenta3	PCDHGA7	-0.0277972	0.0413908	0.8094339
hi_vs_low_CSP	MEorange	orange	SPI1	0.0274708	0.0438310	0.8094339

all_module_trait_dat <- rbind(Pf_baseline, CSPAb_baseline, hi_vs_low_CSP, p_vs_np_3mos) %>%
  arrange(P.Value)

p_vs_np_3mos_select <- p_vs_np_3mos %>%
  filter(P.Value<0.05)

all_module_trait_hm_dat <- all_module_trait_dat %>%
  filter(hub_gene %in% p_vs_np_3mos_select$hub_gene) %>%
  dplyr::select(comparison, module_color, hub_gene, logFC) %>%
  pivot_wider(names_from = comparison, values_from = logFC) %>%
  column_to_rownames(var = "hub_gene")

Heatamp for WGCNA Limma data

all_module_trait_hm_annot_dat <- all_module_trait_hm_dat %>%
  rownames_to_column(var= "hub_gene") %>%
  dplyr::select(hub_gene)

all_module_trait_hm_mat <- all_module_trait_hm_dat %>%
  dplyr::select(-module_color) %>%
  as.matrix()

row_ha = rowAnnotation(module = all_module_trait_hm_dat$module_color)

clab <- list(
  "hub_gene" = c("RIOK3" = "skyblue1", "SEC62" = "thistle3", CSDE1 = "mediumpurple1", EFHD2 = "lavenderblush2")
  )

clab <- rev(clab)
row_ha <- rowAnnotation(df = all_module_trait_hm_annot_dat, col = clab, na_col = "lavender", annotation_name_gp = gpar(fontsize = 7),
                        simple_anno_size = unit(0.2, "inches"))


paletteLength <- 50
# length(breaks) == length(paletteLength) + 1
# use floor and ceiling to deal with even/odd length pallettelengths
myBreaks <- c(seq(-1*max(abs(all_module_trait_hm_mat)), 0, length.out=ceiling(paletteLength/2) + 1), 
              seq(max(all_module_trait_hm_mat)/paletteLength, max(all_module_trait_hm_mat), length.out=floor(paletteLength/2)))
myColor <- colorRampPalette(c("dodgerblue", "white", "#b22222"))(length(myBreaks))

hm <- Heatmap(all_module_trait_hm_mat,
        col = circlize::colorRamp2(myBreaks, myColor),
        color_space = "LAB",
        row_names_side = "left",
        row_dend_side = "right",
        left_annotation = row_ha,
        border = TRUE)

draw(hm)

Identify hub genes from protection analysis

myColors <- gsub("ME", "", p_vs_np_3mos_select$module_color)
mytopmodules <- topmodules[myColors] %>%
  as.data.frame() %>%
  dplyr::rename(EnsemblID = ".") %>%
  rownames_to_column("module_label") %>%
  as_tibble() %>%
  left_join(., fData(x) %>%
              dplyr::select(EnsemblID, GeneSymbol), by = "EnsemblID")
devtools::source_url("https://github.com/jtlovell/limmaDE2/blob/master/R/wgcna2igraph.R?raw=TRUE")
graph <- wgcna2igraph(net = net, WGCNA_matrix, modules2plot = myColors,
                      colors2plot =  myColors,
                      kME.threshold = 0.5, adjacency.threshold = 0.1,
                      adj.power = power, verbose = T,
                      node.size = 1.5, frame.color = NA, node.color = scales::muted("red"),
                      edge.alpha = .7, edge.width = 0.5)

## using KME values to find genes with high module membership
## subsetting to modules: thistle3, skyblue1, mediumpurple1, lavenderblush2 
## culling edges by adjacency
## removing unconnected nodes
## coverting to igraph format
## returning a network with 169 nodes and 3154 edges

hubscores <- hub_score(graph, scale = TRUE, weights = NULL,
  options = arpack_defaults)

Plot individual beeswarmplot plots for significant module-trait relationships

beeswarmplot_dat <- orderMEs(net$MEs) %>% 
  dplyr::select(rownames(modTraitCor.select)) %>%
  rownames_to_column(var = "subject") %>%
  mutate(subject = gsub("\\_0", "", subject)) %>%
  right_join(., pData(wgcna_eset) %>%
               dplyr::select(-c(site, treat)) %>%
               rownames_to_column(var = "subject"),
             by = "subject") %>%
  pivot_longer(cols = contains("ME"), names_to = "module", values_to = "MEs") %>%
  mutate(module = gsub("MEskyblue1", "RIOK3", module)) %>%
  mutate(module = gsub("MEmediumpurple1", "CSDE1", module)) %>%
  mutate(module = gsub("MEthistle3", "SEC62", module)) %>%
  mutate(module = gsub("MElavenderblush2", "EFHD2", module))

library(tidyverse)
library(broom)
beeswarmplot_dat_factor <- beeswarmplot_dat %>%
  pivot_longer(cols = c("Pf infection at first vax":"uninfected, 3 months"), names_to = "trait", values_to = "value")  %>%
  filter(module %in% p_vs_np_3mos_select$hub_gene) %>%
  mutate(trait = factor(trait, levels = c("Pf infection at first vax",
                                          "pre-vax anti-CSP IgG",
                                          "uninfected, 3 months",
                                          "fold-change anti-CSP IgG")))

beeswarmplot_dat_factor_norm_test <- beeswarmplot_dat_factor %>%
  group_by(module, trait, value)%>% 
  do(tidy(shapiro.test(.$MEs))) %>% 
  ungroup() %>%
  dplyr::select(-method) %>%
  mutate(is_normal = ifelse(p.value<0.05, "no","yes"))
  

beeswarmplot_dat_factor_n <- beeswarmplot_dat_factor %>%
  group_by(module, trait, value) %>%
  summarise(n = n())

## `summarise()` has grouped output by 'module', 'trait'. You can override using
## the `.groups` argument.

data_summary <- function(x) {
  m <- mean(x, na.rm=TRUE)
  sem <-sd(x, na.rm=TRUE)/sqrt(sum(!is.na(x)))
  ymin<-m-1.96*sem
  ymax<-m+1.96*sem
  return(c(y=m,ymin=ymin,ymax=ymax))
}

all_beeswarm_plot <- beeswarmplot_dat_factor %>%
  ggplot(., aes(x=value, y=MEs)) +
  ggbeeswarm::geom_beeswarm(alpha = 0.5, color = "lightblue") +
  stat_summary(fun="mean", geom="segment", mapping=aes(xend=..x.. - 0.1, yend=..y..), linewidth = 0.5)+
  stat_summary(fun="mean", geom="segment", mapping=aes(xend=..x.. + 0.1, yend=..y..), linewidth = 0.5) +
  stat_summary(fun.data=data_summary, geom="errorbar", width=0.1) +
  facet_grid(module~trait, scales = "free") + 
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.15))) +
  ylab("module eigengene") +
  xlab("") +
  theme_bw() +
  theme(axis.text = element_text(colour = "black"),
        strip.background = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
        plot.title = element_text(size=8))

signif_hub_genes_only <- all_module_trait_dat %>%
  filter(hub_gene %in% p_vs_np_3mos_select$hub_gene)

Figure S3A of revised manuscript

all_beeswarm_plot

## Warning: The dot-dot notation (`..x..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(x)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#### Identify hub genes 2
myColors <- gsub("ME", "", rownames(modTraitCor.select))
topmodules <- chooseTopHubInEachModule(WGCNA_matrix, net$colors, omitColors = "grey", power = 12.5, type ="signed")
mytopmodules <- topmodules[myColors] %>%
  as.data.frame() %>%
  dplyr::rename(EnsemblID = ".") %>%
  rownames_to_column("module_label") %>%
  as_tibble() %>%
  left_join(., fData(x) %>%
              dplyr::select(EnsemblID, GeneSymbol), by = "EnsemblID")
devtools::source_url("https://github.com/jtlovell/limmaDE2/blob/master/R/wgcna2igraph.R?raw=TRUE")
graph <- wgcna2igraph(net = net, WGCNA_matrix, modules2plot = myColors,
                      colors2plot =  myColors,
                      kME.threshold = 0.5, adjacency.threshold = 0.1,
                      adj.power = power, verbose = T,
                      node.size = 1.5, frame.color = NA, node.color = scales::muted("red"),
                      edge.alpha = .7, edge.width = 0.5)

## using KME values to find genes with high module membership
## subsetting to modules: skyblue1, mediumpurple1, thistle3, lavenderblush2 
## culling edges by adjacency
## removing unconnected nodes
## coverting to igraph format
## returning a network with 169 nodes and 3154 edges

hubscores <- hub_score(graph, scale = TRUE, weights = NULL,
  options = arpack_defaults)

Plot network graph of significant modules (Figure 2B of revised mansucript)

Network graphs of significant modules containing nodes (red dots) and edges (lines) meeting minimum thresholds. Correlations between nodes in different modules are shown as black edges.

Identify modules with < 0.05

WGCNA_dat_select <- c()
for(i in modTraitP.temp$Module){
  module.color <- sub("ME","", i)
  module.colname <- paste0("kME", i)
  WGCNA_dat_select[[i]] <- WGCNA_dat %>% 
    filter(ModuleColors == module.color) %>%
    dplyr::select(GeneSymbol, all_of(module.colname))
}
lapply(WGCNA_dat_select, nrow)

## $MEskyblue1
## [1] 75
## 
## $MEmediumpurple1
## [1] 55
## 
## $MEthistle3
## [1] 68
## 
## $MElavenderblush2
## [1] 62

Over-representation Analysis

source("https://raw.githubusercontent.com/TranLab/kspzv1-systems/main/helper/ApplyORA2Genesets.R")
myuniverse <- unique(fData(x)[fData(x)$EnsemblID %in% colnames(WGCNA_matrix),]$GeneSymbol)
ORA_baseline_bound_df <- c()
minSize <- 15

#Combine similar modules skyblue1, mediumpurple1, thistle3 in to one
WGCNA_dat_select2 <- list()
WGCNA_dat_select2$EFHD2 <- WGCNA_dat_select$MElavenderblush2
WGCNA_dat_select2$RIOK3_CSDE1_SEC62 <- data.table::rbindlist(list(WGCNA_dat_select$MEskyblue1,
                                                WGCNA_dat_select$MEmediumpurple1,
                                                WGCNA_dat_select$MEthistle3)) %>%
  as.data.frame() %>%
  rename(kMEMEall = "kMEMEskyblue1")
for(k in names(WGCNA_dat_select2)){
  WGCNA_dat_select2[[k]] <- WGCNA_dat_select2[[k]][order(WGCNA_dat_select2[[k]][,2], decreasing = TRUE),]
  ORA_baseline_bound_df[[k]] <- ApplyORA2Genesets(genelist = WGCNA_dat_select2[[k]]$GeneSymbol,
                                                  geneset = "all",
                                                  universe = myuniverse,
                                                  output_directory = tempdir(),
                                                  filename_prefix = paste0("ORA_Mod_Corr_Protect_3_mos_", k,
                                                                           "_minSize", minSize),
                                                  minSize = minSize)
  }

## [1] "Running ORA for MonacoModules signatures"
## [1] "Running ORA for highBTMs signatures"
## [1] "Running ORA for lowBTMs signatures"
## [1] "Running ORA for BloodGen3Module signatures"
## [1] "Running ORA for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running ORA for MSigDB_C8_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C7_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_mf_v7.4 signatures"
## [1] "Running ORA for MonacoModules signatures"
## [1] "Running ORA for highBTMs signatures"
## [1] "Running ORA for lowBTMs signatures"
## [1] "Running ORA for BloodGen3Module signatures"
## [1] "Running ORA for MSigDB_Hallmark_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_biocarta_v7.4 signatures"
## [1] "Running ORA for MSigDB_C2_kegg_v7.4 signatures"
## [1] "Running ORA for MSigDB_C8_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C7_all_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_bp_v7.4 signatures"
## [1] "Running ORA for MSigDB_C5_GO_mf_v7.4 signatures"

RIOK3_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$MEskyblue1, .id = "module_type") %>%
  mutate(module = "skyblue1") %>%
  mutate(module_size = nrow(WGCNA_dat_select$MEskyblue1)) %>%
  mutate(hub_gene = "RIOK3") 
CSDE1_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$MEmediumpurple1, .id = "module_type") %>%
  mutate(module = "mediumpurple1") %>%
  mutate(module_size = nrow(WGCNA_dat_select$MEmediumpurple1)) %>%
  mutate(hub_gene = "CSDE1")
SEC62_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$MEthistle3, .id = "module_type") %>%
  mutate(module = "thistle3") %>%
  mutate(module_size = nrow(WGCNA_dat_select$MEthistle3)) %>%
  mutate(hub_gene = "SEC62")
EFHD2_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$MElavenderblush2, .id = "module_type") %>%
  mutate(module = "lavenderblush2") %>%
  mutate(module_size = nrow(WGCNA_dat_select$MElavenderblush2)) %>%
  mutate(hub_gene = "EFHD2")

EFHD2_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$EFHD2, .id = "module_type") %>%
  mutate(module = "lavenderblush2") %>%
  mutate(module_size = nrow(WGCNA_dat_select$MElavenderblush2)) %>%
  mutate(hub_gene = "EFHD2")

RIOK3_CSDE1_SEC62_ORA_baseline_bound <- bind_rows(ORA_baseline_bound_df$RIOK3_CSDE1_SEC62, .id = "module_type") %>%
  mutate(module = "the_three_amigos") %>%
  mutate(module_size = nrow(WGCNA_dat_select$MElavenderblush2)) %>%
  mutate(hub_gene = "RIOK3_CSDE1_SEC62")


ORA_baseline_allbound <- bind_rows(RIOK3_ORA_baseline_bound,
                                  CSDE1_ORA_baseline_bound,
                                  SEC62_ORA_baseline_bound,
                                  EFHD2_ORA_baseline_bound,
                                  RIOK3_CSDE1_SEC62_ORA_baseline_bound) %>%
  mutate(neglogpadj = -log10(padj)) %>%
  mutate(pct_overlap = 100*(overlap/module_size)) %>%
  dplyr::select(module, module_size, hub_gene, module_type, pathway, overlap,
                pct_overlap, size, overlapGenes, pval, padj, neglogpadj)

Plot WGCNA Over-representation Results

baseline analysis

myModuleTypes <- c("MSigDB_Hallmark_v7.4", "MSigDB_C2_kegg_v7.4", "highBTMs", "BloodGen3Module")
myORAClusterPlotDat <- ORA_baseline_allbound %>%
  mutate(pathway = gsub("VS", "v", pathway)) %>%
  mutate(pathway = gsub("Vd", "Vδ", pathway)) %>%
  mutate(pathway = gsub("gd", "γδ", pathway)) %>%
  mutate(pathway = sub(".*?\\_", "", pathway)) %>%
  mutate(pathway = fct_reorder(pathway, neglogpadj))  %>%
  arrange(desc(neglogpadj))%>%
  mutate(TextLabelColor = ifelse(module_type == "BloodGen3Module", scales::muted("red"),
                                 ifelse(module_type == "MSigDB_C2_kegg_v7.4", scales::muted("blue"),
                                        ifelse(module_type == "MSigDB_Hallmark_v7.4", "black","gray")))) %>%
  filter(padj < 0.01) %>%
  filter(pct_overlap >= 5) %>%
  filter(module_type %in% myModuleTypes) %>%
  group_by(hub_gene) %>%
  arrange(neglogpadj) %>%
  slice_tail(n = 4) %>%
  ungroup()

Arrange plots

addSmallLegend <- function(myPlot, pointSize = 1.5, textSize = 3, spaceLegend = 0.3) {
    myPlot +
        guides(shape = guide_legend(override.aes = list(size = pointSize)),
               color = guide_legend(override.aes = list(size = pointSize))) +
        theme(legend.title = element_text(size = textSize), 
              legend.text  = element_text(size = textSize),
              legend.key.size = unit(spaceLegend, "lines"))
}

scale_begin <- floor(min(myORAClusterPlotDat$pct_overlap, na.rm = TRUE))
scale_end <- ceiling(max(myORAClusterPlotDat$pct_overlap, na.rm = TRUE))

myArrangedPlot <- myORAClusterPlotDat %>%
  filter(hub_gene == "EFHD2" | hub_gene == "RIOK3_CSDE1_SEC62") %>%
  mutate(pathway = fct_reorder(pathway, neglogpadj))  %>%
  ggplot(., aes(x = neglogpadj, y = pathway, fill = pct_overlap)) +
  geom_bar(stat = 'identity') +
  geom_vline(xintercept = 2, color = "red", linetype = "dotted") +
  scale_fill_distiller(direction = 1, breaks = c(0, 10, 20, 30, 40, 50), limits = c(0,50)) +
  ylab("module") +
  theme_classic(base_family = "sans", base_size = 14) +
  theme(legend.position = "bottom",
        plot.margin = unit(c(0,0.5,0,0.5), "cm")) +
  facet_wrap(~hub_gene, scales ="free", nrow = 2) +
  theme(strip.background = element_blank())