Makes volcano plots from IPA tables
myFiles <- list.files(path = datadir, pattern='*263 pval25 with logCSP SEX.xls')
df.list <- lapply(paste0(datadir, myFiles), read_excel, skip = 1)
names(df.list) <- gsub(" UpReg LFC 263 pval25 with logCSP SEX.xls", "", myFiles)
UpRegPvS <- bind_rows(df.list, .id = "Comparison")
UpRegPvS %>%
mutate(Treatment = factor(Comparison, levels = c("Baseline Placebo", "Baseline Lowdose", "Baseline Meddose", "Baseline Highdose"),
labels = c("Placebo", "4.5x105", "9.0x105", "1.8x106"))) %>%
mutate(Timepoint = "Baseline") %>%
mutate(Comparison = "Protected vs Not Protected") %>%
# filter(`B-H corrected p-value` < 0.2) %>%
# filter(`Activation z-score` >= 2 | `Activation z-score` <= -2) %>%
select(Timepoint, Treatment, Comparison, everything()) %>%
writexl::write_xlsx(., paste0(datadir, "IPA Upstrem Regulator Baseline Protected vs NonProtected All Treatment.xlsx"))
temp <- tempfile(fileext = ".xlsx")
dl <- drive_download(
as_id("1Dl4Fv-mXxdRxMzPmyUeSkM5vDldR0bf2"), path = temp, overwrite = TRUE)
UpRegPvS <- readxl::read_excel(path = dl$local_path)
Baseline Upstream Regulator Volcano Plot Fig 2E Revised
significance.cutoff <- 0.2
zscore.cutoff <- 2
MoleculeTypes <- c("cytokine", "transcription regulator", "G-protein coupled receptor", "chemical - endogenous non-mammalian", "chemical - endogenous mammalian",
"group", "chemical toxicant", "microRNA", "enzyme", "translation regulator")
# MoleculeTypes <- c("cytokine", "transcription regulator", "G-protein coupled receptor", "chemical - endogenous non-mammalian", "chemical drug", "chemical - endogenous mammalian",
# "complex", "group", "chemical toxicant", "microRNA", "enzyme", "translation regulator")
# UpRegPvS_foo <- UpRegPvS %>%
# filter(`p-value of overlap`<0.05) %>%
# filter(Comparison == "Baseline Placebo")
UpRegPvS_foo <- UpRegPvS %>%
mutate(`Dose group` = factor(Treatment, levels = c("Placebo", "4.5x105", "9.0x105", "1.8x106"))) %>%
dplyr::select("Dose group", "Upstream Regulator", "Molecule Type", "Activation z-score", "p-value of overlap", "B-H corrected p-value", "Comparison") %>%
dplyr::rename(MoleculeType = "Molecule Type") %>%
mutate(`-log10(p-value of overlap)` = -log10(`p-value of overlap`)) %>%
mutate(`-log10(B-H adj. p value)` = -log10(`B-H corrected p-value`)) %>%
mutate(`Upstream Regulator` = gsub("\\(and other miRNAs.*", "", `Upstream Regulator`)) %>%
mutate(significant = ifelse(`B-H corrected p-value` < significance.cutoff & `Activation z-score` >= 2.2 |
`B-H corrected p-value`< significance.cutoff & `Activation z-score` <= - 2, TRUE, FALSE)) %>%
filter(`B-H corrected p-value`< significance.cutoff) %>%
mutate(color = ifelse(significant == TRUE & `Activation z-score`>0, "red", "grey50")) %>%
mutate(color = ifelse(significant == TRUE & `Activation z-score` < 0, "blue", color)) %>%
mutate(label = ifelse(significant == TRUE, `Upstream Regulator`, "")) %>%
filter(MoleculeType %in% MoleculeTypes) %>%
mutate(label = gsub("Salmonella enterica serotype abortus equi lipopolysaccharide", "Salmonella LPS", label)) %>%
mutate(label = gsub("salmonella minnesota R595 lipopolysaccharides", "Salmonella LPS", label)) %>% #replacing long names with shorter names/synonyms
mutate(label = gsub("tetradecanoylphorbol acetate", "PMA", label)) %>% #replacing long names with shorter names/synonyms
mutate(label = gsub("sphingosine-1-phosphate", "S1P", label)) %>% #replacing long names with shorter names/synonyms
mutate(label = gsub("beta-estradiol", "β-estradiol", label)) %>% #replacing long names with shorter names/synonyms
mutate(label = gsub("Interferon alpha", "IFNα", label)) %>% #replacing long names with shorter names/synonyms
mutate(label = gsub("IL1", "IL1β", label)) %>% #replacing long names with shorter names/synonyms
mutate(label = gsub("IFN Beta", "IFNβ", label)) %>% #replacing long names with shorter names/synonyms
mutate(label = gsub("Ifnar", "IFNAR", label)) %>% #replacing long names with shorter names/synonyms
mutate(label = gsub("Tgf beta", "TGFβ", label)) %>% #replacing long names with shorter names/synonyms
mutate(label = gsub("G protein alphai", "Giα protein", label)) %>% #replacing long names with shorter names/synonyms
mutate(label = gsub("Nfat \\(family\\)", "NFAT", label)) %>% #replacing long names with shorter names/synonyms
mutate(label = gsub("Alpha ", "α-", label)) %>% #replacing long names with shorter names/synonyms
mutate(label = gsub("LPSs", "LPS", label)) %>% #replacing long names with shorter names/synonyms
mutate(label = gsub("lipopolysaccharide", "LPS", label)) %>% #replacing long names with shorter names/synonyms
mutate(label = gsub("medroxyprogesterone acetate", "MPA", label))
#identify upstream regulators that appear as significant in more than one comparison
foo <- UpRegPvS_foo %>%
group_by(label) %>%
summarize(n = n()) %>%
dplyr::filter(n > 1) %>%
dplyr::filter(label != "")
#https://datavizpyr.com/how-to-add-labels-to-select-points-with-ggrepel/
UpRegPvS_label_df <- UpRegPvS_foo %>%
dplyr::filter(label %in% foo$label | #limit labels due to space limitations
(`Activation z-score` < -2.3 & `B-H corrected p-value` < significance.cutoff & `Dose group` == "Placebo")|
(`Activation z-score` > 2.3 & `B-H corrected p-value` < significance.cutoff & `Dose group` == "Placebo")|
(`Activation z-score` < -2.3 & `B-H corrected p-value` < significance.cutoff & `Dose group` == "4.5x105")|
(`Activation z-score` > 2.3 & `B-H corrected p-value` < significance.cutoff & `Dose group` == "4.5x105")|
(`Activation z-score` < -2.3 & `B-H corrected p-value` < significance.cutoff & `Dose group` == "4.5x105")|
(`Activation z-score` > 2.3 & `B-H corrected p-value` < significance.cutoff & `Dose group` == "9.0x105")|
(`Activation z-score` < -2.3 & `B-H corrected p-value` < significance.cutoff & `Dose group` == "9.0x105")|
(`Activation z-score` > 2.5 & `B-H corrected p-value` < significance.cutoff & `Dose group` == "1.8x106")|
(`Activation z-score` < -3 & `B-H corrected p-value` < significance.cutoff & `Dose group` == "1.8x106")|
grepl("lipopolysaccharide", .$`Upstream Regulator`) |
grepl("IL1", .$`Upstream Regulator`)) #limit label to upstream regulators that appear as significant in more than one comparison
myUpRegPlot <- UpRegPvS_foo %>%
ggplot(., aes(x = `Activation z-score`, y = `-log10(B-H adj. p value)`, label = label)) +
geom_point(color = as.character(UpRegPvS_foo$color), alpha = 0.6) +
geom_hline(yintercept = -log10(significance.cutoff), color = "blue", linetype = "dotted") +
geom_vline(xintercept = c((-1*zscore.cutoff), zscore.cutoff), color = "red", linetype = "dotted") +
ggrepel::geom_label_repel(data = UpRegPvS_label_df, force = 15, force_pull = 2, size = 3.6, max.overlaps = 20,
label.size = NA,
label.padding = 0.02,
box.padding = 0.02,
fill = alpha(c("white"),0.8)) +
theme_bw() +
xlim(-6,6)+
facet_wrap(~`Dose group`, nrow = 2, ncol = 2) +
theme(axis.text=element_text(size=16),
axis.title=element_text(size=16),
strip.text = element_text(size=16),
strip.background = element_blank())
myUpRegPlot