GSVA
导入示例数据
## 为正常和肿瘤的内皮细胞的基因表达矩阵 library(readxl) library(dplyr) dat <- read_excel("data_test.xlsx") dat <- dat %>% data.frame() row.names(dat) <- dat$gene dat <- dat[,-1] head(dat)
> head(dat) EC1_T EC2_T EC3_T EC0_N EC1_N EC3_N EC4_N RP11-34P13.3 0 0 0 0 0 0 0 FAM138A 0 0 0 0 0 0 0 OR4F5 0 0 0 0 0 0 0 RP11-34P13.7 0 0 0 0 0 0 0 RP11-34P13.8 0 0 0 0 0 0 0 RP11-34P13.14 0 0 0 0 0 0 0
参考基因集数据库
MSigDB数据库具体介绍详见:Q&A | 如何使用clusterProfiler对MSigDB数据库进行富集分析
这里我们使用:hallmark gene sets
开始分析
#BiocManager::install("GSEABase") BiocManager::install("GSVA", version = "3.14") # R 4.1.2 注意版本w library('GSEABase') library(GSVA) geneSets <- getGmt('h.all.v7.5.1.symbols.gmt') ###下载的基因集 GSVA_hall <- gsva(expr=as.matrix(dat), gset.idx.list=geneSets, mx.diff=T, # 数据为正态分布则T,双峰则F kcdf="Gaussian", #CPM, RPKM, TPM数据就用默认值"Gaussian", read count数据则为"Poisson", parallel.sz=4) # 并行线程数目 head(GSVA_hall)
> head(GSVA_hall) EC1_T EC2_T EC3_T EC0_N HALLMARK_TNFA_SIGNALING_VIA_NFKB -0.43490528 -0.36929145 -0.4694267 0.57790329 HALLMARK_HYPOXIA -0.19830871 -0.15040810 -0.1237437 0.31595670 HALLMARK_CHOLESTEROL_HOMEOSTASIS -0.15223695 -0.15160770 -0.0505465 0.24273366 HALLMARK_MITOTIC_SPINDLE -0.29757233 0.20140000 0.3185932 -0.09284047 HALLMARK_WNT_BETA_CATENIN_SIGNALING -0.08827878 0.03086390 0.2383694 0.24045204 HALLMARK_TGF_BETA_SIGNALING -0.26357054 -0.01068719 0.3041132 0.28761931 EC1_N EC3_N EC4_N HALLMARK_TNFA_SIGNALING_VIA_NFKB -0.1673606 0.2198982 0.33940521 HALLMARK_HYPOXIA -0.3292568 0.0906295 0.01407149 HALLMARK_CHOLESTEROL_HOMEOSTASIS -0.3566404 0.1532081 0.05151732 HALLMARK_MITOTIC_SPINDLE -0.3673806 0.1091566 -0.15295077 HALLMARK_WNT_BETA_CATENIN_SIGNALING -0.2352282 -0.1027337 -0.15598311 HALLMARK_TGF_BETA_SIGNALING -0.4387025 0.2336080 -0.14090342
limma差异通路分析
## limma #BiocManager::install('limma') library(limma) # 设置或导入分组 group <- factor(c(rep("Tumor", 3), rep("Normal", 4)), levels = c('Tumor', 'Normal')) design <- model.matrix(~0+group) colnames(design) = levels(factor(group)) rownames(design) = colnames(GSVA_hall) design # Tunor VS Normal compare <- makeContrasts(Tumor - Normal, levels=design) fit <- lmFit(GSVA_hall, design) fit2 <- contrasts.fit(fit, compare) fit3 <- eBayes(fit2) Diff <- topTable(fit3, coef=1, number=200) head(Diff)
> head(Diff) logFC AveExpr t P.Value HALLMARK_INTERFERON_GAMMA_RESPONSE -0.7080628 -0.021269395 -4.393867 0.0002713325 HALLMARK_INTERFERON_ALPHA_RESPONSE -0.7419587 -0.044864170 -4.299840 0.0003385128 HALLMARK_INFLAMMATORY_RESPONSE -0.5940473 -0.003525139 -4.193096 0.0004352397 HALLMARK_IL6_JAK_STAT3_SIGNALING -0.6117943 -0.038379008 -4.157977 0.0004727716 HALLMARK_TNFA_SIGNALING_VIA_NFKB -0.6670027 -0.043396767 -4.149307 0.0004825257 HALLMARK_ALLOGRAFT_REJECTION -0.4645747 0.015248663 -3.278981 0.0036971108 adj.P.Val B HALLMARK_INTERFERON_GAMMA_RESPONSE 0.004825257 0.43891329 HALLMARK_INTERFERON_ALPHA_RESPONSE 0.004825257 0.22763343 HALLMARK_INFLAMMATORY_RESPONSE 0.004825257 -0.01221232 HALLMARK_IL6_JAK_STAT3_SIGNALING 0.004825257 -0.09109393 HALLMARK_TNFA_SIGNALING_VIA_NFKB 0.004825257 -0.11056468 HALLMARK_ALLOGRAFT_REJECTION 0.030809257 -2.03863951
发散条形图绘制
## barplot dat_plot <- data.frame(id = row.names(Diff), t = Diff$t) # 去掉"HALLMARK_" library(stringr) dat_plot$id <- str_replace(dat_plot$id , "HALLMARK_","") # 新增一列 根据t阈值分类 dat_plot$threshold = factor(ifelse(dat_plot$t >-2, ifelse(dat_plot$t >= 2 ,'Up','NoSignifi'),'Down'),levels=c('Up','Down','NoSignifi')) # 排序 dat_plot <- dat_plot %>% arrange(t) # 变成因子类型 dat_plot$id <- factor(dat_plot$id,levels = dat_plot$id) # 绘制 library(ggplot2) library(ggtheme) # install.packages("ggprism") library(ggprism) p <- ggplot(data = dat_plot,aes(x = id,y = t,fill = threshold)) + geom_col()+ coord_flip() + scale_fill_manual(values = c('Up'= '#36638a','NoSignifi'='#cccccc','Down'='#7bcd7b')) + geom_hline(yintercept = c(-2,2),color = 'white',size = 0.5,lty='dashed') + xlab('') + ylab('t value of GSVA score, tumour versus non-malignant') + #注意坐标轴旋转了 guides(fill=F)+ # 不显示图例 theme_prism(border = T) + theme( axis.text.y = element_blank(), axis.ticks.y = element_blank() ) p # 添加标签 # 此处参考了:https://mp.weixin.qq.com/s/eCMwWCnjTyQvNX2wNaDYXg # 小于-2的数量 low1 <- dat_plot %>% filter(t < -2) %>% nrow() # 小于0总数量 low0 <- dat_plot %>% filter( t < 0) %>% nrow() # 小于2总数量 high0 <- dat_plot %>% filter(t < 2) %>% nrow() # 总的柱子数量 high1 <- nrow(dat_plot) # 依次从下到上添加标签 p <- p + geom_text(data = dat_plot[1:low1,],aes(x = id,y = 0.1,label = id), hjust = 0,color = 'black') + # 小于-1的为黑色标签 geom_text(data = dat_plot[(low1 +1):low0,],aes(x = id,y = 0.1,label = id), hjust = 0,color = 'grey') + # 灰色标签 geom_text(data = dat_plot[(low0 + 1):high0,],aes(x = id,y = -0.1,label = id), hjust = 1,color = 'grey') + # 灰色标签 geom_text(data = dat_plot[(high0 +1):high1,],aes(x = id,y = -0.1,label = id), hjust = 1,color = 'black') # 大于1的为黑色标签 ggsave("gsva_bar.pdf",p,width = 8,height = 8)
结果展示