1、为每个Case计算内部DEGS
pacman::p_load(Seurat,dplyr,ggplot2,forcats,future.apply);
plan(multisession(workers = 6));sprintf("FUTURE CURRENT WORKERS = %s", nbrOfWorkers())
## snrnaseq DEGs for multi batch
seu <- readRDS("</seurat-object/>.Rds")
future.apply::future_lapply("</batch/>", function(x){
cur_seu <- subset("</code/>")
cur_seu <- cur_seu %>% SCTransform(verbose = 0)
Idents(cur_seu) <- "</code/>"
degs_mat <- FindAllMarkers(cur_seu,only.pos = T) %>%
filter(p_val_adj < 5e-1) %>% filter(!grepl("^ENS",gene)) %>%
mutate(Ident = x)
}) %>% bind_rows() -> degLT.dat
if(F) degLT.dat %>% write.table("</FILE NAME/>",sep = "\t",col.names = T,row.names = F,quote = F)
2、通路富集与去冗余和简单可视化
pacman::p_load(clusterProfiler,enrichplot,DOSE,forcats)
### Load DEG for GO enrichment analysis
degLT.dat <- read.delim("</FILE NAME/>")
##### 多基因集 GO富集
Go.item <- compareCluster(gene ~ cluster, data = degLT.dat,
keyType = 'SYMBOL',
fun='enrichGO',
OrgDb = 'org.Hs.eg.db',
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
ont="BP",readable = T)
### 富集通路去冗余
Go.item %>% clusterProfiler::simplify() ->trim_GO.item
### Top 差异通路展示:
### includeAll=F 设置仅显示每个Cluster的top_n(根据p.adjust定义) item,并屏蔽该item在其它Cluster的显示
dotplot(trim_GO.item,includeAll = F,label_format = 100) + labs(x = "")
emapplot(pairwise_termsim(trim_GO.item),showCategory = 5,pie.params = list(pie = "count",legend_n = 3)) + scale_fill_brewer(palette = "Set1")
3、计算当前富集通路的富集强度分数 Fold Enrichment(FE)
### 添加计算指标 :Fold Enrichment(FE)用于描述给定基因集在GO的富集程度。
### FE>1表明该GO在基因集中富集;FE<1表明该GO在基因集中贫化;
trim_GO.item <- trim_GO.item %>% filter(p.adjust < 5e-2) %>% mutate(FE = parse_ratio(GeneRatio)/parse_ratio(BgRatio)) %>% select(-cluster)
### 可视化 FE分数,根据FE展示每个Cluster top_n个 GO_iterm
as.data.frame(trim_GO.item) %>%
group_by(Cluster) %>% slice_max(order_by = FE,n = 10,with_ties = F) %>% ungroup() %>%
ggplot(aes(x = FE,y= fct_reorder(Description,FE,mean,.desc=F))) +
geom_segment(aes(xend=0, yend = Description)) +
geom_point(aes(colour=p.adjust, size = Count)) +
scale_colour_gradientn(colours = rev(viridis::viridis(10, option = "D")),guide=guide_colorbar(reverse=T)) +
scale_size_continuous(range=c(2, 10)) + theme_bw() +
scale_x_continuous(expand = expansion(mult= c(0, .25))) +
labs(x = "Fold Enrichment(FE)", y =NULL) +
facet_grid(cols = vars(Cluster),scales = "free")
4、为每个CASE下富集到各Cluster的GO基因集计算GSEA
### 计算每个Cluster内已富集GO通路的 NES,该分数仅Normalized了通路基因集的大小,不同rank_list之间的NES不可比
library(future.apply);plan(multisession,workers = 4);sprintf("FUTURE CURRENT WORKERS = %s", nbrOfWorkers())
future_lapply(as.vector(unique(trim_GO.item@compareClusterResult$Cluster)), function(x){
### PP degs ranklist
degLT <- degLT.dat %>% filter(Ident == "</case/>",cluster == x) %>% arrange(desc(avg_log2FC))
rank_list <- degLT[["avg_log2FC"]] ; names(rank_list) <- degLT[["gene"]]
### format GO.iterm to .GMT and runGSEA
dat_tmp <- trim_GO.item@compareClusterResult %>% filter(Cluster == x) %>% select(ID,geneID)
gmt_dat <- stringr::str_split(dat_tmp[["geneID"]],"/",simplify = T) %>% data.frame() %>%
mutate(ID = dat_tmp[["ID"]]) %>%
tidyr::pivot_longer(cols = !c('ID'),values_to = "gene") %>%
select(-name) %>% filter(gene != "")
gsea_rslt <- GSEA(rank_list, TERM2GENE= gmt_dat,minGSSize = 1,pvalueCutoff = 1,verbose=F)
if (nrow(gsea_rslt) == 0) as.data.frame(gsea_rslt) %>% add_row(ID = gmt_dat$ID)
### NES游走图
### enrichplot::gseaplot2(gsea_rslt, geneSetID = "GO:0034329",rel_heights = c(1.7, 0.2, 1),subplots = 1:3,pvalue_table = F,color = "blue")
#### combine output
gsea_rslt <- as.data.frame(gsea_rslt) %>% select(c(ID,NES,pvalue,p.adjust,leading_edge,core_enrichment)) %>% tibble::remove_rownames()
gsea_rslt <- gsea_rslt %>% rename("gsea.p"=pvalue,"gsea.p.adj"=p.adjust)
gsea_rslt <- trim_GO.item@compareClusterResult %>% filter(Cluster == x) %>% select(-c(qvalue,BgRatio,Count,geneID)) %>% plyr::join(gsea_rslt,by = "ID")
return(data.frame(gsea_rslt))
}) %>% bind_rows()-> trim_GO.item_GSEA.dat
5、保存文件
#### ... save
GO_Result <- list(Go.item@compareClusterResult,trim_GO.item@compareClusterResult,trim_GO.item_GSEA.dat)
GO_Result <- setNames(GO_Result,c("GO.item","trim_GO.item","trim_GO.item_gsea"))
lapply(1:length(GO_Result), function(obt){
if(obt == 1){
GO_Result[[obt]] %>% data.frame %>% xlsx::write.xlsx(sprintf("</FILE NAME/>"), sheetName=names(GO_Result)[obt], row.names=F)
}else{
GO_Result[[obt]] %>% data.frame %>% xlsx::write.xlsx(sprintf("</FILE NAME/>"), sheetName=names(GO_Result)[obt], row.names=F,append = T)
}
})->dev.null;remove(dev.null)
Reference
includeAll 参数释义--(guangchuangyu.github.io)
ClusterProfile 官方文档 --(yulab-smu.top)
gseaplot2 plots 分辨率问题 (bioconductor.org)