前言
多组学文章经常出现非连续变量的热图或者叫格子图。举几个例子:
以上两个图都来自2021.09的一篇Cell,标题是Proteogenomic characterization of pancreatic ductal adenocarcinoma。今天就不细讲这两幅图了。这种图给我们展示离散/分类变量的差异提供了一个思路。今天就简单介绍几种常用的画这种图的方法。
常用方法
构建一个分类变量组成的示例数据。
library(ggplot2) library(tidyverse) library(reshape2) library(RColorBrewer) clinical.df=data.frame( patient=paste("P",seq(1:15),sep = ""), age=sample(c("young","old"),15,replace = T), gender=sample(c("male","female"),15,replace = T), symptom=sample(c("mild","moderate","severe"),15,replace = T), RNAseq=sample(c("yes","no"),15,replace = T), WES=sample(c("yes","no"),15,replace = T) ) head(clinical.df)
> head(clinical.df) patient age gender symptom RNAseq WES 1 P1 old female moderate yes no 2 P2 old male moderate yes no 3 P3 old male moderate yes yes 4 P4 young female severe yes yes 5 P5 old female moderate no no 6 P6 young male moderate no no x
> head(clinical.df2) patient variable value 1 P1 age old 2 P2 age old 3 P3 age old 4 P4 age young 5 P5 age old 6 P6 age young
> head(clinical.df2) patient variable value 1 P1 age old 2 P2 age old 3 P3 age old 4 P4 age young 5 P5 age old 6 P6 age young
geom_tile
Color<-brewer.pal(9, "Set3") # 设置颜色 # 设置因子顺序 clinical.df2$patient=factor(clinical.df2$patient,levels = paste("P",seq(1:15),sep = "")) clinical.df2$variable=factor(clinical.df2$variable,levels = c("WES","RNAseq","symptom","gender","age")) ggplot(clinical.df2, aes(x = patient, y = variable, fill = value)) + geom_tile(color = "white", size = 0.25) + scale_fill_manual(name = "Category", #labels = names(sort_table), values = Color)+ theme(#panel.border = element_rect(fill=NA,size = 2), panel.background = element_blank(), plot.title = element_text(size = rel(1.2)), axis.title = element_blank(), axis.ticks = element_blank(), legend.title = element_blank(), legend.position = "right")
ggwaffle
devtools::install_github("liamgilbey/ggwaffle") # 下载包 library(ggwaffle) ggplot(clinical.df2, aes(patient, variable, fill = value)) + geom_waffle()+ scale_fill_manual(name = "Category", #labels = names(sort_table), values = Color)+ theme(#panel.border = element_rect(fill=NA,size = 2), panel.background = element_blank(), plot.title = element_text(size = rel(1.2)), axis.title = element_blank(), axis.ticks = element_blank(), legend.title = element_blank(), legend.position = "right")
和geom_tile异曲同工。
ComplexHeatmap
ComplexHeatmap应该是最能还原本文前言图的包,不过我这里暂时还没时间搞定,后续发复现版本的代码。
row.names(clinical.df) <- clinical.df[,1] clinical.df <- clinical.df[,-1] clinical.df3 <- data.frame(t(clinical.df)) # 上面的代码为了将数据转为热图矩阵 library(ComplexHeatmap) Heatmap(clinical.df3)
未经雕饰的图确实不是很美观。
总结
以上就是我所知的几种常用的画离散变量的热图的方法。如果大家有更巧妙的想法,欢迎在后台留言互相学习交流。