论文
Removing unwanted variation from large-scale RNA sequencing data with PRPS
https://www.nature.com/articles/s41587-022-01440-w#data-availability
数据链接
https://zenodo.org/record/6459560#.Y2D2NHZBzid
https://zenodo.org/record/6392171#.Y2D2SXZBzid
代码链接
https://github.com/RMolania/TCGA_PanCancer_UnwantedVariation
今天推文重复的图没有出现在论文中,是论文中提供的代码里的一个图
首先是示例数据集
这个数据集论文中并没有提供,需要运行一系列代码获取,获取这个数据集的代码就不介绍了,推文内容主要介绍是如何绘图
读取数据集
sample.info.01<-read.csv("example_data/sample_info_brca_tcga_rnaseq.csv",
row.names = 1)
dim(sample.info.01)
head(sample.info.01)
ComplexHeatmap的帮助文档
https://jokergoo.github.io/ComplexHeatmap-reference/book/
ComplexHeatmap安装
BiocManager::install("ComplexHeatmap")
组图代码
library(ComplexHeatmap)
years.colors <- c(
'green',
'purple4',
'blue',
'brown',
'tan1',
'darkgreen',
'black'
)
names(years.colors) <- c(
'2009',
'2010',
'2011',
'2012',
'2013',
'2014',
'2015'
)
H.year <- ComplexHeatmap::Heatmap(
rev(sample.info.01$year_mda),
cluster_columns = FALSE,
column_names_gp = grid::gpar(fontsize = 12),
col = years.colors,
name = 'Time (years)',
heatmap_legend_param = list(
color_bar = "discrete" ,
ncol = 2,
title_gp = grid::gpar(fontsize = 12)))
H.year
还可以把好多个热图拼到一起
完整代码
sample.info.01<-read.csv("example_data/sample_info_brca_tcga_rnaseq.csv",
row.names = 1)
dim(sample.info.01)
head(sample.info.01)
library(ComplexHeatmap)
years.colors <- c(
'green',
'purple4',
'blue',
'brown',
'tan1',
'darkgreen',
'black'
)
names(years.colors) <- c(
'2009',
'2010',
'2011',
'2012',
'2013',
'2014',
'2015'
)
H.year <- ComplexHeatmap::Heatmap(
rev(sample.info.01$year_mda),
cluster_columns = FALSE,
column_names_gp = grid::gpar(fontsize = 12),
col = years.colors,
name = 'Time (years)',
heatmap_legend_param = list(
color_bar = "discrete" ,
ncol = 2,
title_gp = grid::gpar(fontsize = 12)))
H.year
n.plate <- length(unique(sample.info$plate_RNAseq)) # 38
colfunc <- grDevices::colorRampPalette(
RColorBrewer::brewer.pal(11, 'PRGn')[-6])
color.plates <- colfunc(n.plate)
H.plate <- ComplexHeatmap::Heatmap(
rev(sample.info$plate_RNAseq),
cluster_rows = FALSE,
cluster_columns = FALSE,
column_names_gp = grid::gpar(fontsize = 12),
col = color.plates,
name = 'Plates',
heatmap_legend_param = list(
color_bar = "discrete" ,
ncol = 4,
title_gp = grid::gpar(fontsize = 12)))
### TSS
n.tss <- length(unique(sample.info$tss_RNAseq)) # 40
colfunc <- grDevices::colorRampPalette(
RColorBrewer::brewer.pal(11, 'BrBG')[-6]
)
color.tss <- colfunc(n.tss)
H.tss <- ComplexHeatmap::Heatmap(
rev(sample.info$tss_RNAseq),
cluster_rows = FALSE,
cluster_columns = FALSE,
column_names_gp = grid::gpar(fontsize = 12),
col = color.tss,
name = 'Tissue source sites',
heatmap_legend_param = list(
color_bar = "discrete" ,
ncol = 4,
title_gp = grid::gpar(fontsize = 12)))
### Tissue
H.tissue <- ComplexHeatmap::Heatmap(
rev(sample.info$Tissue.Type),
cluster_rows = FALSE,
column_names_gp = grid::gpar(fontsize = 12),
col = c("#252525", 'blue', "#D9D9D9"),
name = 'Tissues',
heatmap_legend_param = list(
color_bar = "discrete" ,
direction = "vertical",
ncol = 1,
title_gp = grid::gpar(fontsize = 12),
labels = c(
'Primary tumor',
'Metastatic tumor',
'Adjacent normal')))
### Purity
H.purity <- ComplexHeatmap::Heatmap(
rev(sample.info$purity_HTseq_FPKM),
column_names_gp = grid::gpar(fontsize = 12),
cluster_rows = FALSE,
name = 'Tumor purity score',
col = viridis::plasma(n = 10),
heatmap_legend_param = list(
title_gp = grid::gpar(fontsize = 12)))
### library size
H.ls <- ComplexHeatmap::Heatmap(
rev(sample.info$libSize),
cluster_rows = FALSE,
name = 'Library size',
column_names_gp = grid::gpar(fontsize = 12),
col = viridis::viridis(n = 10),
heatmap_legend_param = list(
title_gp = grid::gpar(fontsize = 12)))
### PAM50
H.pam50.tcga <- ComplexHeatmap::Heatmap(
rev(sample.info$Call),
cluster_rows = FALSE,
name = 'PAM50 (TCGA calls)',
column_names_gp = grid::gpar(fontsize = 12),
col = pam50.colors,
heatmap_legend_param = list(
title_gp = grid::gpar(fontsize = 12)))
### PAM50 genefu
H.pam50.genefu <- ComplexHeatmap::Heatmap(
rev(sample.info$pam50.geneFu.fpkm),
cluster_rows = FALSE,
name = 'PAM50 (Genefu calls)',
column_names_gp = grid::gpar(fontsize = 12),
col = pam50.colors,
heatmap_legend_param = list(
title_gp = grid::gpar(fontsize = 12)))
### Flow cell chemistry
H.fcch <- ComplexHeatmap::Heatmap(
rev(sample.info$FcCh),
cluster_rows = FALSE,
name = 'Flow cell chemistry',
column_names_gp = grid::gpar(fontsize = 12),
col = FcCh.colors,
heatmap_legend_param = list(
title_gp = grid::gpar(fontsize = 12),
direction = "horizontal"))
ComplexHeatmap::draw(
H.year +
H.fcch +
H.plate +
H.tss +
H.tissue +
H.pam50.tcga +
H.pam50.genefu +
H.ls +
H.purity,
merge_legends = FALSE,
heatmap_legend_side = 'right')
今天的推文没有对代码的细节进行研究,主要就是能够运行出结果,如果后续需要用到这个R包来作图,可以仔细研究这个R包的函数示例数据和代码可以给推文点赞 点击在看 最后留言获取
欢迎大家关注我的公众号
小明的数据分析笔记本
小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关转录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!