论文
A saturated map of common genetic variants associated with human height
https://www.nature.com/articles/s41586-022-05275-y
s41586-022-05275-y.pdf
代码没有公开,但是作图数据基本都公开了,争取把每个图都重复一遍
今天的推文重复论文中的Figure3
首先是作图数据
簇状柱形图
堆积柱形图
作图代码
getwd()
setwd("data/20221014")
library(readxl)
fig3a<-read_excel("Figure3.xlsx",
sheet = "fig3a")
fig3a
library(ggplot2)
library(tidyverse)
library(ggh4x)
library(latex2exp)
fig3a %>%
mutate(`Ancestries / Ethnicities`=
factor(`Ancestries / Ethnicities`,
levels = c('European','Hispanic',
'South-Asian','East-Asian',
'African')),
group=factor(group,
levels = c('within GWS loci','outside GWS loci'))) -> fig3a
fig3a %>%
group_by(`Ancestries / Ethnicities`) %>%
summarise(value=sum(Heritability)) %>%
ungroup() -> fig3a1
x_labels<-c("EUR\n(n = 28,645)",
"HIS\n(n = 4,939)",
"SAS\n(n = 9,257)",
"EAS\n(n = 49,939)",
"AFR\n(n = 15,149)")
ggplot(data=fig3a,aes(x=`Ancestries / Ethnicities`,
y=Heritability))+
geom_bar(stat = "identity",
position = position_dodge(),
aes(fill=group))+
geom_errorbar(aes(min=Heritability-SE,
max=Heritability+SE,
group=group),
position = position_dodge(0.9),
width=0.2,
color="red")+
scale_fill_manual(values = c("#00afbb","#e7b800"),
labels=c(TeX(r"(Within GWS loci (21% of the genome): \textit{h}${^2}$${_G}{_W}{_S}$)"),
TeX(r"(Outside GWS loci (79% of the genome): \textit{h}${^2}$${_o}{_t}{_h}{_e}{_r}$)")))+
scale_y_continuous(breaks = seq(0,0.5,by=0.1))+
scale_x_discrete(labels=x_labels)+
theme_bw()+
theme(panel.grid = element_blank(),
panel.border = element_blank(),
axis.line.y = element_line(),
axis.ticks.x = element_blank(),
axis.text.x = element_text(vjust=10),
legend.title = element_blank(),
legend.position = "top",
legend.justification = c(0,0),
legend.key.size = unit(0.2,'cm'))+
guides(y=guide_axis_truncated(trunc_lower = 0,
trunc_upper = 0.5),
fill=guide_legend(ncol = 1,
label.hjust = 0,
override.aes = list(size = 1)))+
geom_hline(yintercept = 0)+
geom_segment(data=fig3a1,
aes(x=1:5-0.4,xend=1:5+0.4,
y=value,yend=value),
color="#ee82ee",
lty="dashed")+
annotate(geom = "text",x=4.5,y=0.5,
label=TeX(r"(\textit{h}${^2}{_S}{_N}{_P}=$\textit{h}${^2}{_G}{_W}{_S}+$\textit{h}${^2}{_o}{_t}{_h}{_e}{_r}$)"),
vjust=-3)+
coord_cartesian(clip = "off")+
labs(x="Ancestries\n(sample size)") -> p1
p1
fig3b<-read_excel("Figure3.xlsx",
sheet = "fig3b")
fig3b %>%
mutate(`Ancestries / Ethnicities`=
factor(`Ancestries / Ethnicities`,
levels = c('European','Hispanic',
'South-Asian','East-Asian',
'African'))) -> fig3b
fig3b.text<-data.frame(
x=1:5,
y=0.5,
label=paste(c("310,092","348,498","334,237","304,478","371,622"),
"SNPs in GWS loci")
)
fig3b.text
ggplot(data=fig3b,aes(x=`Ancestries / Ethnicities`,
y=Heritability))+
geom_bar(stat = "identity",
position = position_fill(),
aes(fill=group),
show.legend = FALSE)+
scale_fill_manual(values = c("#e7b800","#00afbb"))+
geom_text(data=fig3b.text,
aes(x=x,y=y,label=label),
angle=90,
color="white")+
scale_x_discrete(labels=c(
"EUR\n(1,130,264)","HIS\n(1,277,112)","SAS\n(1,222,935)",
"EAS\n(1,110,588)","AFR\n(1,180,574)"))+
scale_y_continuous(breaks = seq(0,1,by=0.1),
expand = expansion(mult = c(0,0)))+
labs(x="Ancestries (total number of SNPs analysed)",
y=TeX(r"(Proportion of SNP-based heritability within GWS loci $(\textit{h}{^2}{_G}{_W}{_S}/\textit{h}{^2}{_S}{_N}{_P})$)"))+
theme_bw()+
theme(panel.grid = element_blank(),
panel.border = element_blank(),
axis.line.y = element_line(),
axis.ticks.x = element_blank())+
geom_hline(yintercept = 0.9,
color="red",
lty="dashed") -> p2
library(patchwork)
p1+p2+
plot_annotation(tag_levels = "a")
这里a小图上图例里的虚线不知道是怎么添加的,出图后借助其他软件来编辑吧
示例数据和代码可以给推文点赞 点击在看 最后留言获取
欢迎大家关注我的公众号
小明的数据分析笔记本
小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关转录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!