论文
A saturated map of common genetic variants associated with human height
https://www.nature.com/articles/s41586-022-05275-y
s41586-022-05275-y.pdf
代码没有公开,但是作图数据基本都公开了,争取把每个图都重复一遍
今天的推文重复论文中的extended Figure5 频率分布直方图和柱形图添加误差线
其中图b的数据没有找到,我们只重复其他5个小图
首先是两个频率分布直方图
这两个作图代码是一样的
library(readxl)
dat01<-read_excel("data/20221014/extendFig5.xlsx",
sheet = "Source Data for Panels a - c")
head(dat01)
library(ggplot2)
library(ggh4x)
p1<-ggplot(data=dat01,aes(x=`Effect Size`))+
geom_histogram(bins = 70,color="black",fill="grey")+
geom_vline(xintercept = 0,lty="dashed",color="green")+
scale_x_continuous(breaks = seq(-1.5,1.5,by=0.5))+
scale_y_continuous(breaks = seq(0,1000,by=200))+
theme_classic()+
guides(x=guide_axis_truncated(trunc_lower = -1.5,
trunc_upper = 1.5),
y=guide_axis_truncated(trunc_lower = 0,
trunc_upper = 1000))+
labs(x="Estimated effect of minor haplotype",
y="Frequency")
p1
p3<-ggplot(data=dat01,aes(x=`Variance Explained`*100))+
geom_histogram(bins = 30,color="black",fill="grey")+
scale_x_continuous(breaks = seq(0,0.006,by=0.001))+
scale_y_continuous(breaks = seq(0,6000,by=2000))+
theme_classic()+
guides(x=guide_axis_truncated(trunc_lower = 0,
trunc_upper = 0.006),
y=guide_axis_truncated(trunc_lower = 0,
trunc_upper = 6000))+
labs(x="Variance explained by each haplotype (in %)",
y="Frequency")
p3
图d柱形图误差线叠加散点图
dat02<-read_excel("data/20221014/extendFig5.xlsx",
sheet = "Panel d")
dat02 %>% colnames()
library(tidyverse)
dat02 %>%
mutate(x=paste0(`Variance Explained by underlying Causal variants (q2)`*100,"%")) %>%
group_by(x) %>%
summarise(mean_value=mean(`Signal Density Detected`),
sd_value=sd(`Signal Density Detected`)) %>%
ungroup() -> dat02.1
dat02 %>%
mutate(x=paste0(`Variance Explained by underlying Causal variants (q2)`*100,"%")) -> dat02.2
p4<-ggplot()+
geom_errorbar(data=dat02.1,
aes(x=x,
ymin=mean_value-0.1,
ymax=mean_value+sd_value),
width=0.3,
color="#e27765")+
geom_col(data=dat02.1,
aes(x=x,y=mean_value),
fill="#daa421")+
geom_point(data=dat02.2,
aes(x=x,y=`Signal Density Detected`),
color="gray",size=3)+
theme_classic()+
scale_y_continuous(breaks = seq(0,10,by=2),
expand=expansion(mult = c(0,0.1)))+
scale_x_discrete(labels=c("0.5%\n(1.5)","1%\n(2.0)",
"2%\n(2.8)","5%\n(4.5)"))+
theme(axis.line.x = element_blank(),
axis.ticks.x = element_blank())+
guides(y=guide_axis_truncated(trunc_lower = 0,
trunc_upper = 10))+
coord_cartesian(clip = "off")+
labs(x="Variabce explained by causal variant\n(median allelic effect across simulation replicates - in SD)",
y="Mean density (+S.E.) of genome-wide\nsignigicant SNPs within 100kb")
p4
普通柱形图添加误差线
dat03<-read_excel("data/20221014/extendFig5.xlsx",
sheet = "Panel e")
dat03 %>% colnames()
p5<-dat03 %>%
mutate(Ancestries=factor(Ancestries,
levels = Ancestries)) %>%
ggplot(aes(x=Ancestries,y=`Variance in VNTR length explained by 25 GWS SNPs near ACAN`))+
geom_col(fill="#fe7357")+
geom_errorbar(aes(ymin=`Variance in VNTR length explained by 25 GWS SNPs near ACAN`-`Standard Error`,
ymax=`Variance in VNTR length explained by 25 GWS SNPs near ACAN`+`Standard Error`),
width=0.3,
color="#fe7357")+
theme_classic()+
theme(axis.line.x = element_blank(),
axis.ticks.x = element_blank())+
scale_y_continuous(breaks = seq(0,0.8,by=0.2),
limits = c(0,0.8),
expand = expansion(mult=c(0,0)))+
scale_x_discrete(labels=c("SAS\n(N=9,219)","EUR\n(N=414,429)",
"AFR\n(N=7,543)","EAS(N=1,496)"))
p5
水平柱形图添加误差线
dat04<-read_excel("data/20221014/extendFig5.xlsx",
sheet = "Panel f")
dat04 %>% colnames()
p6<-dat04 %>%
mutate(`Statistical Model`=factor(`Statistical Model`,
levels = `Statistical Model`)) %>%
ggplot(aes(x=`Variance explained`,y=`Statistical Model`))+
geom_col(fill="#006403")+
geom_errorbarh(aes(xmin=`Variance explained`-`Standard-Error`,
xmax=`Variance explained`+`Standard-Error`),
height=0.2,
color="#d19f84")+
theme_classic()+
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank())+
scale_y_discrete(labels=scales::label_wrap(30))+
scale_x_continuous(limits = c(0,0.0055),
breaks = seq(0,0.005,by=0.001))+
guides(x=guide_axis_truncated(trunc_lower = 0,
trunc_upper = 0.005))+
labs(y=NULL)
p6
最后是所有图组合到一起
library(patchwork)
(p1+theme(axis.title = element_text(size=10))+
plot_spacer()+
p3+
theme(axis.title = element_text(size=10)))/(p4+
theme(axis.title = element_text(size=10))+
p5+
theme(axis.title = element_text(size=10),
axis.text.x = element_text(size=10),)+
p6+
theme(axis.text.y = element_text(size=10)))+
plot_annotation(tag_levels = "a")
示例数据和代码可以给公众号推文点赞,点击在看,最后留言获取
欢迎大家关注我的公众号
小明的数据分析笔记本
小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关转录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!