# 跟着Nature学作图：R语言ggplot2柱形图添加误差线和频率分布直方图

## 论文

A saturated map of common genetic variants associated with human height

https://www.nature.com/articles/s41586-022-05275-y

s41586-022-05275-y.pdf

## 首先是两个频率分布直方图

library(readxl)

sheet = "Source Data for Panels a - c")

library(ggplot2)
library(ggh4x)
p1<-ggplot(data=dat01,aes(x=Effect Size))+
geom_histogram(bins = 70,color="black",fill="grey")+
geom_vline(xintercept = 0,lty="dashed",color="green")+
scale_x_continuous(breaks = seq(-1.5,1.5,by=0.5))+
scale_y_continuous(breaks = seq(0,1000,by=200))+
theme_classic()+
guides(x=guide_axis_truncated(trunc_lower = -1.5,
trunc_upper = 1.5),
y=guide_axis_truncated(trunc_lower = 0,
trunc_upper = 1000))+
labs(x="Estimated effect of minor haplotype",
y="Frequency")
p1

p3<-ggplot(data=dat01,aes(x=Variance Explained*100))+
geom_histogram(bins = 30,color="black",fill="grey")+
scale_x_continuous(breaks = seq(0,0.006,by=0.001))+
scale_y_continuous(breaks = seq(0,6000,by=2000))+
theme_classic()+
guides(x=guide_axis_truncated(trunc_lower = 0,
trunc_upper = 0.006),
y=guide_axis_truncated(trunc_lower = 0,
trunc_upper = 6000))+
labs(x="Variance explained by each haplotype (in %)",
y="Frequency")
p3


## 图d柱形图误差线叠加散点图

dat02<-read_excel("data/20221014/extendFig5.xlsx",
sheet = "Panel d")
dat02 %>% colnames()

library(tidyverse)
dat02 %>%
mutate(x=paste0(Variance Explained by underlying Causal variants (q2)*100,"%")) %>%
group_by(x) %>%
summarise(mean_value=mean(Signal Density Detected),
sd_value=sd(Signal Density Detected)) %>%
ungroup() -> dat02.1

dat02 %>%
mutate(x=paste0(Variance Explained by underlying Causal variants (q2)*100,"%")) -> dat02.2
p4<-ggplot()+
geom_errorbar(data=dat02.1,
aes(x=x,
ymin=mean_value-0.1,
ymax=mean_value+sd_value),
width=0.3,
color="#e27765")+
geom_col(data=dat02.1,
aes(x=x,y=mean_value),
fill="#daa421")+
geom_point(data=dat02.2,
aes(x=x,y=Signal Density Detected),
color="gray",size=3)+
theme_classic()+
scale_y_continuous(breaks = seq(0,10,by=2),
expand=expansion(mult = c(0,0.1)))+
scale_x_discrete(labels=c("0.5%\n(1.5)","1%\n(2.0)",
"2%\n(2.8)","5%\n(4.5)"))+
theme(axis.line.x = element_blank(),
axis.ticks.x = element_blank())+
guides(y=guide_axis_truncated(trunc_lower = 0,
trunc_upper = 10))+
coord_cartesian(clip = "off")+
labs(x="Variabce explained by causal variant\n(median allelic effect across simulation replicates - in SD)",
y="Mean density (+S.E.) of genome-wide\nsignigicant SNPs within 100kb")

p4

## 普通柱形图添加误差线

dat03<-read_excel("data/20221014/extendFig5.xlsx",
sheet = "Panel e")
dat03 %>% colnames()
p5<-dat03 %>%
mutate(Ancestries=factor(Ancestries,
levels = Ancestries)) %>%
ggplot(aes(x=Ancestries,y=Variance in VNTR length explained by 25 GWS SNPs near ACAN))+
geom_col(fill="#fe7357")+
geom_errorbar(aes(ymin=Variance in VNTR length explained by 25 GWS SNPs near ACAN-Standard Error,
ymax=Variance in VNTR length explained by 25 GWS SNPs near ACAN+Standard Error),
width=0.3,
color="#fe7357")+
theme_classic()+
theme(axis.line.x = element_blank(),
axis.ticks.x = element_blank())+
scale_y_continuous(breaks = seq(0,0.8,by=0.2),
limits = c(0,0.8),
expand = expansion(mult=c(0,0)))+
scale_x_discrete(labels=c("SAS\n(N=9,219)","EUR\n(N=414,429)",
"AFR\n(N=7,543)","EAS(N=1,496)"))

p5

## 水平柱形图添加误差线

dat04<-read_excel("data/20221014/extendFig5.xlsx",
sheet = "Panel f")

dat04 %>% colnames()
p6<-dat04 %>%
mutate(Statistical Model=factor(Statistical Model,
levels = Statistical Model)) %>%
ggplot(aes(x=Variance explained,y=Statistical Model))+
geom_col(fill="#006403")+
geom_errorbarh(aes(xmin=Variance explained-Standard-Error,
xmax=Variance explained+Standard-Error),
height=0.2,
color="#d19f84")+
theme_classic()+
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank())+
scale_y_discrete(labels=scales::label_wrap(30))+
scale_x_continuous(limits = c(0,0.0055),
breaks = seq(0,0.005,by=0.001))+
guides(x=guide_axis_truncated(trunc_lower = 0,
trunc_upper = 0.005))+
labs(y=NULL)
p6

## 最后是所有图组合到一起

library(patchwork)

(p1+theme(axis.title = element_text(size=10))+
plot_spacer()+
p3+
theme(axis.title = element_text(size=10)))/(p4+
theme(axis.title = element_text(size=10))+
p5+
theme(axis.title = element_text(size=10),
axis.text.x = element_text(size=10),)+
p6+
theme(axis.text.y = element_text(size=10)))+
plot_annotation(tag_levels = "a")

|
6月前
|

R语言绘图教程丨Nature论文都在用的多组比较箱线图，自动计算显著性并标注，附带误差线
R语言绘图教程丨Nature论文都在用的多组比较箱线图，自动计算显著性并标注，附带误差线
330 0
|
6月前
|

985 0
|
6月前
|

113 0
|
9月前
|

103 0
|
9月前
|

171 0
|
9月前
|

92 0
|