基础棒棒糖图
横坐标的值可以为数值变量或者分类变量
# 加载包 library(ggplot2) library(patchwork) # 创造数值变量数据 data1 <- data.frame(x=seq(1,30), y=abs(rnorm(30))) # 作图 p1 <- ggplot(data1, aes(x=x, y=y)) + geom_point() + geom_segment( aes(x=x, xend=x, y=0, yend=y)) # 创造分类变量数据 data2 <- data.frame( x=LETTERS[1:26], y=abs(rnorm(26)) ) # 作图 p2 <- ggplot(data2, aes(x=x, y=y)) + geom_point() + geom_segment( aes(x=x, xend=x, y=0, yend=y)) p1+p2
自定义点与线条
library(ggplot2) ggplot(data2, aes(x=x, y=y)) + geom_segment( aes(x=x, xend=x, y=0, yend=y),size=1, color="blue", linetype="dotdash" )+ geom_point( size=5, color="red", fill=alpha("orange", 0.3), alpha=0.7, shape=21, stroke=2)+ theme_light() + #coord_flip() + 控制图形是否水平放置 theme( panel.grid.major.x = element_blank(), panel.border = element_blank(), axis.ticks.x = element_blank() ) + xlab("") + ylab("Value of Y")
我们也利用fct_reorder()
函数按照Y值从小到大排个序,当然也可以改动因子水平去排序,相比之下前者会方便点。
library(ggplot2) library(forcats) library(dplyr) data2%>% mutate(x = fct_reorder(x,y))%>% #降序的话改为desc(y) ggplot(aes(x= x, y=y)) + geom_segment( aes(x=x, xend=x, y=0, yend=y),size=1, color="blue", linetype="dotdash" )+ geom_point( size=5, color="red", fill=alpha("orange", 0.3), alpha=0.7, shape=21, stroke=2)+ theme_light() + #coord_flip() + 控制图形是否水平放置 theme( panel.grid.major.x = element_blank(), panel.border = element_blank(), axis.ticks.x = element_blank() ) + xlab("") + ylab("Value of Y")
另外,我们也可以很容易地更改图表的基线。如果您感兴趣的数据中某个特定的阈值时候,那么它将提供更深入的理解,我们只需更改geom_segment()调用中的y参数。
ggplot(data2, aes(x=x, y=y)) + geom_segment( aes(x=x, xend=x, y=1, yend=y),size=1, color="blue", linetype="dotdash" )+ geom_point( size=5, color="red", fill=alpha("orange", 0.3), alpha=0.7, shape=21, stroke=2)+ theme_light() + #coord_flip() + 控制图形是否水平放置 theme( panel.grid.major.x = element_blank(), panel.border = element_blank(), axis.ticks.x = element_blank() ) + xlab("") + ylab("Value of Y")
高亮并添加注释
library(hrbrthemes) library(ggplot2) library(dplyr) data <- data.frame( x=LETTERS[1:26], y=abs(rnorm(26)) ) # 数据排序 data <- data %>% arrange(y) %>% mutate(x=factor(x,x)) # 绘图 p <- ggplot(data, aes(x=x, y=y)) + geom_segment( aes(x=x, xend=x, y=0, yend=y ), color=ifelse(data$x %in% c("A","D"), "purple", "grey"), size=ifelse(data$x %in% c("A","D"), 1.3, 0.7) ) + geom_point( color=ifelse(data$x %in% c("A","D"), "purple", "grey"), size=ifelse(data$x %in% c("A","D"), 5, 2) ) + theme_ipsum() + coord_flip() + theme( legend.position="none" ) + xlab("") + ylab("Value of Y") + ggtitle("How did groups A and D perform?") p # 添加注释 p + annotate("text", x=grep("D", data$x), y=data$y[which(data$x=="D")]*1.2, label = paste("Group D is very impressive\n (val=",data$y[which(data$x=="D")] %>% round(2),")",sep="" ) , color="purple", size=4 , angle=0, fontface="bold", hjust=0) + annotate("text", x = grep("A", data$x), y = data$y[which(data$x=="A")]*1.2, label = paste("Group A is not too bad\n (val=",data$y[which(data$x=="A")] %>% round(2),")",sep="" ) , color="purple", size=4 , angle=0, fontface="bold", hjust=0)
其实之前介绍过一种高亮图形的包---gghightlight
,此操作大家也可以换个包尝试一下~
多组情况
当我们再增添一组分类变量时候,可以绘制出分组情况下的棒棒图。
方式一:ggplot绘制
data2 <- data2%>% mutate(Group = rep(c('F','M'),13)) data2$Group <- factor(data2$Group) ggplot(data2,aes(x= x, y=y),) + geom_segment( aes(x=x, xend=x, y=0, yend=y,color = Group),size=1, linetype="dotdash" )+ geom_point(aes(color = Group), size=5)+ scale_color_manual(values = c('gold','pink'))+ theme_light() + # coord_flip() + 控制图形是否水平放置 theme( panel.grid.major.x = element_blank(), panel.border = element_blank(), axis.ticks.x = element_blank() ) + xlab("") + ylab("Value of Y")
也可采用分面形式呈现,代码添加facet_wrap(~Group,ncol = 2,scales = 'free_x')
[图片上传失败...(image-f6b491-1603677926805)]
方式二: ggpubr包
ggpubr包中有现成的函数ggdotchart
绘制棒棒糖图
library(ggpubr) ggdotchart(data2, x="x", y="y", color = "Group", palette = c("gold","pink"), # 配色 sorting = "ascending", # 排序 add = "segments", #添加棒棒 # rotate = TRUE, 改为垂直 ggtheme = theme_pubr()) #主题
多值情况
当我们遇到每个分类变量对应俩个值的情况时候,棒棒糖图也是非常合适的。对于每个分类变量,用不同的颜色为每个值绘制一个点。然后使用片段突出显示它们的差异。这种类型的可视化也称为Clevenland dot plots (克利夫兰点图)。
加载包及数据处理
# 加载包 library(ggplot2) library(dplyr) library(hrbrthemes) # 创造数据 value1 <- abs(rnorm(26))*2 data <- data.frame( x=LETTERS[1:26], value1=value1, value2=value1+1+rnorm(26, sd=1) ) head(data) x value1 value2 1 A 0.8301565 0.1326605 2 B 6.7433156 7.8045654 3 C 1.0231795 1.1628583 4 D 0.1416095 1.3683347 5 E 1.1354025 2.7985499 6 F 0.7394701 2.6515638 ## 数据处理计算出差值 data2 <- data %>% group_by(x) %>% mutate( mymean = mean(c(value1,value2) )) %>% # 添加value1与value2的均值 arrange(mymean) %>% mutate(x=factor(x, x)) head(data2) x value1 value2 mymean <fct> <dbl> <dbl> <dbl> 1 A 0.830 0.133 0.481 2 T 0.0898 1.03 0.559 3 S 0.688 0.529 0.609 4 D 0.142 1.37 0.755 5 M 0.621 1.24 0.931 6 Q 0.144 1.87 1.00
作图
ggplot(data2) + geom_segment( aes(x=x, xend=x, y=value1, yend=value2), color="grey") + geom_point( aes(x=x, y=value1), color='pink', size=3 ) + geom_point( aes(x=x, y=value2), color='gold', size=3 ) + coord_flip()+ theme_ipsum() + theme( legend.position = "none", ) + xlab("") + ylab("Value of Y")