本篇介绍如何使用R软件和ggplot2包来制作直方图,我们需要用到geom_histgramh函数,也可以用geom_vline函数去增加线条展示平均值。
准备数据
set.seed(1234) df <- data.frame( sex=factor(rep(c("F", "M"), each=200)), weight=round(c(rnorm(200, mean=55, sd=5), rnorm(200, mean=65, sd=5))) ) head(df) ## sex weight ## 1 F 49 ## 2 F 56 ## 3 F 60 ## 4 F 43 ## 5 F 57 ## 6 F 58
基础直方图
library(ggplot2) # Basic histogram ggplot(df, aes(x=weight)) + geom_histogram() # Change the width of bins ggplot(df, aes(x=weight)) + geom_histogram(binwidth=1) # Change colors p<-ggplot(df, aes(x=weight)) + geom_histogram(color="black", fill="white") p
增加平均值与密度图
# Add mean line p+ geom_vline(aes(xintercept=mean(weight)), color="blue", linetype="dashed", size=1) # Histogram with density plot ggplot(df, aes(x=weight)) + geom_histogram(aes(y=..density..), colour="black", fill="white")+ geom_density(alpha=.2, fill="#FF6666")
改变线形与颜色
# Change line color and fill color ggplot(df, aes(x=weight))+ geom_histogram(color="darkblue", fill="lightblue") # Change line type ggplot(df, aes(x=weight))+ geom_histogram(color="black", fill="lightblue", linetype="dashed")
分组展示
library(plyr) mu <- ddply(df, "sex", summarise, grp.mean=mean(weight)) head(mu) # Change histogram plot line colors by groups ggplot(df, aes(x=weight, color=sex)) + geom_histogram(fill="white") # 重叠 histograms ggplot(df, aes(x=weight, color=sex)) + geom_histogram(fill="white", alpha=0.5, position="identity") # 交错 histograms ggplot(df, aes(x=weight, color=sex)) + geom_histogram(fill="white", position="dodge")+ theme(legend.position="top") # Add mean lines p<-ggplot(df, aes(x=weight, color=sex)) + geom_histogram(fill="white", position="dodge")+ geom_vline(data=mu, aes(xintercept=grp.mean, color=sex), linetype="dashed")+ theme(legend.position="top") p
自定义线条颜色
自定义填充color改为fill即可
# Use custom color palettes p+scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9")) # Use brewer color palettes p+scale_color_brewer(palette="Dark2") # Use grey scale p + scale_color_grey() + theme_classic() + theme(legend.position="top")
自定义主题与文本
# Basic histogram ggplot(df, aes(x=weight, fill=sex)) + geom_histogram(fill="white", color="black")+ geom_vline(aes(xintercept=mean(weight)), color="blue", linetype="dashed")+ labs(title="Weight histogram plot",x="Weight(kg)", y = "Count")+ theme_classic() # Change line colors by groups ggplot(df, aes(x=weight, color=sex, fill=sex)) + geom_histogram(position="identity", alpha=0.5)+ #geom_density(alpha=0.6)+ geom_vline(data=mu, aes(xintercept=grp.mean, color=sex), linetype="dashed")+ scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+ scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"))+ labs(title="Weight histogram plot",x="Weight(kg)", y = "Count")+ theme_classic() p<-ggplot(df, aes(x=weight, color=sex)) + geom_histogram(fill="white", position="dodge")+ geom_vline(data=mu, aes(xintercept=grp.mean, color=sex), linetype="dashed") # Continuous colors p + scale_color_brewer(palette="Paired") + theme_classic()+theme(legend.position="top") # Discrete colors p + scale_color_brewer(palette="Dark2") + theme_minimal()+theme_classic()+theme(legend.position="top") # Gradient colors p + scale_color_brewer(palette="Accent") + theme_minimal()+theme(legend.position="top")