安装与加载
从CRAN安装:
install.packages("ggpubr")
或者从GitHub上安装最新版本:
# Install if(!require(devtools)) install.packages("devtools") devtools::install_github("kassambara/ggpubr")
加载包
library(ggplot2) library(ggpubr)
分布图
# 构建数据集 set.seed(1234) #set.seed()设定生成随机数的种子,种子是为了让结果具有重复性,重现结果。如果不设定种子,生成的随机数无法重现。 wdata = data.frame( sex = factor(rep(c("F", "M"), each=200)), weight = c(rnorm(200, 55), rnorm(200, 58))) head(wdata, 4) ``` set.seed(1234) #set.seed()设定生成随机数的种子,种子是为了让结果具有重复性,重现结果。如果不设定种子,生成的随机数无法重现。 wdata = data.frame( sex = factor(rep(c("F", "M"), each=200)), weight = c(rnorm(200, 55), rnorm(200, 58))) head(wdata, 4)
> head(wdata, 4) sex weight 1 F 53.79293 2 F 55.27743 3 F 56.08444 4 F 52.65430
密度分布图
ggdensityggdensity(wdata, x = "weight", add = "mean", rug = TRUE, color = "sex", fill = "sex", palette = c("#00AFBB", "#E7B800"))
#参数详解 Usage ggdensity( data, x, y = "..density..", combine = FALSE,#对于多类型数据是否分面 merge = FALSE,#对于多类型数据是否合并 color = "black",#线条颜色 fill = NA,#线条填充色 palette = NULL,#自定义颜色画板 size = NULL,#点、线大小 linetype = "solid",#线条类型 alpha = 0.5,#透明度 title = NULL,#设置标题 xlab = NULL,#设置x轴标题 ylab = NULL,#设置y轴标题 facet.by = NULL,#设置分组分面 panel.labs = NULL,#设置分面各组的标题 short.panel.labs = TRUE, add = c("none", "mean", "median"),#添加均值或中位数线 add.params = list(linetype = "dashed"),#添加其他参数 rug = FALSE,#是否添加边际线 label = NULL,#设置列标签 font.label = list(size = 11, color = "black"), #设置标签字体 label.select = NULL, repel = FALSE,#是否避字体免重叠 label.rectangle = FALSE,#是否给标签添加方框 ggtheme = theme_pubr(),#设置画图主题 ... )
直方图
gghistogram(wdata, x = "weight", add = "mean", rug = TRUE, color = "sex", fill = "sex", palette = c("#00AFBB", "#E7B800"))
箱线图和小提琴图
#导入内置数据 data("ToothGrowth") df <- ToothGrowth head(df, 4)
> head(df, 4) len supp dose 1 4.2 VC 0.5 2 11.5 VC 0.5 3 7.3 VC 0.5 4 5.8 VC 0.5
箱线图
p <- ggboxplot(df, x = "dose", y = "len", color = "dose", palette =c("#00AFBB", "#E7B800", "#FC4E07"), add = "jitter",#添加散点 shape = "dose"#点的形状) p
# 添加P值 # 建立需要比较的分组 my_comparisons <- list( c("0.5", "1"), c("1", "2"), c("0.5", "2") ) p + stat_compare_means(comparisons = my_comparisons)+ # 添加两两比较的p-value stat_compare_means(label.y = 50) # 添加多组间比较p-value
# 参数详解 stat_compare_means( mapping = NULL, data = NULL, method = NULL,#检验方法见下表 paired = FALSE,#是否配对 method.args = list(),#用于检验的附加参数,如method.args = list(alternative = "greater") ref.group = NULL,#指定对照组 comparisons = NULL,#包含比较分组的列表 hide.ns = FALSE,#是否隐藏无差异的标志ns label.sep = ", ",#分隔术语的字符串。默认为“,”,以分离相关系数和p值 label = NULL,#标签 label.x.npc = "left",#标签的x轴位置 label.y.npc = "top",#标签的y轴位置 #数字0~1,或者字符('right', 'left', 'center', 'centre', 'middle') label.x = NULL, label.y = NULL,#指定一个数值,表示显示标签的绝对坐标位置 vjust = 0,#向上或向下移动文本 tip.length = 0.03, bracket.size = 0.3, step.increase = 0, symnum.args = list(),#默认symnum.args <- list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns")).可以自己修改 geom = "text", position = "identity", na.rm = FALSE,#如果为FALSE(默认值),则通过警告删除缺失的值。如果为真,则静默删除缺失的值。 show.legend = NA,#是否包含图例 inherit.aes = TRUE, ... )
常用的统计学方法: