一、概述
气泡图(Bubble Chart)是一种展示三个或更多变量之间关系的二维图表。它与散点图类似,但是在每个数据点处加上了一个圆圈,圆圈的大小和第三个变量相关。因此气泡图不仅可以用于显示两个变量之间的关系,还可以同时显示第三个变量的大小或重要性,增加了数据展示的可读性和信息量。
二、数据集
下面我将以R语言自带的mtcars数据集为例画出气泡图。
首先,我们需要加载ggplot2包和mtcars数据集,并指定想要用到的变量。
2.1 安装及其使用
install.packages("ggplot2") library(ggplot2)
2.2 读取数据
加载数据
# 加载 R 自带数据集 mtcars data(mtcars) str(mtcars)
数据集展示
'data.frame': 32 obs. of 11 variables: $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ... $ cyl : num 6 6 4 6 8 6 8 4 4 6 ... $ disp: num 160 160 108 258 360 ... $ hp : num 110 110 93 110 175 105 245 62 95 123 ... $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ... $ wt : num 2.62 2.88 2.32 3.21 3.44 ... $ qsec: num 16.5 17 18.6 19.4 17 ... $ vs : num 0 0 1 1 0 1 0 1 1 1 ... $ am : num 1 1 1 0 0 0 0 0 0 0 ... $ gear: num 4 4 4 3 3 3 3 4 4 4 ... $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
三、使用用法
3.1 基础方法
ggplot(data = mtcars, aes(x = wt, y = mpg)) + # 构建绘图对象 geom_point(aes(size = hp), alpha = 0.7, color = "#0072B2") + # 添加气泡、大小和透明度,设定颜色 labs(title = "wt vs. mpg (by hp)", x = "Weight", y = "Miles per gallon", size = "Horsepower") + # 设定标题和轴标签 scale_size_continuous(range = c(2, 12)) # 设定气泡大小的范围
3.2 气泡颜色变化
ggplot(data = mtcars, aes(x = wt, y = mpg, size = hp, color = disp))+ # 构建绘图对象,并设置气泡大小和颜色 geom_point(alpha = 0.7, shape = 21, stroke = 0.2)+ # 添加气泡,设定透明度、形状和边缘宽度 labs(title = "wt vs. mpg (by hp and disp)", x = "Weight", y = "Miles per gallon", size = "Horsepower", color = "Displacement")+ # 设定标题和轴标签 scale_size(range = c(2, 12))+ # 设定气泡大小范围 scale_color_gradient(low = "darkblue", high = "red") # 设定颜色范围
3.3 聚类气泡图
解析来展示论文中类似的聚类气泡图,代码和数据集如下:
install.packages("ggplot2") install.packages("dplyr") install.packages("ggrepel") install.packages("gridExtra") install.packages("ggalt") library(ggplot2) library(dplyr) library(ggrepel) library(gridExtra) library(ggalt) # 导入数据集 data(iris) head(iris) # 绘制聚类气泡图 p1 <- ggplot(df, aes(x = x, y = y, color = factor(cluster))) + geom_point(size = 3, alpha = 0.8) + scale_color_manual(values = c("#0072B2", "#D55E00", "#009E73")) + theme_bw() p2 <- p1 + stat_ellipse(aes(x = x, y = y, fill = factor(cluster)), data = df, alpha = 0.2, level = 0.95) + geom_point(size = 3, alpha = 0.8) + scale_color_manual(values = c("#0072B2", "#D55E00", "#009E73")) + theme_bw() p3 <- p2 + ggtitle("Cluster Bubble Plot") + theme(legend.position = "none") grid.arrange(p3, ncol = 1)
带标签的聚类气泡图
# 数据预处理 df <- iris %>% select(Sepal.Length, Sepal.Width) %>% scale() km_fit <- kmeans(df, 3) df <- iris %>% select(Sepal.Length, Sepal.Width) %>% scale() %>% bind_cols(cluster = km_fit$cluster) %>% rename(x = Sepal.Length, y = Sepal.Width) # 选择要用于聚类的列 cluster_data <- mtcars[, c("mpg", "disp", "hp")] # 进行k-means聚类,并将聚类结果添加到数据集中 set.seed(1) mtcars$cluster <- kmeans(cluster_data, centers = 3)$cluster # 绘制聚类气泡图 p1 <- ggplot(df, aes(x = x, y = y, color = factor(cluster))) + geom_point(size = 3, alpha = 0.8) + scale_color_manual(values = c("#0072B2", "#D55E00", "#009E73")) + theme_bw() p2 <- p1 + stat_ellipse(aes(x = x, y = y, fill = factor(cluster)), data = df, alpha = 0.2, level = 0.95) + geom_point(size = 3, alpha = 0.8) + scale_color_manual(values = c("#0072B2", "#D55E00", "#009E73")) + theme_bw() p3 <- p2 + geom_label_repel(aes(label = cluster), size = 3) + ggtitle("Cluster Bubble Plot") + theme(legend.position = "none") grid.arrange(p3, ncol = 1)