简介
大多数期刊文章的第一个表,即Table 1,是根据暴露程度分层的研究人群基线特征的描述性统计表。table1 包使得使用R生成这样一个表非常简单。结果输出格式是HTML(它的优点是易于复制到Word文档中)。
Example 1
# 安装和加载包 # install.packages("table1") # install.packages("boot") 示例数据 library(table1) library(boot)
melanoma2 <- melanoma # 导入数据 # 修改变量名称 melanoma2$status <- factor(melanoma2$status, levels=c(2,1,3), labels=c("Alive", "Melanoma death", "Non-melanoma death")) head(melanoma2) #查看示例数据
> head(melanoma2) time status sex age year thickness 1 10 Non-melanoma death 1 76 1972 6.76 2 30 Non-melanoma death 1 56 1968 0.65 3 35 Alive 1 41 1977 1.34 4 99 Non-melanoma death 0 71 1968 2.90 5 185 Melanoma death 1 52 1965 12.08 6 204 Melanoma death 1 28 1971 4.84 ulcer 1 1 2 0 3 0 4 0 5 1 6 1
# 绘制基本的三线表 table1(~ factor(sex) + age + factor(ulcer) + thickness | status, data=melanoma2)
注意,table1包使用了一个熟悉的公式接口,各变量之间用 + 分隔,条件符号 | 的右边为分层变量,参数data指定使用的数据集。
变量和分类的标签可能不适合用来描述结果,可以给分类变量指定标签,给特定的连续变量指定单位。我们可以为分类变量(sex和ulcer)创建带有描述性标签的因素,按照我们想要的方式为每个变量贴上标签,并为连续变量(age和thickness)指定单位,如下所示:
melanoma2$sex <- factor(melanoma2$sex, levels=c(1,0), labels=c("Male", "Female")) melanoma2$ulcer <- factor(melanoma2$ulcer, levels=c(0,1), labels=c("Absent", "Present")) # label()添加标签 label(melanoma2$sex) <- "Sex" label(melanoma2$age) <- "Age" label(melanoma2$ulcer) <- "Ulceration" label(melanoma2$thickness) <- "Thickness" # units()添加单位 units(melanoma2$age) <- "years" units(melanoma2$thickness) <- "mm" table1(~ sex + age + ulcer + thickness | status, data=melanoma2, overall="Total") # overall = F即可不统计全部
Example 2
# 同样的 新建一个数据集 f <- function(x, n, ...) factor(sample(x, n, replace=T, ...), levels=x) set.seed(427) n <- 146 dat <- data.frame(id=1:n) dat$treat <- f(c("Placebo", "Treated"), n, prob=c(1, 2)) # 2:1 randomization dat$age <- sample(18:65, n, replace=TRUE) dat$sex <- f(c("Female", "Male"), n, prob=c(.6, .4)) # 60% female dat$wt <- round(exp(rnorm(n, log(70), 0.23)), 1) # Add some missing data dat$wt[sample.int(n, 5)] <- NA label(dat$age) <- "Age" label(dat$sex) <- "Sex" label(dat$wt) <- "Weight" label(dat$treat) <- "Treatment Group" units(dat$age) <- "years" units(dat$wt) <- "kg" # 查看数据集 head(dat)
> head(dat) id treat age sex wt 1 1 Treated 18 Female 62.6 2 2 Treated 50 Male 57.4 3 3 Treated 37 Male 104.6 4 4 Treated 25 Female 55.5 5 5 Placebo 60 Female 58.4 6 6 Treated 44 Female 41.9
也可以用两个变量进行分层
table1(~ age + wt | treat*sex, data=dat)
image-20210922160647960
也可以不分层
table1(~ treat + age + sex + wt, data=dat)
# 新建数据集|(治疗组多了个亚组) dat$dose <- (dat$treat != "Placebo")*sample(1:2, n, replace=T) dat$dose <- factor(dat$dose, labels=c("Placebo", "5 mg", "10 mg")) # 查看数据集 head(dat)
> head(dat) id treat age sex wt dose 1 1 Treated 18 Female 62.6 5 mg 2 2 Treated 50 Male 57.4 5 mg 3 3 Treated 37 Male 104.6 5 mg 4 4 Treated 25 Female 55.5 10 mg 5 5 Placebo 60 Female 58.4 Placebo 6 6 Treated 44 Female 41.9 10 mg
strata <- c(split(dat, dat$dose), list("All treated"=subset(dat, treat=="Treated")), list(Overall=dat)) labels <- list( variables=list(age=render.varlabel(dat$age), sex=render.varlabel(dat$sex), wt=render.varlabel(dat$wt)), groups=list("", "Treated", "")) table1(strata, labels, groupspan=c(1, 3, 1))