# R语言生存分析数据分析可视化案例（下）

R语言生存分析数据分析可视化案例（上）：https://developer.aliyun.com/article/1493709

# Mantel-Haenszel logrank测试

Call:
survdiff(formula = su_obj ~ stage, data = orca)
N Observed Expected (O-E)^2/E (O-E)^2/V
stage=I    50       25     39.9     5.573     6.813
stage=II   77       51     63.9     2.606     3.662
stage=III  72       51     54.1     0.174     0.231
stage=IV   68       57     33.2    16.966    20.103
stage=unkn 71       45     37.9     1.346     1.642
Chisq= 27.2  on 4 degrees of freedom, p= 2e-05

# Peto＆Peto Gehan-Wilcoxon测试

survdiff(formula = su_obj ~ stage, data = orca, rho = 1)
N Observed Expected (O-E)^2/E (O-E)^2/V
stage=I    50     14.5     25.2     4.500     7.653
stage=II   77     29.3     39.3     2.549     4.954
stage=III  72     30.7     33.8     0.284     0.521
stage=IV   68     40.3     22.7    13.738    21.887
stage=unkn 71     32.0     25.9     1.438     2.359
Chisq= 30.9  on 4 degrees of freedom, p= 3e-06

# CoxPH模型

summary(m1)
Call:
coxph(formula = su_obj ~ sex + I((age - 65)/10) + stage, data = orca)
n= 338, number of events= 229
coef exp(coef) se(coef)     z Pr(>|z|)
sexMale          0.35139   1.42104  0.14139 2.485 0.012947
I((age - 65)/10) 0.41603   1.51593  0.05641 7.375 1.65e-13
stageII          0.03492   1.03554  0.24667 0.142 0.887421
stageIII         0.34545   1.41262  0.24568 1.406 0.159708
stageIV          0.88542   2.42399  0.24273 3.648 0.000265
stageunkn        0.58441   1.79393  0.25125 2.326 0.020016
exp(coef) exp(-coef) lower .95 upper .95
sexMale              1.421     0.7037    1.0771     1.875
I((age - 65)/10)     1.516     0.6597    1.3573     1.693
stageII              1.036     0.9657    0.6386     1.679
stageIII             1.413     0.7079    0.8728     2.286
stageIV              2.424     0.4125    1.5063     3.901
stageunkn            1.794     0.5574    1.0963     2.935
Concordance= 0.674  (se = 0.02 )
Rsquare= 0.226   (max possible= 0.999 )
Likelihood ratio test= 86.76  on 6 df,   p=<2e-16
Wald test            = 80.5  on 6 df,   p=3e-15
Score (logrank) test = 82.86  on 6 df,   p=9e-16

rho    chisq     p
sexMale          -0.00137 0.000439 0.983
I((age - 65)/10)  0.07539 1.393597 0.238
stageII          -0.04208 0.411652 0.521
stageIII         -0.06915 1.083755 0.298
stageIV          -0.10044 2.301780 0.129
stageunkn        -0.09663 2.082042 0.149
GLOBAL                 NA 4.895492 0.557

Cox模型的结果表明性别，年龄和阶段的显着影响。特别是，每增加10年，死亡率就会增加50％。与男性和女性相比，全因死亡率的HR为1.42。此外，估计数中第一阶段和第二阶段之间未发现任何差异。因此，谨慎的做法是将这些主题从数据中排除，并将前两个阶段组合为一个。

round(ci.exp(m2), 4)
exp(Est.)   2.5%  97.5%
sexMale             1.3284 0.9763 1.8074
I((age - 65)/10)    1.4624 1.2947 1.6519
st3III              1.3620 0.9521 1.9482
st3IV               2.3828 1.6789 3.3818

newd
sex age  st3 id
1    Male  40 I+II  1
2  Female  40 I+II  2
3    Male  80 I+II  3
4  Female  80 I+II  4
5    Male  40  III  5
6  Female  40  III  6
7    Male  80  III  7
8  Female  80  III  8
9    Male  40   IV  9
10 Female  40   IV 10
11   Male  80   IV 11
12 Female  80   IV 12

### AFT模型

Call:
flexsurvreg(formula = Surv(time, all) ~ sex + I((age - 65)/10) +
st3, data = orca2, dist = "weibull")
Estimates:
data mean  est       L95%      U95%      se        exp(est)  L95%
shape                   NA    0.93268   0.82957   1.04861   0.05575        NA        NA
scale                   NA   13.53151   9.97582  18.35456   2.10472        NA        NA
sexMale            0.53184   -0.33905  -0.66858  -0.00951   0.16813   0.71245   0.51243
I((age - 65)/10)  -0.15979   -0.41836  -0.54898  -0.28773   0.06665   0.65813   0.57754
st3III             0.26966   -0.32567  -0.70973   0.05839   0.19595   0.72204   0.49178
st3IV              0.25468   -0.95656  -1.33281  -0.58030   0.19197   0.38421   0.26374
U95%
shape                   NA
scale                   NA
sexMale            0.99053
I((age - 65)/10)   0.74996
st3III             1.06012
st3IV              0.55973
N = 267,  Events: 184,  Censored: 83
Total time at risk: 1620.864
Log-likelihood = -545.858, df = 6
AIC = 1103.716

Call:
weibreg(formula = Surv(time, all) ~ sex + I((age - 65)/10) +
st3, data = orca2)
Covariate           Mean       Coef Exp(Coef)  se(Coef)    Wald p
sex
Female    0.490     0         1           (reference)
Male    0.510     0.316     1.372     0.156     0.043
I((age - 65)/10)   -0.522     0.390     1.477     0.062     0.000
st3
I+II    0.551     0         1           (reference)
III    0.287     0.304     1.355     0.182     0.095
IV    0.162     0.892     2.440     0.178     0.000
log(scale)                    2.605    13.532     0.156     0.000
log(shape)                   -0.070     0.933     0.060     0.244
Events                    184
Total time at risk        1620.9
Max. log. likelihood      -545.86
LR test statistic         68.7
Degrees of freedom        4
Overall p-value           4.30767e-14

newd <- data.frame(sex = c("Male", "Female"), age = 65, st3 = "I+II")
summary(m2w, newdata = newd, fn = median.weibull, t = 1, B = 10000)
sex=Male, I((age - 65)/10)=0, st3=I+II
time      est      lcl      ucl
1    1 6.507834 4.898889 8.631952
sex=Female, I((age - 65)/10)=0, st3=I+II
time      est      lcl      ucl
1    1 9.134466 6.801322 12.33771

survfit(m2, newdata = newd)
Call: survfit(formula = m2, newdata = newd)
n events median 0.95LCL 0.95UCL
1 267    184   7.00    5.25    10.6
2 267    184   9.92    7.33    13.8

# 泊松回归

head(orca_splitted, 15)

拟合条件泊松回归，其中时间的影响（作为因子变量）可以被边缘化（不估计来提高计算效率）。

mod_poi <- gnm(all ~ sex + I((age-65)/10) + st3, data = orca_splitted,
family = poisson, eliminate = factor(time))
summary(mod_poi)

round(data.frame(cox = ci.exp(m2), poisson = ci.exp(mod_poi)), 4)
cox.exp.Est.. cox.2.5. cox.97.5. poisson.exp.Est.. poisson.2.5. poisson.97.5.
sexMale                 1.3284   0.9763    1.8074            1.3284       0.9763        1.8074
I((age - 65)/10)        1.4624   1.2947    1.6519            1.4624       1.2947        1.6519
st3III                  1.3620   0.9521    1.9482            1.3620       0.9521        1.9482
st3IV                   2.3828   1.6789    3.3818            2.3828       1.6789        3.3818

orca_splitted\$dur <- with(orca_splitted, time - tstart)
mod_poi2 <- glm(all ~ -1 + factor(time) + sex + I((age-65)/10) + st3,
data = orca_splitted, family = poisson, offset = log(dur))

newd <- data.frame(time = cuts, dur = 1,
sex = "Female", age = 65, st3 = "I+II")
blhaz <- data.frame(ci.pred(mod_poi2, newdata = newd))
ggplot(blhaz, aes(x = c(0, cuts[-138]), y = Estimate, xend = cuts, yend = Estimate)) + geom_segment() +
scale_y_continuous(trans = "log", limits = c(.05, 5), breaks = pretty_breaks()) +
theme_classic() + labs(x = "Time (years)", y = "Baseline hazard")

exp(Est.)  2.5%  97.5%
(Intercept)              0.074 0.040  0.135
ns(time, knots = k)1     0.402 0.177  0.912
ns(time, knots = k)2     1.280 0.477  3.432
ns(time, knots = k)3     0.576 0.220  1.509
ns(time, knots = k)4     1.038 0.321  3.358
ns(time, knots = k)5     4.076 0.854 19.452
ns(time, knots = k)6     1.040 0.171  6.314
sexMale                  1.325 0.975  1.801
I((age - 65)/10)         1.469 1.300  1.659
st3III                   1.360 0.952  1.942
st3IV                    2.361 1.665  3.347

### 比较不同的策略

newd <- data.frame(sex = "Female", age = 65, st3 = "I+II")

## 其他分析

### 非线性

Call:
coxph(formula = Surv(time, all) ~ sex + I(age - 65) + I((age -
65)^2) + st3, data = orca2)
n= 267, number of events= 184
coef exp(coef)  se(coef)     z Pr(>|z|)
sexMale         2.903e-01 1.337e+00 1.591e-01 1.825   0.0681
I(age - 65)     3.868e-02 1.039e+00 6.554e-03 5.902 3.59e-09
I((age - 65)^2) 9.443e-05 1.000e+00 3.576e-04 0.264   0.7917
st3III          3.168e-01 1.373e+00 1.838e-01 1.724   0.0847
st3IV           8.691e-01 2.385e+00 1.787e-01 4.863 1.16e-06
exp(coef) exp(-coef) lower .95 upper .95
sexMale             1.337     0.7481    0.9787     1.826
I(age - 65)         1.039     0.9621    1.0262     1.053
I((age - 65)^2)     1.000     0.9999    0.9994     1.001
st3III              1.373     0.7284    0.9576     1.968
st3IV               2.385     0.4193    1.6801     3.385
Concordance= 0.674  (se = 0.022 )
Rsquare= 0.216   (max possible= 0.999 )
Likelihood ratio test= 64.89  on 5 df,   p=1e-12
Wald test            = 63.11  on 5 df,   p=3e-12
Score (logrank) test = 67.64  on 5 df,   p=3e-13

age <- seq(20, 80, 1) - 65
geom_vline(xintercept = 65, lty = 2) + geom_hline(yintercept = 1, lty = 2)

### 时间依赖系数

cox.zph()函数可用于绘制个体预测因子随时间的影响，因此可用于诊断和理解非比例风险。

id    sex      age stage          event  st3 tstart  time all tgroup
1  2 Female 83.08783   III Oral ca. death  III      0 0.419   1      1
2  3   Male 52.59008    II    Other death I+II      0 5.000   0      1
3  3   Male 52.59008    II    Other death I+II      5 7.915   1      2
4  4   Male 77.08630     I    Other death I+II      0 2.480   1      1
5  5   Male 80.33622    IV Oral ca. death   IV      0 2.500   1      1
6  6 Female 82.58132    IV    Other death   IV      0 0.167   1      1

I((age - 65)/10) + st3, data = orca3)
coef exp(coef) se(coef)      z        p
I((age - 65)/10)                              0.38184   1.46498  0.06255  6.104 1.03e-09
st3III                                        0.28857   1.33451  0.18393  1.569   0.1167
st3IV                                         0.87579   2.40076  0.17963  4.876 1.09e-06
relevel(sex, 2)Male:strata(tgroup)tgroup=1    0.42076   1.52312  0.19052  2.209   0.0272
relevel(sex, 2)Female:strata(tgroup)tgroup=1       NA        NA  0.00000     NA       NA
relevel(sex, 2)Male:strata(tgroup)tgroup=2   -0.10270   0.90240  0.28120 -0.365   0.7149
relevel(sex, 2)Female:strata(tgroup)tgroup=2       NA        NA  0.00000     NA       NA
relevel(sex, 2)Male:strata(tgroup)tgroup=3    1.13186   3.10142  1.09435  1.034   0.3010
relevel(sex, 2)Female:strata(tgroup)tgroup=3       NA        NA  0.00000     NA       NA
Likelihood ratio test=68.06  on 6 df, p=1.023e-12
n= 416, number of events= 184

# 模拟生存百分位数

Call:
ctqr(formula = Surv(time, all) ~ st3, data = orca2, p = p)
Coefficients:
p = 0.25
(Intercept)   2.665
st3III       -1.369
st3IV        -1.877
Degrees of freedom: 267 total; 225 residuals

β0= 2.665 是参考组中死亡概率等于0.25的时间。另一个被解释为相对度量。

p = c(p, p - .005, p + .005)
)[-1, ]
= 1 - p, xend = time_ref,
yend = 1 - p))

ctqr(formula = Surv(time, all) ~ sex + I((age - 65)/10) + st3,
data = orca2, p = seq(0.1, 0.7, 0.1))
Coefficients:
p = 0.1   p = 0.2   p = 0.3   p = 0.4   p = 0.5   p = 0.6   p = 0.7
(Intercept)        1.44467   2.44379   4.65302   7.73909  10.81386  12.18348  15.19359
sexMale           -0.09218  -0.27385  -0.85720  -2.49580  -3.27962  -2.81428  -4.01656
I((age - 65)/10)  -0.19026  -0.39819  -1.20278  -1.93144  -2.39229  -3.03915  -3.52711
st3III            -0.60994  -1.08534  -1.89357  -2.23741  -3.10478  -2.00037  -1.59213
st3IV             -1.07679  -1.59566  -2.92700  -3.16652  -4.74759  -4.80838  -5.25810
Degrees of freedom: 267 total; 220 residuals

coef_q <- data.frame(coef(fit_q)) %>%
.96 * se
)

### CIF

mstate计算竞争事件的非参数CIF（也称为Aalen-Johansen估计）和相关的标准误差。

head(cif)
time      Surv CI.Oral ca. death CI.Other death      seSurv seCI.Oral ca. death
1 0.085 0.9925094       0.007490637    0.000000000 0.005276805         0.005276805
2 0.162 0.9887640       0.011235955    0.000000000 0.006450534         0.006450534
3 0.167 0.9812734       0.011235955    0.007490637 0.008296000         0.006450534
4 0.170 0.9775281       0.011235955    0.011235955 0.009070453         0.006450534
5 0.249 0.9737828       0.011235955    0.014981273 0.009778423         0.006450534
6 0.252 0.9662921       0.014981273    0.018726592 0.011044962         0.007434315
seCI.Other death
1      0.000000000
2      0.000000000
3      0.005276805
4      0.006450534
5      0.007434315
6      0.008296000

grid.arrange(
ncol = 2
)

• 针对事件特定风险的Cox模型，例如，兴趣在于预测因素对死亡率的生物效应非常疾病。
• 当我们想要评估因子对事件总体累积发生率的影响时。

### CIF Cox模型

round(ci.exp(m2haz2), 4)
exp(Est.)   2.5%  97.5%
sexMale             1.8103 1.1528 2.8431
I((age - 65)/10)    1.4876 1.2491 1.7715
st3III              1.2300 0.7488 2.0206
st3IV               1.6407 0.9522 2.8270

### CRR模型

crr()cmprsk竞争风险的情况下，包中的函数可用于子分布函数的回归建模。

Call:
crr(ftime = time, fstatus = event, cov1 = model.matrix(m2), failcode = "Oral ca. death")
coef exp(coef) se(coef)      z p-value
sexMale          -0.0953     0.909    0.213 -0.447 6.5e-01
I((age - 65)/10)  0.2814     1.325    0.093  3.024 2.5e-03
st3III            0.3924     1.481    0.258  1.519 1.3e-01
st3IV             1.0208     2.775    0.233  4.374 1.2e-05
exp(coef) exp(-coef)  2.5% 97.5%
sexMale              0.909      1.100 0.599  1.38
I((age - 65)/10)     1.325      0.755 1.104  1.59
st3III               1.481      0.675 0.892  2.46
st3IV                2.775      0.360 1.757  4.39
Num. cases = 267
Pseudo Log-likelihood = -501
Pseudo likelihood ratio test = 31.4  on 4 df,
m2fg2 <- with(orca2, crr(time, event, cov1 = model.matrix(m2), failcode = "Other death"))
summary(m2fg2, Exp = T)
Competing Risks Regression
Call:
crr(ftime = time, fstatus = event, cov1 = model.matrix(m2), failcode = "Other death")
coef exp(coef) se(coef)      z p-value
sexMale           0.544     1.723   0.2342  2.324   0.020
I((age - 65)/10)  0.197     1.218   0.0807  2.444   0.015
st3III            0.130     1.139   0.2502  0.521   0.600
st3IV            -0.212     0.809   0.2839 -0.748   0.450
exp(coef) exp(-coef)  2.5% 97.5%
sexMale              1.723      0.580 1.089  2.73
I((age - 65)/10)     1.218      0.821 1.040  1.43
st3III               1.139      0.878 0.698  1.86
st3IV                0.809      1.237 0.464  1.41
Num. cases = 267
Pseudo Log-likelihood = -471
Pseudo likelihood ratio test = 9.43  on 4 df,

|
16天前
|

|
5天前
|
JSON 数据挖掘 API

38 0
|
15天前
|
JavaScript Java 测试技术

7 0
|
1月前
|
JSON 数据挖掘 API

29 2
|
25天前
|

26 0
|
2月前
|

【R语言实战】聚类分析及可视化
【R语言实战】聚类分析及可视化
60 2
|
2月前
|

R语言逻辑回归logistic模型ROC曲线可视化分析2例：麻醉剂用量影响、汽车购买行为2
R语言逻辑回归logistic模型ROC曲线可视化分析2例：麻醉剂用量影响、汽车购买行为
66 2
|
2月前
|

411 3
|
2月前

52 3
|
2月前
|
Web App开发 数据可视化 数据挖掘

226 2