library(KMsurv)
Warning messages:
1: In doTryCatch(return(expr), name, parentenv, handler) :
liste de retraçage des affichages incomplète
2: In doTryCatch(return(expr), name, parentenv, handler) :
invalid graphics state
3: In doTryCatch(return(expr), name, parentenv, handler) :
invalid graphics state
library(tidyverse)
library(survival)
data("pneumon")
?pneumon
head(pneumon)
names(pneumon)
[1] "chldage" "hospital" "mthage" "urban" "alcohol"
[6] "smoke" "region" "poverty" "bweight" "race"
[11] "education" "nsibs" "wmonth" "sfmonth" "agepn"
mean(pneumon$alcohol>0)
[1] 0.3576369
glimpse(pneumon)
Observations: 3,470
Variables: 15
$ chldage [3m[38;5;246m<dbl>[39m[23m 12, 12, 3, 2, 4, 12, 7, 3, 7, 12, 12, 12, 3…
$ hospital [3m[38;5;246m<int>[39m[23m 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ mthage [3m[38;5;246m<int>[39m[23m 22, 20, 24, 22, 21, 20, 24, 24, 26, 21, 24,…
$ urban [3m[38;5;246m<int>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ alcohol [3m[38;5;246m<int>[39m[23m 0, 1, 3, 2, 1, 0, 0, 3, 2, 1, 0, 0, 0, 2, 0…
$ smoke [3m[38;5;246m<int>[39m[23m 0, 0, 0, 2, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 1…
$ region [3m[38;5;246m<int>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ poverty [3m[38;5;246m<int>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ bweight [3m[38;5;246m<int>[39m[23m 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1…
$ race [3m[38;5;246m<int>[39m[23m 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
$ education [3m[38;5;246m<int>[39m[23m 10, 12, 12, 9, 12, 12, 12, 14, 12, 12, 16, …
$ nsibs [3m[38;5;246m<int>[39m[23m 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0…
$ wmonth [3m[38;5;246m<int>[39m[23m 1, 2, 1, 0, 0, 0, 0, 4, 1, 3, 0, 0, 0, 4, 0…
$ sfmonth [3m[38;5;246m<int>[39m[23m 1, 2, 0, 0, 0, 0, 0, 2, 1, 2, 0, 0, 0, 2, 0…
$ agepn [3m[38;5;246m<int>[39m[23m 1, 12, 3, 2, 4, 12, 7, 3, 6, 12, 12, 12, 3,…
pneumon = mutate(pneumon , region = as.factor((region)))
pneumon = mutate(pneumon , race = as.factor((race)))
On calcule
KM_estimator = survfit(Surv(chldage,hospital) ~ 1, data= pneumon)
summary(KM_estimator)
Call: survfit(formula = Surv(chldage, hospital) ~ 1, data = pneumon)
time n.risk n.event survival std.err lower 95% CI upper 95% CI
1 3386 21 0.994 0.00135 0.991 0.996
2 3282 14 0.990 0.00176 0.986 0.993
3 3184 12 0.986 0.00205 0.982 0.990
4 3089 4 0.985 0.00215 0.980 0.989
5 2993 6 0.983 0.00229 0.978 0.987
6 2880 5 0.981 0.00241 0.976 0.986
7 2779 1 0.981 0.00243 0.976 0.985
8 2682 2 0.980 0.00249 0.975 0.985
9 2585 4 0.978 0.00260 0.973 0.983
10 2496 2 0.977 0.00265 0.972 0.983
11 2418 2 0.977 0.00271 0.971 0.982
La probabilité estimée de n’avoir pas eu de pneumonie à 6 mois est 0.981 (CI = [0.976 , 0.986])
On crée les variables dummies \(Z\) et alcohol_simple
à partir des variables wmonth
et alcohol
.
pneumon = mutate(pneumon , Z = recode(factor(wmonth > 0 ), 'FALSE' ="Never breastfed",'TRUE'= "Breastfed"))
pneumon = mutate(pneumon , alcohol_simple = factor(alcohol > 0 ))
On commence par représenter les courbes de survie dans les 2 groupes.
library(survminer)
Le chargement a nécessité le package : ggpubr
Le chargement a nécessité le package : magrittr
Attachement du package : ‘magrittr’
The following object is masked from ‘package:purrr’:
set_names
The following object is masked from ‘package:tidyr’:
extract
Registered S3 method overwritten by 'data.table':
method from
print.data.table
KM_estimator_bf = survfit(Surv(chldage,hospital) ~ Z, data= pneumon)
ggsurvplot(KM_estimator_bf,conf.int = TRUE,ylim=c(0.8,1))
Avec le test du log-rank :
survdiff(Surv(chldage,hospital) ~ Z, data= pneumon)
Call:
survdiff(formula = Surv(chldage, hospital) ~ Z, data = pneumon)
N Observed Expected (O-E)^2/E (O-E)^2/V
Z=Never breastfed 2036 59 42.7 6.22 15
Z=Breastfed 1434 14 30.3 8.77 15
Chisq= 15 on 1 degrees of freedom, p= 1e-04
La différence visible sur le graphique est bien significative (p-value = \(10^{-4}\)).
On recommence dans un modèle de Cox.
coxph_bf = coxph(Surv(chldage,hospital) ~ Z, data= pneumon)
summary(coxph_bf)
Call:
coxph(formula = Surv(chldage, hospital) ~ Z, data = pneumon)
n= 3470, number of events= 73
coef exp(coef) se(coef) z Pr(>|z|)
ZBreastfed -1.0970 0.3339 0.2973 -3.69 0.000224 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
ZBreastfed 0.3339 2.995 0.1864 0.5979
Concordance= 0.614 (se = 0.023 )
Likelihood ratio test= 16.59 on 1 df, p=5e-05
Wald test = 13.62 on 1 df, p=2e-04
Score (logrank) test = 15.04 on 1 df, p=1e-04
Tous les tests concluent à un effet significatif de la variable \(Z\). Avec une division du risque par 3 (p-value Wald = \(2*10^{-4}\) et p-value LRT = \(5*10^{-5}\)) pour les enfants allaités.
Cela signifie qu’il y a une association entre l’allaitement et le risque de pneumonie
names(pneumon)
[1] "chldage" "hospital" "mthage"
[4] "urban" "alcohol" "smoke"
[7] "region" "poverty" "bweight"
[10] "race" "education" "nsibs"
[13] "wmonth" "sfmonth" "agepn"
[16] "Z" "alcohol_simple"
var = "mthage"
formule = paste(c("Surv(chldage,hospital) ~ Z",var),collapse = '+')
print(formule)
[1] "Surv(chldage,hospital) ~ Z+mthage"
coxph(as.formula(formule),data=pneumon)
Call:
coxph(formula = as.formula(formule), data = pneumon)
coef exp(coef) se(coef) z p
ZBreastfed -1.02651 0.35826 0.30096 -3.411 0.000648
mthage -0.06776 0.93448 0.04521 -1.499 0.133908
Likelihood ratio test=18.86 on 2 df, p=8.01e-05
n= 3470, number of events= 73
for (var in names(pneumon)[-c(1,2,5,16)]){
formule = paste(c("Surv(chldage,hospital) ~ Z",var),collapse = '+')
print(summary(coxph(as.formula(formule),data=pneumon) ))
}
Call:
coxph(formula = as.formula(formule), data = pneumon)
n= 3470, number of events= 73
coef exp(coef) se(coef) z Pr(>|z|)
ZBreastfed -1.02651 0.35826 0.30096 -3.411 0.000648 ***
mthage -0.06776 0.93448 0.04521 -1.499 0.133908
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
ZBreastfed 0.3583 2.791 0.1986 0.6462
mthage 0.9345 1.070 0.8552 1.0211
Concordance= 0.635 (se = 0.028 )
Likelihood ratio test= 18.86 on 2 df, p=8e-05
Wald test = 15.86 on 2 df, p=4e-04
Score (logrank) test = 17.29 on 2 df, p=2e-04
Call:
coxph(formula = as.formula(formule), data = pneumon)
n= 3470, number of events= 73
coef exp(coef) se(coef) z Pr(>|z|)
ZBreastfed -1.0720 0.3423 0.2978 -3.60 0.000319 ***
urban -0.3819 0.6826 0.2496 -1.53 0.125997
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
ZBreastfed 0.3423 2.921 0.1910 0.6137
urban 0.6826 1.465 0.4185 1.1133
Concordance= 0.638 (se = 0.029 )
Likelihood ratio test= 18.82 on 2 df, p=8e-05
Wald test = 16.01 on 2 df, p=3e-04
Score (logrank) test = 17.5 on 2 df, p=2e-04
Call:
coxph(formula = as.formula(formule), data = pneumon)
n= 3470, number of events= 73
coef exp(coef) se(coef) z Pr(>|z|)
ZBreastfed -1.0501 0.3499 0.2979 -3.525 0.000424 ***
smoke 0.4224 1.5257 0.1510 2.797 0.005156 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
ZBreastfed 0.3499 2.8579 0.1951 0.6274
smoke 1.5257 0.6554 1.1348 2.0513
Concordance= 0.665 (se = 0.031 )
Likelihood ratio test= 23.83 on 2 df, p=7e-06
Wald test = 21.62 on 2 df, p=2e-05
Score (logrank) test = 23.42 on 2 df, p=8e-06
Call:
coxph(formula = as.formula(formule), data = pneumon)
n= 3470, number of events= 73
coef exp(coef) se(coef) z Pr(>|z|)
ZBreastfed -1.0658 0.3445 0.2976 -3.581 0.000342 ***
region -0.2168 0.8051 0.1236 -1.754 0.079389 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
ZBreastfed 0.3445 2.903 0.1922 0.6173
region 0.8051 1.242 0.6319 1.0258
Concordance= 0.647 (se = 0.03 )
Likelihood ratio test= 19.63 on 2 df, p=5e-05
Wald test = 16.53 on 2 df, p=3e-04
Score (logrank) test = 17.93 on 2 df, p=1e-04
Call:
coxph(formula = as.formula(formule), data = pneumon)
n= 3470, number of events= 73
coef exp(coef) se(coef) z Pr(>|z|)
ZBreastfed -1.0919 0.3356 0.2977 -3.668 0.000245 ***
poverty -0.1331 0.8753 0.3981 -0.334 0.738039
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
ZBreastfed 0.3356 2.980 0.1872 0.6015
poverty 0.8753 1.142 0.4012 1.9100
Concordance= 0.616 (se = 0.024 )
Likelihood ratio test= 16.69 on 2 df, p=2e-04
Wald test = 13.73 on 2 df, p=0.001
Score (logrank) test = 15.16 on 2 df, p=5e-04
Call:
coxph(formula = as.formula(formule), data = pneumon)
n= 3470, number of events= 73
coef exp(coef) se(coef) z Pr(>|z|)
ZBreastfed -1.0087 0.3647 0.3018 -3.342 0.00083 ***
bweight 0.4203 1.5224 0.2376 1.768 0.07698 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
ZBreastfed 0.3647 2.7420 0.2019 0.6589
bweight 1.5224 0.6569 0.9555 2.4255
Concordance= 0.643 (se = 0.029 )
Likelihood ratio test= 19.7 on 2 df, p=5e-05
Wald test = 16.83 on 2 df, p=2e-04
Score (logrank) test = 18.41 on 2 df, p=1e-04
Call:
coxph(formula = as.formula(formule), data = pneumon)
n= 3470, number of events= 73
coef exp(coef) se(coef) z Pr(>|z|)
ZBreastfed -1.1212 0.3259 0.2995 -3.744 0.000181 ***
race -0.1108 0.8951 0.1638 -0.676 0.498879
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
ZBreastfed 0.3259 3.069 0.1812 0.5861
race 0.8951 1.117 0.6493 1.2340
Concordance= 0.628 (se = 0.029 )
Likelihood ratio test= 17.05 on 2 df, p=2e-04
Wald test = 14.06 on 2 df, p=9e-04
Score (logrank) test = 15.48 on 2 df, p=4e-04
Call:
coxph(formula = as.formula(formule), data = pneumon)
n= 3470, number of events= 73
coef exp(coef) se(coef) z Pr(>|z|)
ZBreastfed -0.97282 0.37802 0.30023 -3.240 0.00119 **
education -0.14935 0.86127 0.05377 -2.777 0.00548 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
ZBreastfed 0.3780 2.645 0.2099 0.6809
education 0.8613 1.161 0.7751 0.9570
Concordance= 0.674 (se = 0.028 )
Likelihood ratio test= 23.53 on 2 df, p=8e-06
Wald test = 21.14 on 2 df, p=3e-05
Score (logrank) test = 22.22 on 2 df, p=1e-05
coxph(Surv(chldage,hospital)~urban,data=pneumon)
Call:
coxph(formula = Surv(chldage, hospital) ~ urban, data = pneumon)
coef exp(coef) se(coef) z p
urban -0.4528 0.6358 0.2492 -1.817 0.0691
Likelihood ratio test=3.12 on 1 df, p=0.07748
n= 3470, number of events= 73
chisq.test(pneumon$urban,pneumon$Z)
Pearson's Chi-squared test with Yates' continuity
correction
data: pneumon$urban and pneumon$Z
X-squared = 15.614, df = 1, p-value = 7.766e-05
coxph(Surv(chldage,hospital) ~ . - wmonth - alcohol ,data=pneumon)
Call:
coxph(formula = Surv(chldage, hospital) ~ . - wmonth - alcohol,
data = pneumon)
coef exp(coef) se(coef) z p
mthage -0.092385 0.911754 0.056524 -1.634 0.1022
urban -0.321787 0.724852 0.261757 -1.229 0.2189
smoke 0.327166 1.387032 0.167292 1.956 0.0505
region -0.224849 0.798637 0.130782 -1.719 0.0856
poverty -0.033725 0.966837 0.400294 -0.084 0.9329
bweight 0.107751 1.113771 0.258354 0.417 0.6766
race -0.002007 0.997995 0.185048 -0.011 0.9913
education -0.061997 0.939886 0.071524 -0.867 0.3861
nsibs 0.314316 1.369322 0.136263 2.307 0.0211
sfmonth -0.179828 0.835414 0.161441 -1.114 0.2653
agepn 0.003993 1.004001 0.028647 0.139 0.8892
ZBreastfed -0.383887 0.681209 0.434117 -0.884 0.3765
alcohol_simpleTRUE -0.019139 0.981043 0.259388 -0.074 0.9412
Likelihood ratio test=41.65 on 13 df, p=7.46e-05
n= 3470, number of events= 73
Procédure backward basée sur les tests de Wald
coxph(Surv(chldage,hospital) ~ . - wmonth - alcohol -race ,data=pneumon)
Call:
coxph(formula = Surv(chldage, hospital) ~ . - wmonth - alcohol -
race, data = pneumon)
coef exp(coef) se(coef) z p
mthage -0.092391 0.911748 0.056520 -1.635 0.1021
urban -0.322549 0.724300 0.252174 -1.279 0.2009
smoke 0.327725 1.387807 0.159175 2.059 0.0395
region -0.225020 0.798500 0.129814 -1.733 0.0830
poverty -0.033408 0.967144 0.399226 -0.084 0.9333
bweight 0.107361 1.113336 0.255829 0.420 0.6747
education -0.061919 0.939959 0.071162 -0.870 0.3842
nsibs 0.314294 1.369292 0.136249 2.307 0.0211
sfmonth -0.179790 0.835446 0.161396 -1.114 0.2653
agepn 0.003987 1.003994 0.028642 0.139 0.8893
ZBreastfed -0.383648 0.681371 0.433549 -0.885 0.3762
alcohol_simpleTRUE -0.018963 0.981216 0.258878 -0.073 0.9416
Likelihood ratio test=41.65 on 12 df, p=3.808e-05
n= 3470, number of events= 73
cox_final
Call:
coxph(formula = Surv(chldage, hospital) ~ mthage + smoke + nsibs +
sfmonth, data = pneumon)
coef exp(coef) se(coef) z p
mthage -0.12386 0.88351 0.04965 -2.495 0.0126
smoke 0.38743 1.47318 0.15091 2.567 0.0103
nsibs 0.38437 1.46869 0.12182 3.155 0.0016
sfmonth -0.32331 0.72375 0.12615 -2.563 0.0104
Likelihood ratio test=35.16 on 4 df, p=4.308e-07
n= 3470, number of events= 73
predict(cox_final , newdata = new_data, type ="risk")
1
0.7057091
pred_new_data$surv
[1] 1.0000000 0.9967477 0.9945196 0.9925484
[5] 0.9918737 0.9908333 0.9899358 0.9897507
[9] 0.9893687 0.9885813 0.9881738 0.9877555
[13] 0.9877555