# Setup library(survival) source("http://myweb.uiowa.edu/pbreheny/7210/f18/notes/fun.R") PBC <- subset(pbc, !is.na(trt)) PBC <- PBC[order(PBC$time),] GVHD <- read.delim("https://s3.amazonaws.com/pbreheny-data-sets/gvhd.txt") # Residuals: Cox-Snell ---------------------------------------------------- # Slide 4: Construction fit <- coxph(Surv(time/365.25, status!=0) ~ trt + stage + hepato + bili, PBC) sfit <- survfit(fit) H0 <- -log(sfit$surv) H <- approxfun(c(0, sfit$time), c(0, H0), method='constant') e1 <- H(fit$y[,1])*exp(fit$linear.predictors) e2 <- fit$y[,2]-residuals(fit) head(e1) head(e2) # Slide 5: Diagnostic plot efit <- survfit(Surv(e1, fit$y[,2])~1) lim <- c(0,2) plot(efit, fun='cumhaz', mark.time=FALSE, bty='n', conf.int=FALSE, lwd=2, las=1, xlab='Residual', ylab='Cumulative hazard', xlim=lim, ylim=lim) ciband(efit, fun=function(x) -log(x)) lines(lim, lim, col='red', lwd=2) # Slide 9: Alternative display plot(log(efit$time), (-log(efit$surv) - efit$time)/efit$std.err, type='l', col='red', lwd=2, bty='n', las=1, ylim=c(-3,3), xlab='Residual', ylab=expression((hat(Lambda)(e)-e)/SE), xaxt='n') at <- seq(-4, 1, len=4) axis(1, at=at, lab=round(exp(at), 2)) abline(h=c(-2,2), col='gray') # Slide 6: Simulate PH violation set.seed(14) n <- 500 x <- rbinom(n, 1, 0.5) y <- 2*x + rnorm(n) fit <- coxph(Surv(exp(y)) ~ x) e <- fit$y[,2]-residuals(fit) efit <- survfit(Surv(e, fit$y[,2])~1) lim <- c(0, 6) plot(efit, fun='cumhaz', mark.time=FALSE, bty='n', conf.int=FALSE, lwd=2, las=1, xlab='Residual', ylab='Cumulative hazard', xlim=lim, ylim=lim) ciband(efit, fun=function(x) -log(x)) lines(lim, lim, col='red', lwd=2) # Slide 7: Zoom in lim <- c(0, 0.25) plot(efit, fun='cumhaz', mark.time=FALSE, bty='n', conf.int=FALSE, lwd=2, las=1, xlab='Residual', ylab='Cumulative hazard', xlim=lim, ylim=lim) ciband(efit, fun=function(x) -log(x)) lines(lim, lim, col='red', lwd=2) # Slide 8: Alternative display plot(log(efit$time), (-log(efit$surv) - efit$time)/efit$std.err, type='l', col='red', lwd=2, bty='n', las=1, ylim=c(-3,3), xlab='Residual', ylab=expression((hat(Lambda)(e)-e)/SE), xaxt='n') at <- seq(-6, 2, 2) axis(1, at=at, lab=round(exp(at), 2)) abline(h=c(-2,2), col='gray') # Slide 10: GVHD fit <- coxph(Surv(Time, Status) ~ Group, GVHD) e <- fit$y[,2]-residuals(fit) efit <- survfit(Surv(e, fit$y[,2])~1) lim <- c(0, max(e)) plot(efit, fun='cumhaz', mark.time=FALSE, bty='n', conf.int=FALSE, lwd=2, las=1, xlab='Residual', ylab='Cumulative hazard', xlim=lim, ylim=lim) ciband(efit, fun=function(x) -log(x)) lines(lim, lim, col='red', lwd=2) plot(log(efit$time), (-log(efit$surv) - efit$time)/efit$std.err, type='l', col='red', lwd=2, bty='n', las=1, ylim=c(-3,3)) # Slide 11: GVHD, stratified efit <- survfit(Surv(e, fit$y[,2])~GVHD$Group) lim <- c(0, max(e)) plot(efit, fun='cumhaz', mark.time=FALSE, bty='n', conf.int=FALSE, lwd=2, col=pal(2), las=1, xlab='Residual', ylab='Cumulative hazard', xlim=lim, ylim=lim) lines(lim, lim, col='gray', lwd=2) # Residuals: Martingale --------------------------------------------------- # Slide 15: Martingales for PBC fit <- coxph(Surv(time/365.25, status!=0) ~ trt + stage + hepato + bili, PBC) r <- residuals(fit) plot(PBC$time/365.25, residuals(fit), pch=19, las=1, bty='n', col=pal(2)[fit$y[,2]+1], xlab='Time', ylab='Martingale residual') toplegend(legend=c("Censored", "Died / Liver failure"), pch=19, col=pal(2)) PBC[which.min(residuals(fit)),] ecdf(PBC$bili)(PBC$bili[which.min(residuals(fit))]) ecdf(fit$linear.predictors)(fit$linear.predictors[which.min(residuals(fit))]) # Residuals: Deviance ----------------------------------------------------- # Slide 19: Construction fit <- coxph(Surv(time/365.25, status!=0) ~ trt + stage + hepato + bili, PBC) m <- residuals(fit) d <- fit$y[,2] r <- sign(m)*sqrt(-2*(m + d*log(d-m))) head(r) head(residuals(fit, type='deviance')) # Slide 20: Base plot plot(PBC$time/365.25, r, pch=19, bty='n', las=1, col=pal(2)[fit$y[,2]+1], xlab='Time', ylab='Deviance residual') toplegend(legend=c("Censored", "Died / Liver failure"), pch=19, col=pal(2)) # Slide 22: Outliers cbind(fit$y, model.matrix(fit), r)[order(r)[1:5],] cbind(fit$y, model.matrix(fit), r, rank(-fit$linear.predictors))[order(-r)[1:5],] # Slide 24: vs albumin plot(PBC$albumin, r, pch=19, bty='n', las=1, col='gray60', xlab='Albumin', ylab='Deviance residual') lines(lowess(PBC$albumin, r), col='red', lwd=3) # Functional forms -------------------------------------------------------- # Slide 26-27: Albumin library(visreg) f <- function(x) {pmin(x, 3.5)} fit2 <- coxph(Surv(time/365.25, status!=0) ~ trt + stage + hepato + bili + f(albumin), PBC) f2 <- function(x) {cbind(x, (x-3.5)*(x-3.5 > 0))} fit3 <- coxph(Surv(time/365.25, status!=0) ~ trt + stage + hepato + bili + f2(albumin), PBC) visreg(fit2, 'albumin', xlab='Albumin', ylab="Linear predictor") visreg(fit3, 'albumin', xlab='Albumin', ylab="Linear predictor") visreg(fit2, 'albumin', xlab='Albumin', ylab="Hazard ratio", trans=exp, ylim=c(0, 8)) # Slide 28: Comparing AIC for albumin fit0 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + bili, PBC) fit1 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + bili + albumin, PBC) fit2 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + bili + f(albumin), PBC) fit3 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + bili + f2(albumin), PBC) AIC(fit0) AIC(fit1) AIC(fit2) AIC(fit3) # Slide 29: Bilirubin fit0 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + f(albumin), PBC) fit1 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + bili + f(albumin), PBC) r0 <- residuals(fit0, type='deviance') r1 <- residuals(fit1, type='deviance') plot(PBC$bili, r0, pch=19, bty='n', las=1, col='gray60', xlab='Bilirubin', ylab='Deviance residual') mtext('without bili') lines(lowess(PBC$bili, r0), col='red', lwd=3) plot(PBC$bili, r1, pch=19, bty='n', las=1, col='gray60', xlab='Bilirubin', ylab='Deviance residual') mtext('with bili') lines(lowess(PBC$bili, r1), col='red', lwd=3) # Slide 30-31: log(Bilirubin) fit <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + log(bili) + f(albumin), PBC) visreg(fit, 'bili', xlab='Bilirubin', ylab="Linear predictor") 2^coef(fit)['log(bili)'] # Slide 33: Spline(Bilirubin) fit <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + pspline(bili) + f(albumin), PBC) visreg(fit, 'bili', xlab='Bilirubin', ylab="Linear predictor") # Slide 34: Comparing AIC for bilirubin fit0 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + f(albumin), PBC) fit1 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + bili + f(albumin), PBC) fit2 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + log(bili) + f(albumin), PBC) fit3 <- coxph(Surv(time, status!=0) ~ trt + stage + hepato + pspline(bili) + f(albumin), PBC) AIC(fit0) AIC(fit1) AIC(fit2) AIC(fit3) # Slide 35: Stage PBC$Stage <- factor(PBC$stage) fit <- coxph(Surv(time, status!=0) ~ trt + Stage + hepato + log(bili) + f(albumin), PBC) visreg(fit, 'Stage', ylab="Linear predictor") # Influence (delta-beta) -------------------------------------------------- # Slide 38: Setup/syntax fit <- coxph(Surv(time/365.25, status!=0) ~ trt + stage + hepato + log(bili) + f(albumin), PBC) D <- residuals(fit, type='dfbeta') # Slide 39: Treatment plot(D[,1], type='h', col=pal(2)[PBC$trt], bty='n', las=1, lwd=1.5, xlab='Index (ordered by time on study)', ylab=expression(hat(Delta)[ij])) toplegend(legend=1:2, lwd=1.5, col=pal(2)) mtext("Treatment:", line=3) cbind(fit$y, model.matrix(fit))[order(-D[,1])[1],] PBC[order(-D[,1])[1],] # Slide 40: Stage plot(D[,2], type='h', col=pal(4)[PBC$Stage], bty='n', las=1, lwd=1.5, xlab='Index (ordered by time on study)', ylab=expression(hat(Delta)[ij])) toplegend(legend=levels(PBC$Stage), lwd=1.5, col=pal(4)) mtext("Stage:", line=3) PBC[order(D[,2])[1],] # Slide 41: Bili, log scale xx <- log(PBC$bili) xx <- (xx - min(xx)) / (diff(range(xx))) RGB <- colorRamp(c(pal(2)[2], 'gray60', pal(2)[1]))(xx) col <- apply(RGB, 1, function(x) rgb(x[1], x[2], x[3], max=255)) plot(D[,4], type='h', col=col, bty='n', las=1, lwd=1.5, xlab='Index (ordered by time on study)', ylab=expression(hat(Delta)[ij])) toplegend(legend=c("High bili", "Low bili"), lwd=1.5, col=pal(2)) mtext("Log scale", 3, line=4) PBC[order(D[,4])[1],] # Slide 41: Bili (original scale) fit2 <- coxph(Surv(time/365.25, status!=0) ~ trt + stage + hepato + bili + f(albumin), PBC) D2 <- residuals(fit2, type='dfbeta') xx <- PBC$bili xx <- (xx - min(xx)) / (diff(range(xx))) RGB <- colorRamp(c(pal(2)[2], 'gray60', pal(2)[1]))(xx) col <- apply(RGB, 1, function(x) rgb(x[1], x[2], x[3], max=255)) plot(D2[,4], type='h', col=col, bty='n', las=1, lwd=1.5, xlab='Index (ordered by time on study)', ylab=expression(hat(Delta)[ij])) toplegend(legend=c("High bili", "Low bili"), lwd=1.5, col=pal(2)) mtext("Original scale", 3, line=4)