R code for Chapter 11, Count Data ######## Poisson regression for rates. 10 year CHD mortality in British male doctors A <- c(1,2,3,4,5,1,2,3,4,5) S <- c(1,1,1,1,1,0,0,0,0,0) y <- c(32,104,206,186,102, 2,12,28,28,31) M <- c(52407,43248,28612,12663,5317,18790,10673,5710,2585,1462) # Model 2a from WB code. For log(theta[i]) <- beta[1] + beta[2]*A[i] + beta[3]*S[i] PostMeanRate1 <- 10000*c(0.001034000,0.002381000,0.005488000,0.012660000,0.029230000, 0.000689500,0.001588000,0.003660000,0.008442000,0.019490000) # Model 2b from WB code. For log(theta[i]) <- beta[1] + beta[2]*A[i] + beta[3]*S[i] + beta[4]*A[i]*S[i] PostMeanRate2 <- 10000*c(0.00113100,0.00250900,0.00556800,0.01237000,0.02751000, 0.00040880,0.00114900,0.00325000,0.00924700,0.02646000) # Model 2c from WB code. For log(theta[i]) <- beta[1] + beta[2]*A[i] + beta[3]*A[i]*A[i] + beta[4]*S[i] + # beta[5]*A[i]*S[i] + beta[6]*A[i]*A[i]*S[i] PostMeanRate3 <- 10000*c(0.0005805,0.0024720,0.0072250,0.0143800,0.0195300,0.0001549,0.0010390,0.0045520,0.0122200,0.0201800) ## Figure 11.1. Empirical rates plot(A[1:5],10000*y[1:5]/M[1:5], type='b',pch="S", xlab="Age", ylab="Deaths per 10,000 person years", cex=1.5,lwd=2,ylim=c(0,250),axes=F) lines(A[6:10],10000*y[6:10]/M[6:10], type='b',pch="N", lty=2, lwd=2, cex=1.5) legend("bottomright",lty=c(1,2),c("Smokers","Nonsmokers"),lwd=2) axis(2) axis(1,at=c(1,2,3,4,5),labels=c("35-44","45-54","55-64","65-74","75-84")) ## Figure 11.2. Empirical rates; log scale plot(A[1:5],log(10000*y[1:5]/M[1:5]), type='b',pch="S", ylim=c(0,6), xlab="Age", ylab="Deaths per 10,000 person years (log scale)", cex=1.5,lwd=2,axes=F) lines(A[6:10],log(10000*y[6:10]/M[6:10]), type='b',pch="N", lty=2, lwd=2, cex=1.5) legend("bottomright",lty=c(1,2),c("Smokers","Nonsmokers"),lwd=2) axis(2) axis(1,at=c(1,2,3,4,5),labels=c("35-44","45-54","55-64","65-74","75-84")) ## LPML for Model 2a. log(theta[i]) <- beta[1] + beta[2]*A[i] + beta[3]*S[i] cpoinv <- c(35390.00,31.73,931600.00,753.90,9095000000.00,14680.00,25.01,64.69,45.45,20.88) cpo <- 1/cpoinv LPML1 <- sum(log(cpo)) LPML1 ## LPML for Model 2b. log(theta[i]) <- beta[1] + beta[2]*A[i] + beta[3]*S[i] + beta[4]*A[i]*S[i] cpoinv <- c(2882000.000,36.970,321300.000,2082.000,112400000.000,303.500,10.750,306.300,24.810,869.400) cpo <- 1/cpoinv LPML2 <- sum(log(cpo)) LPML2 ## LPML for Model 2c. log(theta[i]) <- beta[1] + beta[2]*A[i] + beta[3]*A[i]*A[i] + beta[4]*S[i] + beta[5]*A[i]*S[i] + beta[6]*A[i]*A[i]*S[i] cpoinv <- c(27.20,36.34,55.76,50.24,64.93,6.96,13.03,24.03,27.26,59.11) cpo <- 1/cpoinv LPML3 <- sum(log(cpo)) LPML3 ### MLE estimates of beta in Models 1-3. Can be used as starting values in the Gibbs sampler A2 <- A*A MLEfit1 <- glm(y ~ A + S , family=poisson, offset=log(M)) summary(MLEfit1) MLEfit2 <- glm(y ~ A + S + A*S, family=poisson, offset=log(M)) summary(MLEfit2) MLEfit3 <- glm(y ~ A + A2 + S + A*S + A2*S, family=poisson, offset=log(M)) summary(MLEfit3)