## The original codes for this analysis were written by Gelman and Hill. I have modified the codes for Stat211. 


# Getting the data and selecting the counties in Minnesota

library ("arm")
data <- read.table ("radon.dat", header=T, sep=",")

head(data)

mn <- data$state=="MN"
radon <- data$activity[mn]
log.radon <- log (ifelse (radon==0, .1, radon))
floor <- data$floor[mn]       # 0 for basement, 1 for first floor
n <- length(radon)
y <- log.radon
x <- floor

# get county index variable
county.name <- as.vector(data$county[mn])
uniq <- unique(county.name)
J <- length(uniq)
county <- rep (NA, J)
for (i in 1:J){
  county[county.name==uniq[i]] <- i
}


# Complete pooling
ybarbar = mean(y)

sample.size <- as.vector (table (county))

# Jittering sample sizes so the plots don't over on top of each other
sample.size.jittered <- sample.size*exp (runif (J, -.1, .1))

# county means, variance, and standard deviation
cty.mns = tapply(y,county,mean)
cty.vars = tapply(y,county,var)
cty.sds = mean(sqrt(cty.vars[!is.na(cty.vars)]))/sqrt(sample.size)
cty.sds.sep = sqrt(tapply(y,county,var)/sample.size)

# no predictors
## no-pooling vs. pooling
par(mfrow=c(1,2))
par (mar=c(5,5,4,2)+.1)
plot (sample.size.jittered, cty.mns, cex.lab=1.1, cex.axis=1.1,
      xlab="sample size in county j",
      ylab="avgerage log radon in county j",
      pch=20, log="x", cex=.5, mgp=c(2,.5,0),
      ylim=c(0,3.2), yaxt="n", xaxt="n")
axis (1, c(1,3,10,30,100), cex.axis=1.1)
axis (2, seq(0,3), cex.axis=1.1)
for (j in 1:J){
  lines (rep(sample.size.jittered[j],2),
         cty.mns[j] + c(-1,1)*cty.sds[j], lwd=.5)
}
abline(h=ybarbar)
title("No pooling",cex.main=.9, line=1)


# Partial pooling with no predictors
# Random intercept model with no predictor 
M0 <- lmer (y ~ 1 + (1 | county), REML = FALSE)
summary(M0)

a.hat.M0 <- coef(M0)$county[,1]               
a.se.M0 <- se.coef(M0)$county

par (mar=c(5,5,4,2)+.1)
plot (sample.size.jittered, t(a.hat.M0), cex.lab=1.1, cex.axis=1.1,
  xlab="sample size in county j", ylab=expression (paste
  ("estimated intercept, ", alpha[j])),
  pch=20, log="x", cex=.5, mgp=c(2,.5,0), ylim=c(0,3.2), yaxt="n", xaxt="n")
axis (1, c(1,3,10,30,100), cex.axis=1.1)
axis (2, seq(0,3), cex.axis=1.1)
for (j in 1:J){
  lines (rep(sample.size.jittered[j],2),
    as.vector(a.hat.M0[j]) + c(-1,1)*a.se.M0[j], lwd=.5, col="gray10")
}
abline(h=ybarbar)
title("Random intercept model",cex.main=.9, line=1)





##########################

# Including a predictor, floor.



## Complete pooling regression
lm.pooled <- lm (y ~ x)
summary (lm.pooled)

## No pooling regression
lm.unpooled <- lm (y ~ x + factor(county) -1)
summary (lm.unpooled)


## Partial pooling
## Random intercept model where we include "floor" as a predictor
M1 <- lmer (y ~ x + (1 | county), REML = FALSE)
summary (M1)

# estimated regression coefficicents
coef (M1)

# fixed and random effects
fixef (M1)
ranef (M1)

# uncertainties in the estimated coefficients
se.fixef (M1)
se.ranef (M1)


x.jitter <- x + runif(n,-.05,.05)
display8 <- c (36, 1, 35, 21, 14, 71, 61, 70)  # counties to be displayed
y.range <- range (y[!is.na(match(county,display8))])

par (mfrow=c(2,4), mar=c(4,4,3,1), oma=c(1,1,2,1))
for (j in display8){
  plot (x.jitter[county==j], y[county==j], xlim=c(-.05,1.05), ylim=y.range,
        xlab="floor", ylab="log radon level", cex.lab=1.2, cex.axis=1.1,
        pch=20, mgp=c(2,.7,0), xaxt="n", yaxt="n", cex.main=1,
        main=uniq[j])
  axis (1, c(0,1), mgp=c(2,.7,0), cex.axis=1.1)
  axis (2, seq(-1,3,2), mgp=c(2,.7,0), cex.axis=1.1)
  curve (coef(lm.pooled)[1] + coef(lm.pooled)[2]*x, lwd=.5, lty=2, add=TRUE)
  curve (coef(lm.unpooled)[j+1] + coef(lm.unpooled)[1]*x, lwd=.5, add=TRUE)
  curve (coef(M1)$county[j+1, 1] + coef(M1)$county[j+1, 2]*x, lwd=2, add=TRUE)
  
}



##
## Random intercept and random coefficient model 

M2 <- lmer (y ~ x + (x | county), REML = FALSE)
summary (M2)

  # estimated regression coefficicents
coef (M2)

  # fixed and random effects
fixef (M2)
ranef (M2)

  # uncertainties in the estimated coefficients
se.fixef (M2)
se.ranef (M2)


x.jitter <- x + runif(n,-.05,.05)
display8 <- c (36, 1, 35, 21, 14, 71, 61, 70)  # counties to be displayed
y.range <- range (y[!is.na(match(county,display8))])

par (mfrow=c(2,4), mar=c(4,4,3,1), oma=c(1,1,2,1))
for (j in display8){
  plot (x.jitter[county==j], y[county==j], xlim=c(-.05,1.05), ylim=y.range,
        xlab="floor", ylab="log radon level", cex.lab=1.2, cex.axis=1.1,
        pch=20, mgp=c(2,.7,0), xaxt="n", yaxt="n", cex.main=1,
        main=uniq[j])
  axis (1, c(0,1), mgp=c(2,.7,0), cex.axis=1.1)
  axis (2, seq(-1,3,2), mgp=c(2,.7,0), cex.axis=1.1)
  curve (coef(lm.pooled)[1] + coef(lm.pooled)[2]*x, lwd=.5, lty=2, add=TRUE)
  curve (coef(lm.unpooled)[j+1] + coef(lm.unpooled)[1]*x, lwd=.5, add=TRUE)
  curve (coef(M2)$county[j+1, 1] + coef(M2)$county[j+1, 2]*x, lwd=2, add=TRUE)
  
}
