# Problem 1 # # assume the data is in a matrix named "fla" # # part (a) model1 <- lm(fla$bush04 ~ fla$etouch) summary(model1) # # part (b) index <- c(1:67) trt.ix <- index[fla$etouch == 1] ctl.ix <- index[fla$etouch == 0] ntrt <- length(trt.ix) nctl <- length(ctl.ix) covbal <- matrix(0, 13, 3) for (k in (4:16)) { sp <- sqrt(((ntrt-1)*var(fla[trt.ix,k]) + (nctl-1)*var(fla[ctl.ix,k])) / (ntrt + nctl - 2)) covbal[k-3,] <- c(mean(fla[trt.ix,k]), mean(fla[ctl.ix,k]), (mean(fla[trt.ix,k]) - mean(fla[ctl.ix,k])) / sp) } covbal # # part (c) model2 <- lm(fla$bush04 ~ fla$etouch + fla$votePer96.rep + fla$votePer00.rep + fla$regPer00.rep + fla$turnout00) summary(model2) # # part (d) model3 <- lm(fla$bush04 ~ fla$etouch + fla$votePer96.rep + fla$votePer00.rep + fla$hisp00 + fla$white00 + fla$black00) summary(model3) model4 <- lm(fla$bush04 ~ fla$etouch + fla$votePer96.rep + fla$votePer00.rep + fla$hisp00 + fla$black00 + fla$lowEduc00 + fla$foreignBorn00) summary(model4) # Problem 2 # # part (a) index <- c(1:67) trt.ix <- index[fla$etouch == 1] ctl.ix <- index[fla$etouch == 0] ntrt <- length(trt.ix) nctl <- length(ctl.ix) # # write distance function - here computes absolute diff between # percent registered republican in the two cases distance <- function(x,y) {abs(x[,10]-y[,10])} # # compute distance matrix between each treatment unit and all controls dis <- matrix(0, ntrt, nctl) for (i in (1:ntrt)) { for (j in (1:nctl)) { dis[i,j] <- distance(fla[trt.ix[i],],fla[ctl.ix[j],]) }} # # find matches (note: to get matching without replacement I have # set distances to a very large number after using a control) matches <- matrix(0,length(trt),3) for (i in (1:length(trt))) { dist <- dis[i,] match.ix <- which(dist == min(dist)) matches[i,] <- c(trt.ix[i], ctl.ix[match.ix], min(dist)) dis[,match.ix] <- 999999 } matches # # part (b) covbal <- matrix(0, 13, 3) for (k in (4:16)) { sp <- sqrt(((ntrt-1)*var(fla[matches[,1],k]) + (ntrt-1)*var(fla[matches[,2],k])) / (ntrt + ntrt - 2)) covbal[k-3,] <- c(mean(fla[matches[,1],k]), mean(fla[matches[,2],k]), (mean(fla[matches[,1],k]) - mean(fla[matches[,2],k])) / sp) } covbal # # part (c) diffs1 <- fla$bush04[matches[,1]] - fla$bush04[matches[,2]] treat1 <- mean(diffs1) stderr1 <- sqrt(var(diffs1)/ntrt) c(treat1,stderr1) # # part (d) z <- matches[,3] < .01 diffs2 <- fla$bush04[matches[z,1]] - fla$bush04[matches[z,2]] treat2 <- mean(diffs2) stderr2 <- sqrt(var(diffs2)/sum(z)) c(treat2,stderr2, sum(z)) z <- matches[,3] < .005 diffs3 <- fla$bush04[matches[z,1]] - fla$bush04[matches[z,2]] treat3 <- mean(diffs3) stderr3 <- sqrt(var(diffs3)/sum(z)) c(treat3,stderr3, sum(z)) # # part (e) (define new distance function and then repeat above steps; only # modification required is to part (d) where I used 25 and 12 as cuts) vtrt <- var(fla[trt.ix,4:16]) vctl <- var(fla[ctl.ix,4:16]) v <- (ntrt*vtrt + nctl*vctl) / (ntrt + nctl) distance <- function(x,y) { zz <- x[,4:16]-y[,4:16] dim(zz) <- c(13,1) zz <- t(zz) t(zz)%*%solve(v)%*%zz }