# Problem 1 # # assume the data is in a matrix named "fla" # # part (a) round(cor(fla[,2:16]),2)[,1:2] # # part (b) # # first propensity score model uses variable with highest corr with etouch prop1 <- glm(fla$etouch ~ fla$regPer00.rep, family=binomial) summary(prop1) plot(fla$etouch, prop1$linear.predictor) # # loop below this tries to add each variable to current model out <- rep(0,13) for (i in (1:13)) { prop <- glm(fla$etouch ~ fla[,10] + fla[,i+3], family=binomial) out[i] <- prop$deviance } out # # that yields propensity score model 2 (prop2) prop2 <- glm(fla$etouch ~ fla$regPer00.rep + fla$votePer96.dem, family=binomial) summary(prop2) plot(fla$etouch, prop2$linear.predictor) # # additional work yields propensity score model 3 (prop3) prop3 <- glm(fla$etouch ~ fla$regPer00.rep + fla$votePer96.dem + fla$votePer00.rep, family=binomial) summary(prop3) plot(fla$etouch, prop3$linear.predictor) # # a bit of R gamesmanship to plot all 3 on the same graph index <- c(1:67) trt.ix <- index[fla$etouch == 1] ctl.ix <- index[fla$etouch == 0] plot(fla$etouch, prop3$linear.predictor) points(fla$etouch[ctl.ix] + 0.05, prop2$linear.predictor[ctl.ix]) points(fla$etouch[trt.ix] - 0.05, prop2$linear.predictor[trt.ix]) points(fla$etouch[ctl.ix] + 0.10, prop1$linear.predictor[ctl.ix]) points(fla$etouch[trt.ix] - 0.10, prop1$linear.predictor[trt.ix]) # Problem 2 # # part (a)(i) index <- c(1:67) trt.ix <- index[fla$etouch == 1] ctl.ix <- index[fla$etouch == 0] ntrt <- length(trt.ix) nctl <- length(ctl.ix) # # compute distance between each treatment unit and all controls # where distance is gap between propensity score linear predictors # NOTE: can run the rest of (a) code for different propensity score models prop <- prop3 dis <- matrix(0, ntrt, nctl) for (i in (1:ntrt)) { for (j in (1:nctl)) { dis[i,j] <- abs(prop$linear.predictor[trt.ix[i]] - prop$linear.predictor[ctl.ix[j]]) }} # # find matches (note: to get matching without replacement I have # set distances to a very large number after using a control) matches <- matrix(0,length(trt),3) a <- 16 - rank(prop$linear.predictor[trt.ix]) for (i in (1:length(trt))) { im <- which(a == i) dist <- dis[im,] match.ix <- which(dist == min(dist)) matches[i,] <- c(trt.ix[im], ctl.ix[match.ix], min(dist)) dis[,match.ix] <- 999999 } matches # # part (a)(ii) covbal <- matrix(0, 13, 3) for (k in (4:16)) { sp <- sqrt(((ntrt-1)*var(fla[matches[,1],k]) + (ntrt-1)*var(fla[matches[,2],k])) / (ntrt + ntrt - 2)) covbal[k-3,] <- c(mean(fla[matches[,1],k]), mean(fla[matches[,2],k]), (mean(fla[matches[,1],k]) - mean(fla[matches[,2],k])) / sp) } covbal # # part (a)(iii) diffs1 <- fla$bush04[matches[,1]] - fla$bush04[matches[,2]] treat1 <- mean(diffs1) stderr1 <- sqrt(var(diffs1)/ntrt) c(treat1,stderr1) z <- matches[,3] < 4 diffs2 <- fla$bush04[matches[z,1]] - fla$bush04[matches[z,2]] treat2 <- mean(diffs2) stderr2 <- sqrt(var(diffs2)/sum(z)) c(treat2,stderr2, sum(z)) # # part (b)(i) cut1 <- mean(sort(prop$linear.predictor[trt.ix])[10:11]) cut2 <- mean(sort(prop$linear.predictor[trt.ix])[5:6]) cut3 <- sort(prop$linear.predictor[trt.ix])[1] - 0.25 t1 <- fla$etouch == 1 & prop$linear.predictor > cut1 t2 <- fla$etouch == 1 & prop$linear.predictor <= cut1 & prop$linear.predictor > cut2 t3 <- fla$etouch == 1 & prop$linear.predictor <= cut2 & prop$linear.predictor > cut3 c1 <- fla$etouch == 0 & prop$linear.predictor > cut1 c2 <- fla$etouch == 0 & prop$linear.predictor <= cut1 & prop$linear.predictor > cut2 c3 <- fla$etouch == 0 & prop$linear.predictor <= cut2 & prop$linear.predictor > cut3 # # part (b)(ii) subclass <- matrix(0,3,8) subclass[1,] <- c(sum(t1),mean(prop$linear.predictor[t1]),mean(fla$bush04[t1]),sqrt(var(fla$bush04[t1])),sum(c1),mean(prop$linear.predictor[c1]),mean(fla$bush04[c1]),sqrt(var(fla$bush04[c1]))) subclass[2,] <- c(sum(t2),mean(prop$linear.predictor[t2]),mean(fla$bush04[t2]),sqrt(var(fla$bush04[t2])),sum(c2),mean(prop$linear.predictor[c2]),mean(fla$bush04[c2]),sqrt(var(fla$bush04[c2]))) subclass[3,] <- c(sum(t3),mean(prop$linear.predictor[t3]),mean(fla$bush04[t3]),sqrt(var(fla$bush04[t3])),sum(c3),mean(prop$linear.predictor[c3]),mean(fla$bush04[c3]),sqrt(var(fla$bush04[c3]))) effect <- subclass[,3] - subclass[,7] se <- sqrt(subclass[,4]^2/subclass[,1] + subclass[,8]^2/subclass[,5]) # # part (b)(iii) estim <- mean(effect) stderr <- sqrt(sum(se^2)/9) # # output cbind(subclass,effect,se) c(estim,stderr)