> cps = read.csv("http://www.macalester.edu/~kaplan/ism/datasets/cps.csv") > attach(cps) > head(cps) wage educ race sex hispanic 1 9.0 10 W M NH 2 5.5 12 W M NH 3 3.8 12 W F NH 4 10.5 12 W F NH 5 15.0 12 W M NH 6 9.0 16 W F NH south married exper union age 1 NS Married 27 Not 43 2 NS Married 20 Not 38 3 NS Single 4 Not 22 4 NS Married 29 Not 47 5 NS Married 40 Union 58 6 NS Married 27 Not 49 sector 1 const 2 sales 3 sales 4 clerical 5 const 6 clerical > colors() [1] "white" [2] "aliceblue" ... [653] "yellow1" [654] "yellow2" [655] "yellow3" [656] "yellow4" [657] "yellowgreen" > ?points # Copy and paste code at bottom of help file > pchShow <- + function(extras = c("*",".", "o","O","0","+","-","|","%","#"), + cex = 3, ## good for both .Device=="postscript" and "x11" + col = "red3", bg = "gold", coltext = "brown", cextext = 1.2, + main = paste("plot symbols : points (... pch = *, cex =", + cex,")")) + { + nex <- length(extras) + np <- 26 + nex + ipch <- 0:(np-1) + k <- floor(sqrt(np)) + dd <- c(-1,1)/2 + rx <- dd + range(ix <- ipch %/% k) + ry <- dd + range(iy <- 3 + (k-1)- ipch %% k) + pch <- as.list(ipch) # list with integers & strings + if(nex > 0) pch[26+ 1:nex] <- as.list(extras) + plot(rx, ry, type="n", axes = FALSE, xlab = "", ylab = "", + main = main) + abline(v = ix, h = iy, col = "lightgray", lty = "dotted") + for(i in 1:np) { + pc <- pch[[i]] + ## 'col' symbols with a 'bg'-colored interior (where available) : + points(ix[i], iy[i], pch = pc, col = col, bg = bg, cex = cex) + if(cextext > 0) + text(ix[i] - 0.3, iy[i], pc, col = coltext, cex = cextext) + } + } > > pchShow() > > plot(c(0,1),c(0,1),type="n") > for(i in 1:6){ + abline(0+(i/10),1,lty=i) + } > legend(0.6,0.5,paste("lty = ",1:6),lty=1:6) > plot(wage~sex) > plot(wage~age) > plot(wage~age, type="n", xlab="Age (years)", ylab="Wage ($/hr)") > points(wage[sex=="F"]~age[sex=="F"], col="dodgerblue", pch=16) > points(wage[sex=="M"]~age[sex=="M"], col="green2", pch=15) > legend(locator(1),c("Female","Male"),col=c("dodgerblue","green2"), pch=c(16,15)) > mod1 = lm(wage~age, data=cps) > # Equivalent to 1+age (intercept always included by default) > summary(mod1) Call: lm(formula = wage ~ age, data = cps) Residuals: Min 1Q Median 3Q Max -8.425 -3.503 -1.106 2.276 36.704 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 6.16747 0.72280 8.533 < 2e-16 *** age 0.07755 0.01870 4.147 3.92e-05 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 5.063 on 532 degrees of freedom Multiple R-squared: 0.03132, Adjusted R-squared: 0.0295 F-statistic: 17.2 on 1 and 532 DF, p-value: 3.917e-05 > y = matrix(wage) > head(y) [,1] [1,] 9.0 [2,] 5.5 [3,] 3.8 [4,] 10.5 [5,] 15.0 [6,] 9.0 > X = cbind(rep(1,length(wage)),age) > head(X) age [1,] 1 43 [2,] 1 38 [3,] 1 22 [4,] 1 47 [5,] 1 58 [6,] 1 49 > t(X)%*%X age 534 19669 age 19669 797769 > solve(t(X)%*%X) age 0.0203829241 -5.025411e-04 age -0.0005025411 1.364365e-05 > solve(t(X)%*%X)%*%t(X)%*%y [,1] 6.16746829 age 0.07755463 > P = X%*%solve(t(X)%*%X)%*%t(X) > P [,1] [,2] [,3] [,4] [,5] [1,] 0.0023914969 0.001970818 6.246441e-04 2.728040e-03 0.0036535346 [2,] 0.0019708177 0.001891230 1.636548e-03 2.034488e-03 0.0022095815 [3,] 0.0006246441 0.001636548 4.874641e-03 -1.848791e-04 -0.0024110681 ... [187,] 0.001895778 6.473836e-04 0.002173198 0.0024506194 7.860940e-04 [,136] [,137] [,138] [,139] [,140] [1,] 3.485263e-03 3.316991e-03 7.929158e-04 1.297731e-03 0.0016342743 [2,] 2.177746e-03 2.145911e-03 1.668383e-03 1.763889e-03 0.0018275593 ... [187,] 0.0013409358 1.202225e-03 1.202225e-03 5.086731e-04 [ reached getOption("max.print") -- omitted 347 rows ] > dim(cps) [1] 534 11 > head(P%*%y) [,1] [1,] 9.502317 [2,] 9.114544 [3,] 7.873670 [4,] 9.812536 [5,] 10.665637 [6,] 9.967645 > head(y) [,1] [1,] 9.0 [2,] 5.5 [3,] 3.8 [4,] 10.5 [5,] 15.0 [6,] 9.0 > (diag(1,534,534)-P)%*%y [,1] [1,] -0.502317196 [2,] -3.614544067 [3,] -4.073670055 [4,] 0.687464301 [5,] 4.334363418 [6,] -0.967644951 ... [534,] 11.208556816 > t((diag(1,534,534)-P)%*%y)%*%((diag(1,534,534)-P)%*%y) [,1] [1,] 13635.85