> cps = read.csv("http://www.macalester.edu/~kaplan/ism/datasets/cps.csv") > attach(cps) > summary(cps) wage educ race Min. : 1.000 Min. : 2.00 NW: 67 1st Qu.: 5.250 1st Qu.:12.00 W :467 Median : 7.780 Median :12.00 Mean : 9.024 Mean :13.02 3rd Qu.:11.250 3rd Qu.:15.00 Max. :44.500 Max. :18.00 sex hispanic south married F:245 Hisp: 27 NS:378 Married:350 M:289 NH :507 S :156 Single :184 exper union age Min. : 0.00 Not :438 Min. :18.00 1st Qu.: 8.00 Union: 96 1st Qu.:28.00 Median :15.00 Median :35.00 Mean :17.82 Mean :36.83 3rd Qu.:26.00 3rd Qu.:44.00 Max. :55.00 Max. :64.00 sector prof :105 clerical: 97 service : 83 manuf : 68 other : 68 manag : 55 (Other) : 58 > mod2 = lm(wage~sex, data=cps) > summary(mod2) Call: lm(formula = wage ~ sex, data = cps) Residuals: Min 1Q Median 3Q Max -8.995 -3.529 -1.072 2.394 36.621 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 7.8789 0.3216 24.50 < 2e-16 *** sexM 2.1161 0.4372 4.84 1.7e-06 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 5.034 on 532 degrees of freedom Multiple R-squared: 0.04218, Adjusted R-squared: 0.04038 F-statistic: 23.43 on 1 and 532 DF, p-value: 1.703e-06 > head(sex) [1] M M F F M F Levels: F M > head(sex=="F") [1] FALSE FALSE TRUE TRUE FALSE TRUE > fem = as.numeric(sex=="F") > fem [1] 0 0 1 1 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 1 1 0 0 1 0 0 [33] 0 0 0 1 1 0 1 0 1 0 1 0 0 0 0 0 1 1 0 0 1 1 0 1 1 0 1 1 1 1 0 1 [65] 0 1 0 0 1 1 0 1 0 0 0 1 1 1 1 1 0 1 1 1 1 0 0 0 1 1 1 0 0 1 0 0 [97] 1 0 1 1 0 1 0 1 1 1 1 0 0 1 0 1 1 0 1 0 0 1 0 1 0 0 0 1 0 1 0 0 [129] 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0 0 1 1 0 1 0 0 0 0 0 1 0 0 0 [161] 0 0 0 0 0 0 1 1 1 0 0 1 1 1 1 0 1 1 1 0 0 0 0 1 1 0 1 1 0 0 0 1 [193] 1 1 1 1 1 1 0 1 0 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 0 0 1 0 0 0 1 [225] 1 1 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 1 1 1 1 0 1 1 0 1 1 1 0 1 [257] 0 0 1 0 1 1 0 0 0 1 1 0 1 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 1 0 0 0 [289] 0 0 1 1 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 1 1 0 1 1 1 1 0 1 0 [321] 0 0 1 0 0 0 1 0 1 1 0 0 0 0 1 0 1 0 1 0 0 1 0 0 0 0 0 0 0 1 1 1 [353] 1 0 0 1 0 0 0 1 1 1 1 0 0 0 0 1 1 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 [385] 0 0 0 0 1 0 0 1 0 0 1 1 0 1 0 1 1 0 0 1 0 0 1 0 0 1 1 0 0 1 1 0 [417] 0 1 1 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 0 0 1 [449] 0 0 0 1 1 0 0 0 1 0 1 0 1 0 0 1 0 0 0 1 0 0 0 1 1 1 1 1 1 1 0 1 [481] 1 0 1 0 0 1 1 1 0 1 1 0 0 0 0 0 1 0 1 1 0 0 1 0 0 0 0 1 1 1 0 0 [513] 1 0 1 1 0 0 1 1 1 0 0 1 0 1 0 1 1 1 1 0 0 0 > summary(lm(wage~fem)) Call: lm(formula = wage ~ fem) Residuals: Min 1Q Median 3Q Max -8.995 -3.529 -1.072 2.394 36.621 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 9.9949 0.2961 33.75 < 2e-16 *** fem -2.1161 0.4372 -4.84 1.7e-06 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 5.034 on 532 degrees of freedom Multiple R-squared: 0.04218, Adjusted R-squared: 0.04038 F-statistic: 23.43 on 1 and 532 DF, p-value: 1.703e-06 > plot(wage~as.numeric(sex=="M")) > abline(7.88,2.12) > summary(mod2) Call: lm(formula = wage ~ sex, data = cps) Residuals: Min 1Q Median 3Q Max -8.995 -3.529 -1.072 2.394 36.621 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 7.8789 0.3216 24.50 < 2e-16 *** sexM 2.1161 0.4372 4.84 1.7e-06 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 5.034 on 532 degrees of freedom Multiple R-squared: 0.04218, Adjusted R-squared: 0.04038 F-statistic: 23.43 on 1 and 532 DF, p-value: 1.703e-06 > attributes(mod2) $names [1] "coefficients" "residuals" "effects" "rank" [5] "fitted.values" "assign" "qr" "df.residual" [9] "contrasts" "xlevels" "call" "terms" [13] "model" $class [1] "lm" > mod2$coef (Intercept) sexM 7.878857 2.116056 > mod2$coefficients (Intercept) sexM 7.878857 2.116056 > head(mod2$resid) 1 2 3 4 5 6 -0.9949135 -4.4949135 -4.0788571 2.6211429 5.0050865 1.1211429 > head(mod2$fitted) 1 2 3 4 5 6 9.994913 9.994913 7.878857 7.878857 9.994913 7.878857 > sse = sum(mod2$resid^2) > sse [1] 13482.98 > mod2$df [1] 532 > dim(cps) [1] 534 11 > sse/mod2$df [1] 25.34396 > sqrt(sse/mod2$df) [1] 5.034278 > summary(mod2) Call: lm(formula = wage ~ sex, data = cps) Residuals: Min 1Q Median 3Q Max -8.995 -3.529 -1.072 2.394 36.621 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 7.8789 0.3216 24.50 < 2e-16 *** sexM 2.1161 0.4372 4.84 1.7e-06 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 5.034 on 532 degrees of freedom Multiple R-squared: 0.04218, Adjusted R-squared: 0.04038 F-statistic: 23.43 on 1 and 532 DF, p-value: 1.703e-06 > confint(mod2) 2.5 % 97.5 % (Intercept) 7.247040 8.510674 sexM 1.257215 2.974898 > t = qt(.975,532) > t [1] 1.964433 > 2.116+c(-1,1)*t*.4372 [1] 1.25715 2.97485 > confint(mod2,level=.90) 5 % 95 % (Intercept) 7.348903 8.408811 sexM 1.395679 2.836434 > # Analysis of Covariance: wage ~ sex*age > mod3 = lm(wage~sex+age, data=cps) > summary(mod3) Call: lm(formula = wage ~ sex + age, data = cps) Residuals: Min 1Q Median 3Q Max -9.508 -3.427 -1.001 2.313 38.056 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 4.65425 0.76105 6.116 1.87e-09 *** sexM 2.27469 0.43029 5.286 1.82e-07 *** age 0.08522 0.01830 4.656 4.08e-06 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 4.939 on 531 degrees of freedom Multiple R-squared: 0.07975, Adjusted R-squared: 0.07628 F-statistic: 23.01 on 2 and 531 DF, p-value: 2.612e-10 > > mod4 = lm(wage~sex*age, data=cps) > # Formula sex*age shortcut for: 1+sex+age+sex:age > summary(mod4) Call: lm(formula = wage ~ sex * age, data = cps) Residuals: Min 1Q Median 3Q Max -10.034 -3.230 -1.104 2.272 37.229 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) 6.51298 1.04021 6.261 7.9e-10 *** sexM -1.23108 1.41216 -0.872 0.38373 age 0.03610 0.02621 1.377 0.16901 sexM:age 0.09490 0.03643 2.605 0.00944 ** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 4.913 on 530 degrees of freedom Multiple R-squared: 0.09138, Adjusted R-squared: 0.08624 F-statistic: 17.77 on 3 and 530 DF, p-value: 5.294e-11 >