# October 12, 2005 # Solutions for HW #5 # compute sum of squares of vector ss <- function(y,mean=F) { if(mean) sum((y-mean(y))^2) else sum(y^2) } ##### Problem 7.7 ### (a) cp <- read.table("CH06PR18.txt") # (y,x1,x2,x3,x4) n=81 ssto <- ss(cp[,1],T) # 236.5575 r4 <- lsfit( cp[,5], cp[,1] ) # regress y on x4 sse4 <- ss(r4\$res) # 168.7824 ssr4 <- ssto - sse4 # 67.7751 r14 <- lsfit( cp[,c(2,5)], cp[,1] ) # regress y on x1,x4 ssr14 <- ssto - ss(r14\$res) # 110.0497 # ssr(1,4) = ssr(4) + ssr(1|4) ssr1.4 <- ssr14 - ssr4 # 42.27457 r124 <- lsfit( cp[,c(2,3,5)], cp[,1] ) # regress y on x1,x2,x4 ssr124 <- ssto - ss(r124\$res) # 137.9072 # ssr(1,2,4) = ssr(4) + ssr(1|4) + ssr(2|1,4) ssr2.14 <- ssr124 - ssr4 - ssr1.4 # 27.85749 r1234 <- lsfit( cp[,c(2,3,4,5)], cp[,1] ) # regress y on x1,x2,x3,x4 ssr1234 <- ssto - ss(r1234\$res) # 138.3269 # ssr(1,2,3,4) = ssr(4) + ssr(1|4) + ssr(2|1,4) + ssr(3|1,2,4) ssr3.124 <- ssr1234 - ssr4 - ssr1.4 - ssr2.14 # 0.4197463 ### (b) Test beta3 = 0 vs != 0 Formula 7.23-7.24 sse1234 <- ss(r1234\$res) # 98.2306 (p=5 including intercept) f <- ( ssr3.124 / 1 ) / ( sse1234 / ( 81-5 ) ) # 0.3247534 ~ F(1,76) crit <- qf(.99,1,76) # 6.980578 ==> FAIL TO REJECT pval <- 1 - pf( 0.3247534, 1, 76 ) # 0.5704457 ##### Problem 7.8 # In full model test b2=b3=0 # ssr(1,2,3,4) = ssr(4) + ssr(1|4) + ssr(2,3|1,4) ssr23.14 <- ssr1234 - ssr4 - ssr1.4 # 28.27724 f <- ( ssr23.14 / 2 ) / ( sse1234 / ( 81-5 ) ) # 10.93890 ~ F(2,76) crit <- qf(.99,2,76) # 4.89584 ==> REJECT pval <- 1 - pf( 10.93890, 2, 76 ) # 6.682161e-05 ##### Problem 7.15 # r2(y4) = ssr4 / ssto r2.y4 <- ssr4 / ssto # 0.2865058 # r2(y1) = ssr1 / ssto r1 <- lsfit( cp[,2], cp[,1] ) # regress y on x1 ssr1 <- ssto - ss(r1\$res) # 14.81852 r2.y1 <- ssr1 / ssto # 0.06264236 # r2(y1|4) = ssr(1|4) / sse4 r2.y1.4 <- ssr1.4 / sse4 # 0.2504679 # r2(14) means r2(y14) ??? sse14 <- ss(r14\$res) # 126.5078 r2.y14 <- ssr14 / ssto # 0.4652132 # r2(y2|14) = ssr(2|14) / sse14 r2.y2.14 <- ssr2.14 / sse14 # 0.2202037 # r2(y3|124) = ssr(3|124) / sse124 sse124 <- ss(r124\$res) # 98.65034 r2.y3.124 <- ssr3.124 / sse124 # 0.004254889 # r2 (full model) = ssr1234 / ssto r2 <- ssr1234 / ssto # 0.5847496 # Marginal linear association between Y and X1 # increases when adjusted for X4 (from a little to a bit) ##### Problem 7.22 # x1,x2,x3 all significant by themselves # however, only 2 of x1-x7 significant # # so fewer predictors (say x2 and x6) are # giving better predictions that x1-x3 # # is there an anomaly? no, x6 is very predictive # ##### Problem 7.23 # 2 predictor variables are highly correlated # # difficult to assess individual predictor effects # # the R^2 is very high # # unimportant to figure out individual predictor effects # # # # it is correct to say that it is "not possible" to # figure out individual predictor effects.... it depends # on the situation if it is also "unimportant" # ##### Problem 7.38 senic <- read.table("APPENC01.txt") # 113 by 12 y <- as.numeric( senic[,2] ) # average length of stay X <- as.matrix( senic[,c(3,4,5,10,11,12)] ) # six predictor variables dimnames(X) <- list(NULL, c("age","infect","culture","census","nurses","facilities")) ### (a) # r2.y3.12 = ssr(3|12)/sse(12) = ( sse(12)-sse(123) ) / sse(12) sse12 <- ss( lsfit(X[,c(1,2 )],y)\$res ) # 278.2504 sse123 <- ss( lsfit(X[,c(1,2,3)],y)\$res ) # 275.0024 r2.y3.12 <- (sse12-sse123)/sse12 # 0.01167293 sse124 <- ss( lsfit(X[,c(1,2,4)],y)\$res ) # 240.3518 r2.y4.12 <- (sse12-sse124)/sse12 # 0.1362033 sse125 <- ss( lsfit(X[,c(1,2,5)],y)\$res ) # 267.8532 r2.y5.12 <- (sse12-sse125)/sse12 # 0.03736635 sse126 <- ss( lsfit(X[,c(1,2,6)],y)\$res ) # 268.1252 r2.y6.12 <- (sse12-sse126)/sse12 # 0.03638879 ### (b) # X4 (average census) adds the most predictive power # Yes, it adds the most SS ### (c) # full model has intercept, x1, x2, and x4 # reduced model has beta4 = 0 ssto <- ss( y,T ) # 409.2104 n = 113 sse12 <- ss( lsfit( X[,c(1,2 )], y )\$res ) # 278.2504 sse124 <- ss( lsfit( X[,c(1,2,4)], y )\$res ) # 240.3518 p = 4 ssr4.12 <- sse12 - sse124 # 37.89864 f <- ( ssr4.12 / 1 ) / ( sse124 / ( 113-4 ) ) # 17.18710 ~ F(1,109) crit <- qf(.95,1,109) # 3.928195 ==> REJECT NULL pval <- 1 - pf( 17.18710, 1, 109 ) # 6.722486e-05 # The other 3 variables (x3,x5,x6) would not have # such such large F-statistics....could be significant though