# September 6, 2005 Homework 2 ##### ------------ get data and set labels ------------- wheat <- scan("wheat.potato",n=48) attr(wheat,"label") <- "wheat yield" potato <-scan("wheat.potato",n=96)[49:96] attr(potato,"label") <- "potato yield" miles <- scan("miles.emissions",n=11) attr(miles,"label") <- "1000s miles" emissions <- scan("miles.emissions",n=22)[12:22] attr(emissions,"label") <- "hydrocarbon emissions" ##### ------------- define computing and plotting functions -------- myreg <- function(x,y) { if(length(x)!=length(y)) stop("lengths do not match") b1 <- sum( (x-mean(x))*(y-mean(y)) ) / sum( (x-mean(x))^2 ) b0 <- mean(y) - b1 * mean(x) yhat <- b0 + b1*x; attr(yhat,"label") <- "predictions" ehat <- y - yhat; attr(ehat,"label") <- "residuals" mse <- sum(ehat^2)/(length(x)-2) list(b=c(b0,b1),x=x,y=y,yhat=yhat,ehat=ehat,mse=mse) } regplot <- function( ans, ciopt=F, ypadd=0.07 ) { yab <- range(ans\$y) + c(-ypadd,ypadd) * diff(range(ans\$y)) plot(ans\$x,ans\$y,ylim=yab,pch="o", xlab=attr(ans\$x,"label"),ylab=attr(ans\$y,"label"), main=paste("Regression of",attr(ans\$y,"label"), "on",attr(ans\$x,"label"))) abline(ans\$b) if(ciopt) { n <- length(ans\$x); xb <- mean(ans\$x) xab <- seq(min(ans\$x),max(ans\$x),l=101) # plotting range tcrit <- qt(.975,n-2) # critical t value # confidence interval for conditional mean stdyhat <- sqrt( ans\$mse * ( 1/n + (xab-xb)^2 / sum((ans\$x-xb)^2) ) ) yhatab <- ans\$b[1] + ans\$b[2] * xab lines( xab, yhatab + tcrit * stdyhat, lty=2 ) lines( xab, yhatab - tcrit * stdyhat, lty=2 ) # confidence interval for conditional population stdynew <- sqrt( ans\$mse * ( 1 + 1/n + (xab-xb)^2 / sum((ans\$x-xb)^2) ) ) lines( xab, yhatab + tcrit * stdynew, lty=3 ) lines( xab, yhatab - tcrit * stdynew, lty=3 ) } ## browser() } par(mfrow=c(2,2)) ans1 <- myreg(wheat,potato) regplot(ans1, ypadd=0.20) regplot(ans1,T,0.20) regplot(myreg( wheat,ans1\$ehat)) regplot(myreg(ans1\$yhat,ans1\$ehat)) scan(n=1) ans2 <- myreg(miles,emissions) regplot(ans2, ypadd=0.50) regplot(ans2,T,0.50) regplot(myreg( miles,ans2\$ehat)) regplot(myreg(ans2\$yhat,ans2\$ehat))