# October 5, 2005 # Look at U.S. Airline Data yr <- seq(1949+1/24,by=1/12,length=96) airline <- scan("airline.dat") myplt <- function(x,y,lines=F,xl=c(1949,1957)) { plot(x,y,pch=16,ylab="Intl Air Traffice") if(lines) lines(x,y,lwd=2) } # fit polynomial model airtrvl.2 <- function() { # if(length(dev.list())==1) { my.ps.color() } x <- (yr-1949)/12; xx <- seq(0, 2/3,l=351) X <- rep(1,96); XX <- rep(1,length(xx)) Y <- log10(airline) par(mfrow=c(2,1)); myplt(yr,Y); myplt(yr,Y,T); browser() for(k in 1:5) { X <- cbind(X,x^k); XX <- cbind(XX,xx^k) ab <- lsfit(X,Y,int=F)$coef; yhh <- X %*% ab; yh <- XX %*% ab myplt(yr,Y,T); lines(12*xx+1949,yh,lwd=2,col=3) title(paste("polynomial order ",k)) ei <- Y - yhh; myplt(yr,ei,T) print(" use ps.copy and mv to poly1.eps ") browser() } } # airtrvl.2() airtrvl.3 <- function() { # if(length(dev.list())==1) { my.ps.color() } x <- (yr-1949)/12; xx <- seq(0, 2/3,l=351) X <- rep(1,96); XX <- rep(1,length(xx)) Y <- log10(airline) par(mfrow=c(2,1)); myplt(yr,Y); myplt(yr,Y,T); browser() for(k in 1:1) { X <- cbind(X,x^k); XX <- cbind(XX,xx^k) ab <- lsfit(X,Y,int=F)$coef; yhh <- X %*% ab; yh <- XX %*% ab myplt(yr,Y,T); lines(12*xx+1949,yh,lwd=2,col=3) title(paste("polynomial order ",k)) ei <- Y - yhh; myplt(yr,ei,T) print(" use ps.copy and mv to poly1.eps ") browser() } # ei are residuals to fit with b2 sin( par(mfrow=c(1,1)) myplt(x,ei,F); lines(xx, 0.1*sin(12*xx*2*pi + 2*pi*0/100),lwd=2,col=2); browser() for(k in 1:100) { lines(xx, 0.1*sin(12*xx*2*pi + 2*pi*(k-1)/100),lwd=2,col="white") points(x,ei,pch=16) lines(xx, 0.1*sin(12*xx*2*pi + 2*pi*k/100),lwd=2,col=2); runif(50000) } browser() par(mfrow=c(1,1)); sse <- rep(0,100) myplt(x,ei,F); xs <- sin(12*x*2*pi + 2*pi*0/100); ab <- lsfit(xs,ei)$coef lines(xx, ab[1]+ ab[2]*sin(12*xx*2*pi + 2*pi*0/100),lwd=2,col=2) for(k in 1:100) { lines(xx, ab[1]+ ab[2]*sin(12*xx*2*pi + 2*pi*(k-1)/100),lwd=2,col="white") points(x,ei,pch=16) xs <- sin(12*x*2*pi + 2*pi*k/100) tmp <- lsfit(xs,ei); ab <- tmp$coef; sse[k] <- sum(tmp$res^2) lines(xx, ab[1]+ ab[2]*sin(12*xx*2*pi + 2*pi*k/100),lwd=2,col=2); runif(50000) } browser(); plot(1:100,sse,type="l",lwd=2,xlab="phase",main="Best phase angle") browser(); kk <- seq(100)[ sse==min(sse) ][1] xs <- sin(12*x*2*pi + 2*pi*kk/100); ab <- lsfit(xs,ei)$coef yh <- ab[1]+ ab[2]*sin(12*xx*2*pi + 2*pi*0/100) ab0 <- lsfit(x,Y)$coef yy <- ab0[1] + ab0[2]*xx + yh plot(x,Y,pch=16); lines(xx,yy,lwd=2,col=2); browser() # indicator variables XXX <- x # normalized year ttt <- rep(0, 200); ttt[ seq(200)%%12 ==1 ] <- 1 XXX <- cbind(XXX,ttt[1:96]) for(k in 2:11) XXX <- cbind(XXX, c( rep(0,k-1),ttt[1:(96-k+1)])) ab <- lsfit(XXX,Y) plot(ab$coef[c(-1,-2)],type="l",main="monthly coefficients",xlim=c(1,12)) points(12,0,pch="o") browser() yhat <- ab$coef[1] + XXX %*% ab$coef[-1] plot(x,Y,type="l",lwd=2) lines(x,yhat,lwd=2,col=2) } airtrvl.3()