# Built on the air quality data frame allyrsum. #Preliminary names(allyrsum) # What are my variables? allyrsum$locs<-category(allyrsum$airs) #Create locs variable to make it easier to extract site information. attach(allyrsum) # So that I can refer to variables directly. # How to deal with missing values? # We will deal with missing values in a simple way. Namely, we # will simulate a normal random variate for each missing value # The distribution has mean and variance are computed from # the middle 50% of the data at the given hour. Note if there # are more than one NA's per hour then the standard error # is computed from a "n" which is too large. oznm<-oz for (k in 1:12){ ndays<-max(allyrsum$d[m==k]) for (i in 1:ndays){ x<-allyrsum$oz[d==i & m==k] mm<-mean(x,trim=.25,na.rm=T) ss<-sqrt(cor(x,trim=.25,na.method="omit")/(length(x)-1)) for (j in 1:15){ if (is.na(x[j])==T) x[j]<-rnorm(1,mean=mm,sd=ss) } oznm[d==i & m==k]<-x } } #update the augtwo datafram and attach allyrsum$oznm<-oznm rm(oznm) attach(allyrsum) #Let's take a quick look at the data for the 15 sites siten<-seq(1,15) #c(7,14) par(mfrow=c(4,2)) #set up plot rm(x1,x2,y1,y2) x1<-c(1,32,61,92,122,153,183,214,245,275,306,336) x2<-c(1,367) y1<-x1*0 y2<-y1+200 c<-levels(locs[locs==1]) for (i in siten){ tsplot(seq(1,367)*0,seq(1,367)*0+200,lty=1,col=1) segments(x2,y1,x2,y2) segments(x1,y1,x1,y2) tslines(oz[locs==i],lty=1) tslines(oznm[locs==i],lty=2) title(main=c[i]) } # Let's take a look at one site, namely 2010059 (L on the map) par(mfrow=c(2,2)) x<-oznm[locs==9] tsplot(oznm[locs==9]) title("1997 Ambient Ozone, 1pm, Site 2010059") acf(x, type="correlation", plot=T) acf(x, type="partial", plot=T) spectrum(x, method="pgram", plot=T) x<-log(oznm[locs==9]+1) tsplot(x) title("1997 Ambient log(Ozone), 1pm, Site 2010059") acf(x, type="correlation", plot=T) acf(x, type="partial", plot=T) spectrum(x, method="pgram", plot=T) xx<-acm.ave(x,gm) tsplot(xx$smo,x) title("1997 Ambient smoothed(Ozone), 1pm, Site 2010059") acf(xx$smo, type="correlation", plot=T) acf(xx$smo, type="partial", plot=T) spectrum(xx$smo, method="pgram", plot=T)