zc = function() { y3 = y[,3] # zipcodes y3[is.na(y3)] = y[1,3] kk = unique(y3) n = length(y3) nk = length(kk) ii = kk for(i in 1:nk) { ii[i] = seq(n)[y3==kk[i]][1] } ## now y[ii,] contains 1st instance of a zip code record } zc = read.table("zipcodes.txt",TRUE) # 394 unique zip codes + long/lat zcp = rep(0,100000) for(i in 1:394) { zcp[zc[i,1]] = i } # pointer from zipcode to zc row x.zc = x[,3]; iomit = seq(n)[is.na(x.zc)]; nn = length(x.zc) x.zc = x[-iomit,3] x.pr = log10(x[-iomit,5]) x.dt = x[-iomit,10] # price by zip code pr.by.zc = rep(0,394); for(i in 1:394) { pr.by.zc[i] = mean( x.pr[ x.zc == zc[i,1] ] ) } p2.by.zc = rep(0,394); for(i in 1:394) { p2.by.zc[i] = median( x.pr[ x.zc == zc[i,1] ] ) } plot(p2.by.zc,pr.by.zc,pch=16) cex.pr = 0.5 + 1.5 * ( ( pr.by.zc - min(pr.by.zc,na.rm=TRUE) ) / diff(range(pr.by.zc,na.rm=TRUE)) ) plot( zc[,2],zc[,3], pch=16, cex=cex.pr ) library("maps"); map('county','california',add=T) plot( zc[,2],zc[,3], pch=16, cex=cex.pr,xlim=c(-122.7,-122.2),ylim=c(37.5,38.0) ) col.pr = trunc( 1 + 31 * ( ( pr.by.zc - min(pr.by.zc,na.rm=TRUE) ) / diff(range(pr.by.zc,na.rm=TRUE)) ) ) plot( zc[,2],zc[,3], pch=16, col=rainbow(32)[col.pr] ) map('county','california',add=T) plot( zc[,2],zc[,3], pch=16, col=rainbow(32)[col.pr],xlim=c(-122.7,-122.2),ylim=c(37.5,38.0) ) # price by date x.yr = as.integer(substr(x.dt,1,4)) x.mo = as.integer(substr(x.dt,6,7)) hist(x.yr,seq(2002.5,2008.5)) hist(x.mo,seq(.5,12.5)) pr.yr = rep(0,6) for(i in 2003:2008) {pr.yr[i-2002] = mean( x.pr[x.yr==i] )} plot(2003:2008,pr.yr,type="b",pch=16) text(c(2003.5,2006),log10(c(465000,661000)),c("$465,000","$661,000")) pr.mo = rep(NA,72); k=0; yr.mo = seq( 2003+1/24,2009,1/12 ) for(i in 2003:2008) { for( j in 1:12 ) { k = k+1; m = seq(nn)[x.yr==i & x.mo==j]; pr.mo[k]=mean(x.pr[m],na.rm=TRUE) } } plot(yr.mo,pr.mo,type="b",pch=16) text(c(2008,2006.8),log10(c(377000,697000)),c("$377,000","$697,000")) # price by date and zip code zc.list = c(94957,94528,93920,94513,94565,94533,94509) plot(yr.mo,pr.mo,type="b",pch=16,ylim=c(5,6.5)) text(c(2008,2006.8),log10(c(377000,697000)),c("$377,000","$697,000")) for(iz in 1:length(zc.list)) { z = zc.list[iz] pr.mo.z = rep(NA,72); k=0 for(i in 2003:2008) { for( j in 1:12 ) { k = k+1; m = seq(nn)[x.yr==i & x.mo==j & x.zc==z] pr.mo.z[k]=mean(x.pr[m],na.rm=TRUE) } } lines(yr.mo,pr.mo.z); text( 2002.5+iz,5.2,as.character(z) ) }