################################################################################ #### By Jimin Ding, 01/21/2017 ## Sir Francis Galton (1822-1911) studied a data set of 1078 heights of fathers and sons. ## Let's load, summarize, and visualize the data ######################## Step 1. Load Data ################################ father.son=read.table("http://www.math.wustl.edu/~jmding/math3200/pearson.dat") names(father.son)=c("fheight","sheight") head(father.son) ?read.table ## Otherways to find the data install.packages("UsingR") library(UsingR) data(father.son) ################ Step 2. Summarize the heights of Father and Son ###################### mean(father.son$fheight) mean(father.son$sheight) median(father.son$fheight) median(father.son$sheight) min(father.son$fheight) max(father.son$fheight) range(father.son$fheight) quantile(father.son$fheight) quantile(father.son$fheight,prob=seq(from=0,to=1,by=0.1)) var(father.son$fheight) sd(father.son$fheight) hist(father.son$fheight) hist(father.son$sheight,xlab="Son's Height") boxplot(father.son) ?boxplot qqnorm(father.son$fheight) qqline(father.son$fheight) ################ Step 3. Relationship Between the heights ###################### plot(father.son,xlab="Father's Height (inches)", ylab="Son's Height (inches)", main="Heights of Fathers and Their Full Grown Sons") regfit=lm(sheight ~ fheight, data=father.son) plot(sheight ~ fheight, data=father.son,bty="l",pch=20, xlab="Father's Height (inches)", ylab="Son's Height (inches)", main="Heights of Fathers and Their Full Grown Sons") abline(a=0,b=1,lty=2,lwd=2) abline(regfit,lty=1,lwd=2,col='red') cov(father.son) cor(father.son) acfvalues=acf(regfit$residuals) plot(regfit$residuals)