# R code to analyze the math proficiency data # using robust regression # The data set consists of the response "mathprof" # for 40 states/provinces, along with several # possible predictor variables # Save the data file into a directory and # use the full path name: mathprof.data <- read.table(file = "http://people.stat.sc.edu/Hitchcock/mathproficiencydata.txt", header=FALSE, col.names = c('state', 'mathprof', 'parents', 'homelib', 'reading', 'tvwatch', 'absences')) # attaching the data frame: attach(mathprof.data) # The ordinary least-squares fit: mathprof.reg <- lm(mathprof ~ parents + homelib + reading + tvwatch + absences) summary(mathprof.reg) # Influence measures for this regression: influence.measures(mathprof.reg) # Normal Q-Q plot of residuals shows a couple of possible outliers: qqnorm(resid(mathprof.reg)) ### Robust regression alternatives: # must load the MASS and quantreg packages first: library(MASS); library(quantreg) # Least Absolute Residuals (LAR) regression: mathprof.lar <- rq(mathprof ~ parents + homelib + reading + tvwatch + absences) summary(mathprof.lar) # Huber's method: mathprof.huber <- rlm(mathprof ~ parents + homelib + reading + tvwatch + absences) summary(mathprof.huber) ######################################################## # The book suggests a simpler model with only (homelib, reading, tvwatch) as predictors # least-squares regression with these 3 predictors: mathprof.reg <- lm(mathprof ~ homelib + reading + tvwatch) summary(mathprof.reg) # Huber's method robust regression with these 3 predictors: mathprof.huber <- rlm(mathprof ~ homelib + reading + tvwatch) summary(mathprof.huber) # Compare these fitted equations with the ones on pg. 447.