# R code to calculate influence / outlier detection # We use the body fat data example # reading the file off the web: # (This may not work in some implementations of R) bodyfat.data <- read.table(file = url("http:/www.stat.sc.edu/~hitchcock/stat701/bodyfatdata.txt"), header=FALSE, col.names = c('triceps', 'thigh', 'midarm', 'bodyfat')) # Or, save the data file into a directory and # use the full path name: bodyfat.data <- read.table(file = "y:/My Documents/teaching/stat_701/bodyfatdata.txt", header=FALSE, col.names = c('triceps', 'thigh', 'midarm', 'bodyfat')) # attaching the data frame: attach(bodyfat.data) # fitting the regression model with just two predictors: bodyfat.reg.1 <- lm(bodyfat ~ triceps + thigh) # plotting the residuals against the predictor "triceps": plot(triceps, resid(bodyfat.reg.1)) # plotting the residuals against the predictor "thigh": plot(thigh, resid(bodyfat.reg.1)) # Added-variable plots (partial regression plots) # for each of those two predictors: # For "triceps": plot(resid(lm(triceps ~ thigh)), resid(lm(bodyfat ~ thigh)), main="Partial regression plot for triceps") # For "thigh": plot(resid(lm(thigh ~ triceps)), resid(lm(bodyfat ~ triceps)), main="Partial regression plot for thigh") ################################################## # # OUTLIER AND INFLUENCE DIAGNOSTICS # ################################################## # fitting the regression model with all three predictors: bodyfat.reg <- lm(bodyfat ~ triceps + thigh + midarm) # getting the summary regression output: summary(bodyfat.reg) # getting the ANOVA table: anova(bodyfat.reg) # rstandard gives the INTERNALLY studentized residuals # (what SAS calls "Student Residual") rstandard(bodyfat.reg) # rstudent gives the EXTERNALLY studentized residuals # (what SAS calls "RStudent") rstudent(bodyfat.reg) # getting the measures of influence: # Gives DFFITS, Cook's Distance, Hat diagonal elements, and some others. influence.measures(bodyfat.reg)