# R code to analyze the body fat data # using multiple linear regression # reading the file off the web: # (This may not work in some implementations of R) bodyfat.data <- read.table(file = url("http:/www.stat.sc.edu/~hitchcock/stat701/bodyfatdata.txt"), header=FALSE, col.names = c('triceps', 'thigh', 'midarm', 'bodyfat')) # Or, save the data file into a directory and # use the full path name: bodyfat.data <- read.table(file = "y:/My Documents/teaching/stat_701/bodyfatdata.txt", header=FALSE, col.names = c('triceps', 'thigh', 'midarm', 'bodyfat')) # attaching the data frame: attach(bodyfat.data) # fitting the regression model: bodyfat.reg <- lm(bodyfat ~ triceps + thigh + midarm) # getting the summary regression output: summary(bodyfat.reg) # getting the ANOVA table: anova(bodyfat.reg) # To test whether beta_2=beta_3=0, given that X1 is in the model # must use SS's for thigh and midarm # (note these must have been listed LAST in the lm statement) # According to the formula: F.stat <- ((33.17 + 11.55)/2)/6.15 F.stat # Getting the P-value (with the appropriate d.f. = (2,16)) pf(F.stat, 2, 16, lower=F) ################## Investigating multicollinearity ######### # Noting correlation among predictors: cor(cbind(triceps, thigh, midarm)) # To get VIF's, first copy this function (by Bill Venables) into R: #################################### ## # vif <- function(object, ...) UseMethod("vif") vif.default <- function(object, ...) stop("No default method for vif. Sorry.") vif.lm <- function(object, ...) { V <- summary(object)$cov.unscaled Vi <- crossprod(model.matrix(object)) nam <- names(coef(object)) if(k <- match("(Intercept)", nam, nomatch = F)) { v1 <- diag(V)[-k] v2 <- (diag(Vi)[-k] - Vi[k, -k]^2/Vi[k,k]) nam <- nam[-k] } else { v1 <- diag(V) v2 <- diag(Vi) warning("No intercept term detected. Results may surprise.") } structure(v1*v2, names = nam) } # ## ##################################### # Then use it as follows: vif(bodyfat.reg) ############## Interaction model? ##################### # Centering the predictor variables: ctriceps = triceps - mean(triceps) cthigh = thigh - mean(thigh) cmidarm = midarm - mean(midarm) /* Now we fit the model with all pairwise interactions */ bodyfat.reg.int <- lm(bodyfat ~ ctriceps + cthigh + cmidarm + ctriceps:cthigh + ctriceps:cmidarm + cthigh:cmidarm) # getting the summary regression output: summary(bodyfat.reg.int) # getting the ANOVA table: anova(bodyfat.reg.int) # To test whether beta_4=beta_5=beta_6=0, given that X1,X2,X3 are in the model # must use SS's for the interaction terms # (note these interactions must have been listed LAST in the lm statement) # According to the formula: F.stat <- ((1.50 + 2.70 + 6.51)/3)/6.75 F.stat # Getting the P-value (with the appropriate d.f. = (3,13)) pf(F.stat, 3, 13, lower=F)