# R code to analyze the body fat data # using ridge regression # Save the data file into a directory and # use the full path name: bodyfat.data <- read.table(file = "z:/My Documents/teaching/stat_704/bodyfatdata.txt", header=FALSE, col.names = c('triceps', 'thigh', 'midarm', 'bodyfat')) # attaching the data frame: attach(bodyfat.data) # must load the MASS package first: library(MASS) # Using R's automatic selection methods to select the biasing constant: # R calls this constant "lambda" select(lm.ridge(bodyfat ~ triceps + thigh + midarm, lambda = seq(0,1,0.001))) # The generalized cross-validation (GCV) criterion says # the optimal biasing constant is .019 bodyfat.ridge.reg <- lm.ridge(bodyfat ~ triceps + thigh + midarm, lambda = .019) # Printing the ridge-regression coefficient estimates for this problem: bodyfat.ridge.reg ########################### # Comparing the ridge-regression fit to the original least-squares fit: # # The X matrix for this problem: X.matrix <- cbind(rep(1,length=length(bodyfat)),triceps, thigh, midarm) # Getting the fitted values for the ridge-regression fit: fitted.vals <- X.matrix %*% c(43.840113, 2.117493, -0.959731, -1.018061) # Getting the SSE for the ridge-regression fit: sse.ridge <- sum((bodyfat-fitted.vals)^2); sse.ridge # The original least-squares fit: bodyfat.reg <- lm(bodyfat ~ triceps + thigh + midarm) # Getting the SSE for the original least-squares fit: sum(resid(bodyfat.reg)^2) # The SSE for the ridge-regression fit is not much higher, which is good.