# STAT_516_Lec_04.R link <- url("https://gregorkb.github.io/data/KNLIcp.txt") cp <- read.table(link,col.names=c("rent","age","optx","vac","sqft")) cp$sqft <- cp$sqft/10000 # rescale sqft head(cp) plot(cp) lm_out <- lm(rent ~ age + optx + vac + sqft, data = cp) lm_out Y <- cp$rent # build the ANOVA table Yhat <- lm_out$fitted.values ehat <- lm_out$residuals Ybar <- mean(Y) SStot <- sum((Y - Ybar)^2) SSreg <- sum((Yhat - Ybar)^2) SSerror <- sum((Y - Yhat)^2) p <- 4 n <- length(Y) MSreg <- SSreg / p MSerror <- SSerror / (n - (p+1)) Ftest <- MSreg / MSerror Ftest alpha <- 0.05 Fcrit <- qf(1 - alpha, p,n-(p+1)) Fcrit Ftest pval <- 1 - pf(Ftest,p,n-(p+1)) pval summary(lm_out) # test with full/reduced model F test # significance of vac and optx covariates lm_full <- lm(rent ~ age + optx + vac + sqft, data = cp) lm_red <- lm(rent ~ age + sqft, data = cp) SSEfull <- sum(lm_full$residuals**2) SSEred <- sum(lm_red$residuals**2) s <- 2 Ftest <- (SSEred - SSEfull)/s / ( SSEfull / (n - (p+1))) Ftest Fcrit <- qf(1-alpha,s,n-(p+1)) Fcrit pval <- 1 - pf(Ftest, s, n- (p+1)) pval # test with full/reduced model F test # significance of vac lm_full <- lm(rent ~ age + optx + vac + sqft, data = cp) lm_red <- lm(rent ~ age + optx + sqft, data = cp) SSEfull <- sum(lm_full$residuals**2) SSEred <- sum(lm_red$residuals**2) s <- 1 Ftest <- (SSEred - SSEfull) / s / ( SSEfull / (n - (p+1))) Ftest Fcrit <- qf(1-alpha,s,n-(p+1)) Fcrit pval <- 1 - pf(Ftest, s, n- (p+1)) pval summary(lm_full) sqrt(Ftest) ### Get VIFs for the predictors lm_age <- lm(age ~ optx + vac + sqft, data = cp) SSE_age <- sum(lm_age$residuals**2) SST_age <- sum((cp$age - mean(cp$age))**2) Rsq_age <- 1 - SSE_age/SST_age VIF_age <- 1/(1 - Rsq_age) VIF_age library(car) vif(lm_out) # variable selection # leaps() library(leaps) regsubsets_out <- regsubsets(rent ~ vac + age + optx + sqft, data = cp) summary(regsubsets_out) summary(regsubsets_out)$cp # pull out C_p statistics 2**28