# STAT_526_Lec_03 link <- url("https://gregorkb.github.io/data/KNLIcp.txt") cp <- read.table(link,col.names=c("rent","age","optx","vac","sqft")) cp$sqft <- cp$sqft/10000 # rescale sqft head(cp) # make scatterplots of all pairs of variables plot(cp) # getting least squares estimators using matrix calculations Y <- cp$rent X <- cbind(1,cp$age,cp$optx,cp$vac,cp$sqft) # put columns side by side bhat <- solve(t(X) %*% X) %*% t(X) %*% Y # solve() takes the inverse bhat # get LS estimators more easily lm_out <- lm(rent ~ age + optx + vac + sqft, data = cp) lm_out Yhat <- lm_out$fitted.values ehat <- lm_out$residuals n <- nrow(cp) # get number of rows p <- 4 sigmahat <- sqrt( sum(ehat^2) / (n - (p + 1)) ) sigmahat # more easily get sigmahat from output of lm() function: summary(lm_out) # get CIs for betas confint(lm_out) confint(lm_out,level = .99) # get p-values for tests about betas summary(lm_out) # build CI for true regression function (average) Ynew at xnew xnew <- data.frame(age = 10, optx = 7, vac = 0.20, sqft = 8) predict(lm_out, newdata=xnew, int='conf') # pred interval predict(lm_out, newdata=xnew, int='pred') predict(lm_out, newdata=xnew, int='pred',level=0.99)