# Example R code, Section 11.10: # Example 1: # entering the data y <- c(39.1,38.9,53.2,89.3,31.3,31.5,89.6,54.5,28.8,46.9,67.9,49.5,41,49.7,55.3,84.5,46.5,54.1,57.9,91.2,29.8,89.1) x1 <- c(1065,950,1090,1350,930,985,1390,1170,950,990,1035,845,1000,1065,1065,1325,1035,1005,918,1370,970,1375) x2 <- c(9.482,13.149,9.418,26.969,8.489,8.329,29.605,13.154,10.887,6.046,14.889,11.694,9.911,9.371,14.051,18.420,13.302,8.098,12.999,35.393,5.518,35.669) # matrix calculations: # creating the X matrix: X <- cbind(rep(1,times=length(y)), x1, x2) # the least-squares estimated coefficients: betahat <- solve(t(X) %*% X) %*% (t(X) %*% y) print(betahat) # A quick way: lm(y ~ x1 + x2) ## Example 2 (quadratic regression with ski-racer data): # times until exhaustion when running on treadmill: x <- c(8.4, 9.0, 9.6, 10.0, 11.0) # 20 km ski race times: y <- c(71.4, 68.7, 69.4, 63.0, 62.6) # matrix calculations: # creating the X matrix: X <- cbind(rep(1,times=length(y)), x, x^2) # the least-squares estimated coefficients: betahat <- solve(t(X) %*% X) %*% (t(X) %*% y) print(betahat) # A quick way: xsq <- x^2 lm(y ~ x + xsq) ######################################################################################################### # Example R code, Section 11.11: # Example 1 again: # entering the data y <- c(39.1,38.9,53.2,89.3,31.3,31.5,89.6,54.5,28.8,46.9,67.9,49.5,41,49.7,55.3,84.5,46.5,54.1,57.9,91.2,29.8,89.1) x1 <- c(1065,950,1090,1350,930,985,1390,1170,950,990,1035,845,1000,1065,1065,1325,1035,1005,918,1370,970,1375) x2 <- c(9.482,13.149,9.418,26.969,8.489,8.329,29.605,13.154,10.887,6.046,14.889,11.694,9.911,9.371,14.051,18.420,13.302,8.098,12.999,35.393,5.518,35.669) # matrix calculations: # creating the X matrix: X <- cbind(rep(1,times=length(y)), x1, x2) # The (X'X)^{-1} matrix: solve(t(X) %*% X) # Getting SSE and MSE: betahat <- solve(t(X) %*% X) %*% (t(X) %*% y) SSE <- t(y) %*% y - t(betahat) %*% t(X) %*% y; print(SSE) MSE <- SSE/(22-2-1); print(MSE) # A quick way to get estimated V(beta_1-hat) and MSE: summary(lm(y ~ x1 + x2)) # Note: 0.02303^2 = estimate of V(beta_1-hat) # Note: 9.172^2 = MSE, the estimate of sigma^2 ######################################################################################################### # Example R code, Section 11.12: # Example 2 again: ## (quadratic regression with ski-racer data): # times until exhaustion when running on treadmill: x <- c(8.4, 9.0, 9.6, 10.0, 11.0) # 20 km ski race times: y <- c(71.4, 68.7, 69.4, 63.0, 62.6) # matrix calculations: # creating the X matrix: X <- cbind(rep(1,times=length(y)), x, x^2) # The (X'X)^{-1} matrix: solve(t(X) %*% X) # the least-squares estimated coefficients: betahat <- solve(t(X) %*% X) %*% (t(X) %*% y) print(betahat) SSE <- t(y) %*% y - t(betahat) %*% t(X) %*% y; print(SSE) MSE <- SSE/(5-2-1); print(MSE) # A quick way: xsq <- x^2 summary(lm(y ~ x + xsq)) ## Example 1 again: # entering the data y <- c(39.1,38.9,53.2,89.3,31.3,31.5,89.6,54.5,28.8,46.9,67.9,49.5,41,49.7,55.3,84.5,46.5,54.1,57.9,91.2,29.8,89.1) x1 <- c(1065,950,1090,1350,930,985,1390,1170,950,990,1035,845,1000,1065,1065,1325,1035,1005,918,1370,970,1375) x2 <- c(9.482,13.149,9.418,26.969,8.489,8.329,29.605,13.154,10.887,6.046,14.889,11.694,9.911,9.371,14.051,18.420,13.302,8.098,12.999,35.393,5.518,35.669) # matrix calculations: # creating the X matrix: X <- cbind(rep(1,times=length(y)), x1, x2) # Finding the a'(X'X)^{-1}a in this case: a <- c(1, 1050, 15) t(a) %*% solve(t(X) %*% X) %*% a # Quick way to get 90% CI for E(Y) at x1=1050 and x2=15: collegefit <- lm(y ~ x1 + x2) predict(collegefit, newdata=data.frame(x1=1050, x2=15), interval="confidence", level=0.90) # The (X'X)^{-1} matrix: solve(t(X) %*% X) ################################################################# # Section 11.13 calculations: # Quick way to get 90% PI for Y* at x1=1050 and x2=15: predict(collegefit, newdata=data.frame(x1=1050, x2=15), interval="prediction", level=0.90) # Example 2 again: ## (quadratic regression with ski-racer data): # times until exhaustion when running on treadmill: x <- c(8.4, 9.0, 9.6, 10.0, 11.0) # 20 km ski race times: y <- c(71.4, 68.7, 69.4, 63.0, 62.6) # matrix calculations: # creating the X matrix: X <- cbind(rep(1,times=length(y)), x, x^2) # Finding the a'(X'X)^{-1}a in this case: a <- c(1, 9, 81) t(a) %*% solve(t(X) %*% X) %*% a # A quick way: xsq <- x^2 skiquadfit <- lm(y ~ x + xsq) predict(skiquadfit, newdata=data.frame(x=9, xsq=81), interval="prediction", level=0.90) ################################################################# # Section 11.14 calculations: # Example 2 again: ## (quadratic regression with ski-racer data): # times until exhaustion when running on treadmill: x <- c(8.4, 9.0, 9.6, 10.0, 11.0) # 20 km ski race times: y <- c(71.4, 68.7, 69.4, 63.0, 62.6) # A quick way to do the Complete vs. Reduced F-test in R: xsq <- x^2 skiquadfit <- lm(y ~ x + xsq) skiquadfit.reduced <- lm(y ~ 1) # this fits the model with only an intercept term anova(skiquadfit.reduced, skiquadfit) ## Example 1 again: # entering the data y <- c(39.1,38.9,53.2,89.3,31.3,31.5,89.6,54.5,28.8,46.9,67.9,49.5,41,49.7,55.3,84.5,46.5,54.1,57.9,91.2,29.8,89.1) x1 <- c(1065,950,1090,1350,930,985,1390,1170,950,990,1035,845,1000,1065,1065,1325,1035,1005,918,1370,970,1375) x2 <- c(9.482,13.149,9.418,26.969,8.489,8.329,29.605,13.154,10.887,6.046,14.889,11.694,9.911,9.371,14.051,18.420,13.302,8.098,12.999,35.393,5.518,35.669) # Quick way to do the Complete vs. Reduced F-test in R: collegefit.firstorder <- lm(y ~ x1 + x2) x1sq <- x1^2 x2sq <- x2^2 collegefit.secondorder <- lm(y ~ x1 + x2 + x1sq + x2sq + x1:x2) anova(collegefit.firstorder, collegefit.secondorder)