###################################################################### # R commands Stat 704 # Lab 5 2017 University of South Carolina ###################################################################### # This file contains the R commands for the lab. # # Lines beginning with the symbol '#' are comments in R. All other # lines contain code. # # In R for Windows, you may wish to open this file from the menu bar # (File:Display file); you can then copy commands into the command # window. (Use the mouse to highlight one or more lines; then # right-click and select "Paste to console".) ###################################################################### tolucaURL<-"http://people.stat.sc.edu/hoyen/Stat704/Data/tolucadata.txt" dat<-read.table(file=tolucaURL, header=F, sep="", stringsAsFactors=F) colnames(dat)<-c("LotSize", "WorkHour") attach(dat) fit1<-lm(WorkHour ~ LotSize) summary(fit1) anova(fit1) setwd("/Users/yen-yiho/Desktop/STAT704/Notes/Lecture5SLR-1") pdf("Toluca.pdf") plot(dat[,1], dat[,2], pch=16, xlab="Lot Size", ylab="Work Hours", main="Toluca Data") abline(v=mean(dat[,1]), lty=2, col="lightgrey") abline(h=mean(dat[,2]), lty=2, col="lightgrey") text(25, mean(dat[,2])+1, "312.28", col="red", cex=1.3) text(mean(dat[,1]+1), 110, "70", col="blue", cex=1.3) dev.off() pdf("TolucaSLR.pdf") plot(dat[,1], dat[,2], pch=16, xlab="Lot Size", ylab="Work Hours", main="Toluca Data") abline(fit1) dev.off() confint(fit1) fit0<-lm(WorkHour ~1) summary(fit0) anova(fit1, fit0) fishz<-function(r){ ans<-0.5*log((1+r)/(1-r)) return(ans) } r<-seq(-1,1, by=0.01) plot(r, fishz(r), type="b") abline(h=0) abline(v=0) ##### centering xstar<-dat[,1]-mean(dat[,1]) fit2<-lm(dat[,2] ~ xstar) summary(fit2) #### confidence interval and prediction intervals ci<-predict.lm(fit1, interval="confidence") ci pi<-predict.lm(fit1, new=data.frame(LotSize=65), interval="prediction") pi ci2<-predict.lm(fit1,new=data.frame(LotSize=65), interval="confidence") ci2 ci3<-predict.lm(fit1,new=data.frame(LotSize=100), interval="prediction") ci3 ci4<-predict.lm(fit1,new=data.frame(LotSize=100), interval="confidence") ci4 xrange<-seq(20, 120, by=10) setwd("/Users/yen-yiho/Desktop/STAT704/Notes/Lecture5SLR-1") pdf("cipi.pdf") plot(dat[,1], dat[,2], pch=16, xlab="Lot Size", ylab="Work Hour", main="Toluca Data") abline(fit1, lwd=2) conf<-predict.lm(fit1, new=data.frame(LotSize=xrange), interval="confidence") lines(xrange, conf[,2], col="blue", lty=2, lwd=2) lines(xrange, conf[,3], col="blue", lty=2, lwd=2) predi<-predict.lm(fit1, new=data.frame(LotSize=xrange), interval="prediction") lines(xrange, predi[,2], col="grey", lty=2, lwd=2) lines(xrange, predi[,3], col="grey", lty=2, lwd=2) legend(20, 540, c("CI for yhat", "PI for a data point"), lty=c(2,2), col=c("blue", "grey"), bty="n", cex=1.3, lwd=2) dev.off() #################################################################### ##### ggplot in library(ggplot2) produce pretty figures with shaded band #################################################################### ###### confidence interval for b0, b1 dat<-read.table(file=tolucaURL, header=F, sep="", stringsAsFactors=F) colnames(dat)<-c("LotSize", "WorkHour") attach(dat) fit1<-lm(WorkHour ~ LotSize) summary(fit1) confint(fit1) ##### Simultaneous confidence band for a regression line simultaneous_CBs <- function(linear_model, newdata, level = 0.95){ # Working-Hotelling 1 – α confidence bands for the model linear_model # at points newdata with α = 1 - level # estimate of residual standard error lm_summary <- summary(linear_model) # degrees of freedom p <- lm_summary$df[1] # residual degrees of freedom nmp <-lm_summary$df[2] # F-distribution Fvalue <- qf(level,p,nmp) # multiplier W <- sqrt(p*Fvalue) # confidence intervals for the mean response at the new points CI <- predict(linear_model, newdata, se.fit = TRUE, interval = "confidence", level = level) # mean value at new points Y_h <- CI$fit[,1] # Working-Hotelling 1 – α confidence bands LB <- Y_h - W*CI$se.fit UB <- Y_h + W*CI$se.fit sim_CB <- data.frame(LowerBound = LB, Mean = Y_h, UpperBound = UB) } xrange<-seq(20, 120, by=10) new=data.frame(LotSize=xrange) rband<-simultaneous_CBs(fit1, newdata=new, level=0.95) pdf("CIregline.pdf") plot(dat[,1], dat[,2], pch=16, xlab="Lot Size", ylab="Work Hour", main="Toluca Data") abline(fit1, lwd=2) conf<-predict.lm(fit1, new=data.frame(LotSize=xrange), interval="confidence") lines(xrange, conf[,2], col="blue", lty=2, lwd=2) lines(xrange, conf[,3], col="blue", lty=2, lwd=2) predi<-predict.lm(fit1, new=data.frame(LotSize=xrange), interval="prediction") lines(xrange, predi[,2], col="grey", lty=2, lwd=2) lines(xrange, predi[,3], col="grey", lty=2, lwd=2) lines(xrange, rband[,1], col="red", lty=2, lwd=2) lines(xrange, rband[,3], col="red", lty=2, lwd=2) legend(20, 540, c("CI for yhat", "Band for Regression Line", "PI for a data point"), lty=c(2,2,2), col=c("blue", "red", "grey"), bty="n", cex=1.3, lwd=2) dev.off() detach(dat)