# R code to analyze the surgical unit data # and do a transformation of variable # Save the data file into a directory and # use the full path name: surg.data <- read.table(file = "z:/My Documents/teaching/stat_704/surgicalunitdata1.txt", header=FALSE, col.names = c('x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8', 'y')) # attaching the data frame: attach(surg.data) # fitting a simple linear regression model: surg.reg1 <- lm(y ~ x1) # residual plot (against fitted values) plot(fitted(surg.reg1), resid(surg.reg1), ylab="Residuals", xlab="Fitted Values"); abline(h=0) # Q-Q plot of residuals qqnorm(resid(surg.reg1)) # Defining a log-transformed variable: lny <- log(y) # fitting a simple linear regression model using ln(y): surg.reg2 <- lm(lny ~ x1) # residual plot (against fitted values) plot(fitted(surg.reg2), resid(surg.reg2), ylab="Residuals", xlab="Fitted Values"); abline(h=0) # Q-Q plot of residuals qqnorm(resid(surg.reg2)) # Have the model violations been alleviated? ##################################################### ## With several predictor variables: # fitting a multiple linear regression model: surg.mult.reg1 <- lm(y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8) # residual plot (against fitted values) plot(fitted(surg.mult.reg1), resid(surg.mult.reg1), ylab="Residuals", xlab="Fitted Values"); abline(h=0) # Q-Q plot of residuals qqnorm(resid(surg.mult.reg1)) # fitting that multiple linear regression model using ln(y): surg.mult.reg2 <- lm(lny ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8) # residual plot (against fitted values) plot(fitted(surg.mult.reg2), resid(surg.mult.reg2), ylab="Residuals", xlab="Fitted Values"); abline(h=0) # Q-Q plot of residuals qqnorm(resid(surg.mult.reg2)) # Have the model violations been alleviated? ### Box-Cox transformation in R: # For the simple linear regression of Y on X1: library(MASS) # loads the 'MASS' package surg.reg1 <- lm(y ~ x1) boxcox(surg.reg1) # default grid of lambdas is -2 to 2 by 0.1 boxcox(surg.reg1, lambda = seq(-3, 3, by=0.25) ) # Could change grid of lambda values # For the multiple regression of y on x1,...,x8: library(MASS) # loads the 'MASS' package surg.mult.reg1 <- lm(y ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8) boxcox(surg.mult.reg1)