# R example: Regression with qualitative (categorical) predictors # We use the Insurance innovation data in Table 8.2 of the book # type = 0 indicates a mutual company # type = 1 indicates a stock company # Save the data file into a directory and # use the full path name: insur.inn.data <- read.table(file = "z:/My Documents/teaching/stat_705/insuranceinnovationdata.txt", header=FALSE, col.names = c('time', 'size', 'type')) # attaching the data frame: attach(insur.inn.data) # Fitting the regression model through ordinary least squares: insur.inn.reg <- lm(time ~ size + type) summary(insur.inn.reg) anova(insur.inn.reg) # getting a 95% confidence interval for the true beta_1 (an indirect way): alpha <- 0.05 b1 <- summary(insur.inn.reg)$coef[2,1] s.b1 <- summary(insur.inn.reg)$coef[2,2] error.df <- summary(insur.inn.reg)$df[2] lower <- b1 - qt(1-alpha/2, df=error.df)*s.b1 upper <- b1 + qt(1-alpha/2, df=error.df)*s.b1 print(paste(100*(1-alpha), "percent CI for beta_1:", lower, upper)) # getting a 95% confidence interval for the true beta_2 (an indirect way): alpha <- 0.05 b2 <- summary(insur.inn.reg)$coef[3,1] s.b2 <- summary(insur.inn.reg)$coef[3,2] error.df <- summary(insur.inn.reg)$df[2] lower <- b2 - qt(1-alpha/2, df=error.df)*s.b2 upper <- b2 + qt(1-alpha/2, df=error.df)*s.b2 print(paste(100*(1-alpha), "percent CI for beta_2:", lower, upper)) ############################################################# # Example of predictor with 4 categories # shirt size example # Save the data file into a directory and # use the full path name: shirt.data <- read.table(file = "z:/My Documents/teaching/stat_705/shirtdata.txt", header=FALSE, col.names = c('amount', 'size')) # attaching the data frame: attach(shirt.data) x1 <- ifelse(size=="medium", 1, 0) x2 <- ifelse(size=="large", 1, 0) x3 <- ifelse(size=="xlarge", 1, 0) # Looking at the data set: print(cbind(shirt.data, x1,x2,x3)) # Fitting the regression model through ordinary least squares: shirt.reg <- lm(amount ~ x1 + x2 + x3) summary(shirt.reg) anova(shirt.reg) # Interpreting estimated regression coefficients: # We estimate that shoppers with "medium" shirt size spend $13.80 LESS # on AVERAGE than shoppers with "small" shirt size. # We estimate that shoppers with "large" shirt size spend $18.50 LESS # on AVERAGE than shoppers with "small" shirt size.