######################### # # # STAT 599 Spring 2013 # # # # Example R code # # # # Chapter 13 # # # ######################### ### Installing the add-on packages needed for this course: # If you haven't already done so, # type at the command line (without the preceding # ): # install.packages(c('faraway','mgcv','acepack','mda','brlr','sm','wavethresh','SuppDists','lme4')) # and choose a U.S. mirror site when the window pops up. # This will install the needed packages for you. ###################################### ## # Regression Trees ## ###################################### # load the 'faraway' package: library(faraway) # load the 'rpart' package: library(rpart) # loading the 'ozone' dataset: data(ozone) # attaching the 'ozone' dataset: # attach(ozone) # examining a scatterplot matrix: pairs(ozone, pch=".") # fitting a regression tree, with default settings: oz.tree <- rpart(O3 ~ vh + wind + humidity + temp + ibh + dpg + ibt + vis + doy, data=ozone) # basic summary output: print(oz.tree) # plotting the graph of the tree: plot(oz.tree); text(oz.tree) # plotting the graph of the tree (with uniform spacing): plot(oz.tree, compress=T, uniform=T, branch=0.4); text(oz.tree) ## Diagnostics based on residuals: plot(predict(oz.tree), residuals(oz.tree), xlab='Predicted', ylab='Residuals') qqnorm(residuals(oz.tree), main='Normal Q-Q plot of Residuals') #### Predictions: # Predict the O3 for an observation that is "typical" in all predictor variables: x.med <- apply(ozone[,-1],2,median) predict(oz.tree, data.frame(t(x.med))) # Predicting the O3 for an observation with # vh=6000, wind=6, humidity=55, temp=70, ibh=2000, dpg=20, ibt=235, vis=63, doy=282: x.val <- c(vh=6000, wind=6, humidity=55, temp=70, ibh=2000, dpg=20, ibt=235, vis=63, doy=282) predict(oz.tree, data.frame(t(x.val))) ################################################# ## ## Section 13.2: Tree Pruning (Selection) ## ################################################# # cp = ratio of lambda to the RSS on the no-branch tree # A larger cp encourages a less complex tree # A smaller cp allows a tree with lots of splits and branches oz.tree.cp <- rpart(O3 ~ vh + wind + humidity + temp + ibh + dpg + ibt + vis + doy, data=ozone, cp=0.001) printcp(oz.tree.cp) # Select cp that minimizes the 'xerror': oz.tree.best <- prune.rpart(oz.tree.cp, cp=0.0109137) # Plotting the 'best' tree: plot(oz.tree.best); text(oz.tree.best) ## This is the same solution as before. ##### A simpler tree (with higher cp): oz.tree.simp <- prune.rpart(oz.tree.cp, cp=0.0267557) # Plotting this tree: plot(oz.tree.simp); text(oz.tree.simp) ##### A more complicated tree (with lower cp): oz.tree.comp <- prune.rpart(oz.tree.cp, cp=0.0044712) # Plotting this tree: plot(oz.tree.comp); text(oz.tree.comp) ################################# # ## 13.3 Classification Trees # ################################# # load the 'faraway' package: library(faraway) # load the 'rpart' package: library(rpart) data(hsb) attach(hsb) # A classification tree to classify the students into one of three program types (academic, vocational, and general) # based on numerous predictor variables (gender,race,ses,schtyp,read,write,math,science,socst). tree.prog <- rpart(prog ~ gender+race+ses+schtyp+read+write+math+science+socst, data=hsb) # Plotting this tree: plot(tree.prog); text(tree.prog) # Predicting the program type for a student with # gender='female',race='white',ses='low',schtyp='public',read=55,write=51,math=44,science=49,socst=56: # Since some predictors have "character" values and some predictors have numeric values, # we must put the values directly into a data frame rather than creating a vector first: predict(tree.prog, data.frame(gender='female',race='white',ses='low',schtyp='public',read=55,write=51,math=44,science=49,socst=56))