######################### # # # STAT 599 Spring 2013 # # # # Example R code # # # # Chapter 9 # # # ######################### ### Installing the add-on packages needed for this course: # If you haven't already done so, # type at the command line (without the preceding # ): # install.packages(c('faraway','mgcv','acepack','mda','brlr','sm','wavethresh','SuppDists','lme4')) # and choose a U.S. mirror site when the window pops up. # This will install the needed packages for you. # load the 'faraway' package: library(faraway) # loading the 'psid' dataset: data(psid) # attaching the 'psid' dataset: attach(psid) ## Section 9.1: # loading the 'lattice' package: library(lattice) # Plotting 'income' against 'year', separately for each person (first 20 people): xyplot(income ~ year | person, data=psid, type='l', subset = (person < 21), strip=F) # Plotting log of 'income' against 'year', separately for each level of 'sex': xyplot(log(income+100) ~ year | sex, data=psid, type='l') ##### ## A simple 'response feature' analysis: ##### # Fitting simple regression lines for each person's record: slopes <- numeric(85) # setting up a vector of zeros (there are 85 individuals) intercepts <- numeric(85) # setting up a vector of zeros (there are 85 individuals) # We center the independent variable (year) # around the median of 78 (i.e., year 1978): for (i in 1:85){ lmod <- lm(log(income) ~ I(year-78), subset=(person==i), data=psid) intercepts[i] <- coef(lmod)[1] slopes[i] <- coef(lmod)[2] } # plotting the 85 intercepts and slopes in a scatter plot: psex <- psid$sex[match(1:85,psid$person)] plot(intercepts, slopes, xlab='Intercept', ylab='Slope', type='n') # creates blank plot text(intercepts, slopes, labels=psex) # fills in plot with labels by sex # The men and women seem to differ by intercept, not as much by slope. # Boxplots of the slopes, separated by sex: boxplot(split(slopes, psex)) # A formal test of whether the slopes differ for the two sexes: t.test(slopes[psex=="M"], slopes[psex=="F"]) # A formal test of whether the intercepts differ for the two sexes: t.test(intercepts[psex=="M"], intercepts[psex=="F"]) ########################## ## ## Fitting a mixed effects model: ## ########################## # loading the 'lme4' package: library(lme4) # creating a centered year variable: psid$cyear <- psid$year - 78 # Fitting a mixed model with a random intercept term and a random coefficient for year: # Also, age, sex, and education level are fixed effects: mmod <- lmer(log(income) ~ cyear*sex + age + educ + (cyear|person), data=psid) # We are allowing for an interaction between cyear and sex... summary(mmod) # Normal Q-Q plots of residuals plotted separately for sexes: qqmath(~resid(mmod) | sex, data=psid) # residuals vs. fitted values, plotted separately for 3 educational levels: xyplot(resid(mmod)~fitted(mmod) | cut(educ, c(0,8.5,12.5,20)), data=psid, layout=c(3,1), xlab='Fitted', ylab='Residuals') ################## # # Redoing the above analysis without the log transformation of income: # ################## mmod2 <- lmer(income ~ cyear*sex + age + educ + (cyear|person), data=psid) # We are allowing for an interaction between cyear and sex... summary(mmod2) # Normal Q-Q plots of residuals plotted separately for sexes: qqmath(~resid(mmod2) | sex, data=psid) # residuals vs. fitted values, plotted separately for 3 educational levels: xyplot(resid(mmod2)~fitted(mmod2) | cut(educ, c(0,8.5,12.5,20)), data=psid, layout=c(3,1), xlab='Fitted', ylab='Residuals') # Some improvement, but new problems with the model have been introduced. # Should keep trying some other transformations... ########################################################## ## ## Section 9.2: # load the 'faraway' package: library(faraway) # loading the 'vision' dataset: data(vision) # attaching the 'vision' dataset: attach(vision) # Creating a numerical version of the 'power' variable # (just for the following plots): vision$npower <- rep(1:4,14) # load the 'lattice' package: library(lattice) # Time series-type plots of acuity, separate for each eye: xyplot(acuity~npower|subject, data=vision, type='l', groups=eye, lty=1:2, layout=c(4,2)) # Note the unusual pattern in subject 6: # load the 'lme4' package: library(lme4) mmod.vis <- lmer(acuity~power + (1|subject) + (1|subject:eye), data=vision) summary(mmod.vis) # F-test for fixed effect (power) anova(mmod.vis) ##### ## Same model, fit without the suspect left-eye data value for subject 6: mmod.vis2 <- lmer(acuity~power + (1|subject) + (1|subject:eye), data=vision, subset=-43) summary(mmod.vis2) # F-test for fixed effect (power) anova(mmod.vis2) ## Using a Helmert contrast to compare the mean acuity at the highest power to # the mean acuity for the other three levels of power: op<-options(contrasts=c("contr.helmert", "contr.poly")) mmod.vis2c <- lmer(acuity~power + (1|subject) + (1|subject:eye), data=vision, subset=-43) summary(mmod.vis2c) options(op) contr.helmert(4) # The t-test about the last-column contrast is the test of interest. ### Some diagnostic plots: plot(resid(mmod.vis2c)~fitted(mmod.vis2c),xlab="Fitted",ylab="Residuals"); abline(h=0) x11() qqnorm(ranef(mmod.vis2c)$"subject:eye"[[1]], main="Normal Q-Q plot for eye random effects") ## Same model, fit without entire data for subject 6: mmod.vis3 <- lmer(acuity~power + (1|subject) + (1|subject:eye), data=vision, subset=-(41:48)) summary(mmod.vis3) # F-test for fixed effect (power) anova(mmod.vis3) ### Some diagnostic plots: plot(resid(mmod.vis3)~fitted(mmod.vis3),xlab="Fitted",ylab="Residuals"); abline(h=0) x11() qqnorm(ranef(mmod.vis3)$"subject:eye"[[1]], main="Normal Q-Q plot for eye random effects") ################################ ### #### 9.3 Multiple Response Multilevel Models # load the 'faraway' package: library(faraway) # loading the 'jsp' dataset: data(jsp) # attaching the 'jsp' dataset: attach(jsp) # Only using the math scores for the THIRD year (the third year is coded year=2): jspr <- jsp[jsp$year==2,] # Scaling both the math (out of 40) scores and the English (out of 100) scores # to make them both between 0 and 1: mjspr <- data.frame(rbind(jspr[,1:6],jspr[,1:6]), subject=factor(rep(c('english','math'),c(953,953))), score=c(jspr$english/100,jspr$math/40)) # Looking at the first few observations of 'mjspr': head(mjspr) # load the 'lattice' package: library(lattice) # Some plots to investiagate the relation between test score and raven score, # separate by subject and gender: xyplot(score~raven | subject*gender, data=mjspr) # Centering the 'raven' variable: mjspr$craven <- mjspr$raven - mean(mjspr$raven) # load the 'lme4' package: library(lme4) # Fitting a rather complicated mixed model: mmod.scores <- lmer(score ~ subject*gender + craven*subject + social + (1|school) + (1|school:class) + (1|school:class:id), data=mjspr) summary(mmod.scores) # Testing about fixed effects: anova(mmod.scores) # Some residual plots, separate by subject: xyplot(residuals(mmod.scores) ~ fitted(mmod.scores)|subject,mjspr,pch='.', xlab="Fitted", ylab="Residuals")