#########################
#                       #
# STAT 599 Spring 2013  #
#                       #
# Example R code        #
#                       #
# Chapter 9             #
#                       #
#########################

### Installing the add-on packages needed for this course:

# If you haven't already done so, 
# type at the command line (without the preceding # ):

# install.packages(c('faraway','mgcv','acepack','mda','brlr','sm','wavethresh','SuppDists','lme4'))

# and choose a U.S. mirror site when the window pops up.
# This will install the needed packages for you.

# load the 'faraway' package:

library(faraway)

# loading the 'psid' dataset:

data(psid)

# attaching the 'psid' dataset:

attach(psid)

## Section 9.1:

# loading the 'lattice' package:

library(lattice)

# Plotting 'income' against 'year', separately for each person (first 20 people):

xyplot(income ~ year | person, data=psid, type='l', subset = (person < 21), strip=F)

# Plotting log of 'income' against 'year', separately for each level of 'sex':

xyplot(log(income+100) ~ year | sex, data=psid, type='l')

#####
## A simple 'response feature' analysis:
#####

# Fitting simple regression lines for each person's record:

slopes <- numeric(85) # setting up a vector of zeros (there are 85 individuals)
intercepts <- numeric(85) # setting up a vector of zeros (there are 85 individuals)

# We center the independent variable (year) 
# around the median of 78 (i.e., year 1978):

for (i in 1:85){
 lmod <- lm(log(income) ~ I(year-78), subset=(person==i), data=psid)
 intercepts[i] <- coef(lmod)[1]
 slopes[i] <- coef(lmod)[2] 
}

# plotting the 85 intercepts and slopes in a scatter plot:

psex <- psid$sex[match(1:85,psid$person)]

plot(intercepts, slopes, xlab='Intercept', ylab='Slope', type='n') # creates blank plot
text(intercepts, slopes, labels=psex) # fills in plot with labels by sex

# The men and women seem to differ by intercept, not as much by slope.

# Boxplots of the slopes, separated by sex:

boxplot(split(slopes, psex))

# A formal test of whether the slopes differ for the two sexes:

t.test(slopes[psex=="M"], slopes[psex=="F"])

# A formal test of whether the intercepts differ for the two sexes:

t.test(intercepts[psex=="M"], intercepts[psex=="F"])

##########################
##
## Fitting a mixed effects model:
##
##########################

# loading the 'lme4' package:

library(lme4)

# creating a centered year variable:

psid$cyear <- psid$year - 78

# Fitting a mixed model with a random intercept term and a random coefficient for year:
# Also, age, sex, and education level are fixed effects:

mmod <- lmer(log(income) ~ cyear*sex + age + educ + (cyear|person), data=psid)
# We are allowing for an interaction between cyear and sex...
summary(mmod)

# Normal Q-Q plots of residuals plotted separately for sexes:

qqmath(~resid(mmod) | sex, data=psid)

# residuals vs. fitted values, plotted separately for 3 educational levels:

xyplot(resid(mmod)~fitted(mmod) | cut(educ, c(0,8.5,12.5,20)), data=psid, layout=c(3,1), xlab='Fitted', ylab='Residuals')

##################
#
# Redoing the above analysis without the log transformation of income:
#
##################


mmod2 <- lmer(income ~ cyear*sex + age + educ + (cyear|person), data=psid)
# We are allowing for an interaction between cyear and sex...
summary(mmod2)

# Normal Q-Q plots of residuals plotted separately for sexes:

qqmath(~resid(mmod2) | sex, data=psid)

# residuals vs. fitted values, plotted separately for 3 educational levels:

xyplot(resid(mmod2)~fitted(mmod2) | cut(educ, c(0,8.5,12.5,20)), data=psid, layout=c(3,1), xlab='Fitted', ylab='Residuals')

# Some improvement, but new problems with the model have been introduced.
# Should keep trying some other transformations...

##########################################################
##
## Section 9.2: 

# load the 'faraway' package:

library(faraway)

# loading the 'vision' dataset:

data(vision)

# attaching the 'vision' dataset:

attach(vision)

# Creating a numerical version of the 'power' variable 
# (just for the following plots):
vision$npower <- rep(1:4,14)

# load the 'lattice' package:

library(lattice)

# Time series-type plots of acuity, separate for each eye:
xyplot(acuity~npower|subject, data=vision, type='l', groups=eye, lty=1:2, layout=c(4,2))

# Note the unusual pattern in subject 6:

# load the 'lme4' package:

library(lme4)

mmod.vis <- lmer(acuity~power + (1|subject) + (1|subject:eye), data=vision)
summary(mmod.vis)

# F-test for fixed effect (power)
anova(mmod.vis)

#####
## Same model, fit without the suspect left-eye data value for subject 6:

mmod.vis2 <- lmer(acuity~power + (1|subject) + (1|subject:eye), data=vision, subset=-43)
summary(mmod.vis2)

# F-test for fixed effect (power)
anova(mmod.vis2)

## Using a Helmert contrast to compare the mean acuity at the highest power to
# the mean acuity for the other three levels of power:

op<-options(contrasts=c("contr.helmert", "contr.poly"))
mmod.vis2c <- lmer(acuity~power + (1|subject) + (1|subject:eye), data=vision, subset=-43)
summary(mmod.vis2c)
options(op)
contr.helmert(4)

# The t-test about the last-column contrast is the test of interest.

### Some diagnostic plots:

plot(resid(mmod.vis2c)~fitted(mmod.vis2c),xlab="Fitted",ylab="Residuals"); abline(h=0)
x11()
qqnorm(ranef(mmod.vis2c)$"subject:eye"[[1]], main="Normal Q-Q plot for eye random effects")

## Same model, fit without entire data for subject 6:

mmod.vis3 <- lmer(acuity~power + (1|subject) + (1|subject:eye), data=vision, subset=-(41:48))
summary(mmod.vis3)

# F-test for fixed effect (power)
anova(mmod.vis3)

### Some diagnostic plots:

plot(resid(mmod.vis3)~fitted(mmod.vis3),xlab="Fitted",ylab="Residuals"); abline(h=0)
x11()
qqnorm(ranef(mmod.vis3)$"subject:eye"[[1]], main="Normal Q-Q plot for eye random effects")

################################
###
#### 9.3 Multiple Response Multilevel Models

# load the 'faraway' package:

library(faraway)

# loading the 'jsp' dataset:

data(jsp)

# attaching the 'jsp' dataset:

attach(jsp)

# Only using the math scores for the THIRD year (the third year is coded year=2):

jspr <- jsp[jsp$year==2,]

# Scaling both the math (out of 40) scores and the English (out of 100) scores 
# to make them both between 0 and 1:

mjspr <- data.frame(rbind(jspr[,1:6],jspr[,1:6]), subject=factor(rep(c('english','math'),c(953,953))), score=c(jspr$english/100,jspr$math/40))

# Looking at the first few observations of 'mjspr':
head(mjspr)

# load the 'lattice' package:

library(lattice)

# Some plots to investiagate the relation between test score and raven score,
# separate by subject and gender:

xyplot(score~raven | subject*gender, data=mjspr)

# Centering the 'raven' variable:
mjspr$craven <- mjspr$raven - mean(mjspr$raven)

# load the 'lme4' package:

library(lme4)

# Fitting a rather complicated mixed model:

mmod.scores <- lmer(score ~ subject*gender + craven*subject + social + (1|school) + (1|school:class) + (1|school:class:id), data=mjspr)
summary(mmod.scores)

# Testing about fixed effects:
anova(mmod.scores)

# Some residual plots, separate by subject:

xyplot(residuals(mmod.scores) ~ fitted(mmod.scores)|subject,mjspr,pch='.', xlab="Fitted", ylab="Residuals")