#########################
#                       #
# STAT 599 Spring 2013  #
#                       #
# Example R code        #
#                       #
# Chapter 11            #
#                       #
#########################

### Installing the add-on packages needed for this course:

# If you haven't already done so, 
# type at the command line (without the preceding # ):

# install.packages(c('faraway','mgcv','acepack','mda','brlr','sm','wavethresh','SuppDists','lme4'))

# and choose a U.S. mirror site when the window pops up.
# This will install the needed packages for you.


######################################
##
#  Section on Nonlinear Regression
##
######################################

## Some plots of common nonlinear regression functions"

# 3-parameter exponential model:

beta0 = 3; beta1 = 4; beta2 = -2;

xgrid <- seq(0,10,by=.1)
EY <- beta0 + beta1*exp(beta2*xgrid)
plot(xgrid, EY, type='l')

beta0 = 3; beta1 = 4; beta2 = 2;

xgrid <- seq(0,4,by=.1)
EY <- beta0 + beta1*exp(beta2*xgrid)
plot(xgrid, EY, type='l')

# 3-parameter logistic model:

beta0 = 3; beta1 = 4; beta2 = -2;

xgrid <- seq(0,3,by=.1)
EY <- beta0 / (1+beta1*exp(beta2*xgrid))
plot(xgrid, EY, type='l')

beta0 = 3; beta1 = 4; beta2 = 2;

xgrid <- seq(0,3,by=.1)
EY <- beta0 / (1+beta1*exp(beta2*xgrid))
plot(xgrid, EY, type='l')

# Michaelis-Menten Model:

beta1 = 4; beta2 = 2;

xgrid <- seq(0,10,by=.1)
EY <- (beta1*xgrid)/(xgrid+beta2)
plot(xgrid, EY, type='l')

# Example 1:


# The response is the prognosis for long-term recovery.
# A larger value indicates a better prognosis.

prognosis <- c(54,50,45,37,35,25,20,16,18,13,8,11,8,4,6)

# the predictor is the number of days of hospitalization.

days <- c(2,5,7,10,14,19,26,31,34,38,45,52,53,60,65)

plot(days, prognosis)

# We will fit the nonlinear model E(Y) = beta1*exp(beta2*X)

nlm.1 <- nls(prognosis ~ beta1*exp(beta2*days),
                 start = list(beta1 = 75, beta2 = -10))
summary(nlm.1)

# Let's linearize to get a sense of the initial values:

coef(lm(log(prognosis) ~ days))

# Try some better initial values:

nlm.1 <- nls(prognosis ~ beta1*exp(beta2*days),
                 start = list(beta1 = exp(4.037), beta2 = -0.038))
summary(nlm.1)

# That worked...

# Plot of fitted model:

plot(days, prognosis); 
xgrid <- seq(min(days),max(days), length=200)
lines(xgrid, coef(nlm.1)[1]*exp(coef(nlm.1)[2]*xgrid) )

# Example 2:

# Response = proportion of sampled individuals in a city 
# who would be likely to purchase a computer

propbuy <- c(.9,.8,.65,.46,.34,.26,.17,.15,.06,.04,.93,.77,.63,.5,.3,.24,.19,.12,.08,.05)

# Predictor 1 = selling price of computer (in 100s of dollars)

price <- c(1,2.5,5,10,20,30,40,50,75,100,1,2.5,5,10,20,30,40,50,75,100)

# Predictor 2 = City (A or B)

city <- c('A','A','A','A','A','A','A','A','A','A','B','B','B','B','B','B','B','B','B','B')

# changing 'city' to 0 to city A and 1 for city B:

city <- ifelse(city=='A',0,1)

# A plot of the data:

plot(price, propbuy, pch=city+21); # circle for city=0, square for city=1

# We will fit the nonlinear model E(Y) = beta0 + beta1*exp(beta2*X1) + beta3*X2


# linearizing to get initial values for beta1 and beta2:

coef(lm(log(propbuy) ~ price))


nlm.2 <- nls(propbuy ~ beta0 + beta1*exp(beta2*price) + beta3*city,
                 start = list(beta0=0, beta1 = exp(-0.347), beta2 = -0.03, beta3=0))
summary(nlm.2)


# Plot of fitted model:

plot(price, propbuy, pch=city+21); # circle for city=0, square for city=1
xgrid <- seq(min(price),max(price), length=200)
lines(xgrid, coef(nlm.2)[1] + coef(nlm.2)[2]*exp(coef(nlm.2)[3]*xgrid), lty=1 ) # When city=0
lines(xgrid, coef(nlm.2)[1] + coef(nlm.2)[2]*exp(coef(nlm.2)[3]*xgrid) + coef(nlm.2)[4], lty=2 ) # When city=1

# We can see visually that the 'city' predictor has little to no effect on the mean response.

## Residual plot:

plot(fitted(nlm.2),residuals(nlm.2))

# Seems to be a bit of a curved pattern -- do we need to consider a different nonlinear mean response function?

######################################
##
#  Section on Nonparametric Regression
##
######################################


# load the 'faraway' package:

library(faraway)

# loading the 'exa' and 'exb' datasets:

data(exa); data(exb)

# attaching the 'exa' and 'exb' datasets:

# attach(exa); attach(exb)


####
# Plots of the data and the true regression function
####

# Example A:

plot(y ~ x, data=exa, main='Example A', pch=".")
lines(m ~ x, data=exa)


# Example B:

plot(y ~ x, data=exb, main='Example B', pch=".")
lines(m ~ x, data=exb)

# 'Old Faithful' data:

data(faithful)

plot(waiting ~ eruptions, data=faithful, main='Old Faithful', pch=".")


################
## Interesting check:  How close does 'nls' come to recovering the true regression function in Example A?
################

exa.y <- exa$y; exa.x <- exa$x

nlm.exa <- nls(exa.y ~ (sin(beta1*(exa.x^beta2)))^3,
                  start = list(beta1=6, beta2 = 3.2))
summary(nlm.exa)
######################################


##
## The Nadaraya-Watson kernel smoothing estimator
## on the "old faithful" data set:
##

# A too-small bandwidth:
plot(waiting ~ eruptions, data=faithful, main="bandwidth=0.1", pch=".")
lines(ksmooth(faithful$eruptions, faithful$waiting, "normal", bandwidth=0.1))

# A reasonable bandwidth:
plot(waiting ~ eruptions, data=faithful, main="bandwidth=0.5", pch=".")
lines(ksmooth(faithful$eruptions, faithful$waiting, "normal", bandwidth=0.5))

# A too-large bandwidth:
plot(waiting ~ eruptions, data=faithful, main="bandwidth=2", pch=".")
lines(ksmooth(faithful$eruptions, faithful$waiting, "normal", bandwidth=2))

### 
### An automated cross-validation procedure to choose the bandwidth:

# load the 'sm' package:

library(sm)

hm <- hcv(faithful$eruptions, faithful$waiting, display="lines")
print(hm)  # prints the chosen bandwidth
x11()
sm.regression(faithful$eruptions, faithful$waiting, h=hm, xlab="Eruption duration", ylab = "Waiting Time")

# For Example A data:

hm <- hcv(exa$x, exa$y, display="lines")
print(hm)  # prints the chosen bandwidth
x11()
sm.regression(exa$x, exa$y, h=hm, xlab="x", ylab = "y")


# For Example B data:

hm <- hcv(exb$x, exb$y, display="lines")

# Some problems with the cv approach ...
# Let's try a small value for the bandwidth.

sm.regression(exb$x, exb$y, h=.01, xlab="x", ylab = "y")

# Way too small! The cross-validation approach has failed us here.

sm.regression(exb$x, exb$y, h=.5, xlab="x", ylab = "y")
# This is possibly better...

##
## A smoothing spline estimator
## on the "old faithful" data set:
##

# load the 'faraway' package:

library(faraway)

# loading the 'exa' and 'exb' datasets:

data(exa); data(exb)

# 'Old Faithful' data:

data(faithful)

plot(waiting ~ eruptions, data=faithful, main='Old Faithful', pch=".")
lines(smooth.spline(faithful$eruptions, faithful$waiting)) # uses default lambda (chosen by cross-validation)

# Example A:
plot(y ~ x, data=exa, main='Example A', pch=".")
lines(exa$x, exa$m)                      # solid curve = true mean function
lines(smooth.spline(exa$x, exa$y),lty=2) # dashed curve = smooth.spline fit

# Example B:
plot(y ~ x, data=exb, main='Example B', pch=".")
lines(exb$x, exb$m)                      # solid curve = true mean function
lines(smooth.spline(exb$x, exb$y),lty=2) # dashed curve = smooth.spline fit

# Poor fit for Example B!  What was the smoothing parameter?

smooth.spline(exb$x, exb$y)$spar

# Some experimentation produces a better choice for 'spar':

plot(y ~ x, data=exb, main='Example B', pch=".")
lines(exb$x, exb$m)                      # solid curve = true mean function
lines(smooth.spline(exb$x, exb$y, spar=1),lty=2) # dashed curve = smooth.spline fit


##
## Regression spline estimation
## on the "Example A" data set:
##

# A picture of piecewise linear spline basis functions:

rhs <- function(x,c) ifelse(x>c,x-c,0)
#  Equally spaced knots at 0, 0.1, 0.2, ..., 0.9
knots <- (0:9)/10
print(knots)
dm <- outer(exa$x, knots, rhs)
matplot(exa$x, dm, type="l", col=1)

# The resulting piecewise linear spline fit:

g <- lm(exa$y ~ dm)
plot(y ~ x, data=exa, main='Example A', pch=".")
lines(exa$x, predict(g))

# A piecewise linear spline fit using more knots in the areas of more curvature:

newknots <- c(0,0.5,0.6,.65,.7,.75,.8,.85,.9,.95)
dm.new <- outer(exa$x,newknots, rhs)
g.new <- lm(exa$y ~ dm.new)
plot(y ~ x, data=exa, main='Example A', pch=".")
lines(exa$x, predict(g.new))

# A visually smoother fit can be obtained using cubic B-splines:

library(splines)  # loads the 'splines' package

# A picture of the cubic spline basis functions:
matplot(bs(seq(0,1,length=1000),df=12),type='l',ylab="",col=1)

# A fit with 12 B-splines:

sm1 <- lm(y~bs(x,12),data=exa)
plot(y~x, data=exa, pch=".")
lines(m~x, data=exa) # solid curve = true mean function
lines(predict(sm1) ~ x, data=exa, lty=2) # dashed curve = smooth.spline fit

# A cubic B-spline fit using more knots in the areas of more curvature:

sm2 <- lm(y ~ bs(x, df = NULL, 
                  knots=c(.4, .45, .5, .55, .6, .7, .8, .85, .9, .95), degree = 3, intercept=T), data=exa)
x.values <- seq(from=0, to=1, length=200)
plot(y~x, data=exa, pch=".")
lines(m~x, data=exa) # solid curve = true mean function
lines(predict(sm2) ~ x, data=exa, lty=2) # dashed curve = smooth.spline fit

##
## Local polynomial regression
##


# load the 'faraway' package:

library(faraway)

# loading the 'exa' and 'exb' datasets:

data(exa); data(exb)

# 'Old Faithful' data:

data(faithful)

plot(waiting ~ eruptions, data=faithful, main='Old Faithful', pch=".")
loess.fit.OF <- loess(waiting ~ eruptions, data=faithful)
i <- order(faithful$eruptions)
lines(loess.fit.OF$x[i], loess.fit.OF$fitted[i])  # loess fit with default span (0.75)

# Example A:

plot(y ~ x, data=exa, main='Example A', pch=".")
lines(exa$x, exa$m, lty=1)                         # solid curve = true mean function
loess.fit.A <- loess(y ~ x, data=exa)
lines(loess.fit.A$x, loess.fit.A$fitted, lty=2)    # dashed curve = loess fit with default span (0.75)
loess.fit.A2 <- loess(y ~ x, data=exa, span=0.22)
lines(loess.fit.A2$x, loess.fit.A2$fitted, lty=2)    # long-dashed curve = loess fit with span = 0.33


# Example B:

plot(y ~ x, data=exb, main='Example B', pch=".")
lines(exb$x, exb$m, lty=1)                         # solid curve = true mean function
loess.fit.B <- loess(y ~ x, data=exb)
lines(loess.fit.B$x, loess.fit.B$fitted, lty=2)    # dashed curve = loess fit with default span (0.75)
loess.fit.B2 <- loess(y ~ x, data=exb, span=1)
lines(loess.fit.B2$x, loess.fit.B2$fitted, lty=2)    # long-dashed curve = loess fit with span = 1 (the biggest possible)

### 
### Section 11.7:  Multivariate Predictors:

library(faraway)

# load the 'sm' package:

library(sm)

# load and attach the 'savings' data set:

data(savings)

# Create a one-column response:
# The response is 'savings rate':

y <- savings$sr

# Create a two-column predictor matrix (each column is for a different predictor variable)
# The predictors are 'growth' and 'population under 15':

x <- cbind(savings$pop15, savings$ddpi)

# Kernel regression with a bandwidth of 1 in each dimension:

sm.regression(x, y, h=c(1,1), xlab="pop15", ylab="growth", zlab="savings rate")

# Kernel regression with a bandwidth of 1 in each dimension:

sm.regression(x, y, h=c(5,5), xlab="pop15", ylab="growth", zlab="savings rate")