############################

# Author: Joshua M. Tebbs  #

# Date: 20 Dec 2009        #

# Update: 25 Jul 2011      #

# Purpose: STAT 520 R code #

# CHAPTER 10               #

############################

# Example 10.1.

# US milk production data

# Page 268

data(milk)

plot(milk,ylab="Amount of milk produced",xlab="Year",type="o")

# Page 269

plot(diff(milk),ylab="Amount of milk produced: First differences",xlab="Year",type="o")

points(y=diff(milk),x=time(diff(milk)),pch=as.vector(season(diff(milk))),cex=1)

# Example 10.2.

# MA(1) simulation with s=12

# Page 271

seasonal.ma1.sim <- arima.sim(list(order = c(0,0,12), ma = c(rep(0,11),0.9)), n = 200)

plot(seasonal.ma1.sim,ylab=expression(Y[t]),xlab="Time",type="o")

# ACF/PACF: Population and sample

# ARMAacf function includes the k=0 lag for ACF

# Use y = y[2:51] to remove k=0 lag from ARMAacf output; only for ACF

# Page 272

par(mfrow=c(2,2))

y = ARMAacf(ma = c(rep(0,11),0.9), lag.max = 50)

y = y[2:51]

plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k",

ylab = "Autocorrelation", main = "Population ACF")

abline(h = 0)

y = ARMAacf(ma = c(rep(0,11),0.9), lag.max = 50, pacf=TRUE)

plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k",

ylab = "Partial autocorrelation", main = "Population PACF")

abline(h = 0)

acf(seasonal.ma1.sim,main="Sample ACF",lag.max=50)

pacf(seasonal.ma1.sim,main="Sample PACF",lag.max=50)

# Example 10.3.

# AR(1) simulation with s=12

# Page 275

seasonal.ar1.sim <- arima.sim(list(order = c(12,0,0), ar = c(rep(0,11),0.9)), n = 200)

plot(seasonal.ar1.sim,ylab=expression(Y[t]),xlab="Time",type="o")

# ACF/PACF: Population and sample

# ARMAacf function includes the k=0 lag for ACF

# Use y = y[2:51] to remove k=0 lag from ARMAacf output; only for ACF

# Page 276

par(mfrow=c(2,2))

y = ARMAacf(ar = c(rep(0,11),0.8), lag.max = 50)

y = y[2:51]

plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k",

ylab = "Autocorrelation", main = "Population ACF")

abline(h = 0)

y = ARMAacf(ar = c(rep(0,11),0.8), lag.max = 50, pacf=TRUE)

plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k",

ylab = "Partial autocorrelation", main = "Population PACF")

abline(h = 0)

acf(seasonal.ar1.sim,main="Sample ACF",lag.max=50)

pacf(seasonal.ar1.sim,main="Sample PACF",lag.max=50)

# MA(1) x MA(1)_12 process

# MA(1) x AR(1)_12 process

# Theoretical ACF and PACF

# Page 282

par(mfrow=c(2,2))

phi.ar12=c(rep(0,11),0.9)

theta.Theta=c(-0.5,rep(0,10),-0.9,0.45)

# ARMAacf function includes the k=0 lag for ACF

# Use y = y[2:51] to remove k=0 lag from ARMAacf output; only for ACF

y = acf.ma.ma = ARMAacf(ma=theta.Theta,lag.max=50)

y = y[2:51]

plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k",

ylab = "Autocorrelation", main = "MA1*MA1(12) ACF")

abline(h=0)

pacf.ma.ma = ARMAacf(ma=theta.Theta,lag.max=50,pacf=T)

plot(pacf.ma.ma,type='h',xlab="k",ylab="PACF",main = "MA1*MA1(12) PACF")

abline(h=0)

y = acf.ma.ar = ARMAacf(ar=phi.ar12,ma=-0.5,lag.max=50)

y = y[2:51]

plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k",

ylab = "Autocorrelation", main = "MA1*AR1(12) ACF")

abline(h=0)

pacf.ma.ar = ARMAacf(ar=phi.ar12,ma=-0.5,lag.max=50,pacf=T)

plot(pacf.ma.ar,type='h',xlab="k",ylab="PACF",main = "MA1*AR1(12) PACF")

abline(h=0)

# Example 10.4

# Denver, CO Bus/Rail boarding data

# Log-transformed

# Page 283

data(boardings)

# "boardings" is a multivariate dataset (2 series)

# Number of boardings is the first series

boardings = boardings[,1]

plot(boardings,ylab="Number of boardings (log-transformed)",xlab="Year",type="o")

points(y=boardings,x=time(boardings),pch=as.vector(season(boardings)))

# Sample ACF and PACF

# Plots were constructed separately

# Page 285

acf(as.vector(boardings),main="Sample ACF",lag.max=40)

pacf(as.vector(boardings),main="Sample PACF",lag.max=40)

# Fit ARMA(0,3) x ARMA(1,0)_{12}

boardings.arma03.arma10 = arima(boardings,order=c(0,0,3),method='ML',seasonal=list(order=c(1,0,0),period=12))

boardings.arma03.arma10

# Residual analysis

hist(rstandard(boardings.arma03.arma10),xlab="Standardised residuals",main="")

qqnorm(rstandard(boardings.arma03.arma10),main="")

qqline(rstandard(boardings.arma03.arma10))

# Shapiro-Wilk and runs tests

shapiro.test(rstandard(boardings.arma03.arma10))

runs(rstandard(boardings.arma03.arma10))

tsdiag(boardings.arma03.arma10,gof=20,omit.initial=F)

# Overfitting

arima(boardings,order=c(1,0,3),method='ML',seasonal=list(order=c(1,0,0),period=12))

arima(boardings,order=c(0,0,4),method='ML',seasonal=list(order=c(1,0,0),period=12))

arima(boardings,order=c(0,0,3),method='ML',seasonal=list(order=c(2,0,0),period=12))

arima(boardings,order=c(0,0,3),method='ML',seasonal=list(order=c(1,0,1),period=12))

# Forecasting

# Full data set: 8/00-3/06

boardings.arma03.arma10.fit = arima(boardings,order=c(0,0,3),method='ML',seasonal=list(order=c(1,0,0),period=12))

# MMSE forecasts on log scale

boardings.arma03.arma10.predict <- predict(boardings.arma03.arma10.fit,n.ahead=12)

round(boardings.arma03.arma10.predict\$pred,3)

round(boardings.arma03.arma10.predict\$se,3)

# Display prediction intervals (12 months ahead) on log scale

year.temp = c(2006.250,2006.333,2006.416,2006.500,2006.583,2006.666,2006.750,2006.833,2006.916,2007,2007.083,2007.166)

# Compute prediction intervals (on log scale)

lower.pi<-boardings.arma03.arma10.predict\$pred-qnorm(0.975,0,1)*boardings.arma03.arma10.predict\$se

upper.pi<-boardings.arma03.arma10.predict\$pred+qnorm(0.975,0,1)*boardings.arma03.arma10.predict\$se

data.frame(Month=year.temp,lower.pi,upper.pi)

# Original series starts at Month = Aug 2000

# Note: Argument n1=c(2003,1) starts plot at Time = Jan 2003

# Note: Argument pch=16 produces a small black circle (MMSE prediction)

plot(boardings.arma03.arma10.fit,n.ahead=12,col='red',type='b',pch=16,n1=c(2003,1),ylab="Number of boardings (log-transformed)",xlab="Year")

# Put horizontal line at ML estimate of overall mean

abline(h=coef(boardings.arma03.arma10.fit)[names(coef(boardings.arma03.arma10.fit))=='intercept'])

# Put prediction interval lines on plot (darker than default)

lines(y=lower.pi,x=year.temp,lwd=2,col="red",lty="dashed")

lines(y=upper.pi,x=year.temp,lwd=2,col="red",lty="dashed")

# MMSE forecasts back-transformed (to original scale)

denver.boardings.predict <- round(exp(boardings.arma03.arma10.predict\$pred + (1/2)*(boardings.arma03.arma10.predict\$se)^2),3)

denver.boardings.predict

# Compute prediction intervals (on original scale)

data.frame(Month=year.temp,lower.pi=exp(lower.pi),upper.pi=exp(upper.pi))

# Example 10.5.

# US milk production data

# Plots were constructed separately

# Page 292

data(milk)

plot(milk,ylab="Amount of milk produced",xlab="Year",type="o")

plot(diff(milk),ylab="First differences",xlab="Year",type="o")

plot(diff(milk,lag=12),ylab="First seasonal differences (s=12)",xlab="Year",type="o")

plot(diff(diff(milk),lag=12),ylab="Combined first differences",xlab="Year",type="o")

# Model specification

# d=1, D=1, and s=12

# Plots were constructed separately

# Page 295

# Combined differences

milk.diff = diff(diff(milk,lag=12))

acf(as.vector(milk.diff),main="Sample ACF",lag.max = 48)

pacf(as.vector(milk.diff),main="Sample PACF",lag.max = 48)

# Fit and diagnosis

# This model was considered but not displayed in the notes.

# ARIMA(0,1,0) x ARIMA(0,1,1)_{12}

milk.arima010.arima011 = arima(milk,order=c(0,1,0),method='ML',seasonal=list(order=c(0,1,1),period=12))

milk.arima010.arima011

# ARIMA(0,1,0) x ARIMA(3,1,0)_{12}

milk.arima010.arima310 = arima(milk,order=c(0,1,0),method='ML',seasonal=list(order=c(3,1,0),period=12))

milk.arima010.arima310

# Residual analysis

hist(rstandard(milk.arima010.arima310),xlab="Standardised residuals",main="")

qqnorm(rstandard(milk.arima010.arima310),main="")

qqline(rstandard(milk.arima010.arima310))

# Shapiro-Wilk and runs tests

shapiro.test(rstandard(milk.arima010.arima310))

runs(rstandard(milk.arima010.arima310))

tsdiag(milk.arima010.arima310,gof=30,omit.initial=T)

# Overfitting

arima(milk,order=c(1,1,0),method='CLS',seasonal=list(order=c(3,1,0),period=12))

arima(milk,order=c(0,1,1),method='ML',seasonal=list(order=c(3,1,0),period=12))

arima(milk,order=c(0,1,0),method='ML',seasonal=list(order=c(4,1,0),period=12))

arima(milk,order=c(0,1,0),method='ML',seasonal=list(order=c(3,1,1),period=12))

# Forecasting

# Full data set: 1/94-12/05

milk.arima010.arima310.fit = arima(milk,order=c(0,1,0),method='ML',seasonal=list(order=c(3,1,0),period=12))

# MMSE forecasts

milk.arima010.arima310.predict <- predict(milk.arima010.arima310.fit,n.ahead=24)

round(milk.arima010.arima310.predict\$pred,3)

round(milk.arima010.arima310.predict\$se,3)

# Display prediction intervals (24 months ahead)

year.temp = c(2006,2006.083,2006.166,2006.250,2006.333,2006.416,2006.500,2006.583,2006.666,2006.750,2006.833,2006.916)

year.temp.2 = c(year.temp,year.temp+1)

# Compute prediction intervals

lower.pi<-milk.arima010.arima310.predict\$pred-qnorm(0.975,0,1)*milk.arima010.arima310.predict\$se

upper.pi<-milk.arima010.arima310.predict\$pred+qnorm(0.975,0,1)*milk.arima010.arima310.predict\$se

data.frame(Month=year.temp.2,lower.pi,upper.pi)

# Original series starts at Month = Jan 1994

# Note: Argument n1=c(2004,1) starts plot at Time = Jan 2004

# Note: Argument pch=16 produces a small black circle (MMSE prediction)

plot(milk.arima010.arima310.fit,n.ahead=24,col='red',type='b',pch=16,n1=c(2004,1),ylab="Amount of milk produced",xlab="Year")

# Put prediction interval lines on plot (darker than default)

lines(y=lower.pi,x=year.temp.2,lwd=2,col="red",lty="dashed")

lines(y=upper.pi,x=year.temp.2,lwd=2,col="red",lty="dashed")

# Example 10.6

# Australian clay brick production

# Page 301

brick <- ts(read.table(file = "C:\\Users\\Tebbs\\My Documents\\texfiles\\Classes\\USC\\stat520\\f11\\data\\brick.txt"),start=1956,freq=4)

plot(brick,ylab="Australian clay brick production (in millions)",xlab="Time",type="o")

# Box Cox analysis

BoxCox.ar(brick)

# Square root transformation

sqrt.brick = sqrt(brick)

# Plots were constructed separately

plot(sqrt.brick,ylab="Brick production (Square-root transformed)",xlab="Time",type="o")

plot(diff(sqrt.brick),ylab="First differences",xlab="Year",type="o")

plot(diff(sqrt.brick,lag=4),ylab="First seasonal differences (s=4)",xlab="Year",type="o")

plot(diff(diff(sqrt.brick),lag=4),ylab="Combined first differences",xlab="Year",type="o")

# Combined differences of sqrt.brick

sqrt.brick.diff = diff(diff(sqrt.brick),lag=4)

# Plots were constructed separately

acf(as.vector(sqrt.brick.diff),main="Sample ACF",lag.max=32)

pacf(as.vector(sqrt.brick.diff),main="Sample PACF",lag.max=32)

# Initial model choice

sqrt.brick.arima010.arima410 = arima(sqrt.brick,order=c(0,1,0),method='ML',seasonal=list(order=c(4,1,0),period=4))

sqrt.brick.arima010.arima410

# Residual analysis

hist(rstandard(sqrt.brick.arima010.arima410),xlab="Standardised residuals",main="")

qqnorm(rstandard(sqrt.brick.arima010.arima410),main="")

qqline(rstandard(sqrt.brick.arima010.arima410))

# Shapiro-Wilk and runs tests

shapiro.test(rstandard(sqrt.brick.arima010.arima410))

runs(rstandard(sqrt.brick.arima010.arima410))

tsdiag(sqrt.brick.arima010.arima410,gof=20,omit.initial=T)

# Overfitting

arima(sqrt.brick,order=c(1,1,0),method='ML',seasonal=list(order=c(4,1,0),period=4))

arima(sqrt.brick,order=c(0,1,1),method='ML',seasonal=list(order=c(4,1,0),period=4))

arima(sqrt.brick,order=c(0,1,0),method='ML',seasonal=list(order=c(5,1,0),period=4))

arima(sqrt.brick,order=c(0,1,0),method='ML',seasonal=list(order=c(4,1,1),period=4))