############################ # Author: Joshua M. Tebbs # # Date: 20 Dec 2009 # # Update: 25 Jul 2011 # # Purpose: STAT 520 R code # # CHAPTER 10 # ############################ # Example 10.1. # US milk production data # Page 268 data(milk) plot(milk,ylab="Amount of milk produced",xlab="Year",type="o") # Page 269 plot(diff(milk),ylab="Amount of milk produced: First differences",xlab="Year",type="o") points(y=diff(milk),x=time(diff(milk)),pch=as.vector(season(diff(milk))),cex=1) # Example 10.2. # MA(1) simulation with s=12 # Page 271 seasonal.ma1.sim = arima.sim(list(order = c(0,0,12), ma = c(rep(0,11),0.9)), n = 200) plot(seasonal.ma1.sim,ylab=expression(Y[t]),xlab="Time",type="o") # ACF/PACF: Population and sample # ARMAacf function includes the k=0 lag for ACF # Use y = y[2:51] to remove k=0 lag from ARMAacf output; only for ACF # Page 272 par(mfrow=c(2,2)) y = ARMAacf(ma = c(rep(0,11),0.9), lag.max = 50) y = y[2:51] plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k", ylab = "Autocorrelation", main = "Population ACF") abline(h = 0) y = ARMAacf(ma = c(rep(0,11),0.9), lag.max = 50, pacf=TRUE) plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k", ylab = "Partial autocorrelation", main = "Population PACF") abline(h = 0) acf(seasonal.ma1.sim,main="Sample ACF",lag.max=50) pacf(seasonal.ma1.sim,main="Sample PACF",lag.max=50) # Example 10.3. # AR(1) simulation with s=12 # Page 275 seasonal.ar1.sim = arima.sim(list(order = c(12,0,0), ar = c(rep(0,11),0.9)), n = 200) plot(seasonal.ar1.sim,ylab=expression(Y[t]),xlab="Time",type="o") # ACF/PACF: Population and sample # ARMAacf function includes the k=0 lag for ACF # Use y = y[2:51] to remove k=0 lag from ARMAacf output; only for ACF # Page 276 par(mfrow=c(2,2)) y = ARMAacf(ar = c(rep(0,11),0.8), lag.max = 50) y = y[2:51] plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k", ylab = "Autocorrelation", main = "Population ACF") abline(h = 0) y = ARMAacf(ar = c(rep(0,11),0.8), lag.max = 50, pacf=TRUE) plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k", ylab = "Partial autocorrelation", main = "Population PACF") abline(h = 0) acf(seasonal.ar1.sim,main="Sample ACF",lag.max=50) pacf(seasonal.ar1.sim,main="Sample PACF",lag.max=50) # MA(1) x MA(1)_12 process # MA(1) x AR(1)_12 process # Theoretical ACF and PACF # Page 282 par(mfrow=c(2,2)) phi.ar12=c(rep(0,11),0.9) theta.Theta=c(-0.5,rep(0,10),-0.9,0.45) # ARMAacf function includes the k=0 lag for ACF # Use y = y[2:51] to remove k=0 lag from ARMAacf output; only for ACF y = acf.ma.ma = ARMAacf(ma=theta.Theta,lag.max=50) y = y[2:51] plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k", ylab = "Autocorrelation", main = "MA1*MA1(12) ACF") abline(h=0) pacf.ma.ma = ARMAacf(ma=theta.Theta,lag.max=50,pacf=T) plot(pacf.ma.ma,type='h',xlab="k",ylab="PACF",main = "MA1*MA1(12) PACF") abline(h=0) y = acf.ma.ar = ARMAacf(ar=phi.ar12,ma=-0.5,lag.max=50) y = y[2:51] plot(y, x = 1:50, type = "h", ylim = c(-1,1), xlab = "k", ylab = "Autocorrelation", main = "MA1*AR1(12) ACF") abline(h=0) pacf.ma.ar = ARMAacf(ar=phi.ar12,ma=-0.5,lag.max=50,pacf=T) plot(pacf.ma.ar,type='h',xlab="k",ylab="PACF",main = "MA1*AR1(12) PACF") abline(h=0) # Example 10.4 # Denver, CO Bus/Rail boarding data # Log-transformed # Page 283 data(boardings) # "boardings" is a multivariate dataset (2 series) # Number of boardings is the first series boardings = boardings[,1] plot(boardings,ylab="Number of boardings (log-transformed)",xlab="Year",type="o") points(y=boardings,x=time(boardings),pch=as.vector(season(boardings))) # Sample ACF and PACF # Plots were constructed separately # Page 285 acf(as.vector(boardings),main="Sample ACF",lag.max=40) pacf(as.vector(boardings),main="Sample PACF",lag.max=40) # Fit ARMA(0,3) x ARMA(1,0)_{12} boardings.arma03.arma10 = arima(boardings,order=c(0,0,3),method='ML',seasonal=list(order=c(1,0,0),period=12)) boardings.arma03.arma10 # Residual analysis hist(rstandard(boardings.arma03.arma10),xlab="Standardised residuals",main="") qqnorm(rstandard(boardings.arma03.arma10),main="") qqline(rstandard(boardings.arma03.arma10)) # Shapiro-Wilk and runs tests shapiro.test(rstandard(boardings.arma03.arma10)) runs(rstandard(boardings.arma03.arma10)) tsdiag(boardings.arma03.arma10,gof=20,omit.initial=F) # Overfitting arima(boardings,order=c(1,0,3),method='ML',seasonal=list(order=c(1,0,0),period=12)) arima(boardings,order=c(0,0,4),method='ML',seasonal=list(order=c(1,0,0),period=12)) arima(boardings,order=c(0,0,3),method='ML',seasonal=list(order=c(2,0,0),period=12)) arima(boardings,order=c(0,0,3),method='ML',seasonal=list(order=c(1,0,1),period=12)) # Forecasting # Full data set: 8/00-3/06 boardings.arma03.arma10.fit = arima(boardings,order=c(0,0,3),method='ML',seasonal=list(order=c(1,0,0),period=12)) # MMSE forecasts on log scale boardings.arma03.arma10.predict = predict(boardings.arma03.arma10.fit,n.ahead=12) round(boardings.arma03.arma10.predict$pred,3) round(boardings.arma03.arma10.predict$se,3) # Display prediction intervals (12 months ahead) on log scale year.temp = c(2006.250,2006.333,2006.416,2006.500,2006.583,2006.666,2006.750,2006.833,2006.916,2007,2007.083,2007.166) # Compute prediction intervals (on log scale) lower.pi=boardings.arma03.arma10.predict$pred-qnorm(0.975,0,1)*boardings.arma03.arma10.predict$se upper.pi=boardings.arma03.arma10.predict$pred+qnorm(0.975,0,1)*boardings.arma03.arma10.predict$se data.frame(Month=year.temp,lower.pi,upper.pi) # Original series starts at Month = Aug 2000 # Note: Argument n1=c(2003,1) starts plot at Time = Jan 2003 # Note: Argument pch=16 produces a small black circle (MMSE prediction) plot(boardings.arma03.arma10.fit,n.ahead=12,col='red',type='b',pch=16,n1=c(2003,1),ylab="Number of boardings (log-transformed)",xlab="Year") # Put horizontal line at ML estimate of overall mean abline(h=coef(boardings.arma03.arma10.fit)[names(coef(boardings.arma03.arma10.fit))=='intercept']) # Put prediction interval lines on plot (darker than default) lines(y=lower.pi,x=year.temp,lwd=2,col="red",lty="dashed") lines(y=upper.pi,x=year.temp,lwd=2,col="red",lty="dashed") # MMSE forecasts back-transformed (to original scale) denver.boardings.predict = round(exp(boardings.arma03.arma10.predict$pred + (1/2)*(boardings.arma03.arma10.predict$se)^2),3) denver.boardings.predict # Compute prediction intervals (on original scale) data.frame(Month=year.temp,lower.pi=exp(lower.pi),upper.pi=exp(upper.pi)) # Example 10.5. # US milk production data # Plots were constructed separately # Page 292 data(milk) plot(milk,ylab="Amount of milk produced",xlab="Year",type="o") plot(diff(milk),ylab="First differences",xlab="Year",type="o") plot(diff(milk,lag=12),ylab="First seasonal differences (s=12)",xlab="Year",type="o") plot(diff(diff(milk),lag=12),ylab="Combined first differences",xlab="Year",type="o") # Model specification # d=1, D=1, and s=12 # Plots were constructed separately # Page 295 # Combined differences milk.diff = diff(diff(milk,lag=12)) acf(as.vector(milk.diff),main="Sample ACF",lag.max = 48) pacf(as.vector(milk.diff),main="Sample PACF",lag.max = 48) # Fit and diagnosis # This model was considered but not displayed in the notes. # ARIMA(0,1,0) x ARIMA(0,1,1)_{12} milk.arima010.arima011 = arima(milk,order=c(0,1,0),method='ML',seasonal=list(order=c(0,1,1),period=12)) milk.arima010.arima011 # ARIMA(0,1,0) x ARIMA(3,1,0)_{12} milk.arima010.arima310 = arima(milk,order=c(0,1,0),method='ML',seasonal=list(order=c(3,1,0),period=12)) milk.arima010.arima310 # Residual analysis hist(rstandard(milk.arima010.arima310),xlab="Standardised residuals",main="") qqnorm(rstandard(milk.arima010.arima310),main="") qqline(rstandard(milk.arima010.arima310)) # Shapiro-Wilk and runs tests shapiro.test(rstandard(milk.arima010.arima310)) runs(rstandard(milk.arima010.arima310)) tsdiag(milk.arima010.arima310,gof=30,omit.initial=T) # Overfitting arima(milk,order=c(1,1,0),method='CLS',seasonal=list(order=c(3,1,0),period=12)) arima(milk,order=c(0,1,1),method='ML',seasonal=list(order=c(3,1,0),period=12)) arima(milk,order=c(0,1,0),method='ML',seasonal=list(order=c(4,1,0),period=12)) arima(milk,order=c(0,1,0),method='ML',seasonal=list(order=c(3,1,1),period=12)) # Forecasting # Full data set: 1/94-12/05 milk.arima010.arima310.fit = arima(milk,order=c(0,1,0),method='ML',seasonal=list(order=c(3,1,0),period=12)) # MMSE forecasts milk.arima010.arima310.predict = predict(milk.arima010.arima310.fit,n.ahead=24) round(milk.arima010.arima310.predict$pred,3) round(milk.arima010.arima310.predict$se,3) # Display prediction intervals (24 months ahead) year.temp = c(2006,2006.083,2006.166,2006.250,2006.333,2006.416,2006.500,2006.583,2006.666,2006.750,2006.833,2006.916) year.temp.2 = c(year.temp,year.temp+1) # Compute prediction intervals lower.pi=milk.arima010.arima310.predict$pred-qnorm(0.975,0,1)*milk.arima010.arima310.predict$se upper.pi=milk.arima010.arima310.predict$pred+qnorm(0.975,0,1)*milk.arima010.arima310.predict$se data.frame(Month=year.temp.2,lower.pi,upper.pi) # Original series starts at Month = Jan 1994 # Note: Argument n1=c(2004,1) starts plot at Time = Jan 2004 # Note: Argument pch=16 produces a small black circle (MMSE prediction) plot(milk.arima010.arima310.fit,n.ahead=24,col='red',type='b',pch=16,n1=c(2004,1),ylab="Amount of milk produced",xlab="Year") # Put prediction interval lines on plot (darker than default) lines(y=lower.pi,x=year.temp.2,lwd=2,col="red",lty="dashed") lines(y=upper.pi,x=year.temp.2,lwd=2,col="red",lty="dashed") # Example of using actual data to verify model # Milk data # Tim, 12-1-2015 # create two subsets # ss1=first 10 years of data # ss2=last 2 years of data ss1=window(milk,start=c(1994,1),end=c(2003,12)) ss2=window(milk,start=c(2004,1),end=c(2005,12)) f2=arima(ss1,order=c(0,1,0),seasonal=list(order=c(3,1,0),period=12)) # fit model using ss1, see how well we predict last two years plot(f2,n.ahead=24,col='red',type='n',pch=1,n1=c(2002,1),ylab="Amount of milk produced",xlab="Year") points(ss2,pch=16) # Example 10.6 # Australian clay brick production # Page 301 brick = ts(read.table(file = "http://people.stat.sc.edu/hansont/stat520/brick.txt"),start=1956,freq=4) plot(brick,ylab="Australian clay brick production (in millions)",xlab="Time",type="o") # Box Cox analysis BoxCox.ar(brick) # Square root transformation sqrt.brick = sqrt(brick) # Plots were constructed separately plot(sqrt.brick,ylab="Brick production (Square-root transformed)",xlab="Time",type="o") plot(diff(sqrt.brick),ylab="First differences",xlab="Year",type="o") plot(diff(sqrt.brick,lag=4),ylab="First seasonal differences (s=4)",xlab="Year",type="o") plot(diff(diff(sqrt.brick),lag=4),ylab="Combined first differences",xlab="Year",type="o") # Combined differences of sqrt.brick sqrt.brick.diff = diff(diff(sqrt.brick),lag=4) # Plots were constructed separately acf(as.vector(sqrt.brick.diff),main="Sample ACF",lag.max=32) pacf(as.vector(sqrt.brick.diff),main="Sample PACF",lag.max=32) # Initial model choice sqrt.brick.arima010.arima410 = arima(sqrt.brick,order=c(0,1,0),method='ML',seasonal=list(order=c(4,1,0),period=4)) sqrt.brick.arima010.arima410 # Residual analysis hist(rstandard(sqrt.brick.arima010.arima410),xlab="Standardised residuals",main="") qqnorm(rstandard(sqrt.brick.arima010.arima410),main="") qqline(rstandard(sqrt.brick.arima010.arima410)) # Shapiro-Wilk and runs tests shapiro.test(rstandard(sqrt.brick.arima010.arima410)) runs(rstandard(sqrt.brick.arima010.arima410)) tsdiag(sqrt.brick.arima010.arima410,gof=20,omit.initial=T) # Overfitting arima(sqrt.brick,order=c(1,1,0),method='ML',seasonal=list(order=c(4,1,0),period=4)) arima(sqrt.brick,order=c(0,1,1),method='ML',seasonal=list(order=c(4,1,0),period=4)) arima(sqrt.brick,order=c(0,1,0),method='ML',seasonal=list(order=c(5,1,0),period=4)) arima(sqrt.brick,order=c(0,1,0),method='ML',seasonal=list(order=c(4,1,1),period=4))