# Chapter 10 R examples # Exhibit 10.1 library(TSA) data(co2) # The stats package has another dataset of the same name 'co2', so # make sure that you have loaded the TSA package before loading the co2 data. #win.graph(width=4.875, height=3,pointsize=8) plot(co2,ylab='CO2',main='Monthly Carbon Dioxide Levels at Alert, NWT, Canada') # Exhibit 10.2 plot(window(co2,start=c(2000,1)),main='Carbon Dioxide Levels with Monthly Symbols', ylab='CO2') # specify the y-label as "CO2", otherwise it will use "window(co2,start=c(2000,1))" Month=c("J","F","M","A","M","J","J","A","S","O","N","D") points(window(co2,start=c(2000,1)),pch=Month) # First let's try a seasonal trend model, plus a time trend # (that is, using the methods of Chapter 3): # A seasonal means model, plus a time trend data(co2) month.=season(co2) # the period sign is included to make the printout from # the commands two lines below clearer model.s=lm(co2~time(co2) + month.-1) # -1 removes the intercept term summary(model.s) # Checking the fit of the model with a residual plot, and a plot of the ACF of the residuals: plot(rstandard(model.s),ylab='Standardized residuals',type='l') abline(h=0) # Some pattern in the residual plot is still evident. acf(residuals(model.s),main='Sample ACF of Residuals from Seasonal Means Model for co2') # Note the significant autocorrelations among the residuals. #### A harmonic regression plus time trend model: # first creates the first pair of harmonic functions and then fit the model har.=harmonic(co2,m=1) # The m=1 creates one pair of harmonic functions (one cosine term and one sine term). # If you want additional pairs, you could increase m. model.h=lm(co2~har. + time(co2)) summary(model.h) plot(rstandard(model.h),ylab='Standardized residuals',type='l') abline(h=0) # A pattern in the residual plot is very clear. acf(residuals(model.h),main='Sample ACF of Residuals from Harmonic Model for co2') # Still significant autocorrelations among the residuals. # What if we add more cosine and sine terms? har.=harmonic(co2,m=3) model.h3=lm(co2~har. + time(co2)) summary(model.h3) plot(rstandard(model.h3),ylab='Standardized residuals',type='l') abline(h=0) acf(residuals(model.h3),main='Sample ACF of Residuals from Harmonic Model for co2') # Somewhat better, but far from perfect. # Exhibit 10.3 # First divide the plotting area into a 1 by 2 matrix of plots, i.e. one row # of two figures. #win.graph(width=4.875, height=3,pointsize=8) par(mfrow=c(1,2)) # The ARMAacf function computes the theoretical acf of an ARMA model. # Note that the seasonal MA part (1+0.5B)(1+0.8B**12)=(1+0.5B+0.8B**12+0.4B**13). plot(y=ARMAacf(ma=c(0.5,rep(0,10),0.8,0.4),lag.max=13)[-1],x=1:13,type='h', xlab='Lag k',ylab=expression(rho[k]),axes=F,ylim=c(0,0.6)) points(y=ARMAacf(ma=c(0.5,rep(0,10),0.8,0.4),lag.max=13)[-1],x=1:13,pch=20) abline(h=0) axis(1,at=1:13, labels=c(1,NA,3,NA,5,NA,7,NA,9,NA,11,NA,13)) axis(2) text(x=7,y=.5,labels=expression(list(theta==-0.5,Theta==-0.8))) plot(y=ARMAacf(ma=c(-0.5,rep(0,10),0.8,-0.4),lag.max=13)[-1],x=1:13,type='h', xlab='Lag k',ylab=expression(rho[k]),axes=F) points(y=ARMAacf(ma=c(-0.5,rep(0,10),0.8,-0.4),lag.max=13)[-1],x=1:13,pch=20) abline(h=0) axis(1,at=1:13, labels=c(1,NA,3,NA,5,NA,7,NA,9,NA,11,NA,13)) axis(2) text(x=7,y=.35,labels=expression(list(theta==0.5,Theta==-0.8))) # Exhibit 10.4 plot(y=ARMAacf(ar=c(rep(0,11), 0.75), ma=0.4,lag.max=61)[-1],x=1:61,type='h', xlab='Lag k',ylab=expression(rho[k]),axes=F,ylim=c(-0.4,.8),xlim=c(0,61)) points(y=ARMAacf(ar=c(rep(0,11), 0.75), ma=0.4,lag.max=61)[c(1,11,12,13, 23,24,25,35,36,37,47,48,49,59,60,61)+1], x=c(1,11,12,13,23,24,25,35,36,37,47,48,49,59,60,61),pch=20) abline(h=0) axis(1,at=c(0,1,12,24,36,48,60,61),labels=c(NA,1,12,24,36,48,60,NA)) axis(2, at=c(-0.4,0.0,0.4,0.8)) text(x=40,y=.8,labels=expression(list(Phi==0.75,theta==-0.4))) plot(y=ARMAacf(ar=c(rep(0,11), 0.75), ma=-0.4,lag.max=61)[-1],x=1:61,type='h', xlab='Lag k',ylab=expression(rho[k]),axes=F,ylim=c(-0.4,.8)) points(y=ARMAacf(ar=c(rep(0,11), 0.75), ma=-0.4,lag.max=61)[c(1,11,12,13, 23,24,25,35,36,37,47,48,49,59,60,61)+1], x=c(1,11,12,13,23,24,25,35,36,37,47,48,49,59,60,61),pch=20) abline(h=0) axis(1,at=c(0,1,12,24,36,48,60,61),labels=c(NA,1,12,24,36,48,60,NA)) axis(2, at=c(-0.4,0.0,0.4,0.8)) text(x=40,y=.8,labels=expression(list(Phi==0.75,theta==0.4))) #### An example of a simulated seasonal AR series, with Phi=0.9, from Shumway and Stoffer: library(astsa) set.seed(666) phi=c(rep(0,11),0.9) sAR=arima.sim(list(order=c(12,0,0),ar=phi),n=37) sAR=ts(sAR,freq=12) layout(matrix(c(1,2,1,3),nc=2)) par(mar=c(3,3,2,1),mgp=c(1.6,0.6,0)) plot(sAR, axes=F, main='seasonal AR(P=1)', xlab='year',type='c') Months=c('J','F','M','A','M','J','J','A','S','O','N','D') points(sAR, pch=Months, cex=1.25,font=4,col=1:4) axis(1,1:4);abline(v=1:4,lty=2,col=gray(0.6)) axis(2); box() # Plotting the TRUE ACF and PACF for this model: ACF=ARMAacf(ar=phi,ma=0,100) PACF=ARMAacf(ar=phi,ma=0,100,pacf=T) plot(ACF,type='h',xlab='lag',ylim=c(-.1,1)) abline(h=0) plot(PACF,type='h',xlab='lag',ylim=c(-.1,1)) abline(h=0) # Compare to the SAMPLE ACF and PACF for this simulated series: x11() # opens new plotting window par(mfrow=c(2,1)) acf(sAR,lag=36) pacf(sAR,lag=36) par(mfrow=c(1,1)) # An example of a real seasonal data set # (differenced logged monthly births in U.S. over time): par(mfrow=c(2,1)) plot(birth) plot(diff(log(birth))) acf(diff(log(birth)),61) pacf(diff(log(birth)),61) par(mfrow=c(1,1)) ## Analysis of the co2 data: library(TSA) data(co2) # Exhibit 10.5 par(mfrow=c(1,1)) acf(as.vector(co2),lag.max=36, main=expression(Sample~~ACF~~of~~CO[2]~~Levels)) # Exhibit 10.6 plot(diff(co2),main=expression(Time~~Series~~Plot~~of~~the~~First~~Differences~~of~~ CO[2]~~Levels), ylab=expression(First~~Difference~~of~~CO[2])) # Exhibit 10.7 acf(as.vector(diff(co2)),lag.max=36, main=expression(Sample~~ACF~~of~~the~~First~~Differences~~of~~ CO[2]~~Levels)) # Exhibit 10.8 plot(diff(diff(co2),lag=12),main=expression(Time~~Series~~Plot~~of~~the~~First~~and~~ Seasonal~~Differences~~of~~CO[2]~~Levels), ylab=expression(First~~and~~Seasonal~~Difference~~of~~C~O[2])) # Exhibit 10.9 acf(as.vector(diff(diff(co2),lag=12)),lag.max=36,ci.type='ma', main=expression(Sample~~ACF~~of~~the~~First~~and~~Seasonal~~Differences~~of~~ CO[2]~~Levels)) # Exhibit 10.10 # Do remember that in the book the MA parameterization uses the minus convention but R uses # the positive convention, lest you find the estimates from R to be different from the reported values # in the book! m1.co2=arima(co2,order=c(0,1,1),seasonal=list(order=c(0,1,1),period=12)) m1.co2 # Exhibit 10.11 # first thirteen residuals are omitted from the plot. plot(window(rstandard(m1.co2),start=c(1995,2)),ylab='Standardized Residuals', type='o', main=expression(Residuals~~from~~the~~ARIMA(list(0,1,1))%*%(list(0,1,1))[12]~~Model)) abline(h=0) # Exhibit 10.12 acf(as.vector(window(rstandard(m1.co2),start=c(1995,2))),lag.max=36, main=expression(ACF~~of~~Residuals~~from~~the~~ARIMA(list(0,1,1))%*%(list(0,1,1))[12]~~Model)) # The figures in the above two exhibits can also be obtained as the first two sub-plots from the # following command. The plotting convention is slightly different for the first sub-plot. #win.graph(width=4.875, height=5,pointsize=8) # by default, the first 13 residuals are ommited; please use ?tsdiag to learn # more about the function. tsdiag(m1.co2, gof.lag=36) # Exhibit 10.13 hist(window(rstandard(m1.co2),start=c(1995,2)),xlab='Standardized Residuals', main=expression(Residuals~~from~~the~~ARIMA(list(0,1,1))%*%(list(0,1,1))[12]~~Model)) # Exhibit 10.14 #win.graph(width=4, height=4,pointsize=8) qqnorm(window(rstandard(m1.co2),start=c(1995,2)),main=expression(Normal~~Q-Q~~Plot)) title(main=expression(Residuals~~from~~the~~ARIMA(list(0,1,1))%*%(list(0,1,1))[12]~~Model), line=3) qqline(window(rstandard(m1.co2),start=c(1995,2))) # Shapiro-Wilk test on the residuals: shapiro.test(rstandard(m1.co2)) # Overfitting: # Exhibit 10.15 m2.co2=arima(co2,order=c(0,1,2),seasonal=list(order=c(0,1,1),period=12)) m2.co2 #### Analysis of the airline data: library(astsa) data(AirPassengers) y = AirPassengers plot(y,type='o') # nonstationary, with increasing variance over time # Take log transformation, then differences: ly=log(y); dly=diff(ly); ddly=diff(dly, lag=12) plot.ts(cbind(y,ly,dly,ddly), yax.flip=T, main="") # a plot to show seasonality: par(mfrow=c(2,1)) monthplot(dly); monthplot(ddly) par(mfrow=c(1,1)) # Sample ACF and PACF of the first-differenced and seasonally differenced series: acf2(ddly, 50) # ACF is cutting off after 1 seasonal lag, and PACF is tailing off at 1s, 2s, 3s,...? # A little ambiguous, but we will try the SARIMA model with Q=1: # At the lower lags 1,2,3,... both the ACF and PACF are tailing off. # We will first try an ARMA(p=1,q=1) for the nonseasonal piece: sarima(ly, 1,1,1, 0,1,1, 12) # specifies p=1,d=1,q=1, P=0,D=1,Q=1, s=12. # The diagnostics look good, but the nonseasonal AR coefficient is nonsignificant. # Let's try a couple of simpler models: sarima(ly, 0,1,1, 0,1,1, 12) # specifies p=0,d=1,q=1, P=0,D=1,Q=1, s=12. sarima(ly, 1,1,0, 0,1,1, 12) # specifies p=1,d=1,q=0, P=0,D=1,Q=1, s=12. # The information criteria prefer the # ARIMA(0,1,1)x(0,1,1)_12 model (the "airline model"). # The diagnostics look good for this model as well, so this will be our final model. ### Forecasting with Seasonal Models: # Forecasting with the co2 series: library(TSA); data(co2) # Exhibit 10.16 #win.graph(width=4.875, height=3,pointsize=8) plot(m1.co2,n1=c(2003,1),n.ahead=24,col='red',xlab='Year',type='o', ylab=expression(CO[2]~~Levels), main=expression(Forecasts~~and~~Forecast~~Limits~~'for'~~the~~CO[2]~~Model)) # Note that for is a reserved word in R so it has to be enclosed in quotation marks. # Exhibit 10.17 plot(m1.co2,n1=c(2004,1),n.ahead=48,col='red',xlab='Year',type='o', ylab=expression(CO[2]~~Levels), main=expression(Long~~Term~~Forecasts~~'for'~~the~~CO[2]~~Model)) # Showing the forecasts and standard errors: predict(m1.co2, n.ahead=48) # Forecasting with the airline passengers data: # (this forecasts 24 time units (months, here) into the future): sarima.for(ly, 24, 0,1,1, 0,1,1, 12) # An example of a series that is differenced, but NOT seasonally differenced: # Forecasting with the U.S. births data: lbirth = log(birth) plot.ts(cbind(birth,lbirth,diff(lbirth)), yax.flip=T, main="") # Recall we considered a seasonal AR(P=1) or AR(P=2) model # for the differenced logged birth data: acf2(diff(lbirth),36) # Probably P=2 or even P=3 is best, but note there are # substantial autocorrelations at the lower lags. # may need a multiplicative model with nonzero p and/or q. m.lb1=arima(lbirth,order=c(0,1,0),seasonal=list(order=c(1,0,0),period=12)) m.lb1 m.lb2=arima(lbirth,order=c(0,1,0),seasonal=list(order=c(2,0,0),period=12)) m.lb2 m.lb3=arima(lbirth,order=c(0,1,0),seasonal=list(order=c(3,0,0),period=12)) m.lb3 # The seasonal AR(P=3) model seems preferred. # Trying a multiplicative model (AIC is even better, but more parameters to estimate): m.lb3.mult=arima(lbirth,order=c(2,1,2),seasonal=list(order=c(3,0,0),period=12)) m.lb3.mult # Checking diagnostics for a couple of candidate models: sarima(lbirth, 0,1,0, 3,0,0, 12) # some problems with residuals' ACF sarima(lbirth, 2,1,2, 3,0,0, 12) # looks better # Forecasting 24 months into the future: sarima.for(lbirth, 24, 2,1,2, 3,0,0, 12)