# Chapter 11, part 2 R code library(TSA) # Exhibit 11.1 ## Plot of the airmiles time series #win.graph(width=4.875, height=2.5,pointsize=8) data(airmiles) plot(log(airmiles),ylab='Log(airmiles)',xlab='Year', ) points(y=log(airmiles),x=time(airmiles),pch=as.vector(season(airmiles))) library(astsa) acf2(window(log(airmiles),end=c(2001,8)), 48) # Seasonality is clearly apparent, # and so is the nonstationarity from the initial time series plot. # # After taking first differences and seasonal differences, we see a pattern indicating an MA(1) model # Exhibit 11.5 acf(as.vector(diff(diff(window(log(airmiles),end=c(2001,8)),12))),lag.max=48) # ACF values cutting off after lag 1 ... pacf(as.vector(diff(diff(window(log(airmiles),end=c(2001,8)),12))),lag.max=48) # PACF values tailing off? # So try ARIMA(0,1,1)x(0,1,0)_12 model for the pre-intervention data: air.pre=arima(window(log(airmiles),end=c(2001,8)),order=c(0,1,1),seasonal=list(order=c(0,1,0),period=12) ) tsdiag(air.pre) # Note one major outlier in the residuals of the seasonal model. We will discuss more later. ### Handling Outliers # Exhibit 11.9 set.seed(12345) y=arima.sim(model=list(ar=.8,ma=.5),n.start=158,n=100) y[10] y[10]=10 y=ts(y,freq=1,start=1) plot(y,type='o') acf(y) pacf(y) eacf(y) m1=arima(y,order=c(1,0,0)) m1 detectAO(m1) detectAO(m1, robust=F) # non-robust version of the test has less power to detect outliers detectIO(m1) tsdiag(m1) # note the major outlier m2=arima(y,order=c(1,0,0),xreg=data.frame(AO=seq(y)==10)) m2 detectAO(m2) detectIO(m2) tsdiag(m2) m3=arima(y,order=c(1,0,1),xreg=data.frame(AO=seq(y)==10)) detectAO(m3) detectIO(m3) tsdiag(m3) m3 plot(y,type='b') arrows(30,6, 11,9.8, length=.1,angle=20) text(34,6.2, "AO") # Exhibit 11.10 library(TSA) data(co2) m1.co2=arima(co2,order=c(0,1,1),seasonal=list(order=c(0,1,1),period=12)) m1.co2 tsdiag(m1.co2) # note the one large standardized residual detectAO(m1.co2) detectIO(m1.co2) time(co2) time(co2)[57] m4.co2=arimax(co2,order=c(0,1,1),seasonal=list(order=c(0,1,1),period=12), io=c(57)) m4.co2 tsdiag(m4.co2) # the signs of the MA coefficient estimates appear to be opposite to # those in the book, because R uses the plus convention in the MA parameterization. ## Back to the airmiles example: air.m1.tentative=arimax(log(airmiles),order=c(0,1,1),seasonal=list(order=c(0,1,0), period=12),xtransf=data.frame(I911=1*(seq(airmiles)==69), I911=1*(seq(airmiles)==69)), transfer=list(c(0,0),c(1,0)),method='ML') # Transfer function components are incorporated by the xtransf and transfer # arguments. # Here, the transfer function consists of two parts omega0*P(t) and # omega1/(1-omega2*B)P(t) where the inputs of the two transfer # functions are identical and equals the dummy variable that is 1 at September # 2001 (the 69th data point) and zero otherwise. # xtransf is a matrix whose columns are the input variables. # transfer is a list consisting of the pair of (AR order, MA order) of each # transfer function, which in this examples is (0,0) and (1,0). air.m1.tentative tsdiag(air.m1.tentative) # Note the significant lag-12 value in the ACF plot for the residuals. # We should add a seasonal MA(1) term to account for this: detectAO(air.m1.tentative) detectIO(air.m1.tentative) # We also include the three additive outliers using # dummy variables in the xreg argument: # The detectAO function marks observation 25 as an AO, # but due to the seasonal differencing and first differencing, # we see that the observations 12 and 13 are really the ones that break from the pattern: plot(log(airmiles),ylab='Log(airmiles)',xlab='Year', ) points(y=log(airmiles),x=time(airmiles),pch=as.vector(season(airmiles))) points(1996.917, log(airmiles)[12],col='red',cex=2.5) points(1997, log(airmiles)[13],col='red',cex=2.5) points(2002.917, log(airmiles)[84],col='red',cex=2.5) # Exhibit 11.6 air.m1=arimax(log(airmiles),order=c(0,1,1),seasonal=list(order=c(0,1,1), period=12),xtransf=data.frame(I911=1*(seq(airmiles)==69), I911=1*(seq(airmiles)==69)), transfer=list(c(0,0),c(1,0)),xreg=data.frame(Dec96=1*(seq(airmiles)==12), Jan97=1*(seq(airmiles)==13),Dec02=1*(seq(airmiles)==84)),method='ML') # Additive outliers are incorporated as dummy variables in xreg. # Transfer function components are incorporated by the xtransf and transfer # arguments. # Here, the transfer function consists of two parts omega0*P(t) and # omega1/(1-omega2*B)P(t) where the inputs of the two transfer # functions are identical and equals the dummy variable that is 1 at September # 2001 (the 69th data point) and zero otherwise. # xtransf is a matrix whose columns are the input variables. # transfer is a list consisting of the pair of (AR order, MA order) of each # transfer function, which in this example is (0,0) and (1,0). # Note in our model for m_t: # The first term has order 0 for B in both numerator and denominator (B does not appear) # The second term has order 0 for B in the numerator and 1 in the denominator. # See pages 451-454 of the Cryer and Chan textbook for full details of specifying # the transfer argument with an intervention model. air.m1 # Exhibit 11.7 plot(log(airmiles),ylab="log(airmiles)") # The line plot shows the observed data. points(fitted(air.m1)) # The open circles are the fitted values under the final model. # Looks like a fairly good fit. ## Plotting the intervention effect over time: # Exhibit 11.8 Nine11p=1*(seq(airmiles)==69) plot(ts(Nine11p*(-0.0949)+ filter(Nine11p,filter=.8139,method='recursive',side=1)*(-0.2715), frequency=12,start=1996),type='h',ylab='9/11 Effects') abline(h=0) ## A time series regression example with an outlier: # Exhibit 11.24 data(boardings) plot(boardings,yax.flip=T) # The Denver dataset has three time series. Here, we only plot the first # two series. log.boardings=boardings[,1] log.price=boardings[,2] library(astsa) acf2(diff(log.price)) # ACF has damped sine wave, PACF cuts off after 2 lags # AR(2) on first differences --> ARIMA(2,1,0) for logged prices. # Exhibit 11.25 # Prewhitening based on ARIMA(2,1,0) for logged prices: m1=arima(boardings[,2],order=c(2,1,0)) prewhiten(x=boardings[,2],y=boardings[,1],x.model=m1) # Prewhitening based on generically fitting a long AR model: prewhiten(x=diff(boardings)[,2],y=diff(boardings)[,1],ylab='CCF') # Exhibit 11.26 log.boardings=boardings[,1] log.price=boardings[,2] # Initially just fitting an OLS model of log boardings on log price: boardings.m.lm = lm(log.boardings~log.price) acf2(residuals(boardings.m.lm)) # Early PACFs cut off after lag 2, but note significant PACF at lag 12 (and 13). # ---> seasonal $ARIMA(2,0,0) \times (1,0,0)_{12}$ model for the noise process. boardings.m1.tent=arima(log.boardings,order=c(2,0,0),seasonal=list(order=c(1,0,0),period=12), xreg=data.frame(log.price)) boardings.m1.tent # AR(2) coefficient not significant, so we remove it, and this improves the model: boardings.m1=arima(log.boardings,order=c(1,0,0),seasonal=list(order=c(1,0,0),period=12), xreg=data.frame(log.price)) boardings.m1 tsdiag(boardings.m1) plot(rstandard(boardings.m1)) # One or more notable residuals detectAO(boardings.m1) detectIO(boardings.m1) # An AO is detected at time point 32, as well as an IO detected at time point 44 # Since the test statistic of the AO has larger magnitude, an AO is added to the # model fitted below. dummy.32 = c(rep(0,31),1,rep(0,36)) # takes value 1 only at time 32 boardings.m2.tent1=arima(log.boardings,order=c(1,0,0),seasonal=list(order=c(1,0,0),period=12), xreg=data.frame(log.price,outlier=dummy.32)) boardings.m2.tent1 tsdiag(boardings.m2.tent1) # Significant lag-3 ACF value of residuals, so we add an MA(3) component: boardings.m2.tent2=arima(log.boardings,order=c(1,0,3),seasonal=list(order=c(1,0,0),period=12), xreg=data.frame(log.price,outlier=dummy.32)) boardings.m2.tent2 tsdiag(boardings.m2.tent2) # MA1 and MA2 coefficients not significant, so we can fix them at 0: boardings.m2=arima(log.boardings,order=c(1,0,3),seasonal=list(order=c(1,0,0),period=12), xreg=data.frame(log.price,outlier=dummy.32), fixed=c(NA,0,0,rep(NA,5))) boardings.m2 detectAO(boardings.m2) detectIO(boardings.m2) # No outliers are detected! tsdiag(boardings.m2,tol=.15,gof.lag=24) # Model diagnostics appear to be OK. ## Compare the model with the outlier unaccounted for: boardings.m1 ## to the model with the outlier incorporated into it: boardings.m2