# Chapter 11, part 1 R code ## Sample CCF of the simulated data, ## where X leads Y by 2 time units ## and the errors are white noise # Exhibit 11.11 #win.graph(width=4.875, height=2.5,pointsize=8) set.seed(12345) X=rnorm(105) Y=zlag(X,2)+.5*rnorm(105) X=ts(X[-(1:5)],start=1,freq=1) Y=ts(Y[-(1:5)],start=1,freq=1) ccf(X,Y,ylab='CCF') # Exhibit 11.12 phi=seq(0,.95,.15) rejection=2*(1-pnorm(1.96*sqrt((1-phi^2)/(1+phi^2)))) M=signif(rbind(phi,rejection),2) rownames(M)=c("phi", "Error Rate") M # Exhibit 11.13 set.seed(23457) correlation.v=NULL B=1000 n=500 for (i in 1:B) {x=cumsum(arima.sim(model=list(ma=.8),n=n)) y=cumsum(arima.sim(model=list(ma=.8),n=n)) correlation.v=c(correlation.v,ccf(x,y,lag.max=1,plot=F)$acf[2]) } hist(correlation.v,prob=T,xlab=expression(r[0](X,Y))) # Milk/electricity example: Spurious correlation # Exhibit 11.14 data(milk) data(electricity) milk.electricity=ts.intersect(milk,log(electricity)) # The ts.intersect function merges serveral time series into a panel of time # series over the time frame where each series has data. plot(milk.electricity,yax.flip=T) # the option yax.flip=T flips the y-axis for the series alternately so as # to make the labeling clearer. # Exhibit 11.15 ccf(as.numeric(milk.electricity[,1]),as.numeric(milk.electricity[,2]), main='milk & electricity',ylab='CCF') # The as.numeric function strips the time series attribute from the time series. # This is done to nullify the default way the ccf function plots the cross-correlations. # You may want to repeat the command without the as.numeric function to see # the default labels of the lags according to the period of the data. # ccf((milk.electricity[,1]),(milk.electricity[,2]), main='milk & electricity',ylab='CCF') ## prewhitening to deal with the spurious correlation: # Exhibit 11.16 me.dif=ts.intersect(diff(diff(milk,12)),diff(diff(log(electricity),12))) prewhiten(as.numeric(me.dif[,1]),as.numeric(me.dif[,2]), ,ylab='CCF' ) ## Bluebird Potato Chip example # Exhibit 11.17 data(bluebird) plot(bluebird,yax.flip=T) sales=bluebird[,1] price=bluebird[,2] # Exhibit 11.18 prewhiten(y=diff(bluebird)[,1],x=diff(bluebird)[,2],main="Price & Sales",ylab='CCF') # As the time series are of unit period, there is no need to apply the as.numeric # function. # Note the 'sales' vector has ALREADY been log-transformed. # Try: hist(sales) # and: qqnorm(sales) # to see that the transformation has achieved approximate symmetry/normality. # Exhibit 11.19 sales=bluebird[,1] price=bluebird[,2] chip.m1=lm(sales~price,data=bluebird) summary(chip.m1) # Exhibit 11.20 acf(residuals(chip.m1),ci.type='ma') # Exhibit 11.21 pacf(residuals(chip.m1)) # Exhibit 11.22 eacf(residuals(chip.m1)) # Exhibit 11.23 chip.m2=arima(sales,order=c(1,0,4),xreg=data.frame(price)) chip.m2 # MA(1)& MA(3) estimates are not significant, at 5% level, # so they are constrained to be zero in the model fit below. chip.m3=arima(sales,order=c(1,0,4),xreg=data.frame(price),fixed=c(NA,0,NA,0,NA,NA,NA)) # The MA(1) & MA(3) estimates are the second and fourth coefficients listed # in the model fit chip.m2. They are set to be zero using the fixed option. # NAs in the fixed option correspond to free parameters. chip.m3 # Now, the AR(1) coefficient estimate also seems not significant, so it is # removed in the next fitted model. chip.m4=arima(sales,order=c(0,0,4),xreg=data.frame(price),fixed=c(0,NA,0,NA,NA,NA)) chip.m4 # model diagnostics can be done by running the tsdiag command. tsdiag(chip.m4) # Forecasting the next 3 months of logged sales, # if the next 3 months' prices are 1.75, 1.70, and 1.68: plot(n1=95,chip.m4,n.ahead=3,newxreg=data.frame(c(1.75,1.70, 1.68))) # the plot of the forecasts # n1=95 tells R to only plot the orginal time series values starting from time 95 # the actual forecasts and the standard errors: predict(chip.m4,n.ahead=3,newxreg=data.frame(c(1.75,1.70, 1.68))) ## A Shumway and Stoffer example: library(astsa) data(cmort) trend = time(cmort); temp = tempr - mean(tempr); temp2 = temp^2 fit = lm(cmort~trend + temp + temp2 + part, na.action=NULL) #OLS fit acf2(resid(fit), 52) # implies AR2 process for the noise #sarima(cmort, 2,0,0, xreg=cbind(trend, temp, temp2, part) ) # WLS fit using 'sarima' fit.wls=arima(cmort,order=c(2,0,0),xreg=cbind(trend, temp, temp2, part) ) # WLS fit using 'arima' fit.wls # model diagnostics can be done by running the tsdiag command. plot(resid(fit.wls)) tsdiag(fit.wls) qqnorm(resid(fit.wls)) # Do the residuals from the WLS fit look like white noise? ## An example with a Lagged Covariate and SARIMA errors: lag2.plot(soi, rec, 8) # to see the appropriate choice of lag # The strongest linear association seems to be at lag 6. # But notice the linear trend is only there when SOI > 0, # so we use a dummy (indicator) variable for that in our model: dummy = ifelse(soi<0, 0, 1) fish = ts.intersect(rec, soiL6=lag(soi,-6), dL6=lag(dummy,-6), dframe=TRUE) summary(fit <- lm(rec ~soiL6*dL6, data=fish, na.action=NULL)) attach(fish) plot(resid(fit)) acf2(resid(fit)) # indicates AR(2) (fit1 = arima(rec,order=c(2,0,0), xreg = cbind(soiL6, dL6, I(soiL6*dL6) ))) acf2(resid(fit1)) # indicates seasonal AR intract = soiL6*dL6 # interaction term # try seasonal AR order 1 ... (fit2 = arima(rec, order=c(2,0,0), seasonal=list(order=c(1,0,0),period=12), xreg = cbind(soiL6, dL6, intract) )) acf2(resid(fit2)) # ... and then seasonal AR order 2, which appears to be best (fit3 = arima(rec, order=c(2,0,0), seasonal=list(order=c(2,0,0),period=12), xreg = cbind(soiL6, dL6, intract) )) acf2(resid(fit3)) plot(rstandard(fit3))