# Chapter 11, part 1 R code


## Sample CCF of the simulated data, 
## where X leads Y by 2 time units
## and the errors are white noise

# Exhibit 11.11
#win.graph(width=4.875, height=2.5,pointsize=8)
set.seed(12345)
X=rnorm(105)
Y=zlag(X,2)+.5*rnorm(105)
X=ts(X[-(1:5)],start=1,freq=1)
Y=ts(Y[-(1:5)],start=1,freq=1)
ccf(X,Y,ylab='CCF')


# Exhibit 11.12
phi=seq(0,.95,.15)
rejection=2*(1-pnorm(1.96*sqrt((1-phi^2)/(1+phi^2))))
M=signif(rbind(phi,rejection),2)
rownames(M)=c("phi", "Error Rate")
M


# Exhibit 11.13
set.seed(23457)
correlation.v=NULL
B=1000
n=500
for (i in 1:B) {x=cumsum(arima.sim(model=list(ma=.8),n=n))
y=cumsum(arima.sim(model=list(ma=.8),n=n))
correlation.v=c(correlation.v,ccf(x,y,lag.max=1,plot=F)$acf[2])
}
hist(correlation.v,prob=T,xlab=expression(r[0](X,Y)))


# Milk/electricity example:  Spurious correlation


# Exhibit 11.14
data(milk)
data(electricity)
milk.electricity=ts.intersect(milk,log(electricity))
# The ts.intersect function merges serveral time series into a panel of time
# series over the time frame where each series has data.

plot(milk.electricity,yax.flip=T) 
# the option yax.flip=T flips the y-axis for the series alternately so as
# to make the labeling clearer.

# Exhibit 11.15
ccf(as.numeric(milk.electricity[,1]),as.numeric(milk.electricity[,2]),
main='milk & electricity',ylab='CCF')
# The as.numeric function strips the time series attribute from the time series.
# This is done to nullify the default way the ccf function plots the cross-correlations.
# You may want to repeat the command without the as.numeric function to see 
# the default labels of the lags according to the period of the data.
# ccf((milk.electricity[,1]),(milk.electricity[,2]), main='milk & electricity',ylab='CCF')

## prewhitening to deal with the spurious correlation:

# Exhibit 11.16
me.dif=ts.intersect(diff(diff(milk,12)),diff(diff(log(electricity),12)))
prewhiten(as.numeric(me.dif[,1]),as.numeric(me.dif[,2]),
,ylab='CCF' )


## Bluebird Potato Chip example

# Exhibit 11.17
data(bluebird)
plot(bluebird,yax.flip=T)

sales=bluebird[,1]
price=bluebird[,2]

# Exhibit 11.18
prewhiten(y=diff(bluebird)[,1],x=diff(bluebird)[,2],main="Price & Sales",ylab='CCF')
# As the time series are of unit period, there is no need to apply the as.numeric 
# function.

# Note the 'sales' vector has ALREADY been log-transformed.

# Try: hist(sales)
# and: qqnorm(sales)
# to see that the transformation has achieved approximate symmetry/normality.

# Exhibit 11.19
sales=bluebird[,1]
price=bluebird[,2]
chip.m1=lm(sales~price,data=bluebird)
summary(chip.m1)

# Exhibit 11.20
acf(residuals(chip.m1),ci.type='ma')

# Exhibit 11.21
pacf(residuals(chip.m1))

# Exhibit 11.22
eacf(residuals(chip.m1))

# Exhibit 11.23
chip.m2=arima(sales,order=c(1,0,4),xreg=data.frame(price))
chip.m2
# MA(1)& MA(3) estimates are not significant, at 5% level,
# so they are constrained to be zero in the model fit below.

chip.m3=arima(sales,order=c(1,0,4),xreg=data.frame(price),fixed=c(NA,0,NA,0,NA,NA,NA))
# The MA(1) & MA(3) estimates are the second and fourth coefficients listed
# in the model fit chip.m2. They are set to be zero using the fixed option.
# NAs in the fixed option correspond to free parameters. 
chip.m3

# Now, the AR(1) coefficient estimate also seems not significant, so it is
# removed in the next fitted model.
chip.m4=arima(sales,order=c(0,0,4),xreg=data.frame(price),fixed=c(0,NA,0,NA,NA,NA))
chip.m4

# model diagnostics can be done by running the tsdiag command.
tsdiag(chip.m4)

# Forecasting the next 3 months of logged sales, 
# if the next 3 months' prices are 1.75, 1.70, and 1.68:

plot(n1=95,chip.m4,n.ahead=3,newxreg=data.frame(c(1.75,1.70, 1.68)))  # the plot of the forecasts
# n1=95 tells R to only plot the orginal time series values starting from time 95

# the actual forecasts and the standard errors:
predict(chip.m4,n.ahead=3,newxreg=data.frame(c(1.75,1.70, 1.68))) 


## A Shumway and Stoffer example:

library(astsa)
data(cmort)

trend = time(cmort); temp = tempr - mean(tempr); temp2 = temp^2
fit = lm(cmort~trend + temp + temp2 + part, na.action=NULL)  #OLS fit
acf2(resid(fit), 52) # implies AR2 process for the noise
#sarima(cmort, 2,0,0, xreg=cbind(trend, temp, temp2, part) )  # WLS fit using 'sarima'

fit.wls=arima(cmort,order=c(2,0,0),xreg=cbind(trend, temp, temp2, part) )  # WLS fit using 'arima'
fit.wls

# model diagnostics can be done by running the tsdiag command.

plot(resid(fit.wls))
tsdiag(fit.wls)
qqnorm(resid(fit.wls))

# Do the residuals from the WLS fit look like white noise?


## An example with a Lagged Covariate and SARIMA errors:

lag2.plot(soi, rec, 8) # to see the appropriate choice of lag

# The strongest linear association seems to be at lag 6.

# But notice the linear trend is only there when SOI > 0, 
# so we use a dummy (indicator) variable for that in our model:

dummy = ifelse(soi<0, 0, 1)
fish = ts.intersect(rec, soiL6=lag(soi,-6), dL6=lag(dummy,-6), dframe=TRUE)
summary(fit <- lm(rec ~soiL6*dL6, data=fish, na.action=NULL))
attach(fish)
plot(resid(fit))
acf2(resid(fit)) # indicates AR(2)
(fit1 = arima(rec,order=c(2,0,0), xreg = cbind(soiL6, dL6, I(soiL6*dL6) )))
acf2(resid(fit1)) # indicates seasonal AR
intract = soiL6*dL6 # interaction term
# try seasonal AR order 1 ...
(fit2 = arima(rec, order=c(2,0,0), seasonal=list(order=c(1,0,0),period=12), xreg = cbind(soiL6, dL6, intract) ))
acf2(resid(fit2))

# ... and then seasonal AR order 2, which appears to be best
(fit3 = arima(rec, order=c(2,0,0), seasonal=list(order=c(2,0,0),period=12), xreg = cbind(soiL6, dL6, intract) ))
acf2(resid(fit3))

plot(rstandard(fit3))