# R code from Chapter 6 of Cryer and Chan

# Chap6.R

library(TSA)

# Exhibit 6.5
data(ma1.1.s)
#win.graph(width=4.875, height=3,pointsize=8)
acf(ma1.1.s,xaxp=c(0,20,10)) # if the argument xaxp is omitted, the
# tick marks will be generated according to the default convention. 
# Run the command ?par to learn more the xaxp argment.  
acf(ma1.1.s) # see the new tickmarks

# Note the acf values (mostly) cut off after lag 1 (do you see the exceptions to this?)

# So how was xaxp determined? First run the command without xaxp to find out
# how many lags are on the x-axis. Suppose there are 20 lags. Then we specify
# xaxp=c(0,20,10), i.e. the two extreme tickmarks are 0 and 20, and we want
# 10 tickmarks in between. How to set the xaxp argument if we want 1 tickmark for
# each lag?

pacf(ma1.1.s)

# Note the PACF values decay toward zero (roughly) rather than cut off.

# Exhibit 6.6
acf(ma1.1.s,ci.type='ma',xaxp=c(0,20,10))

# Exhibit 6.7
data(ma1.2.s)
acf(ma1.2.s,xaxp=c(0,20,10))

# Exhibit 6.8
data(ma2.s)
acf(ma2.s,xaxp=c(0,20,10))

# Exhibit 6.9
acf(ma2.s,ci.type='ma',xaxp=c(0,20,10))

# Exhibit 6.10
data(ar1.s)
acf(ar1.s,xaxp=c(0,20,10))


# Exhibit 6.11
pacf(ar1.s,xaxp=c(0,20,10))

# Exhibit 6.12
data(ar2.s)
acf(ar2.s,xaxp=c(0,20,10))

# Exhibit 6.13
pacf(ar2.s,xaxp=c(0,20,10))

# Exhibit 6.14
data(arma11.s)
plot(arma11.s, type='b',ylab=expression(y[t]))

# Exhibit 6.15
acf(arma11.s,xaxp=c(0,20,10))

# Exhibit 6.16
pacf(arma11.s,xaxp=c(0,20,10))

# Exhibit 6.17
eacf(arma11.s)

# If desirable,  a title can be added to the EACF table by the
# following command.

cat('Exhibit 6.17\n');eacf(arma11.s)

# Exhibit 6.18
data(oil.price)
acf(as.vector(oil.price),main='Sample ACF of the Oil Price Time Series',xaxp=c(0,24,12))
# The as.vector function strips the ts attaribute of oil.price in order to 
# prevent the plotting convention for seasonal time series.
# The main argument supplies the main heading of the figure.
# Try the following command to appreciate the effects of applying the as.vector
# function.

acf(oil.price,main='Sample ACF of the Oil Price Time Series')
# The tickmark 1 on the x-axis now refers to a lag of
# 1 period with each period containing 12 months, so it is lag 12!

# Exhibit 6.19
acf(diff(as.vector(log(oil.price))),main='Sample ACF of the Difference of the Oil Price Time Series',xaxp=c(0,24,12))

# Sample ACF and PACF of a simulated AR(2) Model (from Shumway/Stoffer book)
library(astsa)


set.seed(8675309)    # Jenny, it's me again
ar2 = arima.sim(list(order=c(2,0,0), ar=c(1.5,-.75)), n = 144)
plot(ar2, axes=FALSE, xlab="Time")
axis(2); axis(1, at=seq(0,144,by=12)); box()  # work the plot machine
abline(v=seq(0,144,by=12), lty=2)

# The 'acf2' function in the 'astsa' package plots both the ACF and PACF at once:
acf2(ar2)

ACF = ARMAacf(ar=c(1.5,-.75), ma=0, 50)
plot(ACF, type="h", xlab="lag")
abline(h=0)

## NOTE: The 'ARMAacf' function computes the THEORETICAL ACF (or PACF, if you use pacf=TRUE) for an ARMA-type model.
## The inputs for the 'ARMAacf' function are the true AR and/or MA coefficients of 
## the process, and the number of lags to plot.
## This is different from the 'acf' and 'pacf' functions, 
## which compute a SAMPLE ACF (or PACF) of an observed time series.

ar2.acf = ARMAacf(ar=c(1.5,-.75), ma=0, 24)[-1]
ar2.pacf = ARMAacf(ar=c(1.5,-.75), ma=0, 24, pacf=TRUE)
par(mfrow=c(1,2))
plot(ar2.acf, type="h", xlab="lag")
abline(h=0)
plot(ar2.pacf, type="h", xlab="lag")
abline(h=0)
par(mfrow=c(1,1))


# Exhibit 6.20
data(rwalk)
acf(diff(rwalk,difference=2),ci.type='ma',xaxp=c(0,18,9))

# Exhibit 6.21
acf(diff(rwalk),ci.type='ma',xaxp=c(0,18,9))

#### Augmented Dickey-Fuller Testing:

# Carry out the Dickey-Fuller unit root tests
# Find out the AR order for the differenced series:

ar(diff(rwalk))

# order 8 is indicated by AIC


library(fUnitRoots)  # may need to install package 'fUnitRoots'

# Testing for difference nonstationarity:

# The type='c' assumes just a constant (intercept), not a linear trend:

adfTest(rwalk,lags=8,type='c')  # The AIC indicates the correct order is k=8

# In comparison, setting the true order k to be 0:

adfTest(rwalk,lags=0,type='c')  
# The truth for this example is that k=0:
# Since Y_t is a random walk, the first differences are white noise, i.e., AR(0).

# Testing for trend nonstationarity:

# Repeat the test with the alternative have an intercept term and a linear trend

adfTest(rwalk,lags=8,type='ct')  # Based on the order 8 that the AIC indicates

adfTest(rwalk,lags=0,type='ct')  # Based on the order 0 that is the truth, but in reality would be unknown


# Another function to test for trend nonstationarity:

library(tseries) # may need to install package 'tseries'

adf.test(rwalk,k=8,alternative="stationary")  # Based on the order 8 that the AIC indicates

adf.test(rwalk,k=0,alternative="stationary")  # Based on the order 0 that is the truth, but in reality would be unknown

# Augmented Dickey-Fuller testing on some real data sets:

data(larain); plot(larain,type='o')

ar(diff(larain))

adfTest(larain,lags=4,type='c')

# reject H0:  conclude the LA rain series is stationary.

data(oil.price); plot(log(oil.price),ylab="Logged Oil prices",xlab="Year",type="o")

ar(diff(log(oil.price)))

adfTest(log(oil.price),lags=2,type='c')

# Fail to reject H0:  Conclude the logged oil price data is difference nonstationary.

library(astsa); data(jj); plot(log(jj), type='o')

ar(diff(log(jj)))

adfTest(log(jj),lags=5,type='ct')

# Fail to reject H0:  Conclude the logged Johnson & Johnson data is trend nonstationary.

# For the last example, note that:
# library(tseries); adf.test(log(jj),k=5,alternative="stationary") 
# does the same thing.


#############################

# Best subset ARMA model selection using BIC:

# Exhibit 6.22 
set.seed(92397)
test=arima.sim(model=list(ar=c(rep(0,11),.8),ma=c(rep(0,11),0.7)),n=120)
res=armasubsets(y=test,nar=14,nma=14,y.name='test',ar.method='ols')
plot(res)

# A title may be added to the plot, by adding the main="..." option
# in the above plot function. However, the title may crash with other
# labels. A better approach is to add a title by the 
# following command.

title(main="Exhibit 6.22", line=6)

# The option line=6 means put the title 6 lines from the edge of the
# plot region. You can experiment with this option to fine tune the
# placement of the label.

# Los Angeles Rain example
# Exhibit 6.23
data(larain)
#win.graph(width=3, height=3,pointsize=8)
qqnorm(log(larain))
qqline(log(larain))

# Exhibit 6.24
#win.graph(width=4.875, height=3,pointsize=8)
acf(log(larain),xaxp=c(0,20,10)) # note the main heading now includes the data name.

# Exhibit 6.25
data(color)
acf(color,ci.type='ma')

# Exhibit 6.26
pacf(color)

# Exhibit 6.27  
#win.graph(width=4, height=4,pointsize=8)
data(hare)
bxh=BoxCox.ar(hare)
bxh$mle # the mle of the power parameter
bxh$ci # corresponding 95% C.I.

# Exhibit 6.28
acf(hare^.5)

# Exhibit 6.29
pacf(hare^.5)

# Augmented Dickey-Fuller test for stationarity in the logged oil price data was done previously.
# the conclusion was that the logged oil price data is difference nonstationary.

# Exhibit 6.30
eacf(diff(log(oil.price)))

# Exhibit 6.31
res=armasubsets(y=diff(log(oil.price)),nar=7,nma=7,
y.name='test', ar.method='ols')
plot(res)

# Exhibit 6.32
acf(as.vector(diff(log(oil.price))),xaxp=c(0,22,11))

# Exhibit 6.33
pacf(as.vector(diff(log(oil.price))),xaxp=c(0,22,11))


# Shumway-Stoffer examples with real data:
# Analysis of recruitment data, indicates AR(2) model?

acf2(rec, 48)     # will produce values and a graphic 

# Analysis of SOI data:

acf2(soi, 48)     # doesn't match a stationary ARMA model...

acf2(diff(soi), 48) # first differences are not looking good either...

acf2(diff(soi, differences=2), 48) # Series of second differences looks like MA(1).

# Analysis of logged earnings data:

acf2(log(jj),40)

# The time series is not stationary, so we could try analyzing the differences:

acf2(diff(log(jj)),40)

# Analysis of airmiles data:

library(TSA)

data(airmiles)
acf2(airmiles)

eacf(airmiles)

res=armasubsets(y=airmiles,nar=14,nma=14,
y.name='test', ar.method='ols')
plot(res)

# In the ARMA(14,14) model, which subset of terms has nonzero coefficients in the model considered "best" by BIC?

######################################################################
#
# Using the 'auto.arima' function in the 'forecast' package 
# to do automated model selection based on AIC or BIC:

library(forecast)  # May need to install the 'forecast' package first

# LA rain series:
data(larain)
larain.best.aic <- auto.arima(larain,ic='aic')
print(larain.best.aic)

larain.best.bic <- auto.arima(larain,ic='bic')
print(larain.best.bic)

# With the color series:
data(color)

color.best.aic <- auto.arima(color,ic='aic')
print(color.best.aic)

color.best.bic <- auto.arima(color,ic='bic')
print(color.best.bic)

# With the hare series:

hare.best.aic <- auto.arima(hare,ic='aic')
print(hare.best.aic)

hare.best.bic <- auto.arima(hare,ic='bic')
print(hare.best.bic)

# We see that sometimes the AIC and BIC criteria disagree about which is the best model.

# With more complicated time series, the auto.arima function will sometimes choose
# more complicated models, like seasonal ARIMA models, which we will study in Chapter 10,
# or like models with drift terms, which we will study in Chapter 9:

logjj.best.aic <- auto.arima(log(jj),ic='aic')
print(logjj.best.aic)

# If we don't want to consider such complicated models, we can tell auto.arima not to consider them:

logjj.best.aic <- auto.arima(log(jj),ic='aic',seasonal=FALSE, allowdrift=FALSE)
print(logjj.best.aic)




# Estimation of recruitment data, assuming AR(2) model:
#(regr = ar.ols(rec, order=2, demean=F, intercept=TRUE))  # regression
#regr$asy.se.coef  # standard errors