# chap9.R

library(TSA)

## Forecasting with simple trend models:

# Linear trend for the logged earnings data
library(astsa)
modeljj=lm(log(jj)~time(jj))
summary(modeljj)

# predicting for Qtr 3, 1982:
future.time=1982.5
y.hat.pred = as.numeric(coef(modeljj) %*% c(1,future.time) ); print(y.hat.pred) # predicted logged earnings for Qtr 3, 1982
exp(y.hat.pred) # predicted earnings for Qtr 3, 1982

# Harmonic regression for Dubuque temperature data:
data(tempdub)
har.=harmonic(tempdub,m=1) 
model4=lm(tempdub~har.)
summary(model4)

# predicting for June 1976:
future.time = 1976.41667
y.hat.pred = as.numeric(coef(model4) %*% c(1,cos(2*pi*future.time),sin(2*pi*future.time)) ); 
print(y.hat.pred)

### Forecasting with ARIMA-type models

# Exhibit 9.1
data(color)
m1.color=arima(color,order=c(1,0,0))
m1.color 

# The MLE's:

phi.hat = coef(m1.color)["ar1"]
mu.hat = coef(m1.color)["intercept"]

# The most recent observed data value:

Y.t = color[length(color)]

# Forecasting "l" time units ahead:
# can change to l=2, l=3, l=20, etc.:
l=1; 
Y.t.l.hat = mu.hat + (phi.hat^l)*(Y.t - mu.hat); 
print(Y.t.l.hat)

# Prediction Intervals in the AR(1) model:

# Prediction limits "l" time units ahead:
# can change to l=2, l=3, l=20, etc.:

l=1; 
Y.t.l.hat = mu.hat + (phi.hat^l)*(Y.t - mu.hat); 

sigma.sq.e.hat = m1.color$sigma

alpha=0.05

P.L=Y.t.l.hat - qnorm(1-alpha/2)*sqrt( (sigma.sq.e.hat)*( (1-phi.hat^(2*l)) / (1-phi.hat^2) ) )
P.U=Y.t.l.hat + qnorm(1-alpha/2)*sqrt( (sigma.sq.e.hat)*( (1-phi.hat^(2*l)) / (1-phi.hat^2) ) )

# Printing the prediction interval:
print(paste(100*(1-alpha), "% PI for observation", l, "time units into future is:", signif(P.L,4), signif(P.U, 4) ))


# Exhibit 9.2

data(tempdub)

# append 2 years of missing values to the tempdub data as we want to forecast
# the temperature for two years. (this step may not be necessary...)

tempdub1=ts(c(tempdub,rep(NA,24)),start=start(tempdub),freq=frequency(tempdub)) 

# creates the first pair of harmonic functions and then fit the model
har.=harmonic(tempdub,1)
m5.tempdub=arima(tempdub,order=c(0,0,0),xreg=har.)
m5.tempdub
# The result is same as that from the fit using lm function.
har.=harmonic(tempdub,1)
model4=lm(tempdub~har.)
summary(model4)


# create the harmonic functions over the period of forecast.
newhar.=harmonic(ts(rep(1,24), start=c(1976,1),freq=12),1)
# Compute and plot the forecasts.
#win.graph(width=4.875, height=3,pointsize=8)
plot(m5.tempdub,n.ahead=24,n1=c(1972,1),newxreg=newhar.,
 type='b',ylab='Temperature',xlab='Year')

# Exhibit 9.3 

# Predicting future values of the color property data based on an AR(1) model:
# This goes 12 time units into the future:

data(color)
m1.color=arima(color,order=c(1,0,0))
plot(m1.color,n.ahead=12,type='b', xlab='Time', ylab='Color Property')
# add the horizontal line at the estimated mean ("intercept") 
abline(h=coef(m1.color)[names(coef(m1.color))=='intercept'])

# Showing the forecasted values and the standard errors of the forecasts:

predict(m1.color, n.ahead=12)

## Can see 95% prediction intervals for the forecast at any particular lead time:

# For one time unit ahead:

color.pred <- predict(m1.color, n.ahead=12)

c(color.pred$'pred'[1]-1.96*color.pred$se[1], color.pred$'pred'[1]+1.96*color.pred$se[1] )

# For five time units ahead:

c(color.pred$'pred'[5]-1.96*color.pred$se[5], color.pred$'pred'[5]+1.96*color.pred$se[5] )



# Exhibit 9.4

# Predicting future values of the (square-root transformed) hare data based on an AR(3) model:
# This goes 25 time units (years, here) into the future:

data(hare)
m1.hare=arima(sqrt(hare),order=c(3,0,0))
plot(m1.hare, n.ahead=25,type='b',xlab='Year',ylab='Sqrt(hare)')
abline(h=coef(m1.hare)[names(coef(m1.hare))=='intercept'])

# Showing the forecasted values and the standard errors of the forecasts:

predict(m1.hare, n.ahead=25)

# This goes 100 time units (years, here) into the future:

data(hare)
m1.hare=arima(sqrt(hare),order=c(3,0,0))
plot(m1.hare, n.ahead=100,type='b',xlab='Year',ylab='Sqrt(hare)')
abline(h=coef(m1.hare)[names(coef(m1.hare))=='intercept'])

# A forecasting example from Shumway and Stoffer:

library(astsa)

# The 'sarima' and 'sarima.for' functions can be used for
# estimating ARIMA-type models and forecasting with them:

# An AR(2) model for the recruitment data:

acf2(rec)  # Clearly an AR(2) model is reasonable.

sarima(rec,2,0,0)

# This goes 24 time units (months, here) into the future:
sarima.for(rec,24,2,0,0)
# Optionally add a horizonal line at the estimated mean:
# abline(h=61.8585, lty=3, col='green')

# Note that 'sarima' prints the forecasts to the console, as well as providing a plot
# of the forecasts, plus or minus 2 standard errors (pointwise 95% prediction limits)

#############################
##
## Forecasting using a model for DIFFERENCED data:  Check whether the constant term is needed!

library(TSA)

library(astsa)

# We have seen in chapter 7 that the differenced logged GNP series follows either an MA(2) or an AR(1) model.

# Fitting an ARIMA(0,1,2) model (i.e., an IMA(1,2) model) to the logged GNP series with the 'arima' function, 
# it assumes the differenced series has mean zero and cannot include a mean or constant term in the model:

arima(log(gnp), order=c(0,1,2))

# The 'sarima' function in the 'astsa' package CAN assume a nonzero constant term for the differenced series:
sarima(log(gnp), 0,1,2)

#...or you can tell 'sarima' not to include the constant:
sarima(log(gnp), 0,1,2, no.constant=T)

# In this case the constant term appears significantly different from zero.
# Note that including the constant term changes the forecasts substantially!

sarima.for(log(gnp), 40, 0,1,2)
dev.new()
sarima.for(log(gnp), 40, 0,1,2, no.constant=T)

## The above forecasts are in terms of the logged gnp units, not the original GNP units.
## See below for an example of back-transforming to the original units.

## LESSON:  When forecasting with a NONSTATIONARY series, use 'sarima' rather than 'arima' to fit the model, 
## and check whether the constant term is needed.
##
#############################

# Another real data example:
# An IMA(1,1) model for the logged oil price time series:

data(oil.price)
sarima(log(oil.price),0,1,1)

# In this logged oil price example, we could optionally remove the constant term by using no.constant=T
# since the constant is not significantly different from zero.
# Removing the constant would change the forecasts SLIGHTLY here.

# I'll go ahead and show the forecasts based on the model WITH the constant term:

sarima.for(log(oil.price),35,0,1,1)

## The original oil price data were measured in dollars per barrel.
## We did the analysis on the log-transformed data, so the forecasts 
## are in terms of logged units (not as interpretable?)

## To get forecasts (or prediction intervals) in terms of dollars, just reverse-transform (with the 'exp' function, here)
## Compare the following:

# Forecasts and 95% prediction intervals for next 6 months in logged units:

forecast.logged<- sarima.for(log(oil.price),6,0,1,1)
print(forecast.logged$pred)

# 95% prediction intervals (using 1.96 to get 95% intervals) for next 6 months in logged units:
rbind(forecast.logged$pred-1.96*forecast.logged$se,
forecast.logged$pred+1.96*forecast.logged$se)

# Forecasts and 95% prediction intervals for next 6 months in original units:

print(exp(forecast.logged$pred))

# 95% prediction intervals for next 6 months in original units:
exp(  rbind(forecast.logged$pred-1.96*forecast.logged$se,
forecast.logged$pred+1.96*forecast.logged$se)  )


######################################################################

## A DETAILED EXAMPLE OF FORECASTING WITH A NONSTATIONARY TIME SERIES:

library (TSA)
library (astsa)

# Two approaches to forecasting the chicken price series, a nonstationary process:

data(chicken)
plot(chicken,type='o') # clearly nonstationary

## APPROACH 1: Fitting a linear trend model and detrending:

lin.mod.chick <- lm(chicken~time(chicken))
summary(lin.mod.chick)
detrend.chick <- resid(lin.mod.chick)  # getting the detrended series

# Does the detrended series look like white noise?
plot(detrend.chick,type='o') # Plot of detrended series
acf2(detrend.chick)

# Detrended data don't look like white noise; maybe AR(2) if it's stationary at all.

# Recall the times when the chicken price series was measured:

time(chicken)

# Let's predict the value of the residual series for October 2016 (3 time units into the future):

# Fit an AR(2) model to the detrended series:

# Does the detrended series have mean zero?  Let's try it both ways
arima(detrend.chick, order=c(2,0,0))  #includes the process mean mu to be estimated
arima(detrend.chick, order=c(2,0,0), include.mean=F)  #assumes mu=0

# Looks like we can assume the mean of the detrended series is zero.
detrend.chick.ar2<- arima(detrend.chick, order=c(2,0,0), include.mean=F)

detrend.chick.pred <- predict(detrend.chick.ar2, n.ahead=12) # Forecasting the detrended series for the next 12 months


# Adding the predicted trend model value in October 2016 to get a forecast for the chicken price in October 2016:
future.time=2016.75
y.hat.pred = as.numeric(coef(lin.mod.chick) %*% c(1,future.time) )

# Point prediction of the chicken series for October 2016:
y.hat.pred + detrend.chick.pred$'pred'[3]
# 95% prediction interval for the forecast of the chicken series for October 2016:
y.hat.pred + c(detrend.chick.pred$'pred'[3]-1.96*detrend.chick.pred$se[3], detrend.chick.pred$'pred'[3]+1.96*detrend.chick.pred$se[3])


## APPROACH 2 (Simpler approach):  Work with an ARIMA model for the differenced data.

plot(diff(chicken), type ='o') # The series of first differences looks stationary
acf2(diff(chicken))

# Some evidence that the differenced series follows an AR(2) model, but there may be some subtle seasonality.

# Let's fit an ARIMA(2,1,0) model to the original chicken series and forecast the chicken price for October 2016:

sarima(chicken, 2,1,0) 
# Using the 'sarima' function since d>0 and I may want to include a constant term in the model.

sarima.for(chicken, 12, 2,1,0)

diff.chick.pred <- sarima.for(chicken, 12, 2,1,0)

diff.chick.pred$pred[3]
# 95% prediction interval for the forecast of the chicken series for October 2016:
c(diff.chick.pred$pred[3]-1.96*diff.chick.pred$se[3], diff.chick.pred$pred[3]+1.96*diff.chick.pred$se[3])


## DETAILED EXAMPLES OF SIMULATING FUTURE VALUES OF A TIME SERIES:


# Simulating future realizations of the color time series:

library(forecast)

data(color)
m1.color.a=auto.arima(color) # using the auto.arima function to pick the best model
m1.color.a # Making sure the selected model is our AR(1) choice.  It is.

# how many future values do we want to simulate?
n.future.vals <- 3
future.vals.sim <- simulate(m1.color.a,future=T,nsim=n.future.vals) 
# could include the argument bootstrap=T if the normal-errors assumption is violated
print(future.vals.sim)

# Plotting the simulated series (in gray) after the observed series:

observedtimes<- 1:(length(color) + n.future.vals) # In other examples this could be a set of years
plot(observedtimes[length(color):(length(color)+n.future.vals)],c(color[length(color)],future.vals.sim), xlab='Time', ylab='Color Property', type='o', col='gray', xlim=c(min(observedtimes),max(observedtimes)), ylim=c(60,90))
## Change the ylim values above as needed!  
lines(observedtimes[1:length(color)],color[1:length(color)],type="o",lty=1)

## Rerun the above two blocks of code to see another simulated set of three future values...

# Some code to simulate many future trajectories of the series and plot all the trajectories at once:

number.paths <- 20
future.path.mat <- matrix(0,nrow=number.paths, ncol=n.future.vals)
for (i in 1:number.paths){
future.path <- simulate(m1.color.a,future=T,nsim=n.future.vals)
future.path.mat[i,] <- as.vector(future.path)
}
plot(observedtimes[length(color):(length(color)+n.future.vals)],c(color[length(color)],future.path.mat[1,]), xlab='Time', ylab='Color Property', type='l', col='gray', xlim=c(min(observedtimes),max(observedtimes)), ylim=c(60,90))
## Change the ylim values above as needed! 
for (i in 2:number.paths){
lines(observedtimes[length(color):(length(color)+n.future.vals)],c(color[length(color)],future.path.mat[i,]),type='l',col='gray')
}
lines(observedtimes[1:length(color)],color[1:length(color)],type="o",lty=1)

# And now adding the forecast to the plot...
library(astsa)
color.fore <- sarima.for(color, n.future.vals, 1,0,0,plot=F)$pred
lines(observedtimes[length(color):(length(color)+n.future.vals)],c(color[length(color)],color.fore),type="o",lty=5, lwd=2) 


## Simulating future realizations of the chicken time series (it's nonstationary):


data(chicken)
m1.chicken.a=auto.arima(chicken) # using the auto.arima function to pick the best model
m1.chicken.a # Making sure the selected model is our ARIMA(2,1,0) choice.  It is NOT.
# Let's point auto.arima in the right direction...
m1.chicken.a=auto.arima(chicken,d=1,seasonal=F,max.p=2,max.q=0)
m1.chicken.a # There we go ...

# how many future values do we want to simulate?
n.future.vals <- 6
future.vals.sim <- simulate(m1.chicken.a,future=T,nsim=n.future.vals) 
# could include the argument bootstrap=T if the normal-errors assumption is violated
print(future.vals.sim)

# Plotting the simulated series (in gray) after the observed series:

future.times <- as.vector(time(chicken))[length(chicken)] + (1:n.future.vals)/12
# type print(future.times) to see the result
observedtimes<- c(as.vector(time(chicken)), future.times)

plot(observedtimes[length(chicken):(length(chicken)+n.future.vals)],c(chicken[length(chicken)],future.vals.sim), xlab='Time', ylab='Chicken Price', type='o', col='gray', xlim=c(min(observedtimes),max(observedtimes)), ylim=c(60,120))
## Change the ylim values above as needed!  
lines(observedtimes[1:length(chicken)],chicken[1:length(chicken)],type="o",lty=1)

# Some code to simulate many future trajectories of the series and plot all the trajectories at once:

number.paths <- 20
future.path.mat <- matrix(0,nrow=number.paths, ncol=n.future.vals)
for (i in 1:number.paths){
future.path <- simulate(m1.chicken.a,future=T,nsim=n.future.vals)
future.path.mat[i,] <- as.vector(future.path)
}
plot(observedtimes[length(chicken):(length(chicken)+n.future.vals)],c(chicken[length(chicken)],future.path.mat[1,]), xlab='Time', ylab='Chicken Price', type='l', col='gray', xlim=c(min(observedtimes),max(observedtimes)), ylim=c(60,120))
## Change the ylim values above as needed! 
for (i in 2:number.paths){
lines(observedtimes[length(chicken):(length(chicken)+n.future.vals)],c(chicken[length(chicken)],future.path.mat[i,]),type='l',col='gray')
}
lines(observedtimes[1:length(chicken)],chicken[1:length(chicken)],type="o",lty=1)

# And now adding the forecast to the plot...
library(astsa)
chicken.fore <- sarima.for(chicken, n.future.vals, 2,1,0,plot=F)$pred
lines(observedtimes[length(chicken):(length(chicken)+n.future.vals)],
c(chicken[length(chicken)],chicken.fore),type="o",lty=5, lwd=2)