# chap9.R library(TSA) ## Forecasting with simple trend models: # Linear trend for the logged earnings data library(astsa) modeljj=lm(log(jj)~time(jj)) summary(modeljj) # predicting for Qtr 3, 1982: future.time=1982.5 y.hat.pred = as.numeric(coef(modeljj) %*% c(1,future.time) ); print(y.hat.pred) # predicted logged earnings for Qtr 3, 1982 exp(y.hat.pred) # predicted earnings for Qtr 3, 1982 # Harmonic regression for Dubuque temperature data: data(tempdub) har.=harmonic(tempdub,m=1) model4=lm(tempdub~har.) summary(model4) # predicting for June 1976: future.time = 1976.41667 y.hat.pred = as.numeric(coef(model4) %*% c(1,cos(2*pi*future.time),sin(2*pi*future.time)) ); print(y.hat.pred) ### Forecasting with ARIMA-type models # Exhibit 9.1 data(color) m1.color=arima(color,order=c(1,0,0)) m1.color # The MLE's: phi.hat = coef(m1.color)["ar1"] mu.hat = coef(m1.color)["intercept"] # The most recent observed data value: Y.t = color[length(color)] # Forecasting "l" time units ahead: # can change to l=2, l=3, l=20, etc.: l=1; Y.t.l.hat = mu.hat + (phi.hat^l)*(Y.t - mu.hat); print(Y.t.l.hat) # Prediction Intervals in the AR(1) model: # Prediction limits "l" time units ahead: # can change to l=2, l=3, l=20, etc.: l=1; Y.t.l.hat = mu.hat + (phi.hat^l)*(Y.t - mu.hat); sigma.sq.e.hat = m1.color$sigma alpha=0.05 P.L=Y.t.l.hat - qnorm(1-alpha/2)*sqrt( (sigma.sq.e.hat)*( (1-phi.hat^(2*l)) / (1-phi.hat^2) ) ) P.U=Y.t.l.hat + qnorm(1-alpha/2)*sqrt( (sigma.sq.e.hat)*( (1-phi.hat^(2*l)) / (1-phi.hat^2) ) ) # Printing the prediction interval: print(paste(100*(1-alpha), "% PI for observation", l, "time units into future is:", signif(P.L,4), signif(P.U, 4) )) # Exhibit 9.2 data(tempdub) # append 2 years of missing values to the tempdub data as we want to forecast # the temperature for two years. (this step may not be necessary...) tempdub1=ts(c(tempdub,rep(NA,24)),start=start(tempdub),freq=frequency(tempdub)) # creates the first pair of harmonic functions and then fit the model har.=harmonic(tempdub,1) m5.tempdub=arima(tempdub,order=c(0,0,0),xreg=har.) m5.tempdub # The result is same as that from the fit using lm function. har.=harmonic(tempdub,1) model4=lm(tempdub~har.) summary(model4) # create the harmonic functions over the period of forecast. newhar.=harmonic(ts(rep(1,24), start=c(1976,1),freq=12),1) # Compute and plot the forecasts. #win.graph(width=4.875, height=3,pointsize=8) plot(m5.tempdub,n.ahead=24,n1=c(1972,1),newxreg=newhar., type='b',ylab='Temperature',xlab='Year') # Exhibit 9.3 # Predicting future values of the color property data based on an AR(1) model: # This goes 12 time units into the future: data(color) m1.color=arima(color,order=c(1,0,0)) plot(m1.color,n.ahead=12,type='b', xlab='Time', ylab='Color Property') # add the horizontal line at the estimated mean ("intercept") abline(h=coef(m1.color)[names(coef(m1.color))=='intercept']) # Showing the forecasted values and the standard errors of the forecasts: predict(m1.color, n.ahead=12) ## Can see 95% prediction intervals for the forecast at any particular lead time: # For one time unit ahead: color.pred <- predict(m1.color, n.ahead=12) c(color.pred$'pred'[1]-1.96*color.pred$se[1], color.pred$'pred'[1]+1.96*color.pred$se[1] ) # For five time units ahead: c(color.pred$'pred'[5]-1.96*color.pred$se[5], color.pred$'pred'[5]+1.96*color.pred$se[5] ) # Exhibit 9.4 # Predicting future values of the (square-root transformed) hare data based on an AR(3) model: # This goes 25 time units (years, here) into the future: data(hare) m1.hare=arima(sqrt(hare),order=c(3,0,0)) plot(m1.hare, n.ahead=25,type='b',xlab='Year',ylab='Sqrt(hare)') abline(h=coef(m1.hare)[names(coef(m1.hare))=='intercept']) # Showing the forecasted values and the standard errors of the forecasts: predict(m1.hare, n.ahead=25) # This goes 100 time units (years, here) into the future: data(hare) m1.hare=arima(sqrt(hare),order=c(3,0,0)) plot(m1.hare, n.ahead=100,type='b',xlab='Year',ylab='Sqrt(hare)') abline(h=coef(m1.hare)[names(coef(m1.hare))=='intercept']) # A forecasting example from Shumway and Stoffer: library(astsa) # The 'sarima' and 'sarima.for' functions can be used for # estimating ARIMA-type models and forecasting with them: # An AR(2) model for the recruitment data: acf2(rec) # Clearly an AR(2) model is reasonable. sarima(rec,2,0,0) # This goes 24 time units (months, here) into the future: sarima.for(rec,24,2,0,0) # Optionally add a horizonal line at the estimated mean: # abline(h=61.8585, lty=3, col='green') # Note that 'sarima' prints the forecasts to the console, as well as providing a plot # of the forecasts, plus or minus 2 standard errors (pointwise 95% prediction limits) ############################# ## ## Forecasting using a model for DIFFERENCED data: Check whether the constant term is needed! library(TSA) library(astsa) # We have seen in chapter 7 that the differenced logged GNP series follows either an MA(2) or an AR(1) model. # Fitting an ARIMA(0,1,2) model (i.e., an IMA(1,2) model) to the logged GNP series with the 'arima' function, # it assumes the differenced series has mean zero and cannot include a mean or constant term in the model: arima(log(gnp), order=c(0,1,2)) # The 'sarima' function in the 'astsa' package CAN assume a nonzero constant term for the differenced series: sarima(log(gnp), 0,1,2) #...or you can tell 'sarima' not to include the constant: sarima(log(gnp), 0,1,2, no.constant=T) # In this case the constant term appears significantly different from zero. # Note that including the constant term changes the forecasts substantially! sarima.for(log(gnp), 40, 0,1,2) dev.new() sarima.for(log(gnp), 40, 0,1,2, no.constant=T) ## The above forecasts are in terms of the logged gnp units, not the original GNP units. ## See below for an example of back-transforming to the original units. ## LESSON: When forecasting with a NONSTATIONARY series, use 'sarima' rather than 'arima' to fit the model, ## and check whether the constant term is needed. ## ############################# # Another real data example: # An IMA(1,1) model for the logged oil price time series: data(oil.price) sarima(log(oil.price),0,1,1) # In this logged oil price example, we could optionally remove the constant term by using no.constant=T # since the constant is not significantly different from zero. # Removing the constant would change the forecasts SLIGHTLY here. # I'll go ahead and show the forecasts based on the model WITH the constant term: sarima.for(log(oil.price),35,0,1,1) ## The original oil price data were measured in dollars per barrel. ## We did the analysis on the log-transformed data, so the forecasts ## are in terms of logged units (not as interpretable?) ## To get forecasts (or prediction intervals) in terms of dollars, just reverse-transform (with the 'exp' function, here) ## Compare the following: # Forecasts and 95% prediction intervals for next 6 months in logged units: forecast.logged<- sarima.for(log(oil.price),6,0,1,1) print(forecast.logged$pred) # 95% prediction intervals (using 1.96 to get 95% intervals) for next 6 months in logged units: rbind(forecast.logged$pred-1.96*forecast.logged$se, forecast.logged$pred+1.96*forecast.logged$se) # Forecasts and 95% prediction intervals for next 6 months in original units: print(exp(forecast.logged$pred)) # 95% prediction intervals for next 6 months in original units: exp( rbind(forecast.logged$pred-1.96*forecast.logged$se, forecast.logged$pred+1.96*forecast.logged$se) ) ###################################################################### ## A DETAILED EXAMPLE OF FORECASTING WITH A NONSTATIONARY TIME SERIES: library (TSA) library (astsa) # Two approaches to forecasting the chicken price series, a nonstationary process: data(chicken) plot(chicken,type='o') # clearly nonstationary ## APPROACH 1: Fitting a linear trend model and detrending: lin.mod.chick <- lm(chicken~time(chicken)) summary(lin.mod.chick) detrend.chick <- resid(lin.mod.chick) # getting the detrended series # Does the detrended series look like white noise? plot(detrend.chick,type='o') # Plot of detrended series acf2(detrend.chick) # Detrended data don't look like white noise; maybe AR(2) if it's stationary at all. # Recall the times when the chicken price series was measured: time(chicken) # Let's predict the value of the residual series for October 2016 (3 time units into the future): # Fit an AR(2) model to the detrended series: # Does the detrended series have mean zero? Let's try it both ways arima(detrend.chick, order=c(2,0,0)) #includes the process mean mu to be estimated arima(detrend.chick, order=c(2,0,0), include.mean=F) #assumes mu=0 # Looks like we can assume the mean of the detrended series is zero. detrend.chick.ar2<- arima(detrend.chick, order=c(2,0,0), include.mean=F) detrend.chick.pred <- predict(detrend.chick.ar2, n.ahead=12) # Forecasting the detrended series for the next 12 months # Adding the predicted trend model value in October 2016 to get a forecast for the chicken price in October 2016: future.time=2016.75 y.hat.pred = as.numeric(coef(lin.mod.chick) %*% c(1,future.time) ) # Point prediction of the chicken series for October 2016: y.hat.pred + detrend.chick.pred$'pred'[3] # 95% prediction interval for the forecast of the chicken series for October 2016: y.hat.pred + c(detrend.chick.pred$'pred'[3]-1.96*detrend.chick.pred$se[3], detrend.chick.pred$'pred'[3]+1.96*detrend.chick.pred$se[3]) ## APPROACH 2 (Simpler approach): Work with an ARIMA model for the differenced data. plot(diff(chicken), type ='o') # The series of first differences looks stationary acf2(diff(chicken)) # Some evidence that the differenced series follows an AR(2) model, but there may be some subtle seasonality. # Let's fit an ARIMA(2,1,0) model to the original chicken series and forecast the chicken price for October 2016: sarima(chicken, 2,1,0) # Using the 'sarima' function since d>0 and I may want to include a constant term in the model. sarima.for(chicken, 12, 2,1,0) diff.chick.pred <- sarima.for(chicken, 12, 2,1,0) diff.chick.pred$pred[3] # 95% prediction interval for the forecast of the chicken series for October 2016: c(diff.chick.pred$pred[3]-1.96*diff.chick.pred$se[3], diff.chick.pred$pred[3]+1.96*diff.chick.pred$se[3]) ## DETAILED EXAMPLES OF SIMULATING FUTURE VALUES OF A TIME SERIES: # Simulating future realizations of the color time series: library(forecast) data(color) m1.color.a=auto.arima(color) # using the auto.arima function to pick the best model m1.color.a # Making sure the selected model is our AR(1) choice. It is. # how many future values do we want to simulate? n.future.vals <- 3 future.vals.sim <- simulate(m1.color.a,future=T,nsim=n.future.vals) # could include the argument bootstrap=T if the normal-errors assumption is violated print(future.vals.sim) # Plotting the simulated series (in gray) after the observed series: observedtimes<- 1:(length(color) + n.future.vals) # In other examples this could be a set of years plot(observedtimes[length(color):(length(color)+n.future.vals)],c(color[length(color)],future.vals.sim), xlab='Time', ylab='Color Property', type='o', col='gray', xlim=c(min(observedtimes),max(observedtimes)), ylim=c(60,90)) ## Change the ylim values above as needed! lines(observedtimes[1:length(color)],color[1:length(color)],type="o",lty=1) ## Rerun the above two blocks of code to see another simulated set of three future values... # Some code to simulate many future trajectories of the series and plot all the trajectories at once: number.paths <- 20 future.path.mat <- matrix(0,nrow=number.paths, ncol=n.future.vals) for (i in 1:number.paths){ future.path <- simulate(m1.color.a,future=T,nsim=n.future.vals) future.path.mat[i,] <- as.vector(future.path) } plot(observedtimes[length(color):(length(color)+n.future.vals)],c(color[length(color)],future.path.mat[1,]), xlab='Time', ylab='Color Property', type='l', col='gray', xlim=c(min(observedtimes),max(observedtimes)), ylim=c(60,90)) ## Change the ylim values above as needed! for (i in 2:number.paths){ lines(observedtimes[length(color):(length(color)+n.future.vals)],c(color[length(color)],future.path.mat[i,]),type='l',col='gray') } lines(observedtimes[1:length(color)],color[1:length(color)],type="o",lty=1) # And now adding the forecast to the plot... library(astsa) color.fore <- sarima.for(color, n.future.vals, 1,0,0,plot=F)$pred lines(observedtimes[length(color):(length(color)+n.future.vals)],c(color[length(color)],color.fore),type="o",lty=5, lwd=2) ## Simulating future realizations of the chicken time series (it's nonstationary): data(chicken) m1.chicken.a=auto.arima(chicken) # using the auto.arima function to pick the best model m1.chicken.a # Making sure the selected model is our ARIMA(2,1,0) choice. It is NOT. # Let's point auto.arima in the right direction... m1.chicken.a=auto.arima(chicken,d=1,seasonal=F,max.p=2,max.q=0) m1.chicken.a # There we go ... # how many future values do we want to simulate? n.future.vals <- 6 future.vals.sim <- simulate(m1.chicken.a,future=T,nsim=n.future.vals) # could include the argument bootstrap=T if the normal-errors assumption is violated print(future.vals.sim) # Plotting the simulated series (in gray) after the observed series: future.times <- as.vector(time(chicken))[length(chicken)] + (1:n.future.vals)/12 # type print(future.times) to see the result observedtimes<- c(as.vector(time(chicken)), future.times) plot(observedtimes[length(chicken):(length(chicken)+n.future.vals)],c(chicken[length(chicken)],future.vals.sim), xlab='Time', ylab='Chicken Price', type='o', col='gray', xlim=c(min(observedtimes),max(observedtimes)), ylim=c(60,120)) ## Change the ylim values above as needed! lines(observedtimes[1:length(chicken)],chicken[1:length(chicken)],type="o",lty=1) # Some code to simulate many future trajectories of the series and plot all the trajectories at once: number.paths <- 20 future.path.mat <- matrix(0,nrow=number.paths, ncol=n.future.vals) for (i in 1:number.paths){ future.path <- simulate(m1.chicken.a,future=T,nsim=n.future.vals) future.path.mat[i,] <- as.vector(future.path) } plot(observedtimes[length(chicken):(length(chicken)+n.future.vals)],c(chicken[length(chicken)],future.path.mat[1,]), xlab='Time', ylab='Chicken Price', type='l', col='gray', xlim=c(min(observedtimes),max(observedtimes)), ylim=c(60,120)) ## Change the ylim values above as needed! for (i in 2:number.paths){ lines(observedtimes[length(chicken):(length(chicken)+n.future.vals)],c(chicken[length(chicken)],future.path.mat[i,]),type='l',col='gray') } lines(observedtimes[1:length(chicken)],chicken[1:length(chicken)],type="o",lty=1) # And now adding the forecast to the plot... library(astsa) chicken.fore <- sarima.for(chicken, n.future.vals, 2,1,0,plot=F)$pred lines(observedtimes[length(chicken):(length(chicken)+n.future.vals)], c(chicken[length(chicken)],chicken.fore),type="o",lty=5, lwd=2)