# Chap7.R # R code for Chapter 7 examples: library(TSA) # Below is a function that computes the method of moments estimator of # the MA(1) coefficient of an MA(1) model. estimate.ma1.mom=function(x){r=acf(x,plot=F)$acf[1]; if (abs(r)<0.5) return((-1+sqrt(1-4*r^2))/(2*r)) else return(NA)} # Exhibit 7.1 code # Method of Moments Estimation: data(ma1.2.s) # The true theta is -0.9 for this simulated data set estimate.ma1.mom(ma1.2.s) data(ma1.1.s) # The true theta is 0.9 for this simulated data set estimate.ma1.mom(ma1.1.s) set.seed(1234) ma1.3.s=arima.sim(list(ma=c(.9)),n=60) # The true theta is -0.9 for this simulated data set estimate.ma1.mom(ma1.3.s) # Since r1 = 0.544 for these simulated data, no MOM estimate of theta exists here. ma1.4.s=arima.sim(list(ma=c(-0.5)),n=60) # The true theta is 0.5 for this simulated data set estimate.ma1.mom(ma1.4.s) data(ar1.s) ar(ar1.s,order.max=1,aic=F,method='yw') # the true phi is 0.9 for this simulated data set data(ar1.2.s) ar(ar1.2.s,order.max=1,aic=F,method='yw') # the true phi is 0.4 for this simulated data set data(ar2.s) ar(ar2.s,order.max=2,aic=F,method='yw') # the true phi_1 is 1.5 and the true phi_2 is -0.75 for this simulated data set # MOM estimation of the square-root transformed hare data, based on an AR(2) model: data(hare) mean(sqrt(hare)) # Estimate of mu is 5.82. ar(sqrt(hare),order.max=2,aic=F,method='yw') # estimate of phi_1 is 1.1178, estimate of phi_2 is -0.519. acf(sqrt(hare),plot=F) # We see r1=0.736, r2=0.304. var(sqrt(hare)) # Sample variance is 5.88. # So the estimate of the noise variance is (recall formula for AR(2) model): (1 - 1.1178 * 0.736 - (-0.519) * 0.304) * (5.88) # MOM estimation of the differences of the log-transformed oil price data, based on an MA(1) model: data(oil.price) estimate.ma1.mom(diff(log(oil.price))) # estimate of theta is -0.222. # sample mean of differences of the logged oil prices is 0.004: mean(diff(log(oil.price))) # sample variance of differences of the logged oil prices is 0.0072: var(diff(log(oil.price))) # So the estimate of the noise variance is (recall formula for MA(1) model): 0.0072/(1+(-0.222)^2) # Note: Using MOM to estimate models containing MA terms is NOT recommended, # so the example just above is for illustration purposes only! ######################################################################### ### Examples of LS and ML estimation of some simulated time series: # Least-squares estimation of the simulated MA(1) series from earlier: arima(ma1.4.s,order=c(0,0,1),method='CSS',include.mean=F) # Exhibit 7.4 # A simulated AR(1) series (The true parameter value here is phi = 0.9): data(ar1.s) ar(ar1.s,order.max=1,aic=F,method='yw') # method of moments ar(ar1.s,order.max=1,aic=F,method='ols') # conditional sum of squares ar(ar1.s,order.max=1,aic=F,method='mle') # maximum likelihood # The AIC option is set to be False otherwise the function will choose # the AR order by minimizing AIC, so that zero order might be chosen. # Another simulated AR(1) series (The true parameter value here is phi = 0.4): data(ar1.2.s) ar(ar1.2.s,order.max=1,aic=F,method='yw') # method of moments ar(ar1.2.s,order.max=1,aic=F,method='ols') # conditional sum of squares ar(ar1.2.s,order.max=1,aic=F,method='mle') # maximum likelihood # Exhibit 7.5 # A simulated AR(2) series (the true parameter values here are phi_1 = 1.5 and phi_2 = -0.75): data(ar2.s) ar(ar2.s,order.max=2,aic=F,method='yw') # method of moments ar(ar2.s,order.max=2,aic=F,method='ols') # conditional sum of squares ar(ar2.s,order.max=2,aic=F,method='mle') # maximum likelihood # How do these methods compare with a smaller sample size? ar(ar2.s[1:20],order.max=2,aic=F,method='yw') # method of moments ar(ar2.s[1:20],order.max=2,aic=F,method='ols') # conditional sum of squares ar(ar2.s[1:20],order.max=2,aic=F,method='mle') # maximum likelihood # For this example, MOM appears less accurate with the smaller sample size... # Exhibit 7.6 # A simulated ARMA(1,1) series (the true parameter values here are phi=0.6, theta = -0.3): data(arma11.s) arima(arma11.s, order=c(1,0,1),method='CSS') # conditional sum of squares # order=c(1,0,1) specifies p=1, d=0, q=1, so an ARMA(1,1) model. arima(arma11.s, order=c(1,0,1),method='ML') # maximum likelihood # # Recall that R uses the plus convention whereas our book uses the minus # convention in the specification of the MA part, i.e. R specifies an # ARMA(1,1) model as z_t=theta_0+phi*z_{t-1}+e_t+theta_1*e_{t-1} # versus our convention # z_t=theta_0+phi*z_{t-1}+e_t-theta_1*e_{t-1} # # To match the result on page 165 of the book, reverse the sign of the estimates of theta. # Parameter Estimation with some real time series: # Exhibit 7.7 # Estimation of Color property data, based on an AR(1) model. data(color) ar(color,order.max=1,aic=F,method='yw') # method of moments ar(color,order.max=1,aic=F,method='ols') # conditional sum of squares ar(color,order.max=1,aic=F,method='mle') # maximum likelihood # The ML estimate of phi is 0.57. # The estimates using the other methods are fairly similar. # An important distinction between the 'ar' function and the more sophisticated 'arima' function: # The 'arima' function displays standard errors for the estimates. # The 'arima' function also shows the estimated mean of the time series process (unless include.mean=F is specified). # In the 'color' time series, the process mean is clearly not zero (see plot): plot(color, type='o') # Using the 'arima' function (below, with the 'ML' method), allows us to see the estimate of mu, the mean of the process: arima(color,order=c(1,0,0),method='ML') # Note the estimate of theta is (virtually) the same as we saw with the 'ar' function, but 'arima' provides more output. # Exhibit 7.8 # estimation of square-root transformed hare data, based on an $AR(3)$ model: data(hare) arima(sqrt(hare),order=c(3,0,0)) # Note that an ARIMA model with p=3, d=0, q=0 is just an AR(3) model... # Exhibit 7.9 data(oil.price) arima(log(oil.price),order=c(0,1,1),method='CSS') # conditional sum of squares arima(log(oil.price),order=c(0,1,1),method='ML') # maximum likelihood # Here, order=c(0,1,1) specifies an ARIMA model with # p=0, d=1, q=1, that is, an MA(1) model on the FIRST DIFFERENCES # of the logged oil prices. # Shumway-Stoffer examples: library(astsa) # Yule-Walker (MOM) estimation of AR(2) model for recruitment data: # Since the 'arima' function does not offer an option for MOM estimation, we have to use the 'ar' function. # You have to do a lot of extra work to get the output that the 'arima' function provides: rec.yw = ar.yw(rec, order=2) rec.yw$x.mean # = 62.26278 (mean estimate) rec.yw$ar # = 1.3315874, -.4445447 (parameter estimates) sqrt(diag(rec.yw$asy.var.coef)) # = .04222637, .04222637 (standard errors) rec.yw$var.pred # = 94.79912 (error variance estimate) # Compare the MOM estimates above with the ML estimates here for the recruitment data: arima(rec,order=c(2,0,0),method='ML') # glacial varve series data(varve) plot(varve,type='o') acf2(varve) # Apparent non-stationarity in the varve series... plot(diff(log(varve)),type='o') acf2(diff(log(varve))) # an MA(1) model seems appropriate # for the differences of the logged thickness values arima(log(varve),order=c(0,1,1),method='CSS') # conditional sum of squares arima(log(varve),order=c(0,1,1),method='ML') # maximum likelihood # The U.S. GNP data: plot(gnp) acf2(gnp,50) gnpgr = diff(log(gnp)) plot(gnpgr) acf2(gnpgr) arima(gnpgr,order=c(1,0,0),method='ML') # fits an AR(1) model using ML arima(gnpgr,order=c(0,0,2),method='ML') # fits an MA(2) model using ML # In the code above, we fit an AR(1) or MA(2) model for the DIFFERENCED logged data. # Could we (should we) fit an ARIMA(1,1,0) or ARIMA(0,1,2) model to the logged data (not taking differences)? # Answer: We can and probably should, but the best approach does not use the 'arima' function. # We will explore the best approach for this in Chapter 9. # Some large sample CIs: ## 95% CI for the phi in the AR(1) model for the differenced logged gnp data: alpha=0.05 # Directly using outputted standard error from R: round(c(0.3467 - qnorm(1-alpha/2)*0.0627, 0.3467 + qnorm(1-alpha/2)*0.0627),4) arima(diff(log(oil.price)),order=c(0,0,1),method='ML',include.mean=F) # fits MA(1) model using ML alpha=0.10 # Directly using outputted standard error from R: round(c(0.2956 - qnorm(1-alpha/2)*0.0693, 0.2956 + qnorm(1-alpha/2)*0.0693),4) # Exhibit 7.10 library(TSA); data(hare); res=arima(sqrt(hare),order=c(3,0,0),include.mean=T) set.seed(12345) coefm.cond.norm=arima.boot(res,cond.boot=T,is.normal=T,B=1000,init=sqrt(hare)) signif(apply(coefm.cond.norm,2,function(x){quantile(x,c(.025,.975),na.rm=T)}),3) # Method I coefm.cond.replace=arima.boot(res,cond.boot=T,is.normal=F,B=1000,init=sqrt(hare)) signif(apply(coefm.cond.replace,2,function(x){quantile(x,c(.025,.975),na.rm=T)}),3) # Method II coefm.norm=arima.boot(res,cond.boot=F,is.normal=T,ntrans=100,B=1000,init=sqrt(hare)) signif(apply(coefm.norm,2,function(x){quantile(x,c(.025,.975),na.rm=T)}),3) # Method III coefm.replace=arima.boot(res,cond.boot=F,is.normal=F,ntrans=100,B=1000,init=sqrt(hare)) signif(apply(coefm.replace,2,function(x){quantile(x,c(.025,.975),na.rm=T)}),3) # Method IV # Some bootstrap series may be explosive which will be discarded. To see # the number of usable bootstrap series, run the command dim(coefm.replace) # the output should be # [1] 952 5 # i.e. we have only 952 usable (i.e. finite) bootstrap time series even though # we simulate 1000 series. # The theoretical confidence intervals were computed by the output in Exhibit 7.8. # Compute the quasi-period of the bootstrap series based on the method of # stationary bootstrap with the errors drawn from the residuals with replacement. period.replace=apply(coefm.replace,1,function(x){ roots=polyroot(c(1,-x[1:3])) # find the complex root with smalles magnitude min1=1.e+9 rootc=NA for (root in roots) { if( abs(Im(root))<1e-10) next if (Mod(root)< min1) {min1=Mod(root); rootc=root} } if(is.na(rootc)) period=NA else period=2*pi/abs(Arg(rootc)) period }) sum(is.na(period.replace)) # number of bootstap series that do not admit a well-defined quasi-period. quantile(period.replace, c(.025,.975),na.rm=T) # Exhibit 7.11 #win.graph(width=3.9,height=3.8,pointsize=8) hist(period.replace,prob=T,main="",xlab="quasi-period",axes=F,xlim=c(5,16)) axis(2) axis(1,c(4,6,8,10,12,14,16),c(4,6,8,10,12,14,NA)) # Exhibit 7.12 #win.graph(width=3,height=3,pointsize=8) qqnorm(period.replace,main="") #Normal Q-Q Plot for the Bootstrap Quasi-period Estimates") qqline(period.replace)