########## ## # Reading the data into R: my.datafile <- tempfile() cat(file=my.datafile, " 20.96 127.3 21.40 130.0 21.96 132.7 21.52 129.4 22.39 135.0 22.76 137.1 23.48 141.2 23.66 142.8 24.10 145.5 24.01 145.3 24.54 148.3 24.30 146.4 25.00 150.2 25.64 153.1 26.36 157.3 26.98 160.7 27.52 164.2 27.78 165.6 28.24 168.7 28.78 171.7 ", sep=" ") blais <- read.table(my.datafile, header=FALSE, col.names=c("company", "industry")) attach(blais) # Company Sales=Y, Industry Sales=X plot(industry, company) # may need to install the 'lmtest' package first # in order to use the dwtest function: library(lmtest) blais.ols <- lm(company ~ industry) summary(blais.ols) # Durbin-Watson test for positive autocorrelation of errors: dwtest(blais.ols, alternative='greater') # Could also do: #dwtest(blais.ols, alternative='less') #dwtest(blais.ols, alternative='two.sided') # time series plot of residuals (note data are already ordered by time): plot(resid(blais.ols), type='b');abline(h=0) # Cochrane-Orcutt procedure with simple linear regression: e.vec <- resid(blais.ols) e.t1 <- c(0,e.vec[-(length(e.vec))]) r.best <- (sum(e.vec*e.t1))/(sum(e.t1^2)) Y<-company X<-industry Y.t1 <- c(0,Y[-(length(Y))]) X.t1 <- c(0,X[-(length(X))]) Y.prime <- Y - r.best*Y.t1 X.prime <- X - r.best*X.t1 blais.CO <- lm(Y.prime[-1] ~ X.prime[-1]) summary(blais.CO) # Has the autocorrelation been fixed? dwtest(blais.CO, alternative='greater') #Back-transforming to get b0, s{b0}: b0 <- summary(blais.CO)[[4]][1,1]/(1-r.best); print(b0) s.b0 <- summary(blais.CO)[[4]][1,2]/(1-r.best) b1 <- summary(blais.CO)[[4]][2,1]; print(b1) s.b1 <- summary(blais.CO)[[4]][2,2] correct.y.hats <- b0 + b1*X correct.MSE <- summary(blais.CO)$sigma^2; print(correct.MSE) # Plotting the estimated regression line: plot(X,Y) abline(b0,b1) # Forecasting and Prediction Interval for Y_n+1 : X.n.plus.1 <- 175.3 X.n <- rev(X)[1] X.n.plus.1.prime <- X.n.plus.1 - r.best*X.n X.bar.prime <- mean(X.prime[-1]) s.pred <- sqrt(correct.MSE*(1 + (1/length(Y)) + (X.n.plus.1.prime - X.bar.prime)^2/(sum((X.prime[-1]-X.bar.prime)^2)) ) ) # Point forecast: Y.hat.n.plus.1 <- b0 + b1*X.n.plus.1 Y.n <- rev(Y)[1] e.n <- Y.n - (b0 + b1*X.n) Y.hat.FORECAST.n.plus.1 <- Y.hat.n.plus.1 + r.best*e.n print(paste("forecasted response at time n+1 is:", round(Y.hat.FORECAST.n.plus.1,4) )) # Prediction interval: alpha <- 0.05 pred.L <- Y.hat.FORECAST.n.plus.1 - qt(1-alpha/2,df=length(Y)-3)*s.pred pred.U <- Y.hat.FORECAST.n.plus.1 + qt(1-alpha/2,df=length(Y)-3)*s.pred print(paste(100*(1-alpha) ,"percent PI for response at time n+1 is:", round(pred.L,4), ",", round(pred.U,4) )) # Hildreth-Lu procedure with simple linear regression: # This doesn't work too well in this example. Y<-company X<-industry Y.t1 <- c(0,Y[-(length(Y))]) X.t1 <- c(0,X[-(length(X))]) r.values <- seq(from=-1,to=1,by=0.01) SSE.r <- rep(0,times=length(r.values)) for (i in 1:length(r.values)){ Y.prime <- Y - r.values[i]*Y.t1 X.prime <- X - r.values[i]*X.t1 fit.r <- lm(Y.prime[-1] ~ X.prime[-1]) SSE.r[i] <- sum((Y.prime[-1] - fitted(fit.r))^2) } # cbind(r.values,SSE.r) # if you want to see all the r values and their SSE's print(paste("H-L estimate of rho is:", r.values[which(SSE.r==min(SSE.r))] )) # Use r = 0.96 based on this search. r.best <- 0.96 Y.prime <- Y - r.best*Y.t1 X.prime <- X - r.best*X.t1 blais.HL <- lm(Y.prime[-1] ~ X.prime[-1]) summary(blais.HL) # Has the autocorrelation been fixed? dwtest(blais.HL) #Back-transforming to get b0, s{b0}: b0 <- summary(blais.HL)[[4]][1,1]/(1-r.best); print(b0) s.b0 <- summary(blais.HL)[[4]][1,2]/(1-r.best) b1 <- summary(blais.HL)[[4]][2,1]; print(b1) s.b1 <- summary(blais.HL)[[4]][2,2] correct.y.hats <- b0 + b1*X correct.MSE <- summary(blais.HL)$sigma^2; print(correct.MSE)