##########
##
# Reading the data into R:

my.datafile <- tempfile()
cat(file=my.datafile, "
 20.96 127.3
 21.40 130.0
 21.96 132.7
 21.52 129.4
 22.39 135.0
 22.76 137.1
 23.48 141.2
 23.66 142.8
 24.10 145.5
 24.01 145.3
 24.54 148.3
 24.30 146.4
 25.00 150.2
 25.64 153.1
 26.36 157.3
 26.98 160.7
 27.52 164.2
 27.78 165.6
 28.24 168.7
 28.78 171.7
", sep=" ")

blais <- read.table(my.datafile, header=FALSE, col.names=c("company", "industry")) 

attach(blais)

# Company Sales=Y, Industry Sales=X 

plot(industry, company)

# may need to install the 'lmtest' package first
# in order to use the dwtest function:

library(lmtest) 

blais.ols <- lm(company ~ industry)
summary(blais.ols)

# Durbin-Watson test for positive autocorrelation of errors:

dwtest(blais.ols, alternative='greater')

# Could also do:
#dwtest(blais.ols, alternative='less')
#dwtest(blais.ols, alternative='two.sided')

# time series plot of residuals (note data are already ordered by time):
plot(resid(blais.ols), type='b');abline(h=0)

# Cochrane-Orcutt procedure with simple linear regression:
e.vec <- resid(blais.ols)
e.t1 <- c(0,e.vec[-(length(e.vec))])

r.best <- (sum(e.vec*e.t1))/(sum(e.t1^2))

Y<-company
X<-industry
Y.t1 <- c(0,Y[-(length(Y))])
X.t1 <- c(0,X[-(length(X))])

Y.prime <- Y - r.best*Y.t1
X.prime <- X - r.best*X.t1

blais.CO <- lm(Y.prime[-1] ~ X.prime[-1])
summary(blais.CO)

# Has the autocorrelation been fixed?

dwtest(blais.CO, alternative='greater')

#Back-transforming to get b0, s{b0}:

b0 <- summary(blais.CO)[[4]][1,1]/(1-r.best); print(b0)
s.b0 <- summary(blais.CO)[[4]][1,2]/(1-r.best)
b1 <- summary(blais.CO)[[4]][2,1]; print(b1)
s.b1 <- summary(blais.CO)[[4]][2,2]
correct.y.hats <- b0 + b1*X

correct.MSE <- summary(blais.CO)$sigma^2; print(correct.MSE)

# Plotting the estimated regression line:

plot(X,Y)
abline(b0,b1)

# Forecasting and Prediction Interval for Y_n+1 :

X.n.plus.1 <- 175.3
X.n <- rev(X)[1]
X.n.plus.1.prime <- X.n.plus.1 - r.best*X.n

X.bar.prime <- mean(X.prime[-1])

s.pred <- sqrt(correct.MSE*(1 + (1/length(Y)) + (X.n.plus.1.prime - X.bar.prime)^2/(sum((X.prime[-1]-X.bar.prime)^2)) )  )

# Point forecast:

Y.hat.n.plus.1 <- b0 + b1*X.n.plus.1
Y.n <- rev(Y)[1]
e.n <- Y.n - (b0 + b1*X.n)
Y.hat.FORECAST.n.plus.1 <- Y.hat.n.plus.1 + r.best*e.n

print(paste("forecasted response at time n+1 is:", round(Y.hat.FORECAST.n.plus.1,4) ))

# Prediction interval:

alpha <- 0.05
pred.L <- Y.hat.FORECAST.n.plus.1 - qt(1-alpha/2,df=length(Y)-3)*s.pred
pred.U <- Y.hat.FORECAST.n.plus.1 + qt(1-alpha/2,df=length(Y)-3)*s.pred


print(paste(100*(1-alpha) ,"percent PI for response at time n+1 is:", round(pred.L,4), ",", round(pred.U,4) ))



# Hildreth-Lu procedure with simple linear regression:
# This doesn't work too well in this example.

Y<-company
X<-industry
Y.t1 <- c(0,Y[-(length(Y))])
X.t1 <- c(0,X[-(length(X))])
r.values <- seq(from=-1,to=1,by=0.01)
SSE.r <- rep(0,times=length(r.values))
for (i in 1:length(r.values)){
Y.prime <- Y - r.values[i]*Y.t1
X.prime <- X - r.values[i]*X.t1
fit.r <- lm(Y.prime[-1] ~ X.prime[-1])
SSE.r[i] <- sum((Y.prime[-1] - fitted(fit.r))^2)
}
# cbind(r.values,SSE.r)  # if you want to see all the r values and their SSE's

print(paste("H-L estimate of rho is:", r.values[which(SSE.r==min(SSE.r))] ))

# Use r = 0.96 based on this search.

r.best <- 0.96
Y.prime <- Y - r.best*Y.t1
X.prime <- X - r.best*X.t1

blais.HL <- lm(Y.prime[-1] ~ X.prime[-1])
summary(blais.HL)

# Has the autocorrelation been fixed?

dwtest(blais.HL)

#Back-transforming to get b0, s{b0}:

b0 <- summary(blais.HL)[[4]][1,1]/(1-r.best); print(b0)
s.b0 <- summary(blais.HL)[[4]][1,2]/(1-r.best)
b1 <- summary(blais.HL)[[4]][2,1]; print(b1)
s.b1 <- summary(blais.HL)[[4]][2,2]
correct.y.hats <- b0 + b1*X

correct.MSE <- summary(blais.HL)$sigma^2; print(correct.MSE)