# R example for piecewise regression # We use the raw materials data set from class # y = cost here # x1 = shipment size # Entering the data: y <- c(2.57,4.40,4.52,1.39,4.75,3.55,2.49,4.27,4.42,4.10,2.21,2.90) x1 <- c(650,340,400,800,300,570,720,480,425,460,675,600) # Original scatter plot of Y against x1: plot(x1, y) # Defining indicator x2: x2 <- as.numeric(x1 > 500) # Defining x2star: x2star <- (x1 - 500)*x2 # Fitting the piecewise regression function with a # changepoint at X = 500: raw.piece.reg <- lm(y ~ x1 + x2star) # Recall x2star = (x1 - 500)*x2 here! summary(raw.piece.reg) # Plotting the estimated piecewise regression curve on top of the data: myplotgrid <- seq(min(x1),max(x1),length=400) my.predicted.values <- predict(raw.piece.reg, newdata=data.frame(x1 = myplotgrid, x2star = (myplotgrid - 500)*as.numeric(myplotgrid > 500))) plot(x1, y) lines(myplotgrid, my.predicted.values) ############################################### # Allowing for a discontinuity in the regression function: # Defining another indicator x3: x3 <- as.numeric(x1 > 500) # Fitting a discontinuous piecewise regression function with a # changepoint at X = 500: raw.piece.reg.disc <- lm(y ~ x1 + x2star + x3) # Recall x2star = (x1 - 500)*x2 here! summary(raw.piece.reg.disc) # Based on the output, beta_3 may be zero (P-value = 0.3645), # and the continuous piecewise regression model may be fine. # Plotting the discontinuous piecewise regression function: myplotgrid2 <- seq(min(x1),max(x1),length=400) my.predicted.values2 <- predict(raw.piece.reg.disc, newdata=data.frame(x1 = myplotgrid2, x2star = (myplotgrid2 - 500)*as.numeric(myplotgrid2 > 500), x3 = as.numeric(myplotgrid2 > 500))) plot(x1, y) points(myplotgrid2, my.predicted.values2, pch=20, cex=0.8) ############################################### # When the appropriate changepoint is not known: # This is an R function to pick the optimal changepoint for a # continuous piecewise linear regression, based on a SSE criterion: # Copy this function into R: #### # pick.changepoint <- function(response, predictor, min.changept, max.changept, gridlength){ my.changept.choices <- pretty(c(min.changept, max.changept), n=gridlength) results <- matrix(0, nrow=length(my.changept.choices), ncol=2) for (i in 1:length(my.changept.choices)) { x2star <- (predictor - my.changept.choices[i])*as.numeric(predictor > my.changept.choices[i]) fit <- lm(response ~ predictor + x2star) results[i,1] <- my.changept.choices[i] results[i,2] <- anova(fit)["Residuals", "Sum Sq"] } print(paste("Optimal changepoint:", results[results[,2]==min(results[,2]),1] )) return(results) } # #### # Run the function on the raw materials data set: pick.changepoint(response=y, predictor=x1, min.changept=400, max.changept=600, gridlength=50) # For this data set, using a changepoint of X = 480 produces the smallest SSE.