#We will simulate a regression of stopping distance on speed using the cars data set data(cars) plot(cars,xlab="Speed (mph)",ylab="Stopping distance (ft)") cars.lm <- lm(dist~speed,data=cars) summary(cars.lm) lboot=function(x,B=20,fitplot=TRUE){ nl=dim(x)[1] cl=NULL b=NULL plot(x[,3],x[,4],xlab="Speed (mph)",ylab="Stopping distance (ft)") lines(x[,3],x[,1],type="l",col="red") for(i in 1:B){ bsamp=x[,1]+sample(x[,2],nl,replace=TRUE) Blm=lm(bsamp~x[,3]) cfit=Blm$fitted bB=Blm$coefficients[[2]] cl=cbind(cl,cfit) b=c(b,bB) if (fitplot==TRUE) lines(x[,3],cfit) } out=list(cl,b) return(out) } #Quick check to make sure program works attach(cars) py=lboot(cbind(cars.lm$fitted,cars.lm$residuals,speed,dist)) #This section looks at empirical confidence bands for the regression line. It can be skipped plot(cars,xlab="Speed (mph)",ylab="Stopping distance (ft)") cband=apply(py[[1]],1,quantile,probs=c(.05,.95)) lines(speed,cband[1,],col="red") lines(speed,cband[2,],col="blue") title("Car data with 90% empirical confidence bands") #Confidence interval for the slope parameter py=lboot(cbind(cars.lm$fitted,cars.lm$residuals,speed,dist),B=500,fitplot=FALSE) clslope=quantile(py[[2]],probs=c(0.025,.975)) #Regular empirical bootstrap CI (percentile method) clslope confint(cars.lm,'speed',level=0.95) #Inversion method b1=cars.lm$coefficients[2] d1=b1-clslope[1] d2=clslope[2]-b1 paste0("Reflection CI: ",round(b1-d2,4),", ",round(b1+d1,4)) #Some local robust regression commands as well using the water quality data for Cedar Creek C076.df <- read.delim("C076.txt",header=T,row.names=NULL) C076.df$Date <- as.Date(C076.df$Date, format = "%m/%d/%Y") plot(C076.df$Date,C076.df$Fecal_Coliform) plot(C076.df$Date,log(C076.df$Fecal_Coliform)) #Over-smoothed? C076.loess <- loess(log(Fecal_Coliform)~as.numeric(Date),C076.df) lines(C076.loess$x,predict(C076.loess)) #Possibly under-smoothed. Symmetric is better than default C076.loess <- loess(log(Fecal_Coliform)~as.numeric(Date),C076.df,family="symmetric",span=.3) lines(C076.loess$x,predict(C076.loess),col="red") C076.loess <- loess(log(Fecal_Coliform)~as.numeric(Date),C076.df,family="symmetric",span=.4) lines(C076.loess$x,predict(C076.loess),col="blue")