# Scatter plot smoothing # The response is a measure of the amount of ammonia lost # in a chemical process data(stackloss) attach(stackloss) # Plot a simple regression line plot(Water.Temp,stack.loss,xlab="Water Temperature",ylab="Stack Loss") regress.f=lm(stack.loss~Water.Temp) lines(Water.Temp,regress.f$fitted.values,col="red") # Order the data by Water Temperature so we have # well-defined neighborhoods for our x variable ox=Water.Temp[order(Water.Temp)] ox oy=stack.loss[order(Water.Temp)] oy length(ox) # Regress for subsets of size 11 centered at data points 6,11,16 # Draw lines for these mini-regressions regress.f1=lm(oy[1:11]~ox[1:11]) lines(ox[1:11],regress.f1$fitted.values,col="green") regress.f2=lm(oy[6:16]~ox[6:16]) lines(ox[6:16],regress.f2$fitted.values,col="blue") regress.f3=lm(oy[11:21]~ox[11:21]) lines(ox[11:21],regress.f3$fitted.values,col="orange") # Add symbols to see the predicted values for data points 6,11,16 points(ox[6],regress.f1$fitted.values[6],col="green",pty=19,cex=2) points(ox[11],regress.f2$fitted.values[6],col="blue",pty=19,cex=2) points(ox[16],regress.f3$fitted.values[6],col="orange",pty=19,cex=2) # In a new window, get rid of the regression lines, plot # the predicted values for points 6, 11, and 16 and then # connect them windows() plot(Water.Temp,stack.loss,xlab="Water Temperature",ylab="Stack Loss") points(ox[6],regress.f1$fitted.values[6],col="green",pty=19,cex=2) points(ox[11],regress.f2$fitted.values[6],col="blue",pty=19,cex=2) points(ox[16],regress.f3$fitted.values[6],col="orange",pty=19,cex=2) lines(c(ox[6],ox[11],ox[16]),c(regress.f1$fitted.values[6], regress.f2$fitted.values[6],regress.f3$fitted.values[6])) # In a new window, let's repeat the regression for all data points plot.reg=function(x,y){ n=length(x) out=NULL for(i in 1:n){ il=max(c(1,i-5)) iu=min(c(n,i+5)) regress.f1=lm(oy[il:iu]~ox[il:iu]) lines(ox[il:iu],regress.f1$fitted.values) pty=ifelse(i<6,i,6) points(ox[i],regress.f1$fitted.values[pty],pty=19,cex=2) out=rbind(out,c(ox[i],regress.f1$fitted.values[pty])) out } } plot(Water.Temp,stack.loss,xlab="Water Temperature",ylab="Stack Loss",pch=3) fit.curve=plot.reg(ox,oy) uniquex=!duplicated(fit.curve[,1]) plot(Water.Temp,stack.loss,xlab="Water Temperature",ylab="Stack Loss",pch=3) lines(fit.curve[uniquex,]) # Let's do this using loess commands plot(Water.Temp,stack.loss,xlab="Water Temperature",ylab="Stack Loss") loess.stack=loess(stack.loss~Water.Temp,degree=1) summary(loess.stack) attributes(loess.stack) # The default span is just about right lines(ox,predict(loess.stack,ox)) # A span of .3 is too bumpy loess.stack.3=loess(stack.loss~Water.Temp,span=.3,degree=1) lines(ox,predict(loess.stack.3,ox),col="red",lty=2)