###################################### ## # Section on Nonlinear Regression ## ###################################### ## Some plots of common nonlinear regression functions" # 3-parameter exponential model: beta0 = 3; beta1 = 4; beta2 = -2; xgrid <- seq(0,10,by=.1) EY <- beta0 + beta1*exp(beta2*xgrid) plot(xgrid, EY, type='l') beta0 = 3; beta1 = 4; beta2 = 2; xgrid <- seq(0,4,by=.1) EY <- beta0 + beta1*exp(beta2*xgrid) plot(xgrid, EY, type='l') # 3-parameter logistic model: beta0 = 3; beta1 = 4; beta2 = -2; xgrid <- seq(0,3,by=.1) EY <- beta0 / (1+beta1*exp(beta2*xgrid)) plot(xgrid, EY, type='l') beta0 = 3; beta1 = 4; beta2 = 2; xgrid <- seq(0,3,by=.1) EY <- beta0 / (1+beta1*exp(beta2*xgrid)) plot(xgrid, EY, type='l') # Michaelis-Menten Model: beta1 = 4; beta2 = 2; xgrid <- seq(0,10,by=.1) EY <- (beta1*xgrid)/(xgrid+beta2) plot(xgrid, EY, type='l') # Example 1: # The response is the prognosis for long-term recovery. # A larger value indicates a better prognosis. prognosis <- c(54,50,45,37,35,25,20,16,18,13,8,11,8,4,6) # the predictor is the number of days of hospitalization. days <- c(2,5,7,10,14,19,26,31,34,38,45,52,53,60,65) plot(days, prognosis) # We will fit the nonlinear model E(Y) = beta1*exp(beta2*X) nlm.1 <- nls(prognosis ~ beta1*exp(beta2*days), start = list(beta1 = 75, beta2 = -10)) summary(nlm.1) # Let's linearize to get a sense of the initial values: coef(lm(log(prognosis) ~ days)) # Try some better initial values: nlm.1 <- nls(prognosis ~ beta1*exp(beta2*days), start = list(beta1 = exp(4.037), beta2 = -0.038)) summary(nlm.1) # That worked... # Plot of fitted model: plot(days, prognosis); xgrid <- seq(min(days),max(days), length=200) lines(xgrid, coef(nlm.1)[1]*exp(coef(nlm.1)[2]*xgrid) ) # Example 2: # Response = proportion of sampled individuals in a city # who would be likely to purchase a computer propbuy <- c(.9,.8,.65,.46,.34,.26,.17,.15,.06,.04,.93,.77,.63,.5,.3,.24,.19,.12,.08,.05) # Predictor 1 = selling price of computer (in 100s of dollars) price <- c(1,2.5,5,10,20,30,40,50,75,100,1,2.5,5,10,20,30,40,50,75,100) # Predictor 2 = City (A or B) city <- c('A','A','A','A','A','A','A','A','A','A','B','B','B','B','B','B','B','B','B','B') # changing 'city' to 0 to city A and 1 for city B: city <- ifelse(city=='A',0,1) # A plot of the data: plot(price, propbuy, pch=city+21); # circle for city=0, square for city=1 # We will fit the nonlinear model E(Y) = beta0 + beta1*exp(beta2*X1) + beta3*X2 # linearizing to get initial values for beta1 and beta2: coef(lm(log(propbuy) ~ price)) nlm.2 <- nls(propbuy ~ beta0 + beta1*exp(beta2*price) + beta3*city, start = list(beta0=0, beta1 = exp(-0.347), beta2 = -0.03, beta3=0)) summary(nlm.2) # Plot of fitted model: plot(price, propbuy, pch=city+21); # circle for city=0, square for city=1 xgrid <- seq(min(price),max(price), length=200) lines(xgrid, coef(nlm.2)[1] + coef(nlm.2)[2]*exp(coef(nlm.2)[3]*xgrid), lty=1 ) # When city=0 lines(xgrid, coef(nlm.2)[1] + coef(nlm.2)[2]*exp(coef(nlm.2)[3]*xgrid) + coef(nlm.2)[4], lty=2 ) # When city=1 # We can see visually that the 'city' predictor has little to no effect on the mean response. ## Residual plot: plot(fitted(nlm.2),residuals(nlm.2)) # Seems to be a bit of a curved pattern -- do we need to consider a different nonlinear mean response function?