# STAT 824 sp 2023 Lec 06 in class # do a penalized spline regression on generated data rm(list=ls()) library(splines) n <- 100 X <- sort(runif(n)) m <- function(x){ 5*x*sin((1+x)^2*2*pi) } Y <- m(X) + rnorm(n) K <- 20 # number of intervals r <- 3 # people love cubic splines :) d <- K + r # number of basis functions u <- seq(0,1,length=K+1) # equally spaced knots u.all <- c(rep(u[1],r),u,rep(u[K+1],r)) # include boundary knots r + 1 times B <- splineDesign(knots = u.all, x = X, ord = r + 1) # obtain evaluations of the 2nd derivative matrix at interior knots ddB.u <- splineDesign(knots = u.all,x = u,derivs = 2,ord = r+1) # compute the Omega matrix Omega <- matrix(NA,K+3,K+3) for(l in 1:(K+3)) for(j in 1:(K+3)){ tk <- ddB.u[-(K+1),l] gk <- ddB.u[-(K+1),j] tkp1 <- ddB.u[-1,l] gkp1 <- ddB.u[-1,j] Omega[l,j] <- sum( (u[-1] - u[-(K+1)]) * ((1/2) * (gk*tkp1 + gkp1*tk) + (1/3)* (tkp1 - tk)*( gkp1 - gk) )) } lambda <- 0.0000 alpha.hat <- solve(t(B)%*% B + lambda * Omega) %*% t(B) %*% Y S <- B %*% solve(t(B)%*% B + lambda * Omega) %*% t(B) m.hat.X <- S %*% Y # the estimated function evaluated at the design points x <- seq(0,1,length=1000) B.x <- splineDesign(knots = u.all, x = x, ord = r + 1) m.hat.x <- B.x %*% alpha.hat par(mfrow=c(1,1)) plot(Y~X, col = "gray") lines(m.hat.x~x) lines(m(x)~x,lty = 2) points(m.hat.X ~ X) # Look at the eigenstructure of the smoothing matrix for penalized / unpenalized splines eigen.S <- eigen(S) U <- eigen.S$vectors lambda <- eigen.S$values plot(lambda) par(mfrow = c(2,3)) for(j in 1:6){ plot(U[,j]~X,type="l") } # crossvalidation for the choice of lambda in penalized splines lambdas <- seq(.0000001,.00005,by = .000001) CV <- numeric(length(lambdas)) for(j in 1:length(lambdas)){ lambda <- lambdas[j] S <- B %*% solve(t(B)%*% B + lambda * Omega) %*% t(B) m.hat.X <- S %*% Y # the estimated function evaluated at the design points CV[j] <- mean( (( Y - m.hat.X)/(1 - diag(S)))^2) } par(mfrow=c(1,1)) plot(CV) lambda_cv <- lambdas[which.min(CV)] alpha.hat <- solve(t(B)%*% B + lambda_cv * Omega) %*% t(B) %*% Y x <- seq(0,1,length=1000) B.x <- splineDesign(knots = u.all, x = x, ord = r + 1) m.hat.x <- B.x %*% alpha.hat par(mfrow=c(1,1)) plot(Y~X, col = "gray") lines(m.hat.x~x) lines(m(x)~x,lty = 2) # trend filtering rm(list=ls()) library(genlasso) n <- 100 X <- c(1:n)/n m <- function(x){ 5*x*sin((1+x)^2*2*pi) } Y <- m(X) + rnorm(n) D1 <- cbind(diag(n-1),rep(0,n-1)) + cbind(rep(0,n-1),-diag(n-1)) D2 <- (cbind(diag(n-2),rep(0,n-2)) + cbind(rep(0,n-2),-diag(n-2))) %*% D1 D3 <- (cbind(diag(n-3),rep(0,n-3)) + cbind(rep(0,n-3),-diag(n-3))) %*% D2 D4 <- (cbind(diag(n-4),rep(0,n-4)) + cbind(rep(0,n-4),-diag(n-4))) %*% D3 plot(Y~X) genlasso.out <- genlasso(y = Y, X = diag(n) , D = D4) m.hat.X <- genlasso.out$beta[,50] # each column of the beta matrix is for a different lambda value plot(Y~X, col="gray") lines(m.hat.X~X)