# kewl ~galaxy~ data with bivariate KDE n <- 500 a <- 1 b <- 1/2 theta <- runif(n,0,2*pi) s <- sample(c(-1,1),n,replace = TRUE) r <- s * a * exp( b * theta) X <- matrix(0,n,2) X[,1] <- r * cos( theta ) + rnorm(n,0,1) X[,2] <- r * sin( theta ) + rnorm(n,0,1) plot(X[,2] ~ X[,1], xlab = "X1", ylab = "X2") abline( v = 0, lty = 3) abline( h = 0, lty = 3) # define bivariate KDE biv_kde <- function(x,data,h){ val <- mean(dnorm(data[,1] - x[1],0,h) * dnorm(data[,2]- x[2],0,h)) return(val) } # plot bivariate KDE at multiple bandwidths par( mfrow = c(2,3), mar= c(2.1,2.1,1.1,1.1)) hh <- c(1/2,1,2,4,8) # make grid over which to evaluate the estimator so that # one can plot its contours gridsize <- 120 x1.seq <- seq(min(X[,1]),max(X[,1]),length = gridsize) x2.seq <- seq(min(X[,2]),max(X[,2]),length = gridsize) zz <- matrix(0,gridsize,gridsize) plot(X[,2] ~ X[,1], xlab = "", ylab = "") abline( v = 0, lty = 3) abline( h = 0, lty = 3) for( k in 1:length(hh)){ plot(X[,2] ~ X[,1], xlab = "", ylab = "", xaxt = "n", yaxt = "n", col = "gray") abline( v = 0, lty = 3) abline( h = 0, lty = 3) for( i in 1:gridsize) for( j in 1:gridsize){ zz[i,j] <- biv_kde(x = c(x1.seq[i],x2.seq[j]),data = X, h = hh[k]) } contour(x1.seq, x2.seq, zz, add = TRUE, nlevels = 20) }