## Nonparametric Density Estimation examples ### Histograms: ## Example 1: wait.times <- faithful$waiting # using the default number of bins: hist(wait.times, freq=F, breaks="Scott") # Scott's method uses the normal rule of thumb with s estimating sigma. # Printing number of bins: my.K <- length(hist(wait.times, freq=F, breaks="Scott")$counts); print(my.K) ## Trying different numbers of bins: hist(wait.times, freq=F, breaks=seq(from=40, to=100, length=8) ) # Smaller K --> larger bin width h hist(wait.times, freq=F, breaks=seq(from=40, to=100, length=16) ) # Larger K --> smaller bin width h hist(wait.times, freq=F, breaks=seq(from=40, to=100, length=40) ) # Huge K --> Tiny bin width h ## Example 2: wind.mph <- airquality$Wind # using the default number of bins: hist(wind.mph, freq=F, breaks="Scott") # Printing number of bins: my.h <- length(hist(wind.mph, freq=F, breaks="Scott")$counts); print(my.h) ## Trying different numbers of bins: hist(wind.mph, freq=F, breaks=seq(from=0, to=22, length=8) ) # Smaller K --> larger bin width h hist(wind.mph, freq=F, breaks=seq(from=0, to=22, length=16) ) # Larger K --> smaller bin width h hist(wind.mph, freq=F, breaks=seq(from=0, to=22, length=40) ) # Huge K --> Tiny bin width h ###################################################### ### Kernel density estimates: ## Example 1: wait.times <- faithful$waiting # using the default bandwidth: plot(density(wait.times, bw="nrd"), main="kde: Waiting Times") # The "nrd" method gives the rule of thumb # The default kernel is normal ("gaussian") # We could specify another type of kernel: plot(density(wait.times, kernel = "epanechnikov", bw="nrd"), main="kde: Waiting Times") # Changing the bandwidth has more effect: # We can multiply the "rule-of-thumb" bw by the value of "adjust"): plot(density(wait.times, bw="nrd", adjust=0.6), main="kde: Waiting Times") # smaller bandwidth plot(density(wait.times, bw="nrd", adjust=1.4), main="kde: Waiting Times") # larger bandwidth plot(density(wait.times, bw="nrd", adjust=0.2), main="kde: Waiting Times") # MUCH smaller bandwidth -> NOT GOOD! plot(density(wait.times, bw="nrd", adjust=5), main="kde: Waiting Times") # MUCH larger bandwidth -> NOT GOOD! ## Example 2: wind.mph <- airquality$Wind # using the default bandwidth: plot(density(wind.mph, bw="nrd"), main="kde: Wind") # The "nrd" method gives the rule of thumb # The default kernel is normal ("gaussian") # We could specify another type of kernel: plot(density(wind.mph, kernel = "epanechnikov", bw="nrd"), main="kde: Wind") # Changing the bandwidth has more effect: # We can multiply the "rule-of-thumb" bw by the value of "adjust"): plot(density(wind.mph, bw="nrd", adjust=0.6), main="kde: Wind") # smaller bandwidth plot(density(wind.mph, bw="nrd", adjust=1.4), main="kde: Wind") # larger bandwidth plot(density(wind.mph, bw="nrd", adjust=0.2), main="kde: Wind") # MUCH smaller bandwidth -> NOT GOOD! plot(density(wind.mph, bw="nrd", adjust=5), main="kde: Wind") # MUCH larger bandwidth -> NOT GOOD!