####################### # two circular uniforms ####################### # can change number going into each (n1=200,n2=800) x=matrix(0,1000,2) for(i in 1:200){ r=sqrt(runif(1)); theta=2*pi*runif(1) x[i,1]=r*cos(theta)-1 x[i,2]=r*sin(theta)-1 } for(i in 201:1000){ r=sqrt(runif(1)); theta=2*pi*runif(1) x[i,1]=2*r*cos(theta)+1 x[i,2]=2*r*sin(theta)+1 } plot(x) # k-means with k=2 f=kmeans(x,2) plot(x,pch=f$cluster) # uh-oh! # hierarchical methods d=dist(x,method="euclidean") # distance matrix par(mfrow=c(2,2)) f=hclust(d,method="single") plot(x,main="single",pch=cutree(f,k=2)) f=hclust(d,method="complete") plot(x,main="complete",pch=cutree(f,k=2)) f=hclust(d,method="average") plot(x,main="average",pch=cutree(f,k=2)) f=hclust(d,method="ward") plot(x,main="ward",pch=cutree(f,k=2)) # model-based (finite mixture of normals) library(mclust) f=Mclust(x) plot(f) # Mclust automatically picks best summary(f,parameters=T) # oops! ###################### # two circular normals ###################### x=matrix(0,1000,2) for(i in 1:200){ x[i,1]=rnorm(1,-1,0.5) x[i,2]=rnorm(1,-1,0.5) } for(i in 201:1000){ r=sqrt(runif(1)); theta=2*pi*runif(1) x[i,1]=rnorm(1,1,1) x[i,2]=rnorm(1,1,1) } par(mfrow=c(1,1)) plot(x) # k-means, k=2 f=kmeans(x,2) plot(x,pch=f$cluster) # uh-oh! # hierarchical methods d=dist(x,method="euclidean") # distance matrix par(mfrow=c(2,2)) f=hclust(d,method="single") plot(x,main="single",pch=cutree(f,k=2)) f=hclust(d,method="complete") plot(x,main="complete",pch=cutree(f,k=2)) f=hclust(d,method="average") plot(x,main="average",pch=cutree(f,k=2)) f=hclust(d,method="ward") plot(x,main="ward",pch=cutree(f,k=2)) # model-based (finite mixture of normals) library(mclust) f=Mclust(x) plot(f) # Mclust automatically picks best summary(f,parameters=T) # nice job!