########################################################
######################                                 #           
# R example code for cluster analysis:                 #
######################                                 #
########################################################


#################################################################
#################################################################
###########
###########           Hierarchical Clustering        
###########
#################################################################
#################################################################


# This is the "foodstuffs" data set from HW 2:

###########################################
###  Foodstuffs example 
###########################################

food <- read.table("http://people.stat.sc.edu/hitchcock/foodstuffs.txt", header=T)

attach(food)

# The hclust function requires that a distance object be input:

# Let's first scale the data by dividing each variable by its standard deviation:

std <- apply(food[,-1], 2, sd) # finding standard deviations of variables
food.std <- sweep(food[,-1],2,std,FUN="/") 

# Calculating pairwise Euclidean distances between the (standardized) objects:

dist.food <- dist(food.std)

# Single linkage:

food.single.link <- hclust(dist.food, method='single')

# Plotting the single linkage dendrogram:

plot(food.single.link, labels=Food, ylab="Distance")

dev.new() # opening new window while keeping previous one open

# complete linkage:

food.complete.link <- hclust(dist.food, method='complete')

# Plotting the complete linkage dendrogram:

plot(food.complete.link, labels=Food, ylab="Distance")

dev.new() # opening new window while keeping previous one open

# Average linkage:

food.avg.link <- hclust(dist.food, method='average')

# Plotting the average linkage dendrogram:

plot(food.avg.link, labels=Food, ylab="Distance")

# Note the complete linkage algorithm is slightly less prone to forming 
# "outlier-only" clusters here.

# Cutting the complete-linkage dendrogram to form k=2 clusters here:

cut.2 <- cutree(food.complete.link, k=2)
cut.2     # printing the "clustering vector"

food.2.clust <- lapply(1:2, function(nc) Food[cut.2==nc])  
food.2.clust   # printing the clusters in terms of the Food labels

# Suppose we preferred a 5-cluster solution:

cut.5 <- cutree(food.complete.link, k=5)

# Equivalently, in this case:
cut.5 <- cutree(food.complete.link, h=3.5)  
# h specifies the height at which the dendrogram should be cut

cut.5   # printing the "clustering vector"

food.5.clust <- lapply(1:5, function(nc) Food[cut.5==nc])  
food.5.clust   # printing the clusters in terms of the Food labels


############# Visualization of Clusters:

### Via the scatterplot matrix:

pairs(food[,-1], panel=function(x,y) text(x,y,cut.5))

# Cluster 1 seems to be the high-fat, high-energy foods (beef, ham, pork)
# Cluster 2 foods seem to have low iron (more white meats than red meats)
# Cluster 4 foods have low protein (the clams)
# Cluster 5 is a high-calcium outlier (canned sardines)

### Via a plot of the scores on the first 2 principal components, 
### with the clusters separated by color:

food.pc <- princomp(food[,-1],cor=T)

# Setting up the colors for the 5 clusters on the plot:
my.color.vector <- rep("green", times=nrow(food))
my.color.vector[cut.5==2] <- "blue"
my.color.vector[cut.5==3] <- "red"
my.color.vector[cut.5==4] <- "orange"
my.color.vector[cut.5==5] <- "brown"

# Plotting the PC scores:

par(pty="s")
plot(food.pc$scores[,1], food.pc$scores[,2], ylim=range(food.pc$scores[,1]), 
     xlab="PC 1", ylab="PC 2", type ='n', lwd=2)
text(food.pc$scores[,1], food.pc$scores[,2], labels=Food, cex=0.7, lwd=2,
     col=my.color.vector)

# What would this plot look like for the 2-cluster solution?
# For the 3-cluster solution?


###########################################
###  Cars example 
###########################################

# The mtcars data set is built into R:

help(mtcars)

# We will focus on the variables that are continuous in nature rather than discrete:

cars.data <- mtcars[,c(1,3,4,5,6,7)]

# Standardizing by dividing through by the sample range of each variable

samp.range <- function(x){
myrange <- diff(range(x))
return(myrange)
}
my.ranges <- apply(cars.data,2,samp.range)
cars.std <- sweep(cars.data,2,my.ranges,FUN="/") 


# Getting distance matrix:

dist.cars <- dist(cars.std)

# Single linkage:

cars.single.link <- hclust(dist.cars, method='single')

# Plotting the single linkage dendrogram:

plot(cars.single.link, labels=row.names(cars.data), ylab="Distance")

dev.new() # opening new window while keeping previous one open

# complete linkage:

cars.complete.link <- hclust(dist.cars, method='complete')

# Plotting the complete linkage dendrogram:

plot(cars.complete.link, labels=row.names(cars.data), ylab="Distance")

dev.new() # opening new window while keeping previous one open

# Average linkage:

cars.avg.link <- hclust(dist.cars, method='average')

# Plotting the average linkage dendrogram:

plot(cars.avg.link, labels=row.names(cars.data), ylab="Distance")


# Average Linkage dendrogram seems to indicate two major clusters, 
# Single Linkage dendrogram may indicate three.

# Single Linkage Solution:

cut.3 <- cutree(cars.single.link, k=3)
cut.3     # printing the "clustering vector"

cars.3.clust <- lapply(1:3, function(nc) row.names(cars.data)[cut.3==nc])  
cars.3.clust   # printing the clusters in terms of the car names


# Cluster 1 seems to be mostly compact cars, Cluster 2 is sports cars, Cluster 3 is large luxury sedans

############# Visualization of Clusters:

### Via the scatterplot matrix:

pairs(cars.data, panel=function(x,y) text(x,y,cut.3))

# Cluster 1 cars tend to have high mileage, low displacement, low horsepower, low weight.
# Cluster 3 cars tend to have low mileage, high weight.

### Via a plot of the scores on the first 2 principal components, 
### with the clusters separated by color:

cars.pc <- princomp(cars.data,cor=T)

carnames <- abbreviate(row.names(cars.data))

# Setting up the colors for the 3 clusters on the plot:
my.color.vector <- rep("green", times=nrow(cars.data))
my.color.vector[cut.3==2] <- "blue"
my.color.vector[cut.3==3] <- "red"

# Plotting the PC scores:

par(pty="s")
plot(cars.pc$scores[,1], cars.pc$scores[,2], ylim=range(cars.pc$scores[,1]), 
     xlab="PC 1", ylab="PC 2", type ='n', lwd=2)
text(cars.pc$scores[,1], cars.pc$scores[,2], labels=carnames, cex=0.7, lwd=2,
     col=my.color.vector)

# It's clear that PC1 is more important in separating the clusters than PC2.

######################    Ward's method:  #####################################
##
## To perform Ward's method of hierarchical clustering, alter the above code to:
##
##         cars.ward <- hclust(dist.cars, method='ward')
##
## and proceed as with the linkage methods.
##
###############################################################################



#################################################################
#################################################################
###########
###########           Partitioning Clustering        
###########
#################################################################
#################################################################



##########################
##
##  K-means clustering
##
##########################

###########################################
###  Foodstuffs example 
###########################################

# Consider the food.std data frame given above.

# A K-means clustering with k = 5:

# Note that the stability of the result can be improved by increasing the maximum number 
# of iterations and using multiple random starts:

food.k5 <- kmeans(food.std, centers=5, iter.max=100, nstart=25)
food.k5

# Let's try k=4:

food.k4 <- kmeans(food.std, centers=4, iter.max=100, nstart=25)
food.k4

# Printing the clustering vector for the 4-cluster solution:

food.k4$cluster

food.k4.clust <- lapply(1:4, function(nc) Food[food.k4$cluster==nc])  
food.k4.clust   # printing the clusters in terms of the Food labels


############# Visualization of Clusters:

### Via the scatterplot matrix:

pairs(food[,-1], panel=function(x,y) text(x,y,food.k4$cluster))

# Cluster 1 foods tend to be high in calcium. (this comment does not reflect all runs of the algorithm)
# Cluster 4 foods tend to be high in fat. (this comment does not reflect all runs of the algorithm)


### Via a plot of the scores on the first 2 principal components, 
### with the clusters separated by color:

food.pc <- princomp(food[,-1],cor=T)

# Setting up the colors for the 5 clusters on the plot:
my.color.vector <- rep("green", times=nrow(food))
my.color.vector[food.k4$cluster==2] <- "blue"
my.color.vector[food.k4$cluster==3] <- "red"
my.color.vector[food.k4$cluster==4] <- "orange"

# Plotting the PC scores:

par(pty="s")
plot(food.pc$scores[,1], food.pc$scores[,2], ylim=range(food.pc$scores[,1]), 
     xlab="PC 1", ylab="PC 2", type ='n', lwd=2)
text(food.pc$scores[,1], food.pc$scores[,2], labels=Food, cex=0.7, lwd=2,
     col=my.color.vector)

# Cluster 1 is the "canned seafood" cluster.  (this comment does not reflect all runs of the algorithm)
# Cluster 2 is the clams cluster.  (this comment does not reflect all runs of the algorithm)

## NOTE:  The default for the kmeans function in R is the Hartigan-Wong (1979) algorithm.
## The MacQueen algorithm (1967) can be used by altering the code to, say:
##                kmeans(food.std, centers=4,algorithm="MacQueen")
## You can try it in this case -- I don't think the MacQueen algorithm produces as good of a result.



##########################
##
##  K-medoids clustering
##
##########################


###########################################
###  Cars example 
###########################################


# Consider the cars.data and cars.std data frames we created above.

# Let's cluster the cars into k groups using the K-medoids approach.

# The function "pam" is in the "cluster" package.

# Loading the "cluster" package:

library(cluster)

# K-medoids directly on the (standardized) data matrix:
cars.kmed.3 <- pam(cars.std, k=3, diss=F)

# Or you can do K-medoids by inputting the distance matrix:
# cars.kmed.3 <- pam(dist.cars, k=3, diss=T)

cars.kmed.3$clustering  # printing the "clustering vector"

cars.kmed.3$silinfo$avg.width  #printing the average silhouette width

### A little function to calculate the average silhouette width
### for a variety of choices of k:

my.k.choices <- 2:8
avg.sil.width <- rep(0, times=length(my.k.choices))
for (ii in (1:length(my.k.choices)) ){
avg.sil.width[ii] <- pam(cars.std, k=my.k.choices[ii])$silinfo$avg.width
}
print( cbind(my.k.choices, avg.sil.width) )

# A LARGE average silhouette width indicates that the observations are properly clustered.

# Maybe k=2 is the best choice of k here?


cars.3.clust <- lapply(1:3, function(nc) row.names(cars.data)[cars.kmed.3$clustering==nc])  
cars.3.clust   # printing the clusters in terms of the car names


# Cluster 1 seems to be mostly compact cars, Cluster 2 is sports cars, Cluster 3 is large luxury sedans

############# Visualization of Clusters:

## Built-in plots available with the pam function:

# The "clusplot":

plot(cars.kmed.3, which.plots=1)

# The clusplot (in the "cluster" library) can actually be used with 
# any clustering partition by entering the data set and the clustering vector, e.g.:

clusplot(food[,-1], food.k4$cluster)

# The "silhouette plot":

plot(cars.kmed.3, which.plots=2)

# This shows which observations are "best clustered."


####################################################
####################################################
# 
# Choosing the number of clusters k using the average silhouette width criterion.
#
####################################################
####################################################

# When using pam, the output will give you the average silhouette width (see above code).

# We can also get the average silhouette width when using other algorithms:

### With a hierarchical method (Complete linkage here):

dist.food <- dist(food.std)
food.complete.link <- hclust(dist.food, method='complete')

summary(silhouette(cutree(food.complete.link, k=2), dist(food.std)))$avg.width
summary(silhouette(cutree(food.complete.link, k=3), dist(food.std)))$avg.width
summary(silhouette(cutree(food.complete.link, k=4), dist(food.std)))$avg.width
summary(silhouette(cutree(food.complete.link, k=5), dist(food.std)))$avg.width

### With k-means:

summary(silhouette(kmeans(food.std, centers=2, iter.max=100, nstart=25)$cluster, dist(food.std)))$avg.width
summary(silhouette(kmeans(food.std, centers=3, iter.max=100, nstart=25)$cluster, dist(food.std)))$avg.width
summary(silhouette(kmeans(food.std, centers=4, iter.max=100, nstart=25)$cluster, dist(food.std)))$avg.width
summary(silhouette(kmeans(food.std, centers=5, iter.max=100, nstart=25)$cluster, dist(food.std)))$avg.width

# In each case, we might choose the value of k associated with the LARGEST average silhouette width.

####################################################
####################################################
# 
# Choosing the number of clusters k using the Dunn index.
#
####################################################
####################################################

install.packages("clValid")
library(clValid)

dist2full<-function(ds){
  n<-attr(ds,"Size")
  full<-matrix(0,n,n)
  full[lower.tri(full)]<-ds
  full+t(full)
}

### With a hierarchical method (Complete linkage here):

dist.food <- dist(food.std)
food.complete.link <- hclust(dist.food, method='complete')

distmat.food<-dist2full(dist.food)

dunn(distance=distmat.food,cutree(food.complete.link, k=2))
dunn(distance=distmat.food,cutree(food.complete.link, k=3))
dunn(distance=distmat.food,cutree(food.complete.link, k=4))
dunn(distance=distmat.food,cutree(food.complete.link, k=5))

### With k-means:

dunn(distance=distmat.food,kmeans(food.std, centers=2, iter.max=100, nstart=25)$cluster)
dunn(distance=distmat.food,kmeans(food.std, centers=3, iter.max=100, nstart=25)$cluster)
dunn(distance=distmat.food,kmeans(food.std, centers=4, iter.max=100, nstart=25)$cluster)
dunn(distance=distmat.food,kmeans(food.std, centers=5, iter.max=100, nstart=25)$cluster)

# In each case, we might choose the value of k associated with the LARGEST Dunn index.

################################
################################
## 
##  Plotting the WSS for several choices of k
##
################################
################################

# This is a recommended method for choosing k in K-means clustering.

# For the cars data, let's consider letting k vary up to 5.

### CODE FOR WSS PLOT BEGINS HERE ###
##
#Enter name of the data matrix to be clustered here:
my.data.matrix <- cars.std  

my.k.choices <- 2:5
n <- length(my.data.matrix[,1])
wss1 <- (n-1)*sum(apply(my.data.matrix,2,var))
wss <- numeric(0)
for(i in my.k.choices) {
  W <- sum(kmeans(my.data.matrix,i)$withinss)
  wss <- c(wss,W)
}
wss <- c(wss1,wss)
plot(c(1,my.k.choices),wss,type='l',xlab='Number of clusters', ylab='Within-groups sum-of-squares', lwd=2)
##
### CODE FOR WSS PLOT ENDS HERE ###

# For what value of k does the elbow of the plot occur?


#################################################################
#################################################################
###########
###########           Model-based Clustering        
###########
#################################################################
#################################################################

# Consider the built-in state_stats data set in the 'usdata' package in R:

install.packages("usdata")

library(usdata)

help(state_stats)

# We will perform a model-based clustering of the 50 states (plus DC) based on these 5 variables:

state.crime <- as.data.frame(state_stats[,c('murder','robbery','agg_assault','larceny','motor_theft')])
rownames(state.crime) <- as.character(state_stats$abbr)
state.crime.comp <- state.crime[complete.cases(state.crime),] #removing the rows with missing (NA) crime values

par(mfrow=c(2,2))
qqnorm(state.crime.comp[,1])
qqnorm(state.crime.comp[,2])
qqnorm(state.crime.comp[,3])
qqnorm(state.crime.comp[,4])

# Normality assumption looks wrong.

# Try transforming data first:
qqnorm(log(state.crime.comp[,1]))
qqnorm(log(state.crime.comp[,2]))
qqnorm(log(state.crime.comp[,3]))
qqnorm(log(state.crime.comp[,4]))
par(mfrow=c(1,1))

# Chi-square plot to assess multivariate normality:

###############
###
chisplot <- function(x) {
    if (!is.matrix(x)) stop("x is not a matrix")

    ### determine dimensions
    n <- nrow(x)
    p <- ncol(x)
    #
    xbar <- apply(x, 2, mean)
    S <- var(x)
    S <- solve(S)
    index <- (1:n)/(n+1)
    #
    xcent <- t(t(x) - xbar)
    di <- apply(xcent, 1, function(x,S) x %*% S %*% x,S)
    #
    quant <- qchisq(index,p)
    plot(quant, sort(di), ylab = "Ordered distances",
         xlab = "Chi-square quantile", lwd=2,pch=1)
   
}
###
###############

# Use the chisplot function:

chisplot(as.matrix(state.crime.comp))

chisplot(as.matrix(log(state.crime.comp)))

# We will use the log-transformed data:

log.state.crime.comp <- log(state.crime.comp)

# Loading the mclust package:
# May need to install the mclust package first?
# If so, type at the command line:  install.packages("mclust", dependencies=T)
# while plugged in to the internet.

library(mclust)


# The R function Mclust performs model-based clustering for a range of models
# and a variety of values of k:

crime.clus <- Mclust(log.state.crime.comp)

# By default, the models considered are:
#"EII": spherical, equal volume
#"VII": spherical, unequal volume
#"EEI": diagonal, equal volume and shape
#"VEI": diagonal, varying volume, equal shape
#"EVI": diagonal, equal volume, varying shape
#"VVI": diagonal, varying volume and shape
#"EEE": ellipsoidal, equal volume, shape, and orientation
#"VEE": ellipsoidal, equal shape and orientation (*)
#"EVE": ellipsoidal, equal volume and orientation (*)
#"VVE": ellipsoidal, equal orientation (*)
#"EEV": ellipsoidal, equal volume and equal shape
#"VEV": ellipsoidal, equal shape
#"EVV": ellipsoidal, equal volume (*)
#"VVV": ellipsoidal, varying volume, shape, and orientation

# Plotting the BIC values:

plot(crime.clus, data=log.state.crime.comp, what="BIC")

# Hit ENTER to see the BIC plot.

# The best solution is VVE with 2 clusters.

# The clustering vector:

clus.vec.2 <- crime.clus$classification
clus.vec.2

crime.2.clust <- lapply(1:2, function(nc) row.names(log.state.crime.comp)[clus.vec.2==nc])  
crime.2.clust   # printing the clusters in terms of the state names

# This gives the probabilities of belonging to each cluster for every object:

round(crime.clus$z,2)

## Note: We could also specifically request the best, say, 3-cluster solution (according to BIC)
## if we wanted to.
## For example, the 3-cluster solution is very close in terms of BIC and may be more practically interesting:
crime.clus.3 <- Mclust(log.state.crime.comp, G=3)

# The clustering vector:

clus.vec.3 <- crime.clus.3$classification
clus.vec.3

crime.3.clust <- lapply(1:3, function(nc) row.names(log.state.crime.comp)[clus.vec.3==nc])  
crime.3.clust   # printing the clusters in terms of the state names

# This gives the probabilities of belonging to each cluster for every object:

round(crime.clus.3$z,2)


# Visualizing the clusters (let's work with the 3-cluster solution here):

## Via a scatterplot matrix:

plot(crime.clus.3, what="classification")

# Hit ENTER to see a scatterplot matrix with the points separated by cluster.

# Blue is cluster 1, red is cluster 2, green is cluster 3 here.

### Via a plot of the scores on the first 2 principal components, 
### with the clusters separated by color:

crime.pc <- princomp(log.state.crime.comp,cor=T)

# Setting up the colors for the 3 clusters on the plot:
my.color.vector <- rep("blue", times=nrow(log.state.crime.comp))
my.color.vector[crime.clus.3$classification==2] <- "red"
my.color.vector[crime.clus.3$classification==3] <- "green"

# Plotting the PC scores:

par(pty="s")
plot(crime.pc$scores[,1], crime.pc$scores[,2], ylim=range(crime.pc$scores[,1]), 
     xlab="PC 1", ylab="PC 2", type ='n', lwd=2)
text(crime.pc$scores[,1], crime.pc$scores[,2], labels=row.names(log.state.crime.comp), 
     cex=0.7, lwd=2, col=my.color.vector)

# Reviewing the PCA:

summary(crime.pc,loadings=T)

# Note PC1 is an "seriousness-of-crime" (?) index and PC2 is a "overall crime" index.

# If we wanted to plot the clusters of the 2-cluster solution in the space of the first two PCs:
# Setting up the colors for the 2 clusters on the plot:
#my.color.vector <- rep("blue", times=nrow(log.state.crime.comp))
#my.color.vector[crime.clus$classification==2] <- "red"
# Then run the PC scores plotting code above...

## Note: Similarly, we could specifically request a specific covariance structure
## if we wanted to, for example:

# crime.clus.EEI <- Mclust(log.state.crime.comp, modelNames="EEI")

# To see the resulting clustering vector, type:
# crime.clus.EEI$classification
#
# and all the analogous plots could be done similarly as above.


#################################################################
#################################################################
###########
###########           Clustering Binary Data  
###########
#################################################################
#################################################################

# We can read the ACT math test items data from the Internet:

ACTitems <- read.table("http://people.stat.sc.edu/hitchcock/ACTitems.txt", header=F)

# The first column is a set of labels for the 60 test items.
# The next 60 columns are the scores (0=incorrect, 1=correct) on the items for 55 male students.

# We wish to cluster the items into groups based on the students' scores on them.

# Just using squared Euclidean distance (counting total mismatches):

dist.items <- dist(ACTitems[,-1], method='euclidean')^2

dist.items.2 <- dist(ACTitems[,-1], method='binary') # This distance measure ignores 0-0 matches altogether

dist.items.3 <- dist(1 - ACTitems[,-1], method='binary') # This distance measure ignores 1-1 matches altogether


#### Complete linkage clustering of the 60 test items:

items.complete.link <- hclust(dist.items, method='complete')

# Plotting the complete linkage dendrogram:

plot(items.complete.link, labels=ACTitems[,1], ylab="Distance")

# Single linkage:

items.sing.link <- hclust(dist.items, method='single')
plot(items.sing.link, labels=ACTitems[,1], ylab="Distance")

# Single linkage really breaks down when there are a lot of ties among the distances!

#### K-medoids clustering of the 60 test items:

library(cluster)

### A little function to calculate the average silhouette width
### for a variety of choices of k:

my.k.choices <- 2:8
avg.sil.width <- rep(0, times=length(my.k.choices))
for (ii in (1:length(my.k.choices)) ){
avg.sil.width[ii] <- pam(dist.items, k=my.k.choices[ii])$silinfo$avg.width
}
print( cbind(my.k.choices, avg.sil.width) )

# Maybe 2 clusters, or at most 3, should be used.

items.kmed.2 <- pam(dist.items, k=2, diss=T)
items.2.clust <- lapply(1:2, function(nc) ACTitems[,1][items.kmed.2$clustering==nc])  
items.2.clust 

items.kmed.3 <- pam(dist.items, k=3, diss=T)
items.3.clust <- lapply(1:3, function(nc) ACTitems[,1][items.kmed.3$clustering==nc])  
items.3.clust 


#################################################################
#################################################################
###########
###########           Clustering Mixed Data  
###########
#################################################################
#################################################################

heartdis.cols <- c("age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak", "slope", "ca", "thal", "num")

# age: in years
# sex: 1=male, 0=female
# cp: chest pain type (1: typical angina, 2: atypical angina, 3: non-anginal pain, 4: asymptomatic)
# trestbps: resting blood pressure (in mm Hg on admission to the hospital)
# chol: serum cholestoral in mg/dl
# fbs: (fasting blood sugar > 120 mg/dl)  (1 = true; 0 = false)
# restecg: resting electrocardiographic results (0: normal, 1: having ST-T wave abnormality,  2: showing probable or definite left ventricular hypertrophy by Estes' criteria)
# thalach: maximum heart rate achieved
# exang: exercise induced angina (1 = yes; 0 = no)
# oldpeak = ST depression induced by exercise relative to rest
# slope: the slope of the peak exercise ST segment (1: upsloping, 2: flat, 3: downsloping)
# ca: number of major vessels (0-3) colored by flourosopy
# thal: 3 = normal; 6 = fixed defect; 7 = reversable defect
# num: diagnosis of heart disease (angiographic disease status) (0: < 50% diameter narrowing, 1: > 50% diameter narrowing)
# num is actually (0,1,2,3,4) here: five true clusters.


heartdis <- read.table("http://people.stat.sc.edu/hitchcock/heartdiseasedata.txt", header=F, sep=',', col.names=heartdis.cols, na.strings="?")


heartdis.nm <- na.omit(heartdis)

# Treating several variables as ordinal:

cp <- as.ordered(heartdis.nm$cp)
ecg <- as.ordered(heartdis.nm$restecg)
slope <- as.ordered(heartdis.nm$slope)
vess.clred <- as.ordered(heartdis.nm$ca)
defect.cat <- as.ordered(heartdis.nm$thal)


# Treating 3 variables as nominal:

bld.sugar <- as.factor(heartdis.nm$fbs)
sex <- as.factor(heartdis.nm$sex)
exang <- as.factor(heartdis.nm$exang)

# Other variables (numerical):

bps <- heartdis.nm$trestbps
heartrate <- heartdis.nm$thalach
STdepress <- heartdis.nm$oldpeak
chol <- heartdis.nm$chol
age <- heartdis.nm$age

# Creating a clean data frame with the "mixed" data:

my.clean.data <- data.frame(bps,heartrate,STdepress,chol,age,cp,ecg,slope,vess.clred,defect.cat,bld.sugar,sex,exang)

# Calculating the Gower dissimilarities:

library(cluster)
gower.diss.heart <- daisy(my.clean.data)

# Using k-medoids based on the Gower dissimilarities (using 5 clusters here):
k.med.clust <- pam(gower.diss.heart, k=5)
k.med.res <- k.med.clust$clustering

items.5.clust <- lapply(1:5, function(nc) (1:nrow(my.clean.data))[k.med.res==nc])  
items.5.clust 

# Or could do average linkage:

avg.link.clust <- hclust(gower.diss.heart,method="average")
avg.link.res <- cutree(avg.link.clust,k=5)

items.5.clust.avg <- lapply(1:5, function(nc) (1:nrow(my.clean.data))[avg.link.res==nc])  
items.5.clust.avg