# Example R code, Support Vector Classifier and Support Vector Machine:

# A baseball example, with ONLY using two dimensions (OPS and WHIP) to predict winning status:

bat2015 <- read.csv("http://www.stat.sc.edu/~hitchcock/baseball2015batting.txt", header=T)
pitch2015 <- read.csv("http://www.stat.sc.edu/~hitchcock/baseball2015pitching.txt", header=T)
baseball2015 <- merge(bat2015,pitch2015,by="Tm")
winning <- (baseball2015$WLP>0.5)
team <- baseball2015$Tm
OPS2015 <- baseball2015$OPS
WHIP2015 <- baseball2015$WHIP
baseball2015class <- data.frame(team,winning,OPS2015,WHIP2015)

# The columns called OPS2015 and WHIP2015 are numerical measures of batting and pitching performance, respectively.
# the column called winning is an indicator of whether the team had a winning record that year.

attach(baseball2015class)

library(e1071)

X.train <- cbind(OPS2015,WHIP2015)
y.train <- winning

plot(X.train[,2:1], col=(3-y.train) )  # The 2:1 is just to switch which variable goes on which axis.

# Creating the SVC:

dat=data.frame(x=X.train, y=as.factor(y.train))

# Picking the cost parameter by cross-validation:

tune.out <- tune(svm, y ~ ., data=dat, kernel='linear', scale=TRUE, probability=TRUE, 
ranges=list(cost=c(.001,.01,.1,1,5,10,100,1000) ))
summary(tune.out)

# The cost=5, 10, 100, or 1000 perform well.

svcfit=svm(y ~ ., data=dat, kernel='linear', cost=10, scale=TRUE, probability=TRUE) 

#scale=TRUE tells the function to scale the variables to have mean 0 and variance 1.

plot(svcfit, dat)

# The points plotted as "x" are the support vectors.  The other points are plotted as "o".

# The points in the light tan region will be assigned to Y=0 (not winning).
# The points in the maroon region will be assigned to Y=1 (winning).

# We see three points in the training data are misclassified 
# (the black point in the maroon region and the red points in the tan region).

# Making predictions for several new individuals at once:

newobs <- rbind( c(.760,1.30), c(.700,1.40), c(.760,1.25) )
dimnames(newobs) <- list(NULL,c('OPS2015','WHIP2015'))
testdat <- data.frame(x=newobs)
predict(svcfit,testdat,probability=TRUE)

############################################
## Now using an SVM with a radial kernel:
#############################################

# Picking the cost and gamma parameters by cross-validation:

tune.out <- tune(svm, y ~ ., data=dat, kernel='radial', scale=TRUE, probability=TRUE, 
ranges=list(cost=c(.001,.01,.1,1,5,10,100,1000), gamma=c(0.5,1,2,3,4) ))
summary(tune.out)

# cost=10 and gamma=1 works well.

svmfit <- svm(y ~ ., data=dat, kernel='radial', scale=TRUE, probability=TRUE, cost=10, gamma=1)

plot(svmfit, dat)

# The points plotted as "x" are the support vectors.  The other points are plotted as "o".

# The points in the light tan region will be assigned to Y=0 (not winning).
# The points in the maroon region will be assigned to Y=1 (winning).

# We see three points in the training data are misclassified 
# (the black point in the maroon region and the red points in the tan region).

# Making predictions for several new individuals at once:

newobs <- rbind( c(.760,1.30), c(.700,1.40), c(.760,1.25) )
dimnames(newobs) <- list(NULL,c('OPS2015','WHIP2015'))
testdat <- data.frame(x=newobs)
predict(svmfit,testdat,probability=TRUE)



####################################
# SVM with more than 2 groups:
####################################

# With the Egyptian skulls data from Table 5.8:

skulls <- read.table("http://www.stat.sc.edu/~hitchcock/skullschap7.txt", header=T)

attach(skulls)

X.train <- cbind(MB,BH,BL,NH)
y.train <- EPOCH

dat=data.frame(x=X.train, y=as.factor(y.train))

# Picking the cost and gamma parameters by cross-validation:

tune.out <- tune(svm, y ~ ., data=dat, kernel='radial', scale=TRUE, probability=TRUE, 
ranges=list(cost=c(.001,.01,.1,1,5,10,100,1000), gamma=c(0.5,1,2,3,4) ))
summary(tune.out)

summary(tune.out$best.model)

# cost=100 and gamma=0.5 works best.

svmfit.skull <- svm(y ~ ., data=dat, kernel='radial', scale=TRUE, probability=TRUE, cost=100, gamma=0.5)


# Let's predict the epoch of a new skull with 
# MB = 135, BH = 144, BL = 97, NH = 53:

newobs <- rbind( c(135,144,97,53) )
dimnames(newobs) <- list(NULL,c('MB','BH', 'BL', 'NH'))
testdat <- data.frame(x=newobs)
predict(svmfit.skull,testdat,probability=TRUE)