R scripts for the lecture course "Introduction to machine learning and
pattern recognition", MPIA Feb./March 2008
  Coryn Bailer-Jones

Lecture 4


########## Support Vector Machines

# You need to download the data file baspeg.dat from the course web site:
# http://www.mpia-hd.mpg.de/homes/calj/ps1/baspeg.dat.gz
# See http://www.mpia.de/homes/calj/ps1/PS1-CBJ-001.pdf for more details 

library(e1071)
# application with fixed gamma and cost
dat.baspeg <- read.table("baspeg.dat", header=TRUE)
dim(dat.baspeg)
nobj <- nrow(dat.baspeg)
train.cl <- sample(1:nobj, 0.5*nobj)
svm.cl <- svm(astroclass ~ ., data=dat.baspeg[train.cl,], kernel='radial', cost=1, gamma=1)
svm.cl
length(train.cl)
attributes(svm.cl)
# apply to test set and examine confusion matrix
pred.svm.cl <- predict(svm.cl, dat.baspeg[-train.cl,2:5])
table(dat.baspeg[-train.cl,]$astroclass, pred.svm.cl) 
# plot data, fitted classes and mark SVs
plot(gr ~ ri, x=svm.cl, data=dat.baspeg[train.cl, ], slice=list(iz=0.2, zy=0.2))


# Now play around and see how number of SVs and train and test errors vary with
# cost, gamma and the kernel function used
svm.cl.tune <- tune(svm, train.x=dat.baspeg[train.cl,2:5], train.y=dat.baspeg[train.cl,1], validation.x=dat.baspeg[-train.cl,2:5], validation.y=dat.baspeg[-train.cl,1], ranges=list(gamma=c(0.1,1,10), cost=c(1,5,10) ), tunecontrol=tune.control(sampling="fix")  )
# analyse
svm.cl.tune$performances
# apply best model
astroclass2.predict.svm.cl <- predict(svm.cl.tune$best.model, dat.baspeg[-train.cl,2:5])
# etc.