R scripts for the lecture course "Introduction to machine learning and pattern recognition", MPIA Feb./March 2008 Coryn Bailer-Jones Lecture 4 ########## Support Vector Machines # You need to download the data file baspeg.dat from the course web site: # http://www.mpia-hd.mpg.de/homes/calj/ps1/baspeg.dat.gz # See http://www.mpia.de/homes/calj/ps1/PS1-CBJ-001.pdf for more details library(e1071) # application with fixed gamma and cost dat.baspeg <- read.table("baspeg.dat", header=TRUE) dim(dat.baspeg) nobj <- nrow(dat.baspeg) train.cl <- sample(1:nobj, 0.5*nobj) svm.cl <- svm(astroclass ~ ., data=dat.baspeg[train.cl,], kernel='radial', cost=1, gamma=1) svm.cl length(train.cl) attributes(svm.cl) # apply to test set and examine confusion matrix pred.svm.cl <- predict(svm.cl, dat.baspeg[-train.cl,2:5]) table(dat.baspeg[-train.cl,]$astroclass, pred.svm.cl) # plot data, fitted classes and mark SVs plot(gr ~ ri, x=svm.cl, data=dat.baspeg[train.cl, ], slice=list(iz=0.2, zy=0.2)) # Now play around and see how number of SVs and train and test errors vary with # cost, gamma and the kernel function used svm.cl.tune <- tune(svm, train.x=dat.baspeg[train.cl,2:5], train.y=dat.baspeg[train.cl,1], validation.x=dat.baspeg[-train.cl,2:5], validation.y=dat.baspeg[-train.cl,1], ranges=list(gamma=c(0.1,1,10), cost=c(1,5,10) ), tunecontrol=tune.control(sampling="fix") ) # analyse svm.cl.tune$performances # apply best model astroclass2.predict.svm.cl <- predict(svm.cl.tune$best.model, dat.baspeg[-train.cl,2:5]) # etc.