0% found this document useful (0 votes)
8 views2 pages

SVM

The document outlines a process for building and evaluating machine learning models using Support Vector Machines (SVM) and linear regression in R. It includes data preparation, model training, cross-validation, hyperparameter tuning, and performance evaluation using metrics such as AUC and RMSE. The document also demonstrates the visualization of model predictions against actual data.

Uploaded by

algobeetrading
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views2 pages

SVM

The document outlines a process for building and evaluating machine learning models using Support Vector Machines (SVM) and linear regression in R. It includes data preparation, model training, cross-validation, hyperparameter tuning, and performance evaluation using metrics such as AUC and RMSE. The document also demonstrates the visualization of model predictions against actual data.

Uploaded by

algobeetrading
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 2

library(caret)

library(kernlab)
library(ROCR)
data(segmentationData)
help(sementationData)
head(segmentationData)
#Number of rows and columns
dim(segmentationData)
str(segmentationData)

#Distribution of Target Variable


table(segmentationData$Class)
table(segmentationData$Class) / length(segmentationData$Class)

# Split Data into Training and Validation


Index <- createDataPartition(segmentationData$Class,p=.7,list=FALSE)
svm.train <- segmentationData[Index,]
names(svm.train)
svm.validate <- segmentationData[-Index,]
svm.train1 <- svm.train[,3:61]
names(svm.train1)
library(e1071)
model <- svm(Class ~ . , kernel = "linear", data=svm.train1)
predict1 <- predict(model, svm.validate)
mean(predict1 == svm.validate$Class)

# Setup for cross validation


set.seed(123)
ctrl <- trainControl(method="cv",
number = 2,
summaryFunction=twoClassSummary,
classProbs=TRUE)

# Grid search to fine tune SVM


grid <- expand.grid(sigma = c(.01, .015, 0.2),
C = c(0.75, 0.9, 1, 1.1, 1.25)
)

#Train SVM
svm.tune <- train(x=svm.train1,
y= svm.train$Class,
method = "svmRadial",
metric="ROC",
tuneGrid = grid,
trControl=ctrl)

svm.tune

# Predict Target Label


valX <- svm.validate[,4:61]
pred <- predict(svm.tune, valX, type="prob")[2]
head(pred)

# Model Performance Statistics


pred_val <- prediction(pred, svm.validate$Class)

# Calculating Area under Curve


perf_val <- performance(pred_val,"auc")
auc <- as.numeric(perf_val@y.values)
auc

# Calculating True Positive and False Positive Rate


perf_val <- performance(pred_val, "tpr", "fpr")

# Plot the ROC curve


plot(perf_val, col = "green", lwd = 1.5)
abline(a=0,b=1, lty=2)

# #Calculating KS statistics
# ks <- max(attr(perf_val, "y.values")[[1]] - (attr(perf_val, "x.values")[[1]]))
# ks

############################################################################

# Load the data from the csv file


# dataDirectory <- "D:/" # put your own folder here
# data <- read.csv(paste(dataDirectory, 'regression.csv', sep=""), header = TRUE)
data <- read.csv('E:\\DataScience\\ML\\regression.csv', sep=",", header = TRUE)
head(data)
# Plot the data
plot(data, pch=16)

# Create a linear regression model


model <- lm(Y ~ X, data)
# Add the fitted line
abline(model)

# make a prediction for each X


LpredictedY <- predict(model, data)

# display the predictions

plot(data, pch=16)
points(data$X, LpredictedY, col = "blue", pch=4)

rmse <- function(error)


{
sqrt(mean(error^2))
}

error <- model$residuals # same as data$Y - predictedY


predictionRMSE <- rmse(error)
mean(data$Y)

# Support Vector Regression

library(e1071)

model <- svm(Y ~ X , kernel = "linear", data)


plot(data, pch=16)
predictedY = predict(model,data)
points(data$X, predictedY, col = "red", pch=4)

model <- svm(Y ~ X , data)


predictedY <- predict(model, data)
plot(data, pch=16)
points(data$X, predictedY, col = "red", pch=4)

error <- data$Y - predictedY


svrPredictionRMSE <- rmse(error)

# perform a grid search


tuneResult <- tune(svm, Y ~ X, data = data,
ranges = list(epsilon = seq(0,1,0.1), cost = 2^(2:9))
)
print(tuneResult)
# Draw the tuning graph
plot(tuneResult)

tuneResult <- tune(svm, Y ~ X, data = data,


ranges = list(epsilon = seq(0,0.2,0.01), cost = 2^(2:9))
)

print(tuneResult)
plot(tuneResult)

tunedModel <- tuneResult$best.model


tunedModelY <- predict(tunedModel, data)

error <- data$Y - tunedModelY

# this value can be different on your computer


# because the tune method randomly shuffles the data
tunedModelRMSE <- rmse(error)

plot(data, pch=16)

points(data$X, LpredictedY, col = "green", pch=4)


lines(data$X, LpredictedY, col = "green", pch=4)

points(data$X, predictedY, col = "red", pch=4)


lines(data$X, predictedY, col = "red", pch=4)

points(data$X, tunedModelY, col = "blue", pch=4)


lines(data$X, tunedModelY, col = "blue", pch=4)

You might also like