0% found this document useful (0 votes)

12 views36 pages

ML Batch

The document outlines a machine learning implementation using K-Nearest Neighbors (KNN), Support Vector Machine (SVM), and Naïve Bayes classifiers on a dataset related to car purchases. It includes data preprocessing, model training, predictions, and performance evaluation through accuracy scores and confusion matrices. Visualizations of the results for both training and test sets are also provided for each classifier.

Uploaded by

dammuvinayk86

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

12 views36 pages

ML Batch

Uploaded by

dammuvinayk86

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 36

K nearest neighbour

import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.neighbors import KNeighborsClassifier

dataset = pd.read_csv("/Users/dianamoses/Documents/MCET/Course Files/ML/ML

LAB/Data/Logistic_car_data.csv")
dataset.head
Out[]:
<bound method NDFrame.head of User ID Gender Age AnnualSalary Purchased
0 385 Male 35 20000 0
1 681 Male 40 43500 0
2 353 Male 49 74000 0
3 895 Male 40 107500 1
4 661 Male 25 79000 0
.. ... ... ... ... ...
995 863 Male 38 59000 0
996 800 Female 47 23500 0
997 407 Female 28 138500 1
998 299 Female 48 134000 1
999 687 Female 44 73500 0

[1000 rows x 5 columns]>

# input
x = dataset.iloc[:, [2,3]].values
x
Out[]:
array([[ 35, 20000],
[ 40, 43500],
[ 49, 74000],
...,
[ 28, 138500],
[ 48, 134000],
[ 44, 73500]])

# output
y = dataset.iloc[:, 4].values
y
Out[17]:
array([0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1,
…….. 0, 0, 0, 0, 1, 0, 0, 1, 1, 0])

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0)

sc = StandardScaler()
xtrain = sc.fit_transform(xtrain)
xtest = sc.transform(xtest)

knn = KNeighborsClassifier(n_neighbors=7)

knn.fit(xtrain, ytrain)

Out[]: KNeighborsClassifier(n_neighbors=7)

ypred = knn.predict(xtest)
print(ypred)
[1 0 1 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 1 0 0 1 1 1 1 1 0 0 0 1 0 1 1 0 0 0 1 1 1]

knn.score(xtest, ytest)
Out[]: 0.924

print ("\n\nAccuracy : ", accuracy_score(ytest, ypred)*100)

Accuracy : 92.4

cm = confusion_matrix(ytest, ypred)
print ("Confusion Matrix : \n", cm)

Confusion Matrix :
[[142 10]
[ 9 89]]

fig, ax = plt.subplots(figsize=(5, 5))

# Importing the dataset

dataset = pd.read_csv('/Users/dianamoses/Documents/MCET/Course Files/ML/ML
LAB/Data/Logistic_car_data.csv')
dataset.head

Out[]:
<bound method NDFrame.head of User ID Gender Age AnnualSalary Purchased
0 385 Male 35 20000 0
1 681 Male 40 43500 0
2 353 Male 49 74000 0
3 895 Male 40 107500 1
4 661 Male 25 79000 0
.. ... ... ... ... ...
995 863 Male 38 59000 0
996 800 Female 47 23500 0
997 407 Female 28 138500 1
998 299 Female 48 134000 1
999 687 Female 44 73500 0

[1000 rows x 5 columns]>

# input
x = dataset.iloc[:, [2,3]].values
x
Out[]:
array([[ 35, 20000],
[ 40, 43500],
[ 49, 74000],
...,
[ 28, 138500],
[ 48, 134000],
[ 44, 73500]])

# output
y = dataset.iloc[:, 4].values
y
Out[17]:
array([0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1,
…….. 0, 0, 0, 0, 1, 0, 0, 1, 1, 0])

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0)

sc = StandardScaler()
xtrain = sc.fit_transform(xtrain)
xtest = sc.transform(xtest)

classifier = SVC(kernel = 'linear', random_state = 0)

classifier.fit(xtrain, ytrain)

Out[]: SVC(kernel='linear', random_state=0)

ypred = classifier.predict(xtest)
print(ypred)
[1 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0
0 0 0 0 1 0 1 0 0 0 1 1 1 1 1 0 0 0 1 0 1 1 0 0 0 1 1 0]

classifier.score(xtest, ytest)
Out[]: 0.84

print ("\n\nAccuracy : ", accuracy_score(ytest, ypred)*100)

Accuracy : 84.0

cm = confusion_matrix(ytest, ypred)
print ("Confusion Matrix : \n", cm)
Confusion Matrix :
[[138 14]
[ 26 72]]

fig, ax = plt.subplots(figsize=(5, 5))

X_set=x
y_set=y
for i, j in enumerate(nm.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c = ListedColormap(('red','green','blue'))(i), label = j)
plt.xlim(0, 80)
plt.ylim(0, 180000)
plt.title('Classifier (Test set)')
plt.xlabel('Age')
plt.ylabel('Annual Salary')
plt.legend()
plt.show()
# Visualizing Test Results
X_set, y_set = xtest, ytest
X1, X2 = nm.meshgrid(nm.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1,
step = 0.01),
nm.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step =
0.01))
plt.contourf(X1, X2, classifier.predict(nm.array([X1.ravel(),
X2.ravel()]).T).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(nm.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c = ListedColormap(('white', 'black'))(i), label = j)
plt.title('SVM (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
# USING RBF KERNAL FOR SVM
classifier = SVC(kernel = 'rbf', random_state = 0)
classifier.fit(xtrain, ytrain)
Out[]: SVC(random_state=0)

ypred = classifier.predict(xtest)
print(ypred)
[1 0 1 0 0 0 1 1 1 0 1 0 0 0 1 1 0 1 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 1 0 0 1 1 1 1 1 0 1 0 1 0 1 1 0 0 0 1 1 0]

classifier.score(xtest, ytest)
Out[]: 0.9

print ("\n\nAccuracy : ", accuracy_score(ytest, ypred)*100)

Accuracy : 90.0

cm = confusion_matrix(ytest, ypred)
print ("Confusion Matrix : \n", cm)
Confusion Matrix :
[[138 14]
[ 11 87]]

fig, ax = plt.subplots(figsize=(5, 5))

X_set=x
y_set=y
for i, j in enumerate(nm.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c = ListedColormap(('red','green','blue'))(i), label = j)
plt.xlim(0, 80)
plt.ylim(0, 180000)
plt.title('Classifier (Test set)')
plt.xlabel('Age')
plt.ylabel('Annual Salary')
plt.legend()
plt.show()
# Visualizing Test Results
X_set, y_set = xtest, ytest
X1, X2 = nm.meshgrid(nm.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1,
step = 0.01),
nm.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step =
0.01))
plt.contourf(X1, X2, classifier.predict(nm.array([X1.ravel(),
X2.ravel()]).T).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('red', 'green')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(nm.unique(y_set)):
plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
c = ListedColormap(('white', 'black'))(i), label = j)
plt.title('SVM (Test set)')
plt.xlabel('Age')
plt.ylabel('Estimated Salary')
plt.legend()
plt.show()
Naïve bayes
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from matplotlib.colors import ListedColormap
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score

# Importing the dataset

dataset = pd.read_csv('/Users/dianamoses/Documents/MCET/Course Files/ML/ML
LAB/Data/Logistic_car_data.csv')

dataset
Out[103]:
User ID Gender Age AnnualSalary Purchased
0 385 Male 35 20000 0
1 681 Male 40 43500 0
2 353 Male 49 74000 0
3 895 Male 40 107500 1
4 661 Male 25 79000 0
.. ... ... ... ... ...
995 863 Male 38 59000 0
996 800 Female 47 23500 0
997 407 Female 28 138500 1
998 299 Female 48 134000 1
999 687 Female 44 73500 0

[1000 rows x 5 columns]

# input
x = dataset.iloc[:, [2, 3]].values
x
Out[]:
array([[ 35, 20000],
[ 40, 43500],
[ 49, 74000],
...,
[ 28, 138500],
[ 48, 134000],
[ 44, 73500]])

# Target
y = dataset.iloc[:, 4].values
y
Out[]:
array([0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1,
….. 0, 0, 0, 0, 1, 0, 0, 1, 1, 0])

x2 = dataset.iloc[:, [2]].values
plt.scatter(x2,y)
plt.xlabel("Age")
plt.ylabel("Purchased")
plt.show()

x3 = dataset.iloc[:, [3]].values
plt.scatter(x3,y)
plt.xlabel("Salary")
plt.ylabel("Purchased")
plt.show()
# Splitting the dataset into the Training set and Test set
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 0)

# Feature Scaling
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

# Fitting Naive Bayes to the Training set

classifier = GaussianNB()
classifier.fit(x_train, y_train)
Out[]: GaussianNB()

# Predicting the Test set results

y_pred = classifier.predict(x_test)

print("Predicted values:")
print(y_pred)
Predicted values:
[1 0 1 0 0 0 0 1 1 0 1 0 0 0 0 1 0 1 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0
0 0 0 0 0 0 1 1 0 0 1 1 1 1 1 0 0 0 1 0 1 1 0 0 0 1 1 0]
acc= accuracy_score(y_test,y_pred)*100
print ("\n\nAccuracy of Naïve Bayes Classifier: ", acc)

Accuracy of Naïve Bayes Classifier: 88.0

# Making the Confusion Matrix

cm = confusion_matrix(y_test, y_pred)
cm
Out[]:
array([[140, 12],
[ 18, 80]])

fig, ax = plt.subplots(figsize=(5, 5))

ax.imshow(cm)
ax.grid(False)
ax.xaxis.set(ticks=(0, 1), ticklabels=('Predicted Does not Buy car', 'Predicted Buys Car'))
ax.yaxis.set(ticks=(0, 1), ticklabels=('Actual Does not Buy car ', 'Actual Buys Car'))
ax.set_ylim(1.5, -0.5)
for i in range(2):
for j in range(2):
ax.text(j, i, cm[i, j], ha='center', va='center', color='black')
plt.show()
# Visualising the Training set results
x_set, y_set = x_train, y_train
X1, X2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop = x_set[:, 0].max() + 1,
step = 0.01),
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max() + 1, step = 0.01))
mtp.contourf(X1, X2, classifier.predict(nm.array([X1.ravel(),
X2.ravel()]).T).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('white', 'black')))
mtp.xlim(X1.min(), X1.max())
mtp.ylim(X2.min(), X2.max())
for i, j in enumerate(nm.unique(y_set)):
mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c = ListedColormap(('purple', 'green'))(i), label = j)
mtp.title('Naive Bayes (Training set)')
mtp.xlabel('Age')
mtp.ylabel('Estimated Salary')
mtp.legend()
mtp.show()
# Visualising the Test set results
x_set, y_set = x_test, y_test
X1, X2 = nm.meshgrid(nm.arange(start = x_set[:, 0].min() - 1, stop = x_set[:, 0].max() + 1,
step = 0.01),
nm.arange(start = x_set[:, 1].min() - 1, stop = x_set[:, 1].max() + 1, step = 0.01))
mtp.contourf(X1, X2, classifier.predict(nm.array([X1.ravel(),
X2.ravel()]).T).reshape(X1.shape),
alpha = 0.75, cmap = ListedColormap(('white', 'black')))
mtp.xlim(X1.min(), X1.max())
mtp.ylim(X2.min(), X2.max())
for i, j in enumerate(nm.unique(y_set)):
mtp.scatter(x_set[y_set == j, 0], x_set[y_set == j, 1],
c = ListedColormap(('purple', 'green'))(i), label = j)
mtp.title('Naive Bayes (test set)')
mtp.xlabel('Age')
mtp.ylabel('Estimated Salary')
mtp.legend()
mtp.show()
Demonstration of Clustering algorithms using Hierarchical algorithms (agglomerative
etc).
import numpy as nm
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.cluster import KMeans
from scipy.cluster.hierarchy import fcluster, linkage,dendrogram

import warnings
warnings.filterwarnings('ignore')

dataset = pd.read_csv("/Users/dianamoses/Documents/MCET/Course Files/ML/ML

LAB/Logistic_Iris.csv")
dataset.head

Out[]:
<bound method NDFrame.head of Sepal Length Sepal Width Petal Length Peatal Width
Species
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
.. ... ... ... .. ...
145 6.7 3.0 5.2 2.3 Iris-virginica
146 6.3 2.5 5.0 1.9 Iris-virginica
147 6.5 3.0 5.2 2.0 Iris-virginica
148 6.2 3.4 5.4 2.3 Iris-virginica
149 5.9 3.0 5.1 1.8 Iris-virginica

[150 rows x 5 columns]>

# input
x = dataset.iloc[:, [0,1,2,3]].values
x
Out[]:
array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2],
………
sns.pairplot(dataset)
#Finding the optimum number of clusters for k-means classification
Elbow = []
for i in range(1, 11):
kmeans = KMeans(n_clusters = i, init = 'k-means++', max_iter = 300, n_init = 10, random_state = 0)
kmeans.fit(x)
Elbow.append(kmeans.inertia_)
#Plotting the results onto a Line graph, allowing us to observe ‘The Elbow’
plt.plot(range(1, 11), Elbow, marker='o')
plt.title('The Elbow method')
plt.xlabel('Number of clusters')
plt.ylabel('Elbow')
#within cluster sum of squares
plt.show()
distance_matrix = linkage(x, method = 'ward', metric = 'euclidean')

# Create a dendrogram
dn = dendrogram(distance_matrix)

# Display the dendogram

plt.show()
dn = dendrogram(distance_matrix)
plt.axhline(y=13, color='r', linestyle='--')
plt.ylim(0,15)
plt.show()

cluster = AgglomerativeClustering(n_clusters=2, affinity='euclidean', linkage='ward')

cluster.fit_predict(x)
plt.title('Agglomerative Clustering – 2 Clusters')
plt.scatter(x[:,2],x[:,3], c=cluster.labels_, label= cluster.labels_)
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
dn = dendrogram(distance_matrix)
plt.axhline(y=8, color='r', linestyle='--')
plt.ylim(0,15)
plt.show()

cluster = AgglomerativeClustering(n_clusters=3, affinity='euclidean', linkage='ward')

cluster.fit_predict(x)
plt.title('Agglomerative Clustering – 3 Clusters')
plt.scatter(x[:,2],x[:,3], c=cluster.labels_)
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
dn = dendrogram(distance_matrix)
plt.axhline(y=4, color='r', linestyle='--')
plt.ylim(0,15)
plt.show()

cluster = AgglomerativeClustering(n_clusters=5, affinity='euclidean', linkage='ward')

cluster.fit_predict(x)
plt.title('Agglomerative Clustering – 5 Clusters')
plt.scatter(x[:,2],x[:,3], c=cluster.labels_)
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')
Build a classifier, compare its performance with an ensemble technique like random
forest.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

dataset = pd.read_csv("/Users/dianamoses/Documents/MCET/Course Files/ML/ML

LAB/Data/Logistic_Iris.csv")
dataset.head
Out[]:
<bound method NDFrame.head of Sepal Length Sepal Width Petal Length Peatal Width
Species
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
.. ... ... ... .. ...
145 6.7 3.0 5.2 2.3 Iris-virginica
146 6.3 2.5 5.0 1.9 Iris-virginica
147 6.5 3.0 5.2 2.0 Iris-virginica
148 6.2 3.4 5.4 2.3 Iris-virginica
149 5.9 3.0 5.1 1.8 Iris-virginica

[150 rows x 5 columns]>

# input
x = dataset.iloc[:, [0,1,2,3]].values
x
Out[]:
array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2],
………

# target
y = dataset.iloc[:, 4].values
y
Out[]:
array(['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
….
'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
……
'Iris-virginica', 'Iris-virginica', 'Iris-virginica',
…..], dtype=object)

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0)

sc = StandardScaler()
xtrain = sc.fit_transform(xtrain)
xtest = sc.transform(xtest)

dtree= DecisionTreeClassifier()
dtree.fit(xtrain, ytrain)
Out[]: DecisionTreeClassifier()

y_pred1 = dtree.predict(xtest)
print("Predicted values:")
y_pred1
Predicted values:
Out[]:
array(['Iris-virginica', 'Iris-versicolor', 'Iris-setosa',
'Iris-virginica'], dtype=object)

acc_dtree= accuracy_score(ytest,y_pred1)*100
print ("\n\nAccuracy using Single Decision Tree: ", acc_dtree)

Accuracy using Single Decision Tree: 97.36842105263158

cm = confusion_matrix(ytest, y_pred1)
print ("\n\n Confusion Matrix for Single Decision Tree: \n", cm)

Confusion Matrix for Single Decision Tree:

[[13 0 0]
[ 0 15 1]
[ 0 0 9]]

# Create a Random forest Classifier

RF = RandomForestClassifier(n_estimators = 100)
# Train the model using the training sets
RF.fit(xtrain, ytrain)
Out[]: RandomForestClassifier()
y_pred2 = RF.predict(xtest)
print("Predicted values:")
y_pred2

y_pred2 = RF.predict(xtest)
print("Predicted values:")
y_pred2
Predicted values:
Out[]:
array(['Iris-virginica', 'Iris-versicolor', 'Iris-setosa',
…… 'Iris-virginica'], dtype=object)

acc_rf= accuracy_score(ytest,y_pred2)*100
print ("\n\nAccuracy using Random Forest: ", acc_rf)

Accuracy using Random Forest: 97.36842105263158

cm = confusion_matrix(ytest, y_pred2)
print ("\n\n Confusion Matrix for Random Forest Classifier: \n", cm)

Confusion Matrix for Random Forest Classifier:

[[13 0 0]
[ 0 15 1]
[ 0 0 9]]

Bagging
import numpy as nm
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from matplotlib.colors import ListedColormap
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn import model_selection
from sklearn.ensemble import BaggingClassifier
from sklearn.naive_bayes import GaussianNB
import warnings
warnings.filterwarnings('ignore')

dataset = pd.read_csv("/Users/dianamoses/Documents/MCET/Course Files/ML/ML

[150 rows x 5 columns]>

# input
x = dataset.iloc[:, [0,1,2,3]].values
x
Out[]:
array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2],
………

# Splitting the dataset into the Training set and Test set
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 0)

Single = GaussianNB()
Single.fit(xtrain, ytrain)
Out[]: GaussianNB()

y_pred = Single.predict(xtest)
print("Predicted values for single Naïve Bayes Classifier:")
y_pred
Predicted values for single Naïve Bayes Classifier:
Out[]:
array(['Iris-virginica', 'Iris-versicolor', 'Iris-setosa',
….
'Iris-versicolor'], dtype='<U15')

Acc_Single= accuracy_score(ytest,y_pred)*100
print ("\n\nAccuracy using single Naïve Bayes Classifier: ",Acc_Single)

Accuracy using single Naïve Bayes Classifier: 100.0

cm = confusion_matrix(ytest, y_pred)
print ("\n\n Confusion Matrix -using single Naïve Bayes Classifier: \n", cm)

Confusion Matrix -using single Naïve Bayes Classifier:

[[13 0 0]
[ 0 16 0]
[ 0 0 9]]

# initialize the base classifier

base_cls = GaussianNB()

# no. of base classifier

num_class = 100

# bagging classifier
Bag = BaggingClassifier(base_estimator = base_cls, n_estimators = num_class, random_state
= 0)
Bag.fit(xtrain, ytrain)
Out[]: BaggingClassifier(base_estimator=GaussianNB(), n_estimators=100, random_state=0)

results = model_selection.cross_val_score(Bag, xtest, ytest, cv = 10)

print("\n\nAccuracy using Bagged Set of Naïve Bayes Classifiers :", results.mean()*100)

Accuracy using Bagged Set of Naïve Bayes Classifiers : 94.16666666666667

Boosting
import numpy as nm
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from matplotlib.colors import ListedColormap
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn import model_selection
from sklearn.ensemble import AdaBoostClassifier

import warnings
warnings.filterwarnings('ignore')

dataset = pd.read_csv("/Users/dianamoses/Documents/MCET/Course Files/ML/ML

[150 rows x 5 columns]>

# input
x = dataset.iloc[:, [0,1,2,3]].values
x
Out[]:
array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2],
………

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0)

sc = StandardScaler()
xtrain = sc.fit_transform(xtrain)
xtest = sc.transform(xtest)

adaboost = AdaBoostClassifier(n_estimators = 50, learning_rate = 0.2)

adaboost. fit(xtrain, ytrain)
Out[]: AdaBoostClassifier(learning_rate=0.2)

adaboost.score(xtest, ytest)
Out[]: 0.8947368421052632

y_pred = adaboost.predict(xtest)
print("Predicted values for AdaBoost Classifier:")
y_pred
Out[]:
array(['Iris-virginica', 'Iris-versicolor', 'Iris-setosa',
…..
'Iris-virginica'], dtype=object)

Acc_adaboost= accuracy_score(ytest,y_pred)*100
print ("\n\nTest Accuracy using AdaBoost Classifier: ", Acc_adaboost)

Test Accuracy using AdaBoost Classifier: 89.47368421052632

cm = confusion_matrix(ytest, y_pred)
print ("\n\n Confusion Matrix for AdaBoost Classifier: \n", cm)

Confusion Matrix for AdaBoost Classifier:

[[13 0 0]
[ 0 15 1]
[ 0 3 6]]

fig, ax = plt.subplots(figsize=(6, 6))

ax.imshow(cm)
ax.grid(False)
ax.xaxis.set(ticks=(0,1,2), ticklabels=('Predicted Setosa', 'Predicted Versicolor', 'Predicted
Virginica'))
ax.yaxis.set(ticks=(0,1,2), ticklabels=('Actual Setosa', 'Actual Versicolor', 'Actual Virginica'))
ax.set_ylim(2.5, -0.5)
for i in range(3):
for j in range(3):
ax.text(j, i, cm[i, j], ha='center', va='center', color='white')
plt.show()

Support Vector Machine For Classification: Name: Saurav Doke Roll No: A-41 PRN: 2264191242040
No ratings yet
Support Vector Machine For Classification: Name: Saurav Doke Roll No: A-41 PRN: 2264191242040
3 pages
Svmdoc
No ratings yet
Svmdoc
7 pages
ML
No ratings yet
ML
11 pages
1
No ratings yet
1
13 pages
SVM Implementation
No ratings yet
SVM Implementation
8 pages
ML Experiment WithDataset
No ratings yet
ML Experiment WithDataset
23 pages
ML Programs
No ratings yet
ML Programs
14 pages
ML Lab 01999676272
No ratings yet
ML Lab 01999676272
12 pages
Aiml Ex 4-7
No ratings yet
Aiml Ex 4-7
8 pages
All in One
No ratings yet
All in One
13 pages
Aiml 5-8
No ratings yet
Aiml 5-8
19 pages
ML Journal External
No ratings yet
ML Journal External
14 pages
ML Short Code - Under Updating
No ratings yet
ML Short Code - Under Updating
4 pages
Unit2 ML Programs
No ratings yet
Unit2 ML Programs
7 pages
Aiml Practicals
No ratings yet
Aiml Practicals
22 pages
Mlda - Lab
No ratings yet
Mlda - Lab
35 pages
ML PDF
No ratings yet
ML PDF
30 pages
V
No ratings yet
V
8 pages
SVM K NN MLP With Sklearn Jupyter NoteBo
No ratings yet
SVM K NN MLP With Sklearn Jupyter NoteBo
22 pages
MLLab Manual
No ratings yet
MLLab Manual
24 pages
Python ML Algorithms Guide
No ratings yet
Python ML Algorithms Guide
7 pages
Final ML Programs 075005
No ratings yet
Final ML Programs 075005
15 pages
Medical Data ML
No ratings yet
Medical Data ML
6 pages
KNN - Predictive Analysis
No ratings yet
KNN - Predictive Analysis
6 pages
ML Lab Experiment Shortened With Same Output
No ratings yet
ML Lab Experiment Shortened With Same Output
6 pages
Shobit Sharma (2124399) ML Lab File PDF
No ratings yet
Shobit Sharma (2124399) ML Lab File PDF
19 pages
ML Regression & Classification Guide
100% (1)
ML Regression & Classification Guide
45 pages
Prathamesh KRAI
No ratings yet
Prathamesh KRAI
38 pages
ML Minimized Programs
No ratings yet
ML Minimized Programs
9 pages
Data Science Practical
No ratings yet
Data Science Practical
22 pages
DM ML Practical
No ratings yet
DM ML Practical
13 pages
ML Lab
No ratings yet
ML Lab
10 pages
ML Lab
No ratings yet
ML Lab
20 pages
ML Manual
No ratings yet
ML Manual
9 pages
Machine Learning Evaluation Guide
100% (1)
Machine Learning Evaluation Guide
504 pages
KNN vs SVM: A Python Implementation
No ratings yet
KNN vs SVM: A Python Implementation
6 pages
Lab On ML Print-Set-2022
No ratings yet
Lab On ML Print-Set-2022
10 pages
ML Lab Manual
No ratings yet
ML Lab Manual
12 pages
16BCB0126 VL2018195002535 Pe003
No ratings yet
16BCB0126 VL2018195002535 Pe003
40 pages
KNN Final
No ratings yet
KNN Final
4 pages
Machine Learning Algorithms Guide
No ratings yet
Machine Learning Algorithms Guide
34 pages
Linearregression SVM
No ratings yet
Linearregression SVM
3 pages
DA Programs
No ratings yet
DA Programs
44 pages
Mlalllabprgs
No ratings yet
Mlalllabprgs
17 pages
ML Lab
No ratings yet
ML Lab
5 pages
Data Preprocessing
No ratings yet
Data Preprocessing
9 pages
Machine Learning Programs
No ratings yet
Machine Learning Programs
10 pages
Lab Extern L
No ratings yet
Lab Extern L
8 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
ML Labmanual
No ratings yet
ML Labmanual
33 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
1 KNN - Jupyter Notebook
No ratings yet
1 KNN - Jupyter Notebook
3 pages
Machine Learnin
100% (2)
Machine Learnin
23 pages
AML Lab
No ratings yet
AML Lab
14 pages
ML0101EN Clas SVM Cancer Py v1
No ratings yet
ML0101EN Clas SVM Cancer Py v1
10 pages
Openlab 1
No ratings yet
Openlab 1
17 pages
ML Practical Kiranjot 6-10
No ratings yet
ML Practical Kiranjot 6-10
10 pages
ML
No ratings yet
ML
5 pages
Machine Learning Practical PDF
No ratings yet
Machine Learning Practical PDF
12 pages
Cse319 Soft-Computing TH 1.10 Ac26 PDF
No ratings yet
Cse319 Soft-Computing TH 1.10 Ac26 PDF
2 pages
Chap 1 - Introduction To Algorithms
No ratings yet
Chap 1 - Introduction To Algorithms
2 pages
Operating System Os Notes New Cs 2nd Year
No ratings yet
Operating System Os Notes New Cs 2nd Year
89 pages
Deep Learning For Music
No ratings yet
Deep Learning For Music
310 pages
Mixed Strategy Game Solutions
No ratings yet
Mixed Strategy Game Solutions
7 pages
Naveen Dsa PPT
No ratings yet
Naveen Dsa PPT
18 pages
Diabetes Classification with COA-LS-SVM
No ratings yet
Diabetes Classification with COA-LS-SVM
11 pages
ML Assignment 1
No ratings yet
ML Assignment 1
12 pages
Feature Selection & Feature Extraction
No ratings yet
Feature Selection & Feature Extraction
19 pages
Trees
No ratings yet
Trees
7 pages
Ece206 Signals-And-systems TH 1.20 Ac29
No ratings yet
Ece206 Signals-And-systems TH 1.20 Ac29
2 pages
ANIKET KUMAR - CS126 - Algorithm Design & Implementation-5thSem-2022 - Print All Paths To Leaves and Their Details of A Binary Tree
No ratings yet
ANIKET KUMAR - CS126 - Algorithm Design & Implementation-5thSem-2022 - Print All Paths To Leaves and Their Details of A Binary Tree
3 pages
Campus Assessment Test Solution
No ratings yet
Campus Assessment Test Solution
6 pages
Clrs Solution Collection
No ratings yet
Clrs Solution Collection
217 pages
Algorithm Implementation and Analysis Assignment
No ratings yet
Algorithm Implementation and Analysis Assignment
2 pages
Frequency Transformation With Pascal Matrix Equations
No ratings yet
Frequency Transformation With Pascal Matrix Equations
5 pages
Module FIR
No ratings yet
Module FIR
3 pages
GeorgiaTech CS-6515: Graduate Algorithms: Divide-And-Conquer Flashcards by Yang Hu - Brainscape
No ratings yet
GeorgiaTech CS-6515: Graduate Algorithms: Divide-And-Conquer Flashcards by Yang Hu - Brainscape
8 pages
Constrained Optimization Guide
No ratings yet
Constrained Optimization Guide
40 pages
Assignment 3
No ratings yet
Assignment 3
2 pages
Convolutional Neural Network (CNN) Architectures - GeeksforGeeks
No ratings yet
Convolutional Neural Network (CNN) Architectures - GeeksforGeeks
17 pages
Backpropagation Example With Numbers Step by Step
No ratings yet
Backpropagation Example With Numbers Step by Step
8 pages
Data Structures and Algorithms
No ratings yet
Data Structures and Algorithms
3 pages
Hashing
No ratings yet
Hashing
27 pages
Application of Acoustic Echo Cancellation
100% (1)
Application of Acoustic Echo Cancellation
81 pages
A1 Final Presentation - Quantum Pathfinding
No ratings yet
A1 Final Presentation - Quantum Pathfinding
12 pages
Bidirectional Associative Memory
No ratings yet
Bidirectional Associative Memory
3 pages
2 Ann Architecture Nafees
100% (1)
2 Ann Architecture Nafees
30 pages
HMW1 PDF
No ratings yet
HMW1 PDF
11 pages
E1251 Aug 3:0 Linear and Nonlinear Optimization: Instructor
No ratings yet
E1251 Aug 3:0 Linear and Nonlinear Optimization: Instructor
2 pages

ML Batch

Uploaded by

ML Batch

Uploaded by

K nearest neighbour

dataset = pd.read_csv("/Users/dianamoses/Documents/MCET/Course Files/ML/ML

[1000 rows x 5 columns]>

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0)

print ("\n\nAccuracy : ", accuracy_score(ytest, ypred)*100)

fig, ax = plt.subplots(figsize=(5, 5))

# Importing the dataset

[1000 rows x 5 columns]>

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0)

classifier = SVC(kernel = 'linear', random_state = 0)

Out[]: SVC(kernel='linear', random_state=0)

print ("\n\nAccuracy : ", accuracy_score(ytest, ypred)*100)

fig, ax = plt.subplots(figsize=(5, 5))

print ("\n\nAccuracy : ", accuracy_score(ytest, ypred)*100)

fig, ax = plt.subplots(figsize=(5, 5))

# Importing the dataset

[1000 rows x 5 columns]

# Fitting Naive Bayes to the Training set

# Predicting the Test set results

Accuracy of Naïve Bayes Classifier: 88.0

# Making the Confusion Matrix

fig, ax = plt.subplots(figsize=(5, 5))

dataset = pd.read_csv("/Users/dianamoses/Documents/MCET/Course Files/ML/ML

[150 rows x 5 columns]>

# Display the dendogram

cluster = AgglomerativeClustering(n_clusters=2, affinity='euclidean', linkage='ward')

cluster = AgglomerativeClustering(n_clusters=3, affinity='euclidean', linkage='ward')

cluster = AgglomerativeClustering(n_clusters=5, affinity='euclidean', linkage='ward')

dataset = pd.read_csv("/Users/dianamoses/Documents/MCET/Course Files/ML/ML

[150 rows x 5 columns]>

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0)

Accuracy using Single Decision Tree: 97.36842105263158

Confusion Matrix for Single Decision Tree:

# Create a Random forest Classifier

Accuracy using Random Forest: 97.36842105263158

Confusion Matrix for Random Forest Classifier:

dataset = pd.read_csv("/Users/dianamoses/Documents/MCET/Course Files/ML/ML

[150 rows x 5 columns]>

Accuracy using single Naïve Bayes Classifier: 100.0

Confusion Matrix -using single Naïve Bayes Classifier:

# initialize the base classifier

# no. of base classifier

results = model_selection.cross_val_score(Bag, xtest, ytest, cv = 10)

Accuracy using Bagged Set of Naïve Bayes Classifiers : 94.16666666666667

dataset = pd.read_csv("/Users/dianamoses/Documents/MCET/Course Files/ML/ML

[150 rows x 5 columns]>

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.25, random_state=0)

adaboost = AdaBoostClassifier(n_estimators = 50, learning_rate = 0.2)

Test Accuracy using AdaBoost Classifier: 89.47368421052632

Confusion Matrix for AdaBoost Classifier:

fig, ax = plt.subplots(figsize=(6, 6))

You might also like