0% found this document useful (0 votes)

14 views30 pages

Practical 1: A. Design A Simple Machine Learning Model To Train The Training Instances and Test The Same

The document outlines several practical exercises in machine learning, including the implementation of various algorithms such as Linear Regression, Decision Trees, and Naïve Bayes. It covers data preprocessing, feature selection, and evaluation metrics like accuracy and confusion matrix. Additionally, it demonstrates the Candidate-Elimination algorithm and logistic regression using datasets stored in CSV files.

Uploaded by

unniza

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

14 views30 pages

Practical 1: A. Design A Simple Machine Learning Model To Train The Training Instances and Test The Same

Uploaded by

unniza

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 30

Practical 1

a. Design a simple machine learning model to train the training instances and test the same.

from random import randint

TRAIN_SET_LIMIT=1000
TRAIN_SET_COUNT=100
TRAIN_INPUT=list()
TRAIN_OUTPUT=list()
for i in range(TRAIN_SET_COUNT):
a=randint(0, TRAIN_SET_LIMIT)
b=randint(0, TRAIN_SET_LIMIT)
c=randint(0, TRAIN_SET_LIMIT)
op=a + (2*b)+ (3*c)
TRAIN_INPUT.append([a, b, c])
TRAIN_OUTPUT.append(op)
from sklearn.linear_model import LinearRegression
predictor=LinearRegression(n_jobs=-1)
predictor.fit(X=TRAIN_INPUT, y=TRAIN_OUTPUT)

X_TEST=[[10, 20, 30]]

outcome=predictor.predict(X=X_TEST)
coefficients=predictor.coef_
print('Outcome : {}\nCoefficients : {}'.format(outcome,coefficients))
Practical 2
a. Design a simple machine learning model to train the training instances and test the same.
• Univariate Selection

from pandas import read_csv

from numpy import set_printoptions
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
path = 'diabetes.csv';
names = ['Pregnancies', 'Glucose', 'BloodPressure',
'SkinThickness','Insulin','BMI','DiabetesPedigreeFunction', 'Age', 'Outcome']
dataframe = read_csv(path, names=names)
array = dataframe.values
# specifying 1 here will skip the first row
for i in range(1, len(array)):
array[i-1] =array[i]
X = array[:,0:8]
Y = array[:,8]
#select the best features from dataset
test = SelectKBest(score_func=chi2, k=4)
fit = test.fit(X,Y)
#summarize the data for output as per our choice. Here, we are setting the
precision to 2
#and showing the 4 data attributes with best features
#along with best score of each attribute −
set_printoptions(precision=6)
print(fit.scores_)
featured_data = fit.transform(X)
print ('Featured data:\n', featured_data[0:4])
• Principal Component Analysis

# Feature Extraction with PCA

import numpy
from pandas import read_csv
from sklearn.decomposition import PCA
# load data
filename = 'diabetes.csv'
names = ['Pregnancies', 'Glucose', 'BloodPressure',
'SkinThickness','Insulin','BMI','DiabetesPedigreeFunction', 'Age', 'Outcome']
dataframe = read_csv(filename, names=names)
array = dataframe.values
for i in range(1, len(array)): # specifying 1 here will skip the first row
array[i-1] =array[i]
X = array[:,0:8]
Y = array[:,8]
# feature extraction
pca = PCA(n_components=3)
fit = pca.fit(X)
# summarize components
print('Explained Variance: %s' % fit.explained_variance_ratio_)
print(fit.components_)
• Feature Importance

# Feature Importance with Extra Trees Classifier

from pandas import read_csv
from sklearn.ensemble import ExtraTreesClassifier
# load data
filename = 'diabetes.csv'
names = ['Pregnancies', 'Glucose', 'BloodPressure',
'SkinThickness','Insulin','BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome']

dataframe = read_csv(filename , names=names)

array = dataframe.values
for i in range(1, len(array)): # specifying 1 here will skip the first row
array[i-1] =array[i]
X = array[:,0:8]
Y = array[:,8]
# feature extraction
model = ExtraTreesClassifier(n_estimators=10)
model.fit(X, Y)
print(model.feature_importances_)
b. For a given set of training data examples stored in a .CSV file, implement and demonstrate
the Candidate-Elimination algorithm to output a description of the set of all hypotheses
consistent with the training examples

import numpy as np
import pandas as pd

data = pd.DataFrame(data=pd.read_csv('ENJOYSPORT.csv'))
print(data)

concepts = np.array(data.iloc[:, 0:-1])

print(concepts)

target = np.array(data.iloc[:, -1])

print(target)

def learn(concepts, target):

specific_h = concepts[0].copy()
print("Initialization of specific_h and general_h")
print(specific_h)

general_h = [["?" for i in range(len(specific_h))] for i in

range(len(specific_h))]
print(general_h)

for i, h in enumerate(concepts):
if target[i] == 1:
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
if target[i] == 0:
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'

print("Steps of Candidate Elimination Algorithm", i + 1)

print(specific_h)
print(general_h)

indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?',

'?', '?']]
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])

return specific_h, general_h

s_final, g_final = learn(concepts, target)

print("Final Specific_h:", s_final, sep="\n")
print("Final General_h:", g_final, sep="\n")
Sky AirTemp Humidity Wind Water Forecast EnjoySport
0 Sunny Warm Normal Strong Warm Same 1
1 Sunny Warm High Strong Warm Same 1
2 Rainy Cold High Strong Warm Change 0
3 Sunny Warm High Strong Cool Change 1
[['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
['Sunny' 'Warm' 'High' 'Strong' 'Warm' 'Same']
['Rainy' 'Cold' 'High' 'Strong' 'Warm' 'Change']
['Sunny' 'Warm' 'High' 'Strong' 'Cool' 'Change']]
[1 1 0 1]
Initialization of specific_h and general_h
['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?',
'?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?']]
Steps of Candidate Elimination Algorithm 1
['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?',
'?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?']]
Steps of Candidate Elimination Algorithm 2
['Sunny' 'Warm' '?' 'Strong' 'Warm' 'Same']
[['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?',
'?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?']]
Steps of Candidate Elimination Algorithm 3
['Sunny' 'Warm' '?' 'Strong' 'Warm' 'Same']
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?',
'?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', 'Same']]
Steps of Candidate Elimination Algorithm 4
['Sunny' 'Warm' '?' 'Strong' '?' '?']
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?',
'?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?']]
Final Specific_h:
['Sunny' 'Warm' '?' 'Strong' '?' '?']
Final General_h:
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]
Practical 3
a. Write a program to implement the naïve Bayesian classifier for a sample training data set
stored as a .CSV file. Compute the accuracy of the classifier, considering few test data
sets.

import pandas as pd
from sklearn import tree
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
# load data from CSV
data = pd.read_csv('tennisdata.csv')
print("THe first 5 values of data is :\n",data.head())
# obtain Train data and Train output
X = data.iloc[:,:-1]
print("\nThe First 5 values of train data is\n",X.head())
y = data.iloc[:,-1]
print("\nThe first 5 values of Train output is\n",y.head())
# Convert then in numbers
le_outlook = LabelEncoder()
X.Outlook = le_outlook.fit_transform(X.Outlook)
le_Temperature = LabelEncoder()
X.Temperature = le_Temperature.fit_transform(X.Temperature)
le_Humidity = LabelEncoder()
X.Humidity = le_Humidity.fit_transform(X.Humidity)
le_Windy = LabelEncoder()
X.Windy = le_Windy.fit_transform(X.Windy)
print("\nNow the Train data is :\n",X.head())
le_PlayTennis = LabelEncoder()
y = le_PlayTennis.fit_transform(y)
print("\nNow the Train output is\n",y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20)
classifier = GaussianNB()
classifier.fit(X_train,y_train)

from sklearn.metrics import accuracy_score

print("Accuracy is:",accuracy_score(classifier.predict(X_test),y_test))
b. Write a program to implement Decision Tree and Random forest with Prediction, Test
Score and Confusion Matrix.

from sklearn.datasets import fetch_20newsgroups

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

import numpy as np
categories = ['alt.atheism', 'soc.religion.christian','comp.graphics',
'sci.med']
twenty_train =
fetch_20newsgroups(subset='train',categories=categories,shuffle=True)
twenty_test = fetch_20newsgroups(subset='test',categories=categories,shuffle=True)
print(len(twenty_train.data))
print(len(twenty_test.data))
print(twenty_train.target_names)
print("\n".join(twenty_train.data[0].split("\n")))
print(twenty_train.target[0])
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
X_train_tf = count_vect.fit_transform(twenty_train.data)

from sklearn.feature_extraction.text import TfidfTransformer

tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_tf)
X_train_tfidf.shape

from sklearn.naive_bayes import MultinomialNB

from sklearn.metrics import accuracy_score
from sklearn import metrics
mod = MultinomialNB()
mod.fit(X_train_tfidf, twenty_train.target)
X_test_tf = count_vect.transform(twenty_test.data)
X_test_tfidf = tfidf_transformer.transform(X_test_tf)
predicted = mod.predict(X_test_tfidf)
print("Accuracy:", accuracy_score(twenty_test.target, predicted))
print(classification_report(twenty_test.target,predicted,target_names=twenty_test.t
arget_names))
print("confusion matrix is \n",metrics.confusion_matrix(twenty_test.target,
predicted))
Practical 4
a. For a given set of training data examples stored in a .CSV file implement Least Square
Regression algorithm.

# Making imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (12.0, 9.0)

# Preprocessing Input data

data = pd.read_csv('data.csv')
X = data.iloc[:, 0]
Y = data.iloc[:, 1]
plt.scatter(X, Y)
plt.show()

# Building the model

X_mean = np.mean(X)
Y_mean = np.mean(Y)

num = 0
den = 0
for i in range(len(X)):
num += (X[i] - X_mean)*(Y[i] - Y_mean)
den += (X[i] - X_mean)**2
m = num / den
c = Y_mean - m*X_mean

print (m, c)
# Making predictions
Y_pred = m*X + c

plt.scatter(X, Y) # actual
# plt.scatter(X, Y_pred, color='red')
plt.plot([min(X), max(X)], [min(Y_pred), max(Y_pred)], color='red') #
predicted
plt.show()

b. For a given set of training data examples stored in a .CSV file implement Logistic
Regression algorithm.

# Import necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report,
confusion_matrix, roc_curve, auc
# Load the diabetes dataset
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target

# Convert the target variable to binary (1 for diabetes, 0 for no diabetes)

y_binary = (y > np.median(y)).astype(int)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
X, y_binary, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train the Logistic Regression model

model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate the model

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: {:.2f}%".format(accuracy * 100))

# Evaluate the model

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
# Visualize the decision boundary with accuracy information
plt.figure(figsize=(8, 6))
sns.scatterplot(x=X_test[:, 2], y=X_test[:, 8], hue=y_test, palette={

0: 'blue', 1: 'red'}, marker='o')

plt.xlabel("BMI")
plt.ylabel("Age")
plt.title("Logistic Regression Decision Boundary\nAccuracy:
{:.2f}%".format(accuracy * 100))
plt.legend(title="Diabetes", loc="upper right")
plt.show()
Practical 5
a. Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use
an appropriate data set for building the decision tree and apply this knowledge to classify
a new sample.

import numpy as np
import math
import csv

def read_data(filename):
with open(filename, 'r') as csvfile:
datareader = csv.reader(csvfile, delimiter=',')
headers = next(datareader)
metadata = []
traindata = []
for name in headers:
metadata.append(name)
for row in datareader:
traindata.append(row)
return (metadata, traindata)

class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""

def __str__(self):
return self.attribute

def subtables(data, col, delete):

dict = {}
items = np.unique(data[:, col])
count = np.zeros((items.shape[0], 1), dtype=np.int32)

for x in range(items.shape[0]):
for y in range(data.shape[0]):
if data[y, col] == items[x]:
count[x] += 1

for x in range(items.shape[0]):
dict[items[x]] = np.empty((count[x][0], data.shape[1]), dtype="|S32")
pos = 0
for y in range(data.shape[0]):
if data[y, col] == items[x]:
dict[items[x]][pos] = data[y]
pos += 1
if delete:
dict[items[x]] = np.delete(dict[items[x]], col, 1)

return items, dict

def entropy(S):
items = np.unique(S)
if items.size == 1:
return 0

counts = np.zeros((items.shape[0], 1))

sums = 0

for x in range(items.shape[0]):
counts[x] = sum(S == items[x]) / (S.size * 1.0)
for count in counts:
sums += -1 * count[0] * math.log(count[0], 2)
return sums

def gain_ratio(data, col):

items, dict = subtables(data, col, delete=False)

total_size = data.shape[0]
entropies = np.zeros((items.shape[0], 1))
intrinsic = np.zeros((items.shape[0], 1))

for x in range(items.shape[0]):
ratio = dict[items[x]].shape[0]/(total_size * 1.0)
entropies[x] = ratio * entropy(dict[items[x]][:, -1])
intrinsic[x] = ratio * math.log(ratio, 2)

total_entropy = entropy(data[:, -1])

iv = -1 * sum(intrinsic)

for x in range(entropies.shape[0]):
total_entropy -= entropies[x]

return total_entropy / iv

def create_node(data, metadata):

if (np.unique(data[:, -1])).shape[0] == 1:
node = Node("")
node.answer = np.unique(data[:, -1])[0]
return node

gains = np.zeros((data.shape[1] - 1, 1))

for col in range(data.shape[1] - 1):

gains[col] = gain_ratio(data, col)

split = np.argmax(gains)

node = Node(metadata[split])
metadata = np.delete(metadata, split, 0)

items, dict = subtables(data, split, delete=True)

for x in range(items.shape[0]):
child = create_node(dict[items[x]], metadata)
node.children.append((items[x], child))

return node

def empty(size):
s = ""
for x in range(size):
s += " "
return s

def print_tree(node, level):

if node.answer != "":
print(empty(level), node.answer)
return
print(empty(level), node.attribute)
for value, n in node.children:
print(empty(level + 1), value)
print_tree(n, level + 2)

metadata, traindata = read_data("tennisdata.csv")

data = np.array(traindata)
node = create_node(data, metadata)
print_tree(node, 0)

b. Write a program to implement k-Nearest Neighbour algorithm to classify the iris data set.

from sklearn.datasets import load_iris

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import numpy as np
dataset=load_iris()
#print(dataset)
X_train,X_test,y_train,y_test=train_test_split(dataset["data"],dataset["target"],ra
ndom_state=0)
kn=KNeighborsClassifier(n_neighbors=1)
kn.fit(X_train,y_train)
for i in range(len(X_test)):
x=X_test[i]
x_new=np.array([x])
prediction=kn.predict(x_new)
print("TARGET=",y_test[i],dataset["target_names"][y_test[i]],"PREDICTED=",predict
ion,dataset["target_names"][prediction])
print(kn.score(X_test,y_test))

TARGET= 2 virginica PREDICTED= [2] ['virginica']

TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 2 virginica PREDICTED= [2] ['virginica']
TARGET= 1 versicolor PREDICTED= [1] ['versicolor']
TARGET= 0 setosa PREDICTED= [0] ['setosa']
TARGET= 1 versicolor PREDICTED= [2] ['virginica']
0.9736842105263158
Practical 6
a. Implement the different Distance methods (Euclidean) with Prediction, Test Score and
Confusion Matrix.

# Import required libraries

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load the dataset

df = pd.read_csv('Iris.csv')

# Quick look into the data

df.head(5)

# Separate data and label

x = df.iloc[:, 1:5] # Selecting the feature columns (1 to 4)
y = df.iloc[:, 5] # Selecting the target column (5)

# Prepare data for classification process

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3,
random_state=0)

# Create a model
KNN_Classifier = KNeighborsClassifier(n_neighbors=6, p=2, metric='minkowski')

# Train the model

KNN_Classifier.fit(x_train, y_train)

# Let's predict the classes for test data

pred_test = KNN_Classifier.predict(x_test)

Minkowski Distance

import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
#Load the dataset

# Load the dataset

df = pd.read_csv('Iris.csv')

#quick look into the data

df.head(5)
#Separate data and label
x = df.iloc[:,1:4].values
#Creating the kmeans classifier
KMeans_Cluster = KMeans(n_clusters = 3)
y_class = KMeans_Cluster.fit_predict(x)

#visualing the clusters

plt.scatter(x[y_class==0,0], x[y_class==0,1], c='yellow',label='Iris-setosa')

plt.scatter(x[y_class==1,0], x[y_class==1,1], c='green',label='Iris-versicolour')

plt.scatter(x[y_class==2,0], x[y_class==2,1], c='red',label='Iris-virginica')

#Plotting the clusters and centroids

plt.scatter(KMeans_Cluster.cluster_centers_[:,0],
KMeans_Cluster.cluster_centers_[:,1], s=100, c='black',label='Centroids')
plt.legend()
Practical 7
a. Implement the classification model using clustering for the following techniques with
Kmeans clustering with Prediction, Test Score and Confusion Matrix.

# importing libraries
import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('Mall_Customers.csv')
x = dataset.iloc[:, [3, 4]].values
#finding optimal number of clusters using the elbow method
from sklearn.cluster import KMeans
wcss_list= [] #Initializing the list for the values of WCSS
#Using for loop for iterations from 1 to 10.
for i in range(1, 11):
kmeans = KMeans(n_clusters=i, init='k-means++', random_state= 42)
kmeans.fit(x)
wcss_list.append(kmeans.inertia_)
mtp.plot(range(1, 11), wcss_list)
mtp.title('The Elbow Method Graph')
mtp.xlabel('Number of clusters(k)')
mtp.ylabel('wcss_list')
mtp.show()
#training the K-means model on a dataset
kmeans = KMeans(n_clusters=5, init='k-means++', random_state= 42)
y_predict= kmeans.fit_predict(x)

#visulaizing the clusters

mtp.scatter(x[y_predict == 0, 0], x[y_predict == 0, 1], s = 100, c ='blue', label =
'Cluster 1') #for first cluster
mtp.scatter(x[y_predict == 1, 0], x[y_predict == 1, 1], s = 100, c ='green', label
= 'Cluster 2') #for second cluster
mtp.scatter(x[y_predict== 2, 0], x[y_predict == 2, 1], s = 100, c = 'red',label =
'Cluster 3') #for third cluster
mtp.scatter(x[y_predict == 3, 0], x[y_predict == 3, 1], s = 100, c ='cyan', label =
'Cluster 4') #for fourth cluster

mtp.scatter(x[y_predict == 4, 0], x[y_predict == 4, 1], s = 100, c = 'magenta',

label = 'Cluster 5') #for fifth cluster
mtp.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1],
s = 300, c = 'yellow', label = 'Centroid')
mtp.title('Clusters of customers')
mtp.xlabel('Annual Income (k$)')
mtp.ylabel('Spending Score (1-100)')
mtp.legend()
mtp.show()
Practical 8
a. Write a program to construct a Bayesian network considering medical data. Use this
model to demonstrate the diagnosis of heart patients using standard Heart Disease Data
Set.

import pandas as pd
data=pd.read_csv("heartdisease.csv")
heart_disease=pd.DataFrame(data)
print(heart_disease)

from pgmpy.models import BayesianNetwork

model=BayesianNetwork([
('age','Lifestyle'),
('Gender','Lifestyle'),
('Family','heartdisease'),
('diet','cholestrol'),
('Lifestyle','diet'),
('cholestrol','heartdisease'),
('diet','cholestrol')
])
from pgmpy.estimators import MaximumLikelihoodEstimator
model.fit(heart_disease, estimator=MaximumLikelihoodEstimator)
from pgmpy.inference import VariableElimination

HeartDisease_infer = VariableElimination(model)

print('For age Enter { SuperSeniorCitizen:0, SeniorCitizen:1,MiddleAged:2, Youth:3,

Teen:4 }')
print('For Gender Enter { Male:0, Female:1 }')
print('For Family History Enter { yes:1, No:0 }')
print('For diet Enter { High:0, Medium:1 }')
print('For lifeStyle Enter { Athlete:0, Active:1, Moderate:2, Sedentary:3}')
print('For cholesterol Enter { High:0, BorderLine:1, Normal:2 }')
q = HeartDisease_infer.query(variables=['heartdisease'],
evidence={'age':int(input('Enter age :')),'Gender':int(input('Enter Gender
:')),'Family':int(input('Enter Family history :')),'diet':int(input('Enter diet
:')),'Lifestyle':int(input('Enter Lifestyle :')),'cholestrol':int(input('Enter
cholestrol :'))})
print(q)

For age Enter { SuperSeniorCitizen:0, SeniorCitizen:1,MiddleAged:2, Youth:3, Teen:4 }

For Gender Enter { Male:0, Female:1 }
For Family History Enter { yes:1, No:0 }
For diet Enter { High:0, Medium:1 }
For lifeStyle Enter { Athlete:0, Active:1, Moderate:2, Sedentary:3}
For cholesterol Enter { High:0, BorderLine:1, Normal:2 }
Enter age :2
Enter Gender :0
Enter Family history :0
Enter diet :1
Enter Lifestyle :1
Enter cholestrol :2
+-----------------+---------------------+
| heartdisease | phi(heartdisease) |
+=================+=====================+
| heartdisease(0) | 0.0000 |
+-----------------+---------------------+
| heartdisease(1) | 1.0000 |
+-----------------+---------------------+

b. Implement the non-parametric Locally Weighted Regression algorithm in order to fit data
points. Select appropriate data set for your experiment and draw graphs

from math import ceil

import numpy as np
from scipy import linalg
import math
import matplotlib.pyplot as plt

def lowess(x, y, f, iterations):

n = len(x)
r = int(ceil(f * n))
h = [np.sort(np.abs(x - x[i]))[r] for i in range(n)]
w = np.clip(np.abs((x[:, None] - x[None, :]) / h), 0.0, 1.0)
w = (1 - w ** 3) ** 3
yest = np.zeros(n)
delta = np.ones(n)
for iteration in range(iterations):
for i in range(n):
weights = delta * w[:, i]
b = np.array([np.sum(weights * y), np.sum(weights * y * x)])
A = np.array([[np.sum(weights), np.sum(weights * x)],
[np.sum(weights * x), np.sum(weights * x * x)]])
beta = linalg.solve(A, b)
yest[i] = beta[0] + beta[1] * x[i]
residuals = y - yest
s = np.median(np.abs(residuals))
delta = np.clip(residuals / (6.0 * s), -1, 1)
delta = (1 - delta ** 2) ** 2
return yest

n = 100
x = np.linspace(0, 2 * math.pi, n)
y = np.sin(x) + 0.3 * np.random.randn(n)
f = 0.25
iterations = 3
yest = lowess(x, y, f, iterations)

plt.plot(x, y, "r.")
plt.plot(x, yest, "b-")
plt.show()
Practical 9
a. Build an Artificial Neural Network by implementing the Backpropagation algorithm
andtest the same using appropriate data sets.

import numpy as np

# Input data
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
X = X / np.amax(X, axis=0) # maximum of X array longitudinally
y = y / 100

# Sigmoid Function
def sigmoid(x):
return 1 / (1 + np.exp(-x))

# Derivative of Sigmoid Function

def derivatives_sigmoid(x):
return x * (1 - x)

# Variable initialization
epoch = 5 # Setting training iterations
lr = 0.1 # Setting learning rate
inputlayer_neurons = 2 # number of features in data set
hiddenlayer_neurons = 3 # number of hidden layers neurons
output_neurons = 1 # number of neurons at output layer

# Weight and bias initialization

wh = np.random.uniform(size=(inputlayer_neurons, hiddenlayer_neurons))
bh = np.random.uniform(size=(1, hiddenlayer_neurons))
wout = np.random.uniform(size=(hiddenlayer_neurons, output_neurons))
bout = np.random.uniform(size=(1, output_neurons))

# Training loop
for i in range(epoch):
# Forward Propagation
hinp1 = np.dot(X, wh)
hinp = hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1 = np.dot(hlayer_act, wout)
outinp = outinp1 + bout
output = sigmoid(outinp)

# Backpropagation
EO = y - output
outgrad = derivatives_sigmoid(output)
d_output = EO * outgrad

EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act) # how much hidden layer weights
contributed to error
d_hiddenlayer = EH * hiddengrad
# Updating weights and biases
wout += hlayer_act.T.dot(d_output) * lr
wh += X.T.dot(d_hiddenlayer) * lr

# Print progress
print("-----------Epoch-", i + 1, "Starts----------")
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n", output)
print("-----------Epoch-", i + 1, "Ends----------\n")

# Final result
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n", output)

-----------Epoch- 1 Starts----------
Input:
[[0.666667 1. ]
[0.333333 0.555556]
[1. 0.666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.911329]
[0.902473]
[0.909884]]
-----------Epoch- 1 Ends----------

-----------Epoch- 2 Starts----------
Input:
[[0.666667 1. ]
[0.333333 0.555556]
[1. 0.666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.911257]
[0.902401]
[0.909811]]
-----------Epoch- 2 Ends----------

-----------Epoch- 3 Starts----------
Input:
[[0.666667 1. ]
[0.333333 0.555556]
[1. 0.666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.911185]
[0.902328]
[0.909739]]
-----------Epoch- 3 Ends----------
-----------Epoch- 4 Starts----------
Input:
[[0.666667 1. ]
[0.333333 0.555556]
[1. 0.666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.911113]
[0.902256]
[0.909667]]
-----------Epoch- 4 Ends----------
-----------Epoch- 5 Starts----------
Input:
[[0.666667 1. ]
[0.333333 0.555556]
[1. 0.666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.911042]
[0.902184]
[0.909595]]
-----------Epoch- 5 Ends----------

Input:
[[0.666667 1. ]
[0.333333 0.555556]
[1. 0.666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.911042]
[0.902184]
[0.909595]]
Practical 10
a. Assuming a set of documents that need to be classified, use the naïve Bayesian Classifier
model to perform this task. Built-in Java classes/API can be used to write the program.
Calculate the accuracy, precision, and recall for your data set.

import pandas as pd
msg = pd.read_csv('document.csv', names=['message', 'label'])
print("Total Instances of Dataset: ", msg.shape[0])
msg['labelnum'] = msg.label.map({'pos': 1, 'neg': 0})

X = msg.message
y = msg.labelnum
from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y)
from sklearn.feature_extraction.text import CountVectorizer
count_v = CountVectorizer()
Xtrain_dm = count_v.fit_transform(Xtrain)
Xtest_dm = count_v.transform(Xtest)

df = pd.DataFrame(Xtrain_dm.toarray(),columns=count_v.get_feature_names_out())
print(df[0:5])

about amazing an awesome bad beers best boss do enemy ... to \

0 0 0 0 0 0 0 0 0 0 0 ... 0
1 0 0 0 0 0 0 0 0 1 0 ... 0
2 0 0 0 0 0 0 0 0 0 0 ... 0
3 0 0 1 1 0 0 0 0 0 0 ... 0
4 0 1 1 0 0 0 0 0 0 0 ... 0

today tomorrow very view we went what will work

0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0
2 0 1 0 0 1 0 0 1 0
3 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0

[5 rows x 45 columns]

from sklearn.naive_bayes import MultinomialNB

clf = MultinomialNB()
clf.fit(Xtrain_dm, ytrain)
pred = clf.predict(Xtest_dm)

for doc, p in zip(Xtrain, pred):

p = 'pos' if p == 1 else 'neg'
print("%s -> %s" % (doc, p))
from sklearn.metrics import accuracy_score, confusion_matrix,precision_score,
recall_score
print('Accuracy Metrics: \n')
print('Accuracy: ', accuracy_score(ytest, pred))
print('Recall: ', recall_score(ytest, pred))
print('Precision: ', precision_score(ytest, pred))
print('Confusion Matrix: \n', confusion_matrix(ytest, pred))

Machine Learning Algorithms Lab
No ratings yet
Machine Learning Algorithms Lab
48 pages
Screenshot 2023-12-07 at 11.07.49 AM
No ratings yet
Screenshot 2023-12-07 at 11.07.49 AM
14 pages
Lab Manual
No ratings yet
Lab Manual
25 pages
Mllab Manual
No ratings yet
Mllab Manual
54 pages
PESIT Bangalore South Campus: Vii Semester Lab Manual Subject: Machine Learning
No ratings yet
PESIT Bangalore South Campus: Vii Semester Lab Manual Subject: Machine Learning
31 pages
ML Lab - 231009 - 210335
No ratings yet
ML Lab - 231009 - 210335
38 pages
Machine Learning Lab Record: Dr. Sarika Hegde
No ratings yet
Machine Learning Lab Record: Dr. Sarika Hegde
23 pages
AD3461 - ML Lab Manual
No ratings yet
AD3461 - ML Lab Manual
54 pages
ML Lab Manual-99
No ratings yet
ML Lab Manual-99
23 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
23 pages
MLlab Manual LIET
No ratings yet
MLlab Manual LIET
52 pages
R20 Iii-Ii ML Lab Manual
100% (1)
R20 Iii-Ii ML Lab Manual
79 pages
AI&ML
No ratings yet
AI&ML
9 pages
ML Lab Manual
No ratings yet
ML Lab Manual
14 pages
IV - ML Lab1
No ratings yet
IV - ML Lab1
40 pages
ML Lab Output
No ratings yet
ML Lab Output
15 pages
Machine Learning Lab: Algorithms & Implementation
No ratings yet
Machine Learning Lab: Algorithms & Implementation
33 pages
ML Lab Record
No ratings yet
ML Lab Record
49 pages
ML Manual
No ratings yet
ML Manual
74 pages
1.implement FIND-S Algorithm: Desription
No ratings yet
1.implement FIND-S Algorithm: Desription
19 pages
Machine Learning Laboratory (21AIL66)
No ratings yet
Machine Learning Laboratory (21AIL66)
7 pages
ML Lab Observation
100% (1)
ML Lab Observation
44 pages
ML Lab
No ratings yet
ML Lab
21 pages
ML1 3 Merged
No ratings yet
ML1 3 Merged
19 pages
ML Lab Programs
No ratings yet
ML Lab Programs
18 pages
ML Lab Prog1-5 (5) College PDF
No ratings yet
ML Lab Prog1-5 (5) College PDF
12 pages
MANUAL
No ratings yet
MANUAL
33 pages
MANUAL
No ratings yet
MANUAL
34 pages
Machine Learning Manual Final
No ratings yet
Machine Learning Manual Final
37 pages
ML Final
No ratings yet
ML Final
19 pages
Shashidhar-18csl76 Final
No ratings yet
Shashidhar-18csl76 Final
19 pages
ML Lab Manual - Merged
No ratings yet
ML Lab Manual - Merged
44 pages
Lab Manual
No ratings yet
Lab Manual
55 pages
ML Lab
No ratings yet
ML Lab
9 pages
IV - ML Lab
No ratings yet
IV - ML Lab
31 pages
New ML Lab Manual
No ratings yet
New ML Lab Manual
29 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
26 pages
Cse Machine Learning Lab Manual
No ratings yet
Cse Machine Learning Lab Manual
22 pages
Final Lab Programs
No ratings yet
Final Lab Programs
52 pages
ML1408-Machine Learning Lab Programs
No ratings yet
ML1408-Machine Learning Lab Programs
17 pages
Machine Learning Laboratory Record Book: 1 Find S Algorithm
No ratings yet
Machine Learning Laboratory Record Book: 1 Find S Algorithm
22 pages
AI & ML Program Examples
No ratings yet
AI & ML Program Examples
23 pages
Machine Learning Lab: Algorithms & Implementation
No ratings yet
Machine Learning Lab: Algorithms & Implementation
11 pages
ML Lab Manual
No ratings yet
ML Lab Manual
14 pages
ML Lab Manual
No ratings yet
ML Lab Manual
90 pages
ML Lab File Batch 1
No ratings yet
ML Lab File Batch 1
20 pages
AIML
No ratings yet
AIML
12 pages
ML Lab Experiments (1) - Pages-1
No ratings yet
ML Lab Experiments (1) - Pages-1
6 pages
ML Experiments
No ratings yet
ML Experiments
22 pages
Data Set
No ratings yet
Data Set
10 pages
ML Lab Manual (1-9)
No ratings yet
ML Lab Manual (1-9)
37 pages
MLAll Practical
No ratings yet
MLAll Practical
27 pages
Machine Learning Techniques Lab: Session: 2023-24, Even Semester
No ratings yet
Machine Learning Techniques Lab: Session: 2023-24, Even Semester
20 pages
ML Lab PFG - Removed - Removed - Removed
No ratings yet
ML Lab PFG - Removed - Removed - Removed
22 pages
Machine Learning Algorithms Guide
No ratings yet
Machine Learning Algorithms Guide
9 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
43 pages
Machine Learning Through Python Lab Mannual
No ratings yet
Machine Learning Through Python Lab Mannual
33 pages
Advance Machine Learning
No ratings yet
Advance Machine Learning
28 pages
Isa GS
No ratings yet
Isa GS
10 pages
Blockchain 1
No ratings yet
Blockchain 1
11 pages
Blockchain Assignment1
No ratings yet
Blockchain Assignment1
9 pages
FYP Dissertation Bajgai N 00433761 Stc392 Sample
100% (1)
FYP Dissertation Bajgai N 00433761 Stc392 Sample
79 pages
Ritual Guide for Nammu Devotees
No ratings yet
Ritual Guide for Nammu Devotees
6 pages
Swimming Pool Layout
100% (2)
Swimming Pool Layout
1 page
13 Refr
No ratings yet
13 Refr
9 pages
BMW Coding Parameters - F56 - For MINI Cooper and Cooper S
100% (4)
BMW Coding Parameters - F56 - For MINI Cooper and Cooper S
5 pages
Labour Productivity Rate
No ratings yet
Labour Productivity Rate
5 pages
Christ, The Risen Lord
No ratings yet
Christ, The Risen Lord
4 pages
Life Span Developnment
No ratings yet
Life Span Developnment
14 pages
MineralsLab Fall17 2022
No ratings yet
MineralsLab Fall17 2022
10 pages
Steam Turbine Lubrication Chart Description
No ratings yet
Steam Turbine Lubrication Chart Description
2 pages
The Mystery Cult of Isis
50% (4)
The Mystery Cult of Isis
10 pages
Seaside Summer 4crop Top PDF
100% (1)
Seaside Summer 4crop Top PDF
4 pages
Mediclaim 2012 Policy Document
No ratings yet
Mediclaim 2012 Policy Document
25 pages
Safety in The Environment
No ratings yet
Safety in The Environment
17 pages
(Ebook) Ecology of Fragmented Landscapes by Sharon K. Collinge Foreword by Richard T. T. Forman ISBN 9780801895661, 0801895669 Instant Download
100% (2)
(Ebook) Ecology of Fragmented Landscapes by Sharon K. Collinge Foreword by Richard T. T. Forman ISBN 9780801895661, 0801895669 Instant Download
140 pages
Manufacturing of Soda Ash Via Solvay Process 1
100% (2)
Manufacturing of Soda Ash Via Solvay Process 1
12 pages
Siomai Malunggay
No ratings yet
Siomai Malunggay
52 pages
Narrative Text Singkat
No ratings yet
Narrative Text Singkat
9 pages
Electrics DA42 V2.0
No ratings yet
Electrics DA42 V2.0
3 pages
BEO #2377 - 25th Wedding Anniversary Jimmy and Vidya On 17 August 2025
No ratings yet
BEO #2377 - 25th Wedding Anniversary Jimmy and Vidya On 17 August 2025
6 pages
Writing A Strong Hook
No ratings yet
Writing A Strong Hook
2 pages
Edc 1ST Semester
No ratings yet
Edc 1ST Semester
2 pages
Carbon Steel Line Pipe Inspection Plan
No ratings yet
Carbon Steel Line Pipe Inspection Plan
5 pages
Gerunds and Infinitves
No ratings yet
Gerunds and Infinitves
5 pages
6.artikel JKM Vol. 14 No. 2 Desember 2021 Lenny Sapitry Purba Irene R.T Damanik Renatha Nainggolan 48 54
No ratings yet
6.artikel JKM Vol. 14 No. 2 Desember 2021 Lenny Sapitry Purba Irene R.T Damanik Renatha Nainggolan 48 54
7 pages
Updated-Revised Carl 2-Columns
No ratings yet
Updated-Revised Carl 2-Columns
9 pages
PNC ML Model Brand: Husqvarna Outdoor Products Italia Spa
No ratings yet
PNC ML Model Brand: Husqvarna Outdoor Products Italia Spa
3 pages
Refraction Marvel Sheet 1
No ratings yet
Refraction Marvel Sheet 1
7 pages
Adoption of The Manual On Land Survey
100% (2)
Adoption of The Manual On Land Survey
160 pages
HAZEMAG APK Brochure 2022
No ratings yet
HAZEMAG APK Brochure 2022
12 pages
The Purdue Visualization of Rotations Test: The Chemical Educator October 1997
No ratings yet
The Purdue Visualization of Rotations Test: The Chemical Educator October 1997
18 pages

Practical 1: A. Design A Simple Machine Learning Model To Train The Training Instances and Test The Same

Uploaded by

Practical 1: A. Design A Simple Machine Learning Model To Train The Training Instances and Test The Same

Uploaded by

Practical 1

from random import randint

X_TEST=[[10, 20, 30]]

from pandas import read_csv

# Feature Extraction with PCA

# Feature Importance with Extra Trees Classifier

dataframe = read_csv(filename , names=names)

concepts = np.array(data.iloc[:, 0:-1])

target = np.array(data.iloc[:, -1])

def learn(concepts, target):

general_h = [["?" for i in range(len(specific_h))] for i in

print("Steps of Candidate Elimination Algorithm", i + 1)

indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?',

return specific_h, general_h

s_final, g_final = learn(concepts, target)

from sklearn.metrics import accuracy_score

from sklearn.datasets import fetch_20newsgroups

from sklearn.feature_extraction.text import TfidfTransformer

from sklearn.naive_bayes import MultinomialNB

# Preprocessing Input data

# Building the model

# Import necessary libraries

# Convert the target variable to binary (1 for diabetes, 0 for no diabetes)

# Train the Logistic Regression model

# Evaluate the model

# Evaluate the model

0: 'blue', 1: 'red'}, marker='o')

def subtables(data, col, delete):

return items, dict

counts = np.zeros((items.shape[0], 1))

def gain_ratio(data, col):

total_entropy = entropy(data[:, -1])

def create_node(data, metadata):

gains = np.zeros((data.shape[1] - 1, 1))

for col in range(data.shape[1] - 1):

items, dict = subtables(data, split, delete=True)

def print_tree(node, level):

metadata, traindata = read_data("tennisdata.csv")

from sklearn.datasets import load_iris

TARGET= 2 virginica PREDICTED= [2] ['virginica']

# Import required libraries

# Load the dataset

# Quick look into the data

# Separate data and label

# Prepare data for classification process

# Train the model

# Let's predict the classes for test data

# Load the dataset

#quick look into the data

#visualing the clusters

plt.scatter(x[y_class==0,0], x[y_class==0,1], c='yellow',label='Iris-setosa')

plt.scatter(x[y_class==1,0], x[y_class==1,1], c='green',label='Iris-versicolour')

plt.scatter(x[y_class==2,0], x[y_class==2,1], c='red',label='Iris-virginica')

#Plotting the clusters and centroids

#visulaizing the clusters

mtp.scatter(x[y_predict == 4, 0], x[y_predict == 4, 1], s = 100, c = 'magenta',

from pgmpy.models import BayesianNetwork

print('For age Enter { SuperSeniorCitizen:0, SeniorCitizen:1,MiddleAged:2, Youth:3,

For age Enter { SuperSeniorCitizen:0, SeniorCitizen:1,MiddleAged:2, Youth:3, Teen:4 }

from math import ceil

def lowess(x, y, f, iterations):

# Derivative of Sigmoid Function

# Weight and bias initialization

about amazing an awesome bad beers best boss do enemy ... to \

today tomorrow very view we went what will work

from sklearn.naive_bayes import MultinomialNB

for doc, p in zip(Xtrain, pred):

You might also like