1.
Write a program to implement k-nearest neighbor algorithm to classify the iris data set
print both correct and wrong predictions.
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']
# Read dataset to pandas dataframe
dataset = pd.read_csv("9-dataset.csv", names=names)
X = dataset.iloc[:, :-1]
y = dataset.iloc[:, -1]
print(X.head())
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.10)
classifier = KNeighborsClassifier(n_neighbors=5).fit(Xtrain, ytrain)
ypred = classifier.predict(Xtest)
i=0
print ("\n-------------------------------------------------------------------------")
print ('%-25s %-25s %-25s' % ('Original Label', 'Predicted Label', 'Correct/Wrong'))
print ("-------------------------------------------------------------------------")
for label in ytest:
print ('%-25s %-25s' % (label, ypred[i]), end="")
if (label == ypred[i]):
print (' %-25s' % ('Correct'))
else:
print (' %-25s' % ('Wrong'))
i=i+1
print ("-------------------------------------------------------------------------")
print("\nConfusion Matrix:\n",metrics.confusion_matrix(ytest, ypred))
print ("-------------------------------------------------------------------------")
print("\nClassification Report:\n",metrics.classification_report(ytest, ypred))
print ("-------------------------------------------------------------------------")
print('Accuracy of the classifer is %0.2f' % metrics.accuracy_score(ytest,ypred))
print ("-------------------------------------------------------------------------")
Output:
2. Develop a program to apply K-means algorithm to cluster a set of data stored in .CSV file.
Use the same data set for clustering using EM algorithm. Compare the results of these two
algorithms and comment on the quality of clustering.
from sklearn.cluster import KMeans
from sklearn import preprocessing
from sklearn.mixture import GaussianMixture
from sklearn.datasets import load_iris
import sklearn.metrics as sm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
dataset=load_iris()
X=pd.DataFrame(dataset.data)
X.columns=['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y=pd.DataFrame(dataset.target)
y.columns=['Targets']
# print(X)
plt.figure(figsize=(14,7))
colormap=np.array(['red','lime','black'])
# REAL PLOT
plt.subplot(1,3,1)
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y.Targets],s=40)
plt.title('Real')
# K-PLOT
plt.subplot(1,3,2)
model=KMeans(n_clusters=3)
model.fit(X)
predY=np.choose(model.labels_,[0,1,2]).astype(np.int64)
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[predY],s=40)
plt.title('KMeans')
# GMM PLOT
scaler=preprocessing.StandardScaler()
scaler.fit(X)
xsa=scaler.transform(X)
xs=pd.DataFrame(xsa,columns=X.columns)
gmm=GaussianMixture(n_components=3)
gmm.fit(xs)
y_cluster_gmm=gmm.predict(xs)
plt.subplot(1,3,3)
plt.scatter(X.Petal_Length,X.Petal_Width,c=colormap[y_cluster_gmm],s=40)
plt.title('GMM Classification')
Output:
3. Implement the non-parametric Locally Weighted Regression algorithm in order to fit data
points. Select appropriate data set for your experiment and draw graphs
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
def kernel(point, xmat, k):
m,n = np.shape(xmat)
weights = np.mat(np.eye((m)))
for j in range(m):
diff = point - X[j]
weights[j,j] = np.exp(diff*diff.T/(-2.0*k**2))
return weights
def localWeight(point, xmat, ymat, k):
wei = kernel(point,xmat,k)
W = (X.T*(wei*X)).I*(X.T*(wei*ymat.T))
return W
def localWeightRegression(xmat, ymat, k):
m,n = np.shape(xmat)
ypred = np.zeros(m)
for i in range(m):
ypred[i] = xmat[i]*localWeight(xmat[i],xmat,ymat,k)
return ypred
# load data points
data = pd.read_csv('10-dataset.csv')
bill = np.array(data.total_bill)
tip = np.array(data.tip)
#preparing and add 1 in bill
mbill = np.mat(bill)
mtip = np.mat(tip)
m= np.shape(mbill)[1]
one = np.mat(np.ones(m))
X = np.hstack((one.T,mbill.T))
#set k here
ypred = localWeightRegression(X,mtip,0.5)
SortIndex = X[:,1].argsort(0)
xsort = X[SortIndex][:,0]
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(bill,tip, color='green')
ax.plot(xsort[:,1],ypred[SortIndex], color = 'red', linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('Tip')
plt.show();
OUTPUT:
Regression with parameter k =0.5 Regression with parameter k = 3
Regression with parameter k = 9
4. Build an Artificial Neural Network by implementing the Backpropagation algorithm and test the
same using appropriate data sets.
import random
from math import exp
from random import seed
# Initialize a network
def initialize_network(n_inputs, n_hidden, n_outputs):
network = list()
hidden_layer = [{'weights':[random.uniform(-0.5,0.5) for i in range(n_inputs + 1)]} for i in
range(n_hidden)]
network.append(hidden_layer)
output_layer = [{'weights':[random.uniform(-0.5,0.5) for i in range(n_hidden + 1)]} for i in
range(n_outputs)]
network.append(output_layer)
i= 1
print("\n The initialised Neural Network:\n")
for layer in network:
j=1
for sub in layer:
print("\n Layer[%d] Node[%d]:\n" %(i,j),sub)
j=j+1
i=i+1
return network
# Calculate neuron activation (net) for an input
def activate(weights, inputs):
activation = weights[-1]
for i in range(len(weights)-1):
activation += weights[i] * inputs[i]
return activation
# Transfer neuron activation to sigmoid function
def transfer(activation):
return 1.0 / (1.0 + exp(-activation))
# Forward propagate input to a network output
def forward_propagate(network, row):
inputs = row
for layer in network:
new_inputs = []
for neuron in layer:
activation = activate(neuron['weights'], inputs)
neuron['output'] = transfer(activation)
new_inputs.append(neuron['output'])
inputs = new_inputs
return inputs
# Calculate the derivative of an neuron output
def transfer_derivative(output):
return output * (1.0 - output)
# Backpropagate error and store in neurons
def backward_propagate_error(network, expected):
for i in reversed(range(len(network))):
layer = network[i]
errors = list()
if i != len(network)-1:
for j in range(len(layer)):
error = 0.0
for neuron in network[i + 1]:
error += (neuron['weights'][j] * neuron['delta'])
errors.append(error)
else:
for j in range(len(layer)):
neuron = layer[j]
errors.append(expected[j] - neuron['output'])
for j in range(len(layer)):
neuron = layer[j]
neuron['delta'] = errors[j] * transfer_derivative(neuron['output'])
# Update network weights with error
def update_weights(network, row, l_rate):
for i in range(len(network)):
inputs = row[:-1]
if i != 0:
inputs = [neuron['output'] for neuron in network[i - 1]]
for neuron in network[i]:
for j in range(len(inputs)):
neuron['weights'][j] += l_rate * neuron['delta'] * inputs[j]
neuron['weights'][-1] += l_rate * neuron['delta']
# Train a network for a fixed number of epochs
def train_network(network, train, l_rate, n_epoch, n_outputs):
print("\n Network Training Begins:\n")
for epoch in range(n_epoch):
sum_error = 0
for row in train:
outputs = forward_propagate(network, row)
expected = [0 for i in range(n_outputs)]
expected[row[-1]] = 1
sum_error += sum([(expected[i]-outputs[i])**2 for i in range(len(expected))])
backward_propagate_error(network, expected)
update_weights(network, row, l_rate)
print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, l_rate, sum_error))
print("\n Network Training Ends:\n")
#Test training backprop algorithm
seed(2)
dataset = [[2.7810836,2.550537003,0],
[1.465489372,2.362125076,0],
[3.396561688,4.400293529,0],
[1.38807019,1.850220317,0],
[3.06407232,3.005305973,0],
[7.627531214,2.759262235,1],
[5.332441248,2.088626775,1],
[6.922596716,1.77106367,1],
[8.675418651,-0.242068655,1],
[7.673756466,3.508563011,1]]
print("\n The input Data Set :\n",dataset)
n_inputs = len(dataset[0]) - 1
print("\n Number of Inputs :\n",n_inputs)
n_outputs = len(set([row[-1] for row in dataset]))
print("\n Number of Outputs :\n",n_outputs)
#Network Initialization
network = initialize_network(n_inputs, 2, n_outputs)
# Training the Network
train_network(network, dataset, 0.5, 20, n_outputs)
print("\n Final Neural Network :")
i= 1
for layer in network:
j=1
for sub in layer:
print("\n Layer[%d] Node[%d]:\n" %(i,j),sub)
j=j+1
i=i+1
Output:
The input Data Set :
[[2.7810836, 2.550537003, 0], [1.465489372, 2.362125076, 0],
[3.396561688, 4.400293529, 0], [1.38807019, 1.850220317, 0],
[3.06407232, 3.005305973, 0], [7.627531214, 2.759262235, 1],
[5.332441248, 2.088626775, 1], [6.922596716, 1.77106367, 1],
[8.675418651, -0.242068655, 1], [7.673756466, 3.508563011, 1]]
Number of Inputs :
2
Number of Outputs :
2
The initialised Neural Network:
Layer[1] Node[1]:
{'weights': [0.4560342718892494, 0.4478274870593494,
-0.4434486322731913]}
Layer[1] Node[2]:
{'weights': [-0.41512800484107837, 0.33549887812944956,
0.2359699890685233]}
Layer[2] Node[1]:
{'weights': [0.1697304014402209, -0.1918635424108558,
0.10594416567846243]}
Layer[2] Node[2]:
{'weights': [0.10680173364083789, 0.08120401711200309,
-0.3416171297451944]}
Network Training Begins:
>epoch=0, lrate=0.500, error=5.278
>epoch=1, lrate=0.500, error=5.122
>epoch=2, lrate=0.500, error=5.006
>epoch=3, lrate=0.500, error=4.875
>epoch=4, lrate=0.500, error=4.700
>epoch=5, lrate=0.500, error=4.466
>epoch=6, lrate=0.500, error=4.176
>epoch=7, lrate=0.500, error=3.838
>epoch=8, lrate=0.500, error=3.469
>epoch=9, lrate=0.500, error=3.089
>epoch=10, lrate=0.500, error=2.716
>epoch=11, lrate=0.500, error=2.367
>epoch=12, lrate=0.500, error=2.054
>epoch=13, lrate=0.500, error=1.780
>epoch=14, lrate=0.500, error=1.546
>epoch=15, lrate=0.500, error=1.349
>epoch=16, lrate=0.500, error=1.184
>epoch=17, lrate=0.500, error=1.045
>epoch=18, lrate=0.500, error=0.929
>epoch=19, lrate=0.500, error=0.831
Network Training Ends:
Final Neural Network :
Layer[1] Node[1]:
{'weights': [0.8642508164347664, -0.8497601716670761,
-0.8668929014392035], 'output': 0.9295587965836384, 'delta':
0.005645382825629247}
Layer[1] Node[2]:
{'weights': [-1.2934302410111027, 1.7109363237151511,
0.7125327507327331], 'output': 0.04760703296164143, 'delta':
-0.005928559978815065}
Layer[2] Node[1]:
{'weights': [-1.3098359335096292, 2.16462207144596,
-0.3079052288835877], 'output': 0.1989556395205846, 'delta':
-0.03170801648036036}
Layer[2] Node[2]:
{'weights': [1.5506793402414165, -2.11315950446121,
0.1333585709422027], 'output': 0.8095042653312078, 'delta':
0.029375796661413225}
5. Demonstrate Genetic algorithm by taking a suitable data for any simple application.
# Python program to create target string, starting from
# random string using Genetic Algorithm
import random
# Number of individuals in each generation
POPULATION_SIZE = 100
# Valid genes
GENES = '''abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOP
QRSTUVWXYZ 1234567890, .-;:_!"#%&/()=?@${[]}'''
# Target string to be generated
TARGET = "I love Bangalore Institute of Technology"
class Individual(object):
'''
Class representing individual in population
'''
def __init__(self, chromosome):
self.chromosome = chromosome
self.fitness = self.cal_fitness()
@classmethod
def mutated_genes(self):
'''
create random genes for mutation
'''
global GENES
gene = random.choice(GENES)
return gene
@classmethod
def create_gnome(self):
'''
create chromosome or string of genes
'''
global TARGET
gnome_len = len(TARGET)
return [self.mutated_genes() for _ in range(gnome_len)]
def mate(self, par2):
'''
Perform mating and produce new offspring
'''
# chromosome for offspring
child_chromosome = []
for gp1, gp2 in zip(self.chromosome, par2.chromosome):
# random probability
prob = random.random()
# if prob is less than 0.45, insert gene
# from parent 1
if prob < 0.45:
child_chromosome.append(gp1)
# if prob is between 0.45 and 0.90, insert
# gene from parent 2
elif prob < 0.90:
child_chromosome.append(gp2)
# otherwise insert random gene(mutate),
# for maintaining diversity
else:
child_chromosome.append(self.mutated_genes())
# create new Individual(offspring) using
# generated chromosome for offspring
return Individual(child_chromosome)
def cal_fitness(self):
'''
Calculate fitness score, it is the number of
characters in string which differ from target
string.
'''
global TARGET
fitness = 0
for gs, gt in zip(self.chromosome, TARGET):
if gs != gt: fitness+= 1
return fitness
# Driver code
def main():
global POPULATION_SIZE
#current generation
generation = 1
found = False
population = []
# create initial population
for _ in range(POPULATION_SIZE):
gnome = Individual.create_gnome()
population.append(Individual(gnome))
while not found:
# sort the population in increasing order of fitness score
population = sorted(population, key = lambda x:x.fitness)
# if the individual having lowest fitness score ie.
# 0 then we know that we have reached to the target
# and break the loop
if population[0].fitness <= 0:
found = True
break
# Otherwise generate new offsprings for new generation
new_generation = []
# Perform Elitism, that mean 10% of fittest population
# goes to the next generation
s = int((10*POPULATION_SIZE)/100)
new_generation.extend(population[:s])
# From 50% of fittest population, Individuals
# will mate to produce offspring
s = int((90*POPULATION_SIZE)/100)
for _ in range(s):
parent1 = random.choice(population[:50])
parent2 = random.choice(population[:50])
child = parent1.mate(parent2)
new_generation.append(child)
population = new_generation
print("Generation: {}\tString: {}\tFitness: {}".
format(generation,
"".join(population[0].chromosome),
population[0].fitness))
generation += 1
print("Generation: {}\tString: {}\tFitness: {}".
format(generation,
"".join(population[0].chromosome),
population[0].fitness))
if __name__ == '__main__':
main()
Output: