program 1
import csv
def read_data(filename):
  with open(filename, 'r') as csvfile:
     datareader = csv.reader(csvfile, delimiter=',')
     traindata = []
     for row in datareader:
       traindata.append(row)
       #print(traindata)
  return (traindata)
def findS():
  dataarr=read_data('enjoysport.csv')
  #print("the Data arr are",dataarr)
  h=dataarr[0]
  print("the value of H is",h)
  #number of rows
  rows=len(dataarr)
  print('the value of rows is',rows)
  columns=7
  for x in range(1,rows):
     t=dataarr[x]
     print ("the value of t is", t)
     if t[columns-1]=='1':
       # for y in range(columns):
          #if h[y]==t[y]:
               for y in range(columns):
                if h[y]==t[y]:
                    pass
                else:
                    h[y]='?'
  print("Maximally Specific set")
  print('<',end=' ')
  for i in range(0,len(h)-1):
     print(h[i],end=' ')
  print('>')
findS()
program 2
import csv
def g_0(n):
  return ("?",)*n
def s_0(n):
  return ('0',)*n
def more_general(h1, h2):
  more_general_parts = []
  for x, y in zip(h1, h2):
     mg = x == "?" or (x != "0" and (x == y or y == "0"))
     more_general_parts.append(mg)
  return all(more_general_parts)
# min_generalizations
def fulfills(example, hypothesis):
  ### the implementation is the same as for hypotheses:
  return more_general(hypothesis, example)
def min_generalizations(h, x):
  h_new = list(h)
  for i in range(len(h)):
     if not fulfills(x[i:i+1], h[i:i+1]):
       h_new[i] = '?' if h[i] != '0' else x[i]
  return [tuple(h_new)]
def min_specializations(h, domains, x):
  results = []
  for i in range(len(h)):
     if h[i] == "?":
       for val in domains[i]:
          if x[i] != val:
             h_new = h[:i] + (val,) + h[i+1:]
             results.append(h_new)
     elif h[i] != "0":
       h_new = h[:i] + ('0',) + h[i+1:]
       results.append(h_new)
  return results
with open('enjoysport.csv') as csvFile:
    examples = [tuple(line) for line in csv.reader(csvFile)]
def get_domains(examples):
  d = [set() for i in examples[0]]
  for x in examples:
    for i, xi in enumerate(x):
       d[i].add(xi)
  return [list(sorted(x)) for x in d]
get_domains(examples)
def generalize_S(x, G, S):
  S_prev = list(S)
  for s in S_prev:
    if s not in S:
       continue
    if not fulfills(x, s):
       S.remove(s)
       Splus = min_generalizations(s, x)
       ## keep only generalizations that have a counterpart in G
       S.update([h for h in Splus if any([more_general(g,h) for g in G])])
       ## remove hypotheses less specific than any other in S
       S.difference_update([h for h in S if any([more_general(h, h1) for h1 in S if h != h1])])
  return S
def specialize_G(x, domains, G, S):
  G_prev = list(G)
  for g in G_prev:
    if g not in G:
        continue
    if fulfills(x, g):
        G.remove(g)
        Gminus = min_specializations(g, domains, x)
        ## keep only specializations that have a conuterpart in S
        G.update([h for h in Gminus if any([more_general(h, s) for s in S])])
        ## remove hypotheses less general than any other in G
        G.difference_update([h for h in G if any([more_general(g1, h) for g1 in G if h != g1])])
  return G
def candidate_elimination(examples):
  domains = get_domains(examples)[:-1]
  G = set([g_0(len(domains))])
  S = set([s_0(len(domains))])
  i=0
  print("\n G[{0}]:".format(i),G)
  print("\n S[{0}]:".format(i),S)
  for instance in examples:
    i=i+1
    x, label = instance[:-1], instance[-1] # Splitting data into attributes and decisions
    if label=='yes': # x is positive example
        G = {g for g in G if fulfills(x, g)}
        S = generalize_S(x, G, S)
     else: # x is negative example
       S = {s for s in S if not fulfills(x, s)}
       G = specialize_G(x, domains, G, S)
     print("\n G[{0}]:".format(i),G)
     print("\n S[{0}]:".format(i),S)
  return
candidate_elimination(examples)
program 3
import math
import csv
def load_csv(filename):
  lines=csv.reader(open(filename,"r"));
  dataset = list(lines)
  headers = dataset.pop(0)
  return dataset,headers
class Node:
  def __init__(self,attribute):
     self.attribute=attribute
     self.children=[]
     self.answer=""
def subtables(data,col,delete):
  dic={}
  coldata=[row[col] for row in data]
  attr=list(set(coldata))
  counts=[0]*len(attr)
  r=len(data)
  c=len(data[0])
  for x in range(len(attr)):
    for y in range(r):
       if data[y][col]==attr[x]:
          counts[x]+=1
  for x in range(len(attr)):
    dic[attr[x]]=[[0 for i in range(c)] for j in range(counts[x])]
    pos=0
    for y in range(r):
       if data[y][col]==attr[x]:
          if delete:
             del data[y][col]
          dic[attr[x]][pos]=data[y]
          pos+=1
  return attr,dic
def entropy(S):
  attr=list(set(S))
  if len(attr)==1:
    return 0
  counts=[0,0]
  for i in range(2):
     counts[i]=sum([1 for x in S if attr[i]==x])/(len(S)*1.0)
  sums=0
  for cnt in counts:
     sums+=-1*cnt*math.log(cnt,2)
  return sums
def compute_gain(data,col):
  attr,dic = subtables(data,col,delete=False)
  total_size=len(data)
  entropies=[0]*len(attr)
  ratio=[0]*len(attr)
  total_entropy=entropy([row[-1] for row in data])
  for x in range(len(attr)):
     ratio[x]=len(dic[attr[x]])/(total_size*1.0)
     entropies[x]=entropy([row[-1] for row in dic[attr[x]]])
     total_entropy-=ratio[x]*entropies[x]
  return total_entropy
def build_tree(data,features):
  lastcol=[row[-1] for row in data]
  if(len(set(lastcol)))==1:
     node=Node("")
     node.answer=lastcol[0]
     return node
  n=len(data[0])-1
  gains=[0]*n
  for col in range(n):
     gains[col]=compute_gain(data,col)
  split=gains.index(max(gains))
  node=Node(features[split])
  fea = features[:split]+features[split+1:]
  attr,dic=subtables(data,split,delete=True)
  for x in range(len(attr)):
     child=build_tree(dic[attr[x]],fea)
     node.children.append((attr[x],child))
  return node
def print_tree(node,level):
  if node.answer!="":
     print(" "*level,node.answer)
     return
  print(" "*level,node.attribute)
  for value,n in node.children:
     print(" "*(level+1),value)
     print_tree(n,level+2)
def classify(node,x_test,features):
  if node.answer!="":
     print(node.answer)
     return
  pos=features.index(node.attribute)
  for value, n in node.children:
     if x_test[pos]==value:
        classify(n,x_test,features)
'''Main program'''
dataset,features=load_csv("id3.csv")
node1=build_tree(dataset,features)
print("The decision tree for the dataset using ID3 algorithm is")
print_tree(node1,0)
testdata,features=load_csv("id3_test.csv")
for xtest in testdata:
  print("The test instance:",xtest)
  print("The label for test instance:",end=" ")
  classify(node1,xtest,features)
program 4
import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X,axis=0) # maximum of X array longitudinally
y = y/100
#Sigmoid Function
def sigmoid (x):
  return 1/(1 + np.exp(-x))
#Derivative of Sigmoid Function
def derivatives_sigmoid(x):
  return x * (1 - x)
#Variable initialization
epoch=5000      #Setting training iterations
lr=0.1          #Setting learning rate
inputlayer_neurons = 2                   #number of features in data set
hiddenlayer_neurons = 3         #number of hidden layers neurons
output_neurons = 1              #number of neurons at output layer
#weight and bias initialization
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))
#draws a random range of numbers uniformly of dim x*y
for i in range(epoch):
#Forward Propogation
  hinp1=np.dot(X,wh)
  hinp=hinp1 + bh
  hlayer_act = sigmoid(hinp)
  outinp1=np.dot(hlayer_act,wout)
  outinp= outinp1+ bout
  output = sigmoid(outinp)
#Backpropagation
  EO = y-output
  outgrad = derivatives_sigmoid(output)
  d_output = EO* outgrad
  EH = d_output.dot(wout.T)
#how much hidden layer wts contributed to error
  hiddengrad = derivatives_sigmoid(hlayer_act)
  d_hiddenlayer = EH * hiddengrad
# dotproduct of nextlayererror and currentlayerop
wout += hlayer_act.T.dot(d_output) *lr
wh += X.T.dot(d_hiddenlayer) *lr
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)
program 5
import csv
import random
import math
def loadcsv(filename):
        lines = csv.reader(open(filename, "r"));
        dataset = list(lines)
        for i in range(len(dataset)):
    #converting strings into numbers for processing
                dataset[i] = [float(x) for x in dataset[i]]
        return dataset
def splitdataset(dataset, splitratio):
  #67% training size
        trainsize = int(len(dataset) * splitratio);
        trainset = []
        copy = list(dataset);
        while len(trainset) < trainsize:
#generate indices for the dataset list randomly to pick ele for training data
                index = random.randrange(len(copy));
                trainset.append(copy.pop(index))
        return [trainset, copy]
def separatebyclass(dataset):
       separated = {} #dictionary of classes 1 and 0
#creates a dictionary of classes 1 and 0 where the values are
#the instances belonging to each class
       for i in range(len(dataset)):
               vector = dataset[i]
               if (vector[-1] not in separated):
                       separated[vector[-1]] = []
               separated[vector[-1]].append(vector)
       return separated
def mean(numbers):
       return sum(numbers)/float(len(numbers))
def stdev(numbers):
       avg = mean(numbers)
       variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
       return math.sqrt(variance)
def summarize(dataset): #creates a dictionary of classes
       summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)];
       del summaries[-1] #excluding labels +ve or -ve
       return summaries
def summarizebyclass(dataset):
       separated = separatebyclass(dataset);
  #print(separated)
       summaries = {}
          for classvalue, instances in separated.items():
#for key,value in dic.items()
#summaries is a dic of tuples(mean,std) for each class value
                  summaries[classvalue] = summarize(instances) #summarize is used to cal to mean
and std
          return summaries
def calculateprobability(x, mean, stdev):
          exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
          return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent
def calculateclassprobabilities(summaries, inputvector):
          probabilities = {} # probabilities contains the all prob of all class of test data
      for classvalue, classsummaries in summaries.items():#class and attribute information as
mean and sd
                  probabilities[classvalue] = 1
                  for i in range(len(classsummaries)):
                       mean, stdev = classsummaries[i] #take mean and sd of every attribute for
class 0 and 1 seperaely
                          x = inputvector[i] #testvector's first attribute
                          probabilities[classvalue] *= calculateprobability(x, mean, stdev);#use
normal dist
          return probabilities
def predict(summaries, inputvector): #training and test data is passed
          probabilities = calculateclassprobabilities(summaries, inputvector)
          bestLabel, bestProb = None, -1
          for classvalue, probability in probabilities.items():#assigns that class which has he highest
prob
               if bestLabel is None or probability > bestProb:
                       bestProb = probability
                       bestLabel = classvalue
       return bestLabel
def getpredictions(summaries, testset):
       predictions = []
       for i in range(len(testset)):
               result = predict(summaries, testset[i])
               predictions.append(result)
       return predictions
def getaccuracy(testset, predictions):
       correct = 0
       for i in range(len(testset)):
               if testset[i][-1] == predictions[i]:
                       correct += 1
       return (correct/float(len(testset))) * 100.0
def main():
       filename = 'naivedata.csv'
       splitratio = 0.67
       dataset = loadcsv(filename);
       trainingset, testset = splitdataset(dataset, splitratio)
        print('Split {0} rows into train={1} and test={2} rows'.format(len(dataset),
len(trainingset), len(testset)))
       # prepare model
         summaries = summarizebyclass(trainingset);
         #print(summaries)
         # test model
        predictions = getpredictions(summaries, testset) #find the predictions of test data with the
training data
         accuracy = getaccuracy(testset, predictions)
         print('Accuracy of the classifier is : {0}%'.format(accuracy))
main()
program 6
import pandas as pd
msg=pd.read_csv('naivetext.csv',names=['message','label'])
print('The dimensions of the dataset',msg.shape)
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message
y=msg.labelnum
print(X)
print(y)
#splitting the dataset into train and test data
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(X,y)
print ('\n the total number of Training Data :',ytrain.shape)
print ('\n the total number of Test Data :',ytest.shape)
#output of the words or Tokens in the text documents
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm=count_vect.transform(xtest)
print('\n The words or Tokens in the text documents \n')
print(count_vect.get_feature_names())
df=pd.DataFrame(xtrain_dtm.toarray(),columns=count_vect.get_feature_names())
# Training Naive Bayes (NB) classifier on training data.
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(xtrain_dtm,ytrain)
predicted = clf.predict(xtest_dtm)
#printing accuracy, Confusion matrix, Precision and Recall
from sklearn import metrics
print('\n Accuracy of the classifier is',metrics.accuracy_score(ytest,predicted))
print('\n Confusion matrix')
print(metrics.confusion_matrix(ytest,predicted))
print('\n The value of Precision', metrics.precision_score(ytest,predicted))
print('\n The value of Recall', metrics.recall_score(ytest,predicted))
program 7
import numpy as np
import pandas as pd
import csv
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
lines=list(csv.reader(open('heart_names.csv','r')));
attributes=lines[0]
heartDisease = pd.read_csv('heart.csv',names=attributes)
heartDisease = heartDisease.replace('?',np.nan)
print('Few examples from the dataset are given below')
print(heartDisease.head())
print('\n Attributes and datatypes')
print(heartDisease.dtypes)
model =
BayesianModel([('age','trestbps'),('age','fbs'),('sex','trestbps'),('exang','trestbps'),('trestbps','heartdis
ease'),('fbs','heartdisease'),('heartdisease','restecg'),('heartdisease','thalach'),('heartdisease','chol')])
print('\nLearning CPD using Maximum likelihood estimators')
model.fit(heartDisease,estimator=MaximumLikelihoodEstimator)
print('\n Inferencing with Bayesian Network:')
HeartDisease_infer = VariableElimination(model)
print('\n 1. Probability of HeartDisease given Age=28')
q=HeartDisease_infer.query(variables=['heartdisease'],evidence={'age':28})
print(q['heartdisease'])
print('\n 2. Probability of HeartDisease given cholestoral=100')
q=HeartDisease_infer.query(variables=['heartdisease'],evidence={'chol':100})
print(q['heartdisease'])
program 8
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
import sklearn.metrics as sm
import pandas as pd
import numpy as np
#import matplotlib inline
iris = datasets.load_iris()
X = pd.DataFrame(iris.data)
X.columns = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_Width']
y = pd.DataFrame(iris.target)
y.columns = ['Targets']
#colormap = np.array(['red', 'lime', 'black'])
# K Means Cluster
model = KMeans(n_clusters=3)
model.fit(X)
# This is what KMeans thought
model.labels_
# View the results
# Set the size of the plot
plt.figure(figsize=(14,7))
# Create a colormap
colormap = np.array(['red', 'lime', 'black'])
# Plot the Original Classifications
plt.subplot(1, 2, 1)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)
plt.title('Real Classification')
# Plot the Models Classifications
plt.subplot(1, 2, 2)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[model.labels_], s=40)
plt.title('K Mean Classification')
# View the results
# Set the size of the plot
plt.figure(figsize=(14,7))
# Create a colormap
#print('The accuracy score : ',sm.accuracy_score(y, model.labels_))
#sm.confusion_matrix(y, model.labels_)
predY = np.choose(model.labels_, [0, 1, 2]).astype(np.int64)
print (predY)
#colormap = np.array(['red', 'lime', 'black'])
# Plot Orginal
plt.subplot(1, 2, 1)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y.Targets], s=40)
plt.title('Real Classification')
# Plot Predicted with corrected values
plt.subplot(1, 2, 2)
plt.scatter(X.Petal_Length,X.Petal_Width, c=colormap[predY], s=40)
plt.title('K Mean Classification')
print('The accuracy score of K-Mean: ',sm.accuracy_score(y, model.labels_))
print('The Confusion matrixof K-Mean: ',sm.confusion_matrix(y, model.labels_))
from sklearn import preprocessing
scaler = preprocessing.StandardScaler()
scaler.fit(X)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa, columns = X.columns)
#xs.sample(5)
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=3)
gmm.fit(xs)
y_cluster_gmm = gmm.predict(xs)
#y_cluster_gmm
plt.subplot(2, 2, 3)
plt.scatter(X.Petal_Length, X.Petal_Width, c=colormap[y_cluster_gmm], s=40)
plt.title('GMM Classification')
print('The accuracy score of EM: ',sm.accuracy_score(y, y_cluster_gmm))
print('The Confusion matrix of EM: ',sm.confusion_matrix(y, y_cluster_gmm))
program 10
import numpy as np
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import gridplot
from bokeh.io import push_notebook
def local_regression(x0, X, Y, tau):# add bias term
x0 = np.r_[1, x0] # Add one to avoid the loss in information
X = np.c_[np.ones(len(X)), X]
# fit model: normal equations with kernel
xw = X.T * radial_kernel(x0, X, tau) # XTranspose * W
beta = np.linalg.pinv(xw @ X) @ xw @ Y #@ Matrix Multiplication or Dot Product
# predict value
return x0 @ beta # @ Matrix Multiplication or Dot Product for prediction
def radial_kernel(x0, X, tau):
return np.exp(np.sum((X - x0) ** 2, axis=1) / (-2 * tau * tau))
# Weight or Radial Kernal Bias Function
n = 1000
# generate dataset
X = np.linspace(-3, 3, num=n)
print("The Data Set ( 10 Samples) X :\n",X[1:10])
Y = np.log(np.abs(X ** 2 - 1) + .5)
print("The Fitting Curve Data Set (10 Samples) Y :\n",Y[1:10])
# jitter X
X += np.random.normal(scale=.1, size=n)
print("Normalised (10 Samples) X :\n",X[1:10])
domain = np.linspace(-3, 3, num=300)
print(" Xo Domain Space(10 Samples) :\n",domain[1:10])
def plot_lwr(tau):
# prediction through regression
prediction = [local_regression(x0, X, Y, tau) for x0 in domain]
plot = figure(plot_width=400, plot_height=400)
plot.title.text='tau=%g' % tau
plot.scatter(X, Y, alpha=.3)
plot.line(domain, prediction, line_width=2, color='red')
return plot
show(gridplot([
[plot_lwr(10.), plot_lwr(1.)],
[plot_lwr(0.1), plot_lwr(0.01)]
]))