import numpy as np
import pandas as pd
# Load the data
data = pd.read_csv('finds1.csv')
concepts = np.array(data.iloc[:, :-1]) # All columns except the last
target = np.array(data.iloc[:, -1]) # Last column as target
def learn(concepts, target):
# Initialize specific and general hypotheses
specific_h = concepts[0].copy()
print("Initialization of specific_h and general_h:")
print(f"Specific_h: {specific_h}")
# Initialize general_h with '?' placeholders
general_h = [["?" for _ in range(len(specific_h))] for _ in range(len(specific_h))]
print(f"General_h: {general_h}")
for i, h in enumerate(concepts):
if target[i] == "yes":
# Positive example: update specific_h and general_h
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
elif target[i] == "no":
# Negative example: update general_h
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = 'x'
print(f"Step {i + 1}")
print(f"Specific_h: {specific_h}")
print(f"General_h: {general_h}")
# Remove any general hypotheses that are completely '?'
indices = [i for i, val in enumerate(general_h) if val == ['?' for _ in range(len(specific_h))]]
for i in indices:
general_h.remove(['?' for _ in range(len(specific_h))])
return specific_h, general_h
# Run the learning algorithm
specific_h, general_h_final = learn(concepts, target)
# Print final results
print("\nFinal Specific_h:")
print(specific_h)
print("\nFinal General_h:")
print(general_h_final)
2)
import pandas as pd
import numpy as np
# Load dataset
dataset = pd.read_csv('playtennis.csv', names=['outlook', 'temperature', 'humidity', 'wind', 'class'])
# Entropy function
def entropy(target_col):
elements, counts = np.unique(target_col, return_counts=True)
entropy_value = np.sum([-(counts[i] / np.sum(counts)) * np.log2(counts[i] / np.sum(counts)) for i
in range(len(elements))])
return entropy_value
# Information Gain function
def InfoGain(data, split_attribute_name, target_name="class"):
total_entropy = entropy(data[target_name])
vals, counts = np.unique(data[split_attribute_name], return_counts=True)
Weighted_Entropy = np.sum([(counts[i] / np.sum(counts)) *
entropy(data.where(data[split_attribute_name] == vals[i]).dropna()[target_name]) for i in
range(len(vals))])
Information_Gain = total_entropy - Weighted_Entropy
return Information_Gain
# ID3 Algorithm
def ID3(data, Originaldata, features, target_attribute_name="class", parent_node_class=None):
if len(np.unique(data[target_attribute_name])) <= 1:
return np.unique(data[target_attribute_name])[0]
elif len(data) == 0:
return np.unique(Originaldata[target_attribute_name])
[np.argmax(np.unique(Originaldata[target_attribute_name], return_counts=True)[1])]
elif len(features) == 0:
return parent_node_class
else:
parent_node_class = np.unique(data[target_attribute_name])
[np.argmax(np.unique(data[target_attribute_name], return_counts=True)[1])]
item_values = [InfoGain(data, feature, target_attribute_name) for feature in features]
best_feature_index = np.argmax(item_values)
best_feature = features[best_feature_index]
tree = {best_feature: {}}
features = [i for i in features if i != best_feature]
for value in np.unique(data[best_feature]):
sub_data = data.where(data[best_feature] == value).dropna()
subtree = ID3(sub_data, Originaldata, features, target_attribute_name, parent_node_class)
tree[best_feature][value] = subtree
return tree
# Run ID3
tree = ID3(dataset, dataset, dataset.columns[:-1].tolist())
print("\nDecision Tree:\n", tree)
3)
import numpy as np
# Input and Output Data
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
# Normalize Input and Output Data
X = X / np.amax(X, axis=0) # Feature scaling
y = y / 100 # Normalizing output values
# Sigmoid Activation Function
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# Derivative of Sigmoid Function
def derivative_sigmoid(x):
return x * (1 - x)
# Initialize Neural Network Parameters
epochs = 7000 # Number of training iterations
lr = 0.1 # Learning rate
input_neurons = 2 # Input layer neurons
hidden_neurons = 3 # Hidden layer neurons
output_neurons = 1 # Output layer neurons
# Initialize Weights and Biases
wh = np.random.uniform(size=(input_neurons, hidden_neurons)) # Weights from Input to Hidden
layer
bh = np.random.uniform(size=(1, hidden_neurons)) # Bias for Hidden layer
wout = np.random.uniform(size=(hidden_neurons, output_neurons)) # Weights from Hidden to
Output layer
bout = np.random.uniform(size=(1, output_neurons)) # Bias for Output layer
# Training the Neural Network
for i in range(epochs):
# Forward Propagation
hinp = np.dot(X, wh) + bh # Hidden layer weighted sum
hlayer_act = sigmoid(hinp) # Activation function at hidden layer
outinp = np.dot(hlayer_act, wout) + bout # Output layer weighted sum
output = sigmoid(outinp) # Activation function at output layer
# Backpropagation
EO = y - output # Error at output layer
outgrad = derivative_sigmoid(output) # Output layer gradient
d_output = EO * outgrad # Delta output
EH = d_output.dot(wout.T) # Error at hidden layer
hiddengrad = derivative_sigmoid(hlayer_act) # Hidden layer gradient
d_hiddenlayer = EH * hiddengrad # Delta hidden layer
# Updating Weights and Biases
wout += hlayer_act.T.dot(d_output) * lr
bout += np.sum(d_output, axis=0, keepdims=True) * lr
wh += X.T.dot(d_hiddenlayer) * lr
bh += np.sum(d_hiddenlayer, axis=0, keepdims=True) * lr
# Print Final Results
print("Input:\n", X)
print("Actual Output:\n", y)
print("Predicted Output:\n", output)
4)
import numpy as np
# Input and Output Data
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
# Normalize Input and Output Data
X = X / np.amax(X, axis=0) # Feature scaling
y = y / 100 # Normalizing output values
# Sigmoid Activation Function
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# Derivative of Sigmoid Function
def derivative_sigmoid(x):
return x * (1 - x)
# Initialize Neural Network Parameters
epochs = 7000 # Number of training iterations
lr = 0.1 # Learning rate
input_neurons = 2 # Input layer neurons
hidden_neurons = 3 # Hidden layer neurons
output_neurons = 1 # Output layer neurons
# Initialize Weights and Biases
wh = np.random.uniform(size=(input_neurons, hidden_neurons)) # Weights from Input to Hidden
layer
bh = np.random.uniform(size=(1, hidden_neurons)) # Bias for Hidden layer
wout = np.random.uniform(size=(hidden_neurons, output_neurons)) # Weights from Hidden to
Output layer
bout = np.random.uniform(size=(1, output_neurons)) # Bias for Output layer
# Training the Neural Network
for i in range(epochs):
# Forward Propagation
hinp = np.dot(X, wh) + bh # Hidden layer weighted sum
hlayer_act = sigmoid(hinp) # Activation function at hidden layer
outinp = np.dot(hlayer_act, wout) + bout # Output layer weighted sum
output = sigmoid(outinp) # Activation function at output layer
# Backpropagation
EO = y - output # Error at output layer
outgrad = derivative_sigmoid(output) # Output layer gradient
d_output = EO * outgrad # Delta output
EH = d_output.dot(wout.T) # Error at hidden layer
hiddengrad = derivative_sigmoid(hlayer_act) # Hidden layer gradient
d_hiddenlayer = EH * hiddengrad # Delta hidden layer
# Updating Weights and Biases
wout += hlayer_act.T.dot(d_output) * lr
bout += np.sum(d_output, axis=0, keepdims=True) * lr
wh += X.T.dot(d_hiddenlayer) * lr
bh += np.sum(d_hiddenlayer, axis=0, keepdims=True) * lr
# Print Final Results
print("Input:\n", X)
print("Actual Output:\n", y)
print("Predicted Output:\n", output)
5)
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
# Load dataset
msg = pd.read_csv('naivetext1.csv', names=['message', 'label'])
# Print dataset dimensions
print('The dimensions of the dataset:', msg.shape)
# Map labels to numerical values
msg['labelnum'] = msg.label.map({'pos': 1, 'neg': 0})
# Split into input (X) and output (y)
X = msg.message
y = msg.labelnum
# Train-test split (with fixed random state for reproducibility)
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42)
# Print shapes of splits
print("xtest.shape:", xtest.shape)
print("xtrain.shape:", xtrain.shape)
print("ytest.shape:", ytest.shape)
print("ytrain.shape:", ytrain.shape)
# Convert text to numerical features using CountVectorizer
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm = count_vect.transform(xtest)
# Train Naïve Bayes model
clf = MultinomialNB().fit(xtrain_dtm, ytrain)
# Predictions on test data
predicted_test = clf.predict(xtest_dtm)
# Accuracy metrics
print("Accuracy of the classifier:", metrics.accuracy_score(ytest, predicted_test))
print("Confusion Matrix:\n", metrics.confusion_matrix(ytest, predicted_test))
print("Recall:", metrics.recall_score(ytest, predicted_test))
print("Precision:", metrics.precision_score(ytest, predicted_test))
6)
import csv
import random
import math
def loadCsv(filename):
lines=csv.reader(open(filename,"r"));
dataset=list(lines)
for i in range(len(dataset)):
dataset[i]=[float(x) for x in dataset[i]]
return dataset
def splitDataset(dataset,splitRatio):
trainSize=int(len(dataset)*splitRatio);
trainSet=[]
copy=list(dataset)
while len (trainSet)<trainsize:
index=random.randrange(len(copy));
trainSet,append(copy.pop(index))
return[trainSet,copy]
def separateByClass(dataset):
seperated={}
belonging to each class
for i in range(len(dataset));
vector=dataset[i]
if(vector[-1]not i9n separated):
separated[vector[-1]]=[]
separated[vector[-1]].append(vector)
return separated
def mean(numbers):
return sum(numbers)/float(len(numbers))
def stdev(numbers):
avg=mean(numbers)
variance=sum([pow(x-avg,2)for x in numbers])/float(len(numbers)-1)
return math.sqrt(variance)
def summarize(dataset):
smmaries=[(mean(attribute),stdev(attribute)) for attribute in zip(*dataset)];
del summaries[-1]
return summaries
def summarizeByClass(dataset):
separated=saparatedByClass(dataset);
summaries={}
for classValue,instance in separated.items():
summaries[classValue]=summarize(instance)
return summaries
def calculateProbability(x,mean,stdev):
exponent=math.exp(-(math.pow(*x-mean,2)/2*math.pow(stdev,2)))
return(1/(math.sqrt(2*math.pi)*stdev))*exponent
def calculateClassProbability(summari3es,inputVector):
probability={}
for class value,classsummariesw in summaries.items():
probabilities[classValue]=1
for i in range(len(classSummaries)):
mean,stdev=classSummaries[i]
x=inputVector[i]
probability[class value]* calculateProbability(x,mean,stdev);
normal dist
return probabilities
def predict(summaries,inputVector):
probabilities=calculateClassprpbabilities(summaries,inputVector)
bestLabel,bestprob=none,-1
for classValue,probability in probabilities.items()_:
highest prob
if best Label is NOne or probability>bestprob:
bestprpb=probability
bestlavel=classValue
return bestLabel
def getpredictions(summaries,testset)
predictions=[]
for i in range(len(testSet)):
result=predict(summaries,testset[i])
predictions.append(result)
return predictions
def getAccuracy(testSet,predictions):
correct=0
for i in range(len(testSet)):
if testSet[i][-1]==predictions[i]:
corect+=1
return(correct/float(len(testset)))*100.0
def main():
filename='5data.csv'splitRatio=0.67
dataset=loadCsv(filename);
training set,testset=splitDataset(dataset,splitRatio)
print('split{0} rows into train={1} and test=2
rows'format(len(dataset),
len(training set),len(testSet)))
summaries=summarizeByclass(trainingset)
accuracy=getAccuracy(testSetr,predictions)
print('Accuarcy of the classifier is:
{0}%'.format(accuracy))
main()
7)
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
# Load dataset
msg = pd.read_csv('naivetext1.csv', names=['message', 'label'])
print('The dimensions of the dataset:', msg.shape)
# Convert labels to numerical values
msg['labelnum'] = msg.label.map({'pos': 1, 'neg': 0})
X = msg.message
y = msg.labelnum
# Split into training and testing sets
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42)
# Convert text data to numerical features using CountVectorizer
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm = count_vect.transform(xtest)
# Train Naive Bayes model
clf = MultinomialNB().fit(xtrain_dtm, ytrain)
# Predict on test data
predicted = clf.predict(xtest_dtm)
# Accuracy metrics
print('Accuracy of the classifier is:', metrics.accuracy_score(ytest, predicted))
print('Confusion matrix:')
print(metrics.confusion_matrix(ytest, predicted))
print('Recall:', metrics.recall_score(ytest, predicted))
print('Precision:', metrics.precision_score(ytest, predicted))
8)
import numpy as np
import pandas as pd
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
# Read Cleveland Heart Disease data
heartDisease = pd.read_csv('heart.csv')
heartDisease = heartDisease.replace('?', np.nan)
# Display the data
print('Few examples from the dataset are given below:')
print(heartDisease.head())
# Define Bayesian Network Model
model = BayesianNetwork([
('age', 'trestbps'),
('age', 'fbs'),
('sex', 'trestbps'),
('exang', 'trestbps'),
('trestbps', 'heartdisease'),
('fbs', 'heartdisease'),
('heartdisease', 'restecg'),
('heartdisease', 'thalach'),
('heartdisease', 'chol')
])
print('\nLearning CPD using Maximum Likelihood Estimators...')
model.fit(heartDisease, estimator=MaximumLikelihoodEstimator)
print('\nInference with Bayesian Network:')
HeartDisease_infer = VariableElimination(model)
# Query 1: Probability of Heart Disease given Age=30
print('\n1. Probability of HeartDisease given Age=30')
q1 = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 30})
print(q1)
# Query 2: Probability of Heart Disease given Cholesterol=100
print('\n2. Probability of HeartDisease given Cholesterol=100')
q2 = HeartDisease_infer.query(variables=['heartdisease'], evidence={'chol': 100})
print(q2)
9)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
# Load Data
X = pd.read_csv("kmeansdata.csv")
# Extract features
x1 = X['Distance_Feature'].values
x2 = X['Speeding_Feature'].values
X = np.column_stack((x1, x2))
# Plot dataset
plt.figure()
plt.xlim([0, 100])
plt.ylim([1, 50])
plt.title('Dataset')
plt.scatter(x1, x2)
plt.show()
# Gaussian Mixture Model (GMM)
gmm = GaussianMixture(n_components=3)
gmm.fit(X)
em_predictions = gmm.predict(X)
print("EM Predictions:\n", em_predictions)
print("Means:\n", gmm.means_)
print("\nCovariances:\n", gmm.covariances_)
# Plot GMM Clusters
plt.title('Expectation Maximization')
plt.scatter(X[:, 0], X[:, 1], c=em_predictions, s=50)
plt.show()
# K-Means Clustering
kmeans = KMeans(n_clusters=3, n_init=10)
kmeans.fit(X)
print("Cluster Centers:\n", kmeans.cluster_centers_)
print("Cluster Labels:\n", kmeans.labels_)
# Plot K-Means Clusters
plt.title('K-Means Clustering')
plt.scatter(X[:, 0], X[:, 1], c=kmeans.labels_, s=50)
plt.show()
10)
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.datasets import load_iris
# Load dataset
iris = load_iris()
df = pd.DataFrame(iris['data'], columns=iris['feature_names'])
X = df
y = iris['target']
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize and train KNN classifier
classifier = KNeighborsClassifier(n_neighbors=3)
classifier.fit(X_train, y_train)
# Make predictions
y_pred = classifier.predict(X_test)
# Print first few predictions alongside actual values
print("\n-----------------------------------")
print(f"{'Actual Label':<25}{'Predicted Label':<25}{'Result':<25}")
print("-----------------------------------")
for i in range(len(y_test)):
result = "Correct" if y_test[i] == y_pred[i] else "Wrong"
print(f"{y_test[i]:<25}{y_pred[i]:<25}{result:<25}")
print("--------------------------------------")
# Confusion Matrix
print("\nConfusion Matrix:\n", metrics.confusion_matrix(y_test, y_pred))
print("---------------------------------------")
# Classification Report
print("\nClassification Report:\n", metrics.classification_report(y_test, y_pred))
print("---------------------------------------")
# Accuracy of the classifier
accuracy = metrics.accuracy_score(y_test, y_pred)
print(f'Accuracy of the Classifier is {accuracy:.2f}%')
print("------------------------------")
11) import matplotlib.pyplot as plt
import pandas as pd
import as np
def kernel(point,xmat,k):
m,n=np.mat(np1.eye((m)))
for j in range(m):
diff=point-x[j]
weights[j,j]=np.exp(diff.T/(-2*k***2))
return weights
def localWeight(point,xmat,ymat,k):
wei=kernel(point,xmat,k)
W=(X.T*(wi*X)).I*(wei*ymat.T))
return W
def localWeightRegression(xmat,ymat,k):
m,n=np.shape(xmat)
ypred=np.zeros(m)
for i in range(m)
for i in range(m):
ypred[i]=xmat[i]*localWeight(xmat,ymat,k)
return ypred
data=pd.read_csv('10-dataset.csv')
bill=np.array(data.total_bill)
tip=np.array(data.tip)
mbill=np.mat(bill)
data=pd.read_csv("/Users/HP/Downloads/10-dataset.csv")
bill=np.array(data.total_bill)
tip=np.array(data.tip)
mbill=np.mat(tip)
m=np.shape(mbill)[1]
one=np.mat(np.ones(m))
x=np.hstack((one.Tmbill.T))
ypred=localWeightRegression(X,mtip,0.5)
SortIndex=X[:,1].argsort(0)
xsort=X[SortIndex][:,0]
fig=plt.figure()
ax=fig.add_subplot(1,1,1)
ax.scatter(bill,tip,color='green')
ax.plot(xsort[:,1],ypred[SortIndex],color='red',linewidth=5)
plt.xlabel('Total bill')
plt.ylabel('tip')
plt.show()
12)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Function to calculate kernel weights
def kernel(point, xmat, k):
m, n = np.shape(xmat)
weights = np.mat(np.eye(m)) # Identity matrix
for j in range(m):
diff = point - xmat[j]
weights[j, j] = np.exp(diff * diff.T / (-2 * k**2))
return weights
# Function to compute locally weighted regression coefficients
def localWeight(point, xmat, ymat, k):
wei = kernel(point, xmat, k)
W = (xmat.T * (wei * xmat)).I * (xmat.T * (wei * ymat.T))
return W
# Function to perform local weighted regression
def localWeightRegression(xmat, ymat, k):
m, n = np.shape(xmat)
ypred = np.zeros(m)
for i in range(m):
ypred[i] = xmat[i] * localWeight(xmat[i], xmat, ymat, k)
return ypred
# Load dataset
data = pd.read_csv("10-dataset.csv") # Ensure this file exists in the working directory
bill = np.array(data.total_bill)
tip = np.array(data.tip)
# Convert to matrices
mbill = np.mat(bill).T
mtip = np.mat(tip).T
# Construct X matrix with an additional ones column
m = np.shape(mbill)[0]
one = np.mat(np.ones(m)).T
X = np.hstack((one, mbill))
# Apply locally weighted regression
ypred = localWeightRegression(X, mtip, 0.5)
# Sorting for visualization
SortIndex = X[:, 1].argsort(0)
xsort = X[SortIndex][:, 0]
# Plot the data points and regression line
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(bill, tip, color='green', label="Data Points")
ax.plot(xsort[:, 1], ypred[SortIndex], color='red', linewidth=2, label="LOWESS Fit")
plt.xlabel('Total Bill')
plt.ylabel('Tip')
plt.legend()
plt.show()