LAB NO 01
import pandas as pd
import numpy as np
from pandas import Series
s1= pd.Series([3,-4.5,7,8.19,13])
s2=pd.Series ([5,7,31,3.5,-8.7],index=['A','B','C','D','E'])
print(s1)
print(s2)
print('VALUE',s2.values)
print('INDEX',s2.index)
s3= Series(np.random.randn(6))
print(s3)
capital=['MI','LENSING','CA','SCARMENTO','TX','AUSTIN','MN','ST.PAUL'] #
Change set to a list
s4=Series(capital)
print(s4)
print('VALUE',s4.values)
print('INDEX',s4.index)
print('\n s1[1]',s1[1])
print('\n s2[0]',s2['A'])
print('\n s1[1:3]',s1[1:3])
print('\ns s2[3]', s2['A':'E'])
print(s4.iloc[1:3])
s3.shape
s3.size
s4.shape
s4.size
LAB NO = 03
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
from pandas import Series
import numpy as np
s3=Series ([1.2,-0.8,0,1.7,-
3.2,2],index=['JAN1','JAN2','JAN3','JAN4','JAN5','JAN6'])
print(s3[s3>0])
print(s3+4)
print(s3/2)
print(np.log(s3+4))
s3.plot(kind='hist',title='Histogram')
plt.show()
s3.plot(kind='bar',title='Barchart')
plt.show()
s3.plot(kind='line',title='Linechart')
plt.show()
PROGRAMME 02
import pandas as pd
from pandas import Series
import numpy as np
cars={'MAKE':['FORD','HONDA','TOYATA','TESLA'],
'MODEL':['TAURUS','ACCCORD','CAMRY','MODELS'],
'MSRP':[27595,23570,23495,68000]}
cardata=pd.DataFrame(cars)
cardata=pd.DataFrame(cars,index=[1,2,3,4])
print(cardata)
cardata['YEAR']=2016
cardata['DEALERSHIP']=['COURTESY FORD','CAPITAL HONDA','SPARTAN
TOYOTA','N/A']
print(cardata)
npdata=np.random.randn(5,3)
column=['X1','X2','X3']
data=pd.DataFrame(npdata,columns=column)
print(data['X2'])
print(data.iloc[2])
print(cardata.iloc[2])
print(cardata.iloc[1,2])
print(cardata.loc[1,'MODEL'])
print(cardata.iloc[1:3,1:3])
print(cardata.loc[1:3,'MODEL':'MSRP'])
print(cardata[cardata.MSRP>25000])
print(data.T)
print(data+4)
print(data.sort_values(by='X2'))
print(data.sort_values(by='X2',ascending=False))
print(data.max())
print(data.min(axis=1))
print(data.mean(axis=1))
print(data.median())
print(data.std())
print('Calculate max_min per column')
print(data.apply(lambda x:x.max()-x.min()))
print(data.describe())
print('Calculate max_min per row')
print(data.apply(lambda x:x.max()-x.min(),axis=1))
print(data.describe())
OUTPUT:
MAKE MODEL MSRP
1 FORD TAURUS 27595
2 HONDA ACCCORD 23570
3 TOYATA CAMRY 23495
4 TESLA MODELS 68000
MAKE MODEL MSRP YEAR DEALERSHIP
1 FORD TAURUS 27595 2016 COURTESY FORD
2 HONDA ACCCORD 23570 2016 CAPITAL HONDA
3 TOYATA CAMRY 23495 2016 SPARTAN TOYOTA
4 TESLA MODELS 68000 2016 N/A
0 -0.539041
1 -0.352970
2 1.232906
3 -1.993466
4 0.634204
Name: X2, dtype: float64
X1 -0.298207
X2 1.232906
X3 0.694186
Name: 2, dtype: float64
MAKE TOYATA
MODEL CAMRY
MSRP 23495
YEAR 2016
DEALERSHIP SPARTAN TOYOTA
Name: 3, dtype: object
23570
TAURUS
MODEL MSRP
2 ACCCORD 23570
3 CAMRY 23495
MODEL MSRP
1 TAURUS 27595
2 ACCCORD 23570
3 CAMRY 23495
MAKE MODEL MSRP YEAR DEALERSHIP
1 FORD TAURUS 27595 2016 COURTESY FORD
4 TESLA MODELS 68000 2016 N/A
0 1 2 3 4
X1 0.947945 -0.021360 -0.298207 -0.376367 -1.581967
X2 -0.539041 -0.352970 1.232906 -1.993466 0.634204
X3 0.721995 1.843104 0.694186 -0.139815 -0.655503
X1 X2 X3
0 4.947945 3.460959 4.721995
1 3.978640 3.647030 5.843104
2 3.701793 5.232906 4.694186
3 3.623633 2.006534 3.860185
4 2.418033 4.634204 3.344497
X1 X2 X3
3 -0.376367 -1.993466 -0.139815
0 0.947945 -0.539041 0.721995
1 -0.021360 -0.352970 1.843104
4 -1.581967 0.634204 -0.655503
2 -0.298207 1.232906 0.694186
X1 X2 X3
2 -0.298207 1.232906 0.694186
4 -1.581967 0.634204 -0.655503
1 -0.021360 -0.352970 1.843104
0 0.947945 -0.539041 0.721995
3 -0.376367 -1.993466 -0.139815
X1 0.947945
X2 1.232906
X3 1.843104
dtype: float64
0 -0.539041
1 -0.352970
2 -0.298207
3 -1.993466
4 -1.581967
dtype: float64
0 0.376966
1 0.489592
2 0.542962
3 -0.836549
4 -0.534422
dtype: float64
X1 -0.298207
X2 -0.352970
X3 0.694186
dtype: float64
X1 0.905331
X2 1.235306
X3 0.953311
dtype: float64
Calculate max_min per column
X1 2.529912
X2 3.226372
X3 2.498607
dtype: float64
X1 X2 X3
count 5.000000 5.000000 5.000000
mean -0.265991 -0.203673 0.492793
std 0.905331 1.235306 0.953311
min -1.581967 -1.993466 -0.655503
25% -0.376367 -0.539041 -0.139815
50% -0.298207 -0.352970 0.694186
75% -0.021360 0.634204 0.721995
max 0.947945 1.232906 1.843104
Calculate max_min per row
0 1.486986
1 2.196074
2 1.531113
3 1.853652
4 2.216171
dtype: float64
X1 X2 X3
count 5.000000 5.000000 5.000000
mean -0.265991 -0.203673 0.492793
std 0.905331 1.235306 0.953311
min -1.581967 -1.993466 -0.655503
25% -0.376367 -0.539041 -0.139815
50% -0.298207 -0.352970 0.694186
75% -0.021360 0.634204 0.721995
max 0.947945 1.232906 1.843104
LAB NO 04
import numpy as np
import matplotlib.pyplot as plt
seed = 1
numinstance = 200
np.random.seed(seed)
X=np.random.rand(numinstance,1).reshape(-1,1)
Y_true= -3*X+1
Y=Y_true+np.random.normal(size=numinstance).reshape(-1,1)
plt.scatter(X,Y,color='BLack')
plt.scatter(X,Y_true,color='Blue',linewidth=3)
plt.title('true Function Y=3X+1')
plt.xlabel('X_axis')
plt.ylabel('Y_axis')
plt.show()
OUTPUT:
LAB NO 05
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.metrics import mean_squared_error,r2_score
seed = 1
numinstance = 200
np.random.seed(seed)
X=np.random.rand(numinstance,1).reshape(-1,1)
Y_true= -3*X+1
Y=Y_true+np.random.normal(size=numinstance).reshape(-1,1)
plt.scatter(X,Y,color='BLack')
plt.scatter(X,Y_true,color='Blue',linewidth=3)
plt.title('true Function Y=3X+1')
plt.xlabel('X_axis')
plt.ylabel('Y_axis')
plt.show()
numtrain=20
numtest=numinstance-numtrain
X_train=X[:-numtest]
X_test=X[-numtest:]
Y_train= Y[:-numtest]
Y_test=Y[-numtest:]
regr=linear_model.LinearRegression()
regr.fit(X_train,Y_train)
Y_pred_test=regr.predict(X_test)
plt.scatter(Y_test,Y_pred_test,color='black')
plt.title("Comparing True and /predicted values for test set")
plt.xlabel('True value of Y')
plt.ylabel('Predicted value of Y')
print('Root Mean Square /error=
%4f'%np.sqrt(mean_squared_error(Y_test,Y_pred_test)))
print('R_square=%4f'%r2_score(Y_test,Y_pred_test))
print('Slope=',regr.coef_[0])
print('Intercept=',regr.intercept_[0])
plt.scatter(X_test,Y_test,color='black')
plt.plot(X_test,Y_pred_test,color='blue',linewidth=3)
titlelab=('Predicted Functions Y=%0.2fx+%0.2f'%
(regr.coef_[0],regr.intercept_[0]))
plt.title(titlelab)
plt.xlabel('X')
plt.ylabel('Y')
plt.show()
OUTPUT:
Root Mean Square /error=1.047626
R_square=0.444334
Slope= [-3.24235454]
Intercept= 1.0805993038584842
LAB NO 06
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
# Load dataset (using Iris dataset as an example)
data = load_iris()
X = data.data # Features
Y = data.target # Labels
# Split dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3,
random_state=42)
# Hyperparameter tuning
c = [0.01, 0.1, 0.2, 0.5, 0.8, 1, 5, 10, 20, 50]
SVMtrainAcc = []
SVMtestAcc = []
for param in c:
clf = SVC(C=param, kernel='rbf', gamma='auto')
clf.fit(X_train, Y_train)
Y_predTrain = clf.predict(X_train)
Y_predTest = clf.predict(X_test)
SVMtrainAcc.append(accuracy_score(Y_train, Y_predTrain))
SVMtestAcc.append(accuracy_score(Y_test, Y_predTest))
# Plot results
plt.plot(c, SVMtrainAcc, 'ro-', label='Training Accuracy')
plt.plot(c, SVMtestAcc, 'bv--', label='Test Accuracy')
plt.legend()
plt.xlabel('C')
plt.xscale('log')
plt.ylabel('Accuracy')
plt.title('SVM Accuracy vs C Parameter')
plt.show()
OUTPUT:
LAB NO 07
import numpy as np
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
# Movie features: [movie length, genre (0 = Comedy, 1 = Action)]
movie_feature = np.array([[ 23, 0], [104, 0], [ 57, 0], [ 24, 1],
[199, 0],[ 49, 1], [ 91, 0], [120, 1], [ 62, 0], [ 33, 0],[163, 1],
[197, 1], [133, 1], [ 50, 0], [166, 1],[ 24, 0], [ 54, 0], [ 74,
1], [ 98, 1], [ 80, 0],[ 18, 0], [ 97, 1], [138, 1], [145, 1], [ 72,
0],[148, 0], [ 90, 0], [145, 0], [172, 0], [172, 0]])
# Movie likes (1 = like, 0 = dislike)
movie_likes = np.array([1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0,
0,0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0])
# Initialize Gaussian Naive Bayes model
# Using class_prior to balance the 'like' and 'dislike' classes
model = GaussianNB(priors=[0.5, 0.5]) # equal prior probabilities for
both classes
# Fit the model to the training data
model.fit(movie_feature, movie_likes)
# Predict whether a new movie will be liked or disliked
newmovie = np.array([[125,0.2]]) # [Movie length, Genre (1 = Action)]
# Make a prediction
predicted = model.predict(newmovie)
# Visualize the data
plt.scatter(movie_feature[:,0], movie_feature[:,1], c=movie_likes,
cmap='viridis', marker='o', label='Training data')
plt.scatter(newmovie[:,0], newmovie[:,1], c="red", marker='x', label='New
movie (to predict)')
plt.title('Movie Likes Prediction Based on Length and Genre')
plt.xlabel('Movie Length (minutes)')
plt.ylabel('Genre Code (0 = Comedy, 1 = Action)')
plt.legend()
plt.show()
# Print out the prediction result
print(f"User {'likes' if predicted[0] == 1 else 'dislikes'} a
{newmovie[0,0]} min long movie of genre type {'Comedy' if newmovie[0,1]==0
else 'Action'}.")
OUTPUT:
LAB NO 08:
import pandas as pd
from sklearn import tree
from sklearn.metrics import accuracy_score
import pydotplus
from IPython.display import Image
import numpy as np
import matplotlib.pyplot as plt
# Read data
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/ML DATA
/Veretebrates', header='infer')
data['Class'] = data['Class'].replace(['fishes', 'birds', 'amphibians',
'reptiles'], 'non-mammals')
# Decision Tree Classification
Y = data['Class']
X = data.drop(['Name', 'Class'], axis=1)
# Train decision tree classifier
clf = tree.DecisionTreeClassifier(criterion='entropy', max_depth=3)
clf = clf.fit(X, Y)
# Visualize the decision tree using graphviz
dot_data = tree.export_graphviz(clf, feature_names=X.columns,
class_names=['mammals', 'non-mammals'],
filled=True, out_file=None)
graph = pydotplus.graph_from_dot_data(dot_data)
# Display the decision tree graph
from IPython.display import display
display(Image(graph.create_png()))
# Test data
testData = [['gila monster', 0, 0, 0, 0, 1, 1, 'non-mammals'],
['platypus', 1, 0, 0, 0, 1, 1, 'mammals'],
['owl', 1, 0, 0, 1, 1, 0, 'non-mammals'],
['dolphin', 1, 1, 1, 0, 0, 0, 'mammals']]
testData = pd.DataFrame(testData, columns=data.columns)
testY = testData['Class']
testX = testData.drop(['Name', 'Class'], axis=1)
# Predict with trained model
predY = clf.predict(testX)
predictions = pd.concat([testData['Name'], pd.Series(predY,
name='Predicted Class')], axis=1)
# Print predictions
print(predictions)
# Accuracy on test data
print('Accuracy on test data is %.2f' % (accuracy_score(testY, predY)))
# Scatter plot generation
N = 1500
mean1 = [6, 14]
mean2 = [10, 6]
mean3 = [14, 14]
cov = [[3.5, 0], [0, 3.5]] # diagonal covariance
np.random.seed(50)
X = np.random.multivariate_normal(mean1, cov, int(N/6))
X = np.concatenate((X, np.random.multivariate_normal(mean2, cov,
int(N/6))))
X = np.concatenate((X, np.random.multivariate_normal(mean3, cov,
int(N/6))))
X = np.concatenate((X, 20*np.random.rand(int(N/2), 2)))
Y = np.concatenate((np.ones(int(N/2)), np.zeros(int(N/2))))
# Scatter plot
plt.plot(X[:int(N/2), 0], X[:int(N/2), 1], 'r+', X[int(N/2):, 0],
X[int(N/2):, 1], 'k.', ms=4)
plt.title('Scatter plot of generated data')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()
OUTPUT:
Name Predicted Class
0 gila monster non-mammals
1 platypus non-mammals
2 owl non-mammals
3 dolphin mammals
Accuracy on test data is 0.75
LAB NO 09
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
%matplotlib inline
numNeighbors = [1, 5, 10, 15, 20, 25, 30]
trainAcc = []
testAcc = []
for k in numNeighbors:
clf = KNeighborsClassifier(n_neighbors=k, metric='minkowski', p=2)
clf.fit(X_train, Y_train)
Y_predTrain = clf.predict(X_train)
Y_predTest = clf.predict(X_test)
trainAcc.append(accuracy_score(Y_train, Y_predTrain))
testAcc.append(accuracy_score(Y_test, Y_predTest))
plt.plot(numNeighbors, trainAcc, 'ro-', numNeighbors, testAcc,'bv--')
plt.legend(['Training Accuracy','Test Accuracy'])
plt.xlabel('Number of neighbors')
plt.ylabel('Accuracy')
OUTPUT: