Assignment 7
In [1]: # Load pacakages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sn
In [2]: #Load the csv file
marks = pd.read_csv('Marks.csv')
In [3]: # Understanding the data
marks.info()
<class 'pandas.core.frame.DataFrame'>
Rangeindex: 10 entries, 0 to 9
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--------------
0 Roll no 10 non-null int64
1 Science 10 non-null int64
2 Accounting 10 non-null int64
3 Maths 10 non-null int64
dtypes: int64(4)
memory usage: 448.0 bytes
In [4]: # Scaling the data
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
scale=StandardScaler()
scaled_marks =scale.fit_transform(marks[["Science","Accounting","Maths"]])
scaled_marks
Out[4]: array([[ 0.84964017, -0.14463921, -0.44396005],
[-0.20508556, 0.43391764, 0.89824474],
[ 0.08789381, 0.72319607, -0.75369961],
[ 0.79104429, -2.31422742, -0.96019266],
[ 0.3222773 , -0.43391764, -1.99265788],
[-1.6699824 , 0.28927843, 1.10473779],
[-1.37700303, -0.86783528, 1.41447736],
[-1.08402366, 1.59103135, 0.48525865],
[ 1.25981128, 0.57855685, -0.03097396],
[ 1.02542779, 0.14463921, 0.27876561]])
In [5]: # Creating a dataframe of the scaled data
scaledmarks = pd.DataFrame(scaled_marks, index =['1', '2', '3', '4', '5', '6', '7',
columns =['Science','Accounting','Ma1
scaledmarks
localhost:8889/nbconvert/html/Downloads/lDS/Chp 7 Clustering Marks dataset.ipynb?download=false 1/4
3/14/24, 2:13 PM Chp 7 Clustering Marks dataset
kmeans=kmeansclusters.fit_predict(scaledmarks)
kmeans
Out[10]: array([2, 0, 2, 1, 1, 0, 0, 0, 2, 2])
In [11]: # Scatter plot of the clusters
#Create seperate lists of each cluster
kmeans0 = scaledmarks[kmeans -- 0]
kmeansl = scaledmarks[kmeans -- 1]
kmeans2 = scaledmarks[kmeans -- 2]
#Scatter plot of each cluster
plt.scatter(kmeans0.iloc[:,0] , kmeans0.iloc[:,1], color='blue')
plt.scatter(kmeansl.iloc[:,0] , kmeansl.iloc[:,1], color='red')
plt.scatter(kmeans2.iloc[:,0] , kmeans2.iloc[:,1], color='green')
plt.show()
l'S •
• •
lO
• •
••
0.'5
0.0
•
•
-0.5
-1.0
-1.5
-2.0
-2.5 '-----.----,-------.----.-------.----.-----'
-1.5 -1.0 -0.5 0.0 0.5 HI
In [ ]:
4/4