Import As Import As Import As Import As Import As From Import From Import From Import
Import As Import As Import As Import As Import As From Import From Import From Import
In [1]: import pandas as pd #Python library used for working with data sets
import numpy as np #Python library used for working with arrays
import seaborn as sn # library for making statistical graphics in Python
import random as rn
import matplotlib.pyplot as mat #used to create 2D graphs and plots by using python
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB,MultinomialNB
from sklearn.metrics import accuracy_score
Out[3]: age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca
... ... ... ... ... ... ... ... ... ... ... ... ...
1 of 12 18/04/24, 14:50
2B about:srcdoc
Out[4]:
Date Time CO(GT) PT08.S1(CO) NMHC(GT) C6H6(GT) PT08.S2(NMHC)
Out[5]: 0
In [6]: DataFrame1.dtypes
In [7]: DataFrame2.dtypes
2 of 12 18/04/24, 14:50
2B about:srcdoc
DATA CLEANING
In [8]: DataFrame3=DataFrame2.iloc[:,:15] #iloc stands for “integer location”.
#It is used to select rows and columns from a Pandas DataFrame or a Series using in
In [9]: DataFrame3
In [10]: DataFrame3.isna().sum().sum()
Out[10]: 1710
In [11]: DataFrame4=DataFrame3.dropna()
In [12]: DataFrame4
3 of 12 18/04/24, 14:50
2B about:srcdoc
C:\Users\rutur\AppData\Local\Temp/ipykernel_6896/3779120835.py:2: SettingW
ithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
4 of 12 18/04/24, 14:50
2B about:srcdoc
In [14]: DataFrame4
C:\Users\rutur\anaconda3\lib\site-packages\pandas\core\frame.py:5238: Sett
ingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
5 of 12 18/04/24, 14:50
2B about:srcdoc
In [16]: DataFrame4.drop_duplicates(inplace=True)
DataFrame4 #Drop Duplicates
C:\Users\rutur\anaconda3\lib\site-packages\pandas\util\_decorators.py:311:
SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
In [17]: DataFrame1
Out[17]: age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca
... ... ... ... ... ... ... ... ... ... ... ... ...
6 of 12 18/04/24, 14:50
2B about:srcdoc
In [18]: DataFrame4
DATA INTEGRATION
In [19]: DataSet1=DataFrame4[['Date','Time','T','RH','AH']].loc[0:50]
DataSet1.head()
In [20]: DataSet2=DataFrame4[['Date','Time','T','RH','AH']].loc[51:100]
DataSet2.head()
In [21]: DataSet3=DataFrame1[['age','sex','cp','ca','target']].loc[50:100]
7 of 12 18/04/24, 14:50
2B about:srcdoc
DataSet3.head()
50 58 0 3 0 1
51 57 0 0 0 0
52 38 1 2 4 1
53 49 1 2 3 0
54 55 1 0 0 0
In [22]: Merged=pd.concat([DataSet1,DataSet2])
Merged
Data Transformation
In [23]: DataFrame1.loc[DataFrame1['sex']==1,'sex']='M' #Replacing 1 with M
8 of 12 18/04/24, 14:50
2B about:srcdoc
In [25]: DataFrame1.head()
Out[25]: age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal
Out[26]: age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca
... ... ... ... ... ... ... ... ... ... ... ... ...
Error Correction
In [27]: DataFrame1[DataFrame1['ca']==4]
9 of 12 18/04/24, 14:50
2B about:srcdoc
Out[27]: age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca
In [30]: DataFrame1.isnull().sum()
Out[30]: age 0
sex 0
cp 0
trestbps 0
chol 0
fbs 0
restecg 0
thalach 0
exang 0
oldpeak 0
slope 0
ca 0
thal 0
target 0
dtype: int64
In [31]: DataFrame1
10 of 12 18/04/24, 14:50
2B about:srcdoc
Out[31]: age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca
... ... ... ... ... ... ... ... ... ... ... ... ...
Model Building
In [32]: X_train, X_test, y_train, y_test = train_test_split(DataFrame1.iloc[:,:-1
Out[35]: GaussianNB()
11 of 12 18/04/24, 14:50
2B about:srcdoc
Out[36]: array([1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1,
1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0,
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0,
1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,
1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0,
1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0,
0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1,
1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1,
0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0,
0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
0],
dtype=int64)
Data Visualization
In [39]: dataplot = sn.heatmap(DataFrame4.corr(), cmap="Blues", annot=True)
mat.figure(figsize=(20,15))
mat.show() #Heatmap Plot provide a 2D array or a correlation matrix.
In [ ]:
12 of 12 18/04/24, 14:50