0% found this document useful (0 votes)

3 views2 pages

Lab 8

Uploaded by

harshab845384

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

3 views2 pages

Lab 8

Uploaded by

harshab845384

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 2

In [49]: import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from sklearn.tree import export_graphviz

from IPython.display import Image

import warnings
warnings.filterwarnings('ignore')

data = pd.read_csv("/home/iselab-17/Desktop/ML DATA SETS/Breast Cancer Dataset.csv")

pd.set_option('display.max_columns', None)

data.head()

Out[49]: id diagnosis radius_mean texture_mean perimeter_mean area_mean smoothness_mean compactness_mean concavity_mean concave_points_mean symmetry_mean fractal_dimension_mean radius_se texture_se perimeter_se area_se smoothness_se compactness_se conc

0 842302 M 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.3001 0.14710 0.2419 0.07871 1.0950 0.9053 8.589 153.40 0.006399 0.04904

1 842517 M 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.0869 0.07017 0.1812 0.05667 0.5435 0.7339 3.398 74.08 0.005225 0.01308

2 84300903 M 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.1974 0.12790 0.2069 0.05999 0.7456 0.7869 4.585 94.03 0.006150 0.04006

3 84348301 M 11.42 20.38 77.58 386.1 0.14250 0.28390 0.2414 0.10520 0.2597 0.09744 0.4956 1.1560 3.445 27.23 0.009110 0.07458

4 84358402 M 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.1980 0.10430 0.1809 0.05883 0.7572 0.7813 5.438 94.44 0.011490 0.02461

In [50]: data.shape

Out[50]: (569, 32)

In [51]: data['diagnosis'].unique()

Out[51]: array(['M', 'B'], dtype=object)

In [52]: df = data.drop(['id'], axis=1)

In [53]: df['diagnosis'] = df['diagnosis'].map({'M': 1, 'B': 0})

In [54]: X=df.drop('diagnosis',axis=1)
y=df['diagnosis']

In [55]: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_test

Out[55]: radius_mean texture_mean perimeter_mean area_mean smoothness_mean compactness_mean concavity_mean concave_points_mean symmetry_mean fractal_dimension_mean radius_se texture_se perimeter_se area_se smoothness_se compactness_se concavity_se concave_p

204 12.47 18.60 81.09 481.9 0.09965 0.10580 0.08005 0.03821 0.1925 0.06373 0.3961 1.0440 2.497 30.29 0.006953 0.01911 0.02701 0

70 18.94 21.31 123.60 1130.0 0.09009 0.10290 0.10800 0.07951 0.1582 0.05461 0.7888 0.7975 5.486 96.05 0.004444 0.01652 0.02269 0

131 15.46 19.48 101.70 748.9 0.10920 0.12230 0.14660 0.08087 0.1931 0.05796 0.4743 0.7859 3.094 48.31 0.006240 0.01484 0.02813 0

431 12.40 17.68 81.47 467.8 0.10540 0.13160 0.07741 0.02799 0.1811 0.07102 0.1767 1.4600 2.204 15.43 0.010000 0.03295 0.04861 0

540 11.54 14.44 74.65 402.9 0.09984 0.11200 0.06737 0.02594 0.1818 0.06782 0.2784 1.7680 1.628 20.86 0.012150 0.04112 0.05553 0

... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...

486 14.64 16.85 94.21 666.0 0.08641 0.06698 0.05192 0.02791 0.1409 0.05355 0.2204 1.0060 1.471 19.98 0.003535 0.01393 0.01800 0

75 16.07 19.65 104.10 817.7 0.09168 0.08424 0.09769 0.06638 0.1798 0.05391 0.7474 1.0160 5.029 79.25 0.010820 0.02203 0.03500 0

249 11.52 14.93 73.87 406.3 0.10130 0.07808 0.04328 0.02929 0.1883 0.06168 0.2562 1.0380 1.686 18.62 0.006662 0.01228 0.02105 0

238 14.22 27.85 92.55 623.9 0.08223 0.10390 0.11030 0.04408 0.1342 0.06129 0.3354 2.3240 2.105 29.96 0.006307 0.02845 0.03850 0

265 20.73 31.12 135.70 1419.0 0.09469 0.11430 0.13670 0.08646 0.1769 0.05674 1.1720 1.6170 7.749 199.70 0.004551 0.01478 0.02143 0

114 rows × 30 columns

In [56]: model = DecisionTreeClassifier(criterion='entropy')

model.fit(X_train, y_train)
model

Out[56]: DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',

max_depth=None, max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort='deprecated',
random_state=None, splitter='best')

In [57]: import math

def entropy(column):
counts = column.value_counts()
probabilities = counts / len(column)
return sum([-p * math.log2(p) for p in probabilities])

def conditional_entropy(data, X, target):

feature_values = data[X].unique()
weighted_entropy = 0
for value in feature_values:
subset = data[data[X] == value]
weighted_entropy += (len(subset) / len(data)) * entropy(subset[target])
return weighted_entropy

def information_gain(data, X, target):

total_entropy = entropy(data[target])
feature_conditional_entropy = conditional_entropy(data, X, target)
return total_entropy - feature_conditional_entropy
for feature in X:
ig = information_gain(df, feature, 'diagnosis')
print(f"Information Gain[{feature}] : {ig}")

Information Gain[radius_mean] : 0.8607815854835991

Information Gain[texture_mean] : 0.8357118798482908
Information Gain[perimeter_mean] : 0.9267038614138748
Information Gain[area_mean] : 0.9280305529818247
Information Gain[smoothness_mean] : 0.7761788341876101
Information Gain[compactness_mean] : 0.9091291689709926
Information Gain[concavity_mean] : 0.9350604299589776
Information Gain[concave_points_mean] : 0.9420903069361305
Information Gain[symmetry_mean] : 0.735036638169654
Information Gain[fractal_dimension_mean] : 0.8361770160635639
Information Gain[radius_se] : 0.9337337383910278
Information Gain[texture_se] : 0.8642965239721755
Information Gain[perimeter_se] : 0.9315454914704012
Information Gain[area_se] : 0.925377169845925
Information Gain[smoothness_se] : 0.9350604299589776
Information Gain[compactness_se] : 0.9231889229252984
Information Gain[concavity_se] : 0.9280305529818247
Information Gain[concave_points_se] : 0.8585933385629725
Information Gain[symmetry_se] : 0.8181371874054084
Information Gain[fractal_dimension_se] : 0.9174857375160954
Information Gain[radius_worst] : 0.9003074642106167
Information Gain[texture_worst] : 0.8634349686194988
Information Gain[perimeter_worst] : 0.8985843535052632
Information Gain[area_worst] : 0.9350604299589776
Information Gain[smoothness_worst] : 0.7197189097252679
Information Gain[compactness_worst] : 0.9183472928687721
Information Gain[concavity_worst] : 0.9302187999024514
Information Gain[concave_points_worst] : 0.9148323543801957
Information Gain[symmetry_worst] : 0.8453951399613433
Information Gain[fractal_dimension_worst] : 0.8915544765281104

In [58]: import matplotlib.pyplot as plt

from sklearn import tree

plt.figure(figsize=(20, 8))
tree.plot_tree(model, feature_names=X.columns, filled=True, rounded=True, fontsize=8)
plt.show()

In [59]: y_pred = model.predict(X_test)

print(y_pred)

[0 1 1 0 0 1 1 1 1 0 0 1 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 1
0 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 1 1 0 0 0 1 1 0 0 1 1 0 1
0 0 0 0 0 0 1 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 0 1 1 0 1 1 0 0 0 1 0 0 1
0 0 1]

In [60]: accuracy = accuracy_score(y_test, y_pred) * 100

classification_rep = classification_report(y_test, y_pred)
print("Accuracy:\n", accuracy)
print("Classification Report:\n", classification_rep)

Accuracy:
94.73684210526315
Classification Report:
precision recall f1-score support

0 0.95 0.97 0.96 71

1 0.95 0.91 0.93 43

accuracy 0.95 114

macro avg 0.95 0.94 0.94 114
weighted avg 0.95 0.95 0.95 114

In [61]: new = [[12.5, 19.2, 80.0, 500.0,0.035,0.1,0.05,0.02,0.17,0.06,0.4,1.0,2.5,40.0,0.006,0.02,0.03,0.01,0.02,0.003,

16.0,25.0,105.0,900.0,0.13,0.25,0.28,0.12,0.29,0.08]]
y_pred = model.predict(new)

if y_pred[0] == 0:
print("Prediction: Benign")
else:
print("Prediction: Malignant")

Prediction: Benign

In [ ]:

Breast Cancer Classification Using DTC
No ratings yet
Breast Cancer Classification Using DTC
1 page
Machine Learning Algorithm
No ratings yet
Machine Learning Algorithm
18 pages
Support Vector Machines Com Python
No ratings yet
Support Vector Machines Com Python
13 pages
Breast Cancer Classification Using ADAM Vs Nirmal Optimizer With Pre-Trained Models
No ratings yet
Breast Cancer Classification Using ADAM Vs Nirmal Optimizer With Pre-Trained Models
37 pages
Logistic Regression For Binary Classification With Core APIs - TensorFlow Core
No ratings yet
Logistic Regression For Binary Classification With Core APIs - TensorFlow Core
22 pages
Brain Tumor Multi-Classification With PSO: Import As Import As Import
No ratings yet
Brain Tumor Multi-Classification With PSO: Import As Import As Import
18 pages
Python Project: Breast Cancer Analysis
No ratings yet
Python Project: Breast Cancer Analysis
18 pages
Breastcancer
No ratings yet
Breastcancer
13 pages
ML Lab Experiment Shortened With Same Output
No ratings yet
ML Lab Experiment Shortened With Same Output
6 pages
Reast Cancer Prediction Using Debt
No ratings yet
Reast Cancer Prediction Using Debt
18 pages
Breat Cancer Detection Using Thermograpgy
No ratings yet
Breat Cancer Detection Using Thermograpgy
15 pages
CatBoost - An In-Depth Guide Python
No ratings yet
CatBoost - An In-Depth Guide Python
33 pages
ML0101EN Clas SVM Cancer Py v1
No ratings yet
ML0101EN Clas SVM Cancer Py v1
10 pages
Faizal
No ratings yet
Faizal
35 pages
ML RECORD EX 5,6,7,8,9 (Without Border)
No ratings yet
ML RECORD EX 5,6,7,8,9 (Without Border)
13 pages
Assignment #1: K Nearest Neighbor Classifier: Name: Srikanth Mujjiga (Roll No: 2015-50-831
No ratings yet
Assignment #1: K Nearest Neighbor Classifier: Name: Srikanth Mujjiga (Roll No: 2015-50-831
8 pages
Code and Output of Cancer Detection Model
No ratings yet
Code and Output of Cancer Detection Model
13 pages
Classification Algorithms
No ratings yet
Classification Algorithms
16 pages
Miniproject 2
No ratings yet
Miniproject 2
9 pages
From Import: Dict - Keys ( ('Data', 'Target', 'Frame', 'Target - Names', 'DESCR', 'Feature - Names', 'Filename', 'Data - Module') )
No ratings yet
From Import: Dict - Keys ( ('Data', 'Target', 'Frame', 'Target - Names', 'DESCR', 'Feature - Names', 'Filename', 'Data - Module') )
4 pages
LAB # 08 Naive Bayes - Ipynb - Colab
No ratings yet
LAB # 08 Naive Bayes - Ipynb - Colab
3 pages
20BCP021 Assignment 3
No ratings yet
20BCP021 Assignment 3
7 pages
EX - NO:3: Algorithm
No ratings yet
EX - NO:3: Algorithm
11 pages
45B AIML Practical 08
No ratings yet
45B AIML Practical 08
10 pages
Project 1
No ratings yet
Project 1
6 pages
# Import Plotting Libraries: in (1) : Import Pandas As PD
No ratings yet
# Import Plotting Libraries: in (1) : Import Pandas As PD
13 pages
A008 - KNN.R: # Load The Dataset
No ratings yet
A008 - KNN.R: # Load The Dataset
4 pages
Appendix - Complete Code Implementation
No ratings yet
Appendix - Complete Code Implementation
8 pages
Pinn-Emfnet For Breast Cancer Image Classification: Import As Import As
No ratings yet
Pinn-Emfnet For Breast Cancer Image Classification: Import As Import As
16 pages
Superior University Lahore: Assignment 8
No ratings yet
Superior University Lahore: Assignment 8
5 pages
IDS Project Group 11
No ratings yet
IDS Project Group 11
35 pages
DWDM Lab 3
No ratings yet
DWDM Lab 3
10 pages
Knee Osteoarthritis Classification Using Xception, Mobilenet, Attention, Sqeeze and Excitation
No ratings yet
Knee Osteoarthritis Classification Using Xception, Mobilenet, Attention, Sqeeze and Excitation
19 pages
Practical 6
No ratings yet
Practical 6
8 pages
ML
No ratings yet
ML
11 pages
KnnClassifier - Jupyter Notebook
No ratings yet
KnnClassifier - Jupyter Notebook
2 pages
ML Functions
No ratings yet
ML Functions
12 pages
Final ML Programs 075005
No ratings yet
Final ML Programs 075005
15 pages
Bacdeaf 23032025 115708 Split 1
No ratings yet
Bacdeaf 23032025 115708 Split 1
37 pages
Assignment 1 - Introduction To Machine Learning: Version 1.0 of This Notebook. To Download
0% (1)
Assignment 1 - Introduction To Machine Learning: Version 1.0 of This Notebook. To Download
30 pages
Brain Breast Cancer Classification
No ratings yet
Brain Breast Cancer Classification
13 pages
Decision Tree and Forests - Ipynb - Colab
No ratings yet
Decision Tree and Forests - Ipynb - Colab
3 pages
ML Project - Binary - Colaboratory
No ratings yet
ML Project - Binary - Colaboratory
7 pages
ML Lab 5
No ratings yet
ML Lab 5
2 pages
Experiment 8
No ratings yet
Experiment 8
4 pages
Loading The Dataset: 'Diabetes - CSV'
No ratings yet
Loading The Dataset: 'Diabetes - CSV'
4 pages
Python Code For Machine Learning
No ratings yet
Python Code For Machine Learning
26 pages
Dataset Link
No ratings yet
Dataset Link
6 pages
Additional Program
No ratings yet
Additional Program
573 pages
Medical Data ML
No ratings yet
Medical Data ML
6 pages
AIML Project
No ratings yet
AIML Project
4 pages
ML All
No ratings yet
ML All
29 pages
Machine Learning Assignment
No ratings yet
Machine Learning Assignment
8 pages
1FsWES7YJDERHD-bZ2ujFakbQyzi6 Yin
No ratings yet
1FsWES7YJDERHD-bZ2ujFakbQyzi6 Yin
9 pages
Skipnet
No ratings yet
Skipnet
15 pages
ML Exp4
No ratings yet
ML Exp4
5 pages
Breast Cancer Detection Using VGG16
No ratings yet
Breast Cancer Detection Using VGG16
21 pages
Pattern Recognition Lab
No ratings yet
Pattern Recognition Lab
24 pages
Lab Manual ML
No ratings yet
Lab Manual ML
23 pages
7771imguf Chemical
No ratings yet
7771imguf Chemical
54 pages
2.2 Communities and Ecosystems
No ratings yet
2.2 Communities and Ecosystems
53 pages
Chemistry Students' Gibbs Energy Study
50% (2)
Chemistry Students' Gibbs Energy Study
8 pages
CLS Aipmt 14 15 XI Che Study Package 2 SET 2 Chapter 6
84% (25)
CLS Aipmt 14 15 XI Che Study Package 2 SET 2 Chapter 6
46 pages
Principles and Applications To Earth and Planetary Sciences: Thermodynamics
No ratings yet
Principles and Applications To Earth and Planetary Sciences: Thermodynamics
12 pages
CHE/CHS 2515 CHE/CHS 2516 CHE/CHS 3515 CHE/CHS 3516: Physical Chemistry
No ratings yet
CHE/CHS 2515 CHE/CHS 2516 CHE/CHS 3515 CHE/CHS 3516: Physical Chemistry
11 pages
PHYSICS HONOURS (Full Marks: 800)
No ratings yet
PHYSICS HONOURS (Full Marks: 800)
30 pages
Thermodynamics Course Syllabus
No ratings yet
Thermodynamics Course Syllabus
117 pages
Chapter 8 Termo
No ratings yet
Chapter 8 Termo
37 pages
Spontaneity of Chemical Reactions
No ratings yet
Spontaneity of Chemical Reactions
26 pages
Thermo2 Mid
No ratings yet
Thermo2 Mid
4 pages
CHM110H5
No ratings yet
CHM110H5
8 pages
Stefanescu - Science and Engineering of Casting Solidification
100% (1)
Stefanescu - Science and Engineering of Casting Solidification
354 pages
Thermodynamics Slide Links
No ratings yet
Thermodynamics Slide Links
37 pages
Engineering Chemistry Problems
No ratings yet
Engineering Chemistry Problems
2 pages
Thermodynamics Problems for Students
No ratings yet
Thermodynamics Problems for Students
1 page
Quantum Detailed Fluctuation Theorem in Curved Spacetimes The Observer Dependent Nature of Entropy ProductionPhysRevLett.134.050406
No ratings yet
Quantum Detailed Fluctuation Theorem in Curved Spacetimes The Observer Dependent Nature of Entropy ProductionPhysRevLett.134.050406
6 pages
Chapter 5 Notes
No ratings yet
Chapter 5 Notes
27 pages
Mechanics) MIT Materials Science and Engineering - Mechanics of Materials (Fall 1999)
No ratings yet
Mechanics) MIT Materials Science and Engineering - Mechanics of Materials (Fall 1999)
357 pages
Entropy and The Second Law. Interpretation and Misss-Interpretations
100% (4)
Entropy and The Second Law. Interpretation and Misss-Interpretations
292 pages
HSC Thermochemical Database
No ratings yet
HSC Thermochemical Database
34 pages
ENGRD 2210 Syllabus 2011
No ratings yet
ENGRD 2210 Syllabus 2011
4 pages
(Ebook PDF) Introduction To Chemical Engineering Thermodynamics 9th Edition Full Chapters Instanly
100% (3)
(Ebook PDF) Introduction To Chemical Engineering Thermodynamics 9th Edition Full Chapters Instanly
148 pages
Optimize Shell & Tube Heat Exchanger Design
No ratings yet
Optimize Shell & Tube Heat Exchanger Design
6 pages
Social Entropy
No ratings yet
Social Entropy
20 pages
Lec 14
No ratings yet
Lec 14
51 pages
Sizing Control Valves
No ratings yet
Sizing Control Valves
70 pages
Mechanisms and Rate Laws For Oxygen Exchange On Mixed-Conducting
No ratings yet
Mechanisms and Rate Laws For Oxygen Exchange On Mixed-Conducting
19 pages
Jawaharlal Nehru Technological University Anantapur Course Structure For Mechanical Engineering B. Tech Course (2015-16)
No ratings yet
Jawaharlal Nehru Technological University Anantapur Course Structure For Mechanical Engineering B. Tech Course (2015-16)
15 pages

Lab 8

Uploaded by

Lab 8

Uploaded by

In [49]: import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.tree import export_graphviz

data = pd.read_csv("/home/iselab-17/Desktop/ML DATA SETS/Breast Cancer Dataset.csv")

Out[50]: (569, 32)

Out[51]: array(['M', 'B'], dtype=object)

In [52]: df = data.drop(['id'], axis=1)

In [53]: df['diagnosis'] = df['diagnosis'].map({'M': 1, 'B': 0})

In [55]: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

114 rows × 30 columns

In [56]: model = DecisionTreeClassifier(criterion='entropy')

Out[56]: DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',

In [57]: import math

def conditional_entropy(data, X, target):

def information_gain(data, X, target):

Information Gain[radius_mean] : 0.8607815854835991

In [58]: import matplotlib.pyplot as plt

In [59]: y_pred = model.predict(X_test)

In [60]: accuracy = accuracy_score(y_test, y_pred) * 100

0 0.95 0.97 0.96 71

accuracy 0.95 114

In [61]: new = [[12.5, 19.2, 80.0, 500.0,0.035,0.1,0.05,0.02,0.17,0.06,0.4,1.0,2.5,40.0,0.006,0.02,0.03,0.01,0.02,0.003,

You might also like