0% found this document useful (0 votes)
3 views2 pages

Lab 8

Hi

Uploaded by

harshab845384
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views2 pages

Lab 8

Hi

Uploaded by

harshab845384
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 2

In [49]: import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split


from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

from sklearn.tree import export_graphviz


from IPython.display import Image

import warnings
warnings.filterwarnings('ignore')

data = pd.read_csv("/home/iselab-17/Desktop/ML DATA SETS/Breast Cancer Dataset.csv")

pd.set_option('display.max_columns', None)

data.head()

Out[49]: id diagnosis radius_mean texture_mean perimeter_mean area_mean smoothness_mean compactness_mean concavity_mean concave_points_mean symmetry_mean fractal_dimension_mean radius_se texture_se perimeter_se area_se smoothness_se compactness_se conc

0 842302 M 17.99 10.38 122.80 1001.0 0.11840 0.27760 0.3001 0.14710 0.2419 0.07871 1.0950 0.9053 8.589 153.40 0.006399 0.04904

1 842517 M 20.57 17.77 132.90 1326.0 0.08474 0.07864 0.0869 0.07017 0.1812 0.05667 0.5435 0.7339 3.398 74.08 0.005225 0.01308

2 84300903 M 19.69 21.25 130.00 1203.0 0.10960 0.15990 0.1974 0.12790 0.2069 0.05999 0.7456 0.7869 4.585 94.03 0.006150 0.04006

3 84348301 M 11.42 20.38 77.58 386.1 0.14250 0.28390 0.2414 0.10520 0.2597 0.09744 0.4956 1.1560 3.445 27.23 0.009110 0.07458

4 84358402 M 20.29 14.34 135.10 1297.0 0.10030 0.13280 0.1980 0.10430 0.1809 0.05883 0.7572 0.7813 5.438 94.44 0.011490 0.02461

In [50]: data.shape

Out[50]: (569, 32)

In [51]: data['diagnosis'].unique()

Out[51]: array(['M', 'B'], dtype=object)

In [52]: df = data.drop(['id'], axis=1)

In [53]: df['diagnosis'] = df['diagnosis'].map({'M': 1, 'B': 0})

In [54]: X=df.drop('diagnosis',axis=1)
y=df['diagnosis']

In [55]: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


X_test

Out[55]: radius_mean texture_mean perimeter_mean area_mean smoothness_mean compactness_mean concavity_mean concave_points_mean symmetry_mean fractal_dimension_mean radius_se texture_se perimeter_se area_se smoothness_se compactness_se concavity_se concave_p

204 12.47 18.60 81.09 481.9 0.09965 0.10580 0.08005 0.03821 0.1925 0.06373 0.3961 1.0440 2.497 30.29 0.006953 0.01911 0.02701 0

70 18.94 21.31 123.60 1130.0 0.09009 0.10290 0.10800 0.07951 0.1582 0.05461 0.7888 0.7975 5.486 96.05 0.004444 0.01652 0.02269 0

131 15.46 19.48 101.70 748.9 0.10920 0.12230 0.14660 0.08087 0.1931 0.05796 0.4743 0.7859 3.094 48.31 0.006240 0.01484 0.02813 0

431 12.40 17.68 81.47 467.8 0.10540 0.13160 0.07741 0.02799 0.1811 0.07102 0.1767 1.4600 2.204 15.43 0.010000 0.03295 0.04861 0

540 11.54 14.44 74.65 402.9 0.09984 0.11200 0.06737 0.02594 0.1818 0.06782 0.2784 1.7680 1.628 20.86 0.012150 0.04112 0.05553 0

... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...

486 14.64 16.85 94.21 666.0 0.08641 0.06698 0.05192 0.02791 0.1409 0.05355 0.2204 1.0060 1.471 19.98 0.003535 0.01393 0.01800 0

75 16.07 19.65 104.10 817.7 0.09168 0.08424 0.09769 0.06638 0.1798 0.05391 0.7474 1.0160 5.029 79.25 0.010820 0.02203 0.03500 0

249 11.52 14.93 73.87 406.3 0.10130 0.07808 0.04328 0.02929 0.1883 0.06168 0.2562 1.0380 1.686 18.62 0.006662 0.01228 0.02105 0

238 14.22 27.85 92.55 623.9 0.08223 0.10390 0.11030 0.04408 0.1342 0.06129 0.3354 2.3240 2.105 29.96 0.006307 0.02845 0.03850 0

265 20.73 31.12 135.70 1419.0 0.09469 0.11430 0.13670 0.08646 0.1769 0.05674 1.1720 1.6170 7.749 199.70 0.004551 0.01478 0.02143 0

114 rows × 30 columns

In [56]: model = DecisionTreeClassifier(criterion='entropy')


model.fit(X_train, y_train)
model

Out[56]: DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='entropy',


max_depth=None, max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort='deprecated',
random_state=None, splitter='best')

In [57]: import math

def entropy(column):
counts = column.value_counts()
probabilities = counts / len(column)
return sum([-p * math.log2(p) for p in probabilities])

def conditional_entropy(data, X, target):


feature_values = data[X].unique()
weighted_entropy = 0
for value in feature_values:
subset = data[data[X] == value]
weighted_entropy += (len(subset) / len(data)) * entropy(subset[target])
return weighted_entropy

def information_gain(data, X, target):


total_entropy = entropy(data[target])
feature_conditional_entropy = conditional_entropy(data, X, target)
return total_entropy - feature_conditional_entropy
for feature in X:
ig = information_gain(df, feature, 'diagnosis')
print(f"Information Gain[{feature}] : {ig}")

Information Gain[radius_mean] : 0.8607815854835991


Information Gain[texture_mean] : 0.8357118798482908
Information Gain[perimeter_mean] : 0.9267038614138748
Information Gain[area_mean] : 0.9280305529818247
Information Gain[smoothness_mean] : 0.7761788341876101
Information Gain[compactness_mean] : 0.9091291689709926
Information Gain[concavity_mean] : 0.9350604299589776
Information Gain[concave_points_mean] : 0.9420903069361305
Information Gain[symmetry_mean] : 0.735036638169654
Information Gain[fractal_dimension_mean] : 0.8361770160635639
Information Gain[radius_se] : 0.9337337383910278
Information Gain[texture_se] : 0.8642965239721755
Information Gain[perimeter_se] : 0.9315454914704012
Information Gain[area_se] : 0.925377169845925
Information Gain[smoothness_se] : 0.9350604299589776
Information Gain[compactness_se] : 0.9231889229252984
Information Gain[concavity_se] : 0.9280305529818247
Information Gain[concave_points_se] : 0.8585933385629725
Information Gain[symmetry_se] : 0.8181371874054084
Information Gain[fractal_dimension_se] : 0.9174857375160954
Information Gain[radius_worst] : 0.9003074642106167
Information Gain[texture_worst] : 0.8634349686194988
Information Gain[perimeter_worst] : 0.8985843535052632
Information Gain[area_worst] : 0.9350604299589776
Information Gain[smoothness_worst] : 0.7197189097252679
Information Gain[compactness_worst] : 0.9183472928687721
Information Gain[concavity_worst] : 0.9302187999024514
Information Gain[concave_points_worst] : 0.9148323543801957
Information Gain[symmetry_worst] : 0.8453951399613433
Information Gain[fractal_dimension_worst] : 0.8915544765281104

In [58]: import matplotlib.pyplot as plt


from sklearn import tree

plt.figure(figsize=(20, 8))
tree.plot_tree(model, feature_names=X.columns, filled=True, rounded=True, fontsize=8)
plt.show()

In [59]: y_pred = model.predict(X_test)


print(y_pred)

[0 1 1 0 0 1 1 1 1 0 0 1 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 1
0 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 1 1 0 0 0 1 1 0 0 1 1 0 1
0 0 0 0 0 0 1 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 1 1 0 1 1 0 1 1 0 0 0 1 0 0 1
0 0 1]

In [60]: accuracy = accuracy_score(y_test, y_pred) * 100


classification_rep = classification_report(y_test, y_pred)
print("Accuracy:\n", accuracy)
print("Classification Report:\n", classification_rep)

Accuracy:
94.73684210526315
Classification Report:
precision recall f1-score support

0 0.95 0.97 0.96 71


1 0.95 0.91 0.93 43

accuracy 0.95 114


macro avg 0.95 0.94 0.94 114
weighted avg 0.95 0.95 0.95 114

In [61]: new = [[12.5, 19.2, 80.0, 500.0,0.035,0.1,0.05,0.02,0.17,0.06,0.4,1.0,2.5,40.0,0.006,0.02,0.03,0.01,0.02,0.003,


16.0,25.0,105.0,900.0,0.13,0.25,0.28,0.12,0.29,0.08]]
y_pred = model.predict(new)

if y_pred[0] == 0:
print("Prediction: Benign")
else:
print("Prediction: Malignant")

Prediction: Benign

In [ ]:

In [ ]:

You might also like