0% found this document useful (0 votes)
14 views7 pages

Compute3

The document discusses two tasks related to analyzing mushroom data using machine learning algorithms. Task 1 involves preprocessing mushroom data and building a decision tree classifier. Task 2 involves calculating the information gain of features to determine their importance for classification.

Uploaded by

lil Aady
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
14 views7 pages

Compute3

The document discusses two tasks related to analyzing mushroom data using machine learning algorithms. Task 1 involves preprocessing mushroom data and building a decision tree classifier. Task 2 involves calculating the information gain of features to determine their importance for classification.

Uploaded by

lil Aady
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 7

Task 1:

import pandas
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
import matplotlib.pyplot as plt

df = pandas.read_csv("F:\\4th sem\\Data\\agaricus-lepiota.csv")

att1 = {'e': 0, 'p': 1}


att2 = {'b': 0, 'c': 1, 'x': 2, 'f': 3, 'k': 4, 's': 5}
att3 = {'f': 0, 'g': 1, 'y': 2, 's': 3}
att4 = {'n': 0, 'b': 1, 'c': 2, 'g': 3, 'r': 4, 'p': 5, 'u': 6, 'e': 7,
'w': 8, 'y': 9}
att5 = {'t': 0, 'f': 1}
att6 = {'a': 0, 'l': 1, 'c': 2, 'y': 3, 'f': 4, 'm': 5, 'n': 6, 'p': 7,
's': 8}
att7 = {'a': 0, 'd': 1, 'f': 2, 'n': 3}
att8 = {'c': 0, 'w': 1, 'd': 2}
att9 = {'b': 0, 'n': 1}
att10 = {'k': 0, 'n': 1, 'b': 2, 'h': 3, 'g': 4, 'r': 5, 'o': 6, 'p': 7,
'u': 8, 'e': 9, 'w': 10, 'y': 11}
att11 = {'e': 0, 't': 1}
att12 = {'b': 0, 'c': 1, 'u': 2, 'e': 3, 'z': 4, 'r': 5, '?': 6}
att13 = {'f': 0, 'y': 1, 'k': 2, 's': 3}
att14 = {'f': 0, 'y': 1, 'k': 2, 's': 3}
att15 = {'n': 0, 'b': 1, 'c': 2, 'g': 3, 'o': 4, 'p': 5, 'e': 6, 'w': 7,
'y': 8}
att16 = {'n': 0, 'b': 1, 'c': 2, 'g': 3, 'o': 4, 'p': 5, 'e': 6, 'w': 7,
'y': 8}
att17 = {'p': 0, 'u': 1}
att18 = {'n': 0, 'o': 1, 'w': 2, 'y': 3}
att19 = {'n': 0, 'o': 1, 't': 2}
att20 = {'c': 0, 'e': 1, 'f': 2, 'l': 3, 'n': 4, 'p': 5, 's': 6, 'z': 7}
att21 = {'k': 0, 'n': 1, 'b': 2, 'h': 3, 'r': 4, 'o': 5, 'u': 6, 'w': 7,
'y': 8}
att22 = {'a': 0, 'c': 1, 'n': 2, 's': 3, 'v': 4, 'y': 5}
att23 = {'g': 0, 'l': 1, 'm': 2, 'p': 3, 'u': 4, 'w': 5, 'd': 6}
df['cap-shape'] = df['cap-shape'].map(att2)
df['cap-surface'] = df['cap-surface'].map(att3)
df['cap-color'] = df['cap-color'].map(att4)
df['bruises'] = df['bruises'].map(att5)
df['odor'] = df['odor'].map(att6)
df['gill-attachment'] = df['gill-attachment'].map(att7)
df['gill-spacing'] = df['gill-spacing'].map(att8)
df['gill-size'] = df['gill-size'].map(att9)
df['gill-color'] = df['gill-color'].map(att10)
df['stalk-shape'] = df['stalk-shape'].map(att11)
df['stalk-root'] = df['stalk-root'].map(att12)
df['stalk-surface-above-ring'] = df['stalk-surface-above-ring'].map(att13)
df['stalk-surface-below-ring'] = df['stalk-surface-below-ring'].map(att14)
df['stalk-color-above-ring'] = df['stalk-color-above-ring'].map(att15)
df['stalk-color-below-ring'] = df['stalk-color-below-ring'].map(att16)
df['veil-type'] = df['veil-type'].map(att17)
df['veil-color'] = df['veil-color'].map(att18)
df['ring-number'] = df['ring-number'].map(att19)
df['ring-type'] = df['ring-type'].map(att20)
df['spore-print-color'] = df['spore-print-color'].map(att21)
df['population'] = df['population'].map(att22)
df['habitat'] = df['habitat'].map(att23)
df['classes'] = df['classes'].map(att1)

features = ['cap-shape','cap-surface','cap-color','bruises','odor','gill-
attachment','gill-spacing','gill-size','gill-color','stalk-shape','stalk-
root','stalk-surface-above-ring','stalk-surface-below-ring', 'stalk-color-
above-ring','stalk-color-below-ring','veil-type','veil-color','ring-
number','ring-type','spore-print-color','population','habitat']

X = df[features]
y = df['classes']

clf = DecisionTreeClassifier()

clf = clf.fit(X, y)

fig, ax = plt.subplots(figsize=(10, 5))


tree.plot_tree(clf, ax=ax)
plt.show()

Output:
Task 2:

import pandas

import numpy as np

def entropy(target_col):

elements, counts = np.unique(target_col, return_counts=True)

entropy = np.sum(

[(counts[i] / np.sum(counts)) * np.log2((counts[i] /


np.sum(counts))) for i in range(len(elements))])

return -entropy

def info_gain(data, split_attribute_name, target_name):

total_entropy = entropy(data[target_name])

vals, counts = np.unique(data[split_attribute_name],


return_counts=True)

weighted_entropy = np.sum(

[(counts[i] / np.sum(counts)) *
entropy(data.where(data[split_attribute_name] == vals[i]).dropna()
[target_name])

for i in range(len(vals))])

info_gain = total_entropy - weighted_entropy

return info_gain

df = pandas.read_csv("F:\\4th sem\\Data\\agaricus-lepiota.csv")

att1 = {'e': 0, 'p': 1}

att2 = {'b': 0, 'c': 1, 'x': 2, 'f': 3, 'k': 4, 's': 5}

att3 = {'f': 0, 'g': 1, 'y': 2, 's': 3}


att4 = {'n': 0, 'b': 1, 'c': 2, 'g': 3, 'r': 4, 'p': 5, 'u': 6, 'e': 7,
'w': 8, 'y': 9}

att5 = {'t': 0, 'f': 1}

att6 = {'a': 0, 'l': 1, 'c': 2, 'y': 3, 'f': 4, 'm': 5, 'n': 6, 'p': 7,


's': 8}

att7 = {'a': 0, 'd': 1, 'f': 2, 'n': 3}

att8 = {'c': 0, 'w': 1, 'd': 2}

att9 = {'b': 0, 'n': 1}

att10 = {'k': 0, 'n': 1, 'b': 2, 'h': 3, 'g': 4, 'r': 5, 'o': 6, 'p': 7,


'u': 8, 'e': 9, 'w': 10, 'y': 11}

att11 = {'e': 0, 't': 1}

att12 = {'b': 0, 'c': 1, 'u': 2, 'e': 3, 'z': 4, 'r': 5, '?': 6}

att13 = {'f': 0, 'y': 1, 'k': 2, 's': 3}

att14 = {'f': 0, 'y': 1, 'k': 2, 's': 3}

att15 = {'n': 0, 'b': 1, 'c': 2, 'g': 3, 'o': 4, 'p': 5, 'e': 6, 'w': 7,


'y': 8}

att16 = {'n': 0, 'b': 1, 'c': 2, 'g': 3, 'o': 4, 'p': 5, 'e': 6, 'w': 7,


'y': 8}

att17 = {'p': 0, 'u': 1}

att18 = {'n': 0, 'o': 1, 'w': 2, 'y': 3}

att19 = {'n': 0, 'o': 1, 't': 2}

att20 = {'c': 0, 'e': 1, 'f': 2, 'l': 3, 'n': 4, 'p': 5, 's': 6, 'z': 7}

att21 = {'k': 0, 'n': 1, 'b': 2, 'h': 3, 'r': 4, 'o': 5, 'u': 6, 'w': 7,


'y': 8}

att22 = {'a': 0, 'c': 1, 'n': 2, 's': 3, 'v': 4, 'y': 5}

att23 = {'g': 0, 'l': 1, 'm': 2, 'p': 3, 'u': 4, 'w': 5, 'd': 6}

df['cap-shape'] = df['cap-shape'].map(att2)

df['cap-surface'] = df['cap-surface'].map(att3)

df['cap-color'] = df['cap-color'].map(att4)

df['bruises'] = df['bruises'].map(att5)

df['odor'] = df['odor'].map(att6)

df['gill-attachment'] = df['gill-attachment'].map(att7)

df['gill-spacing'] = df['gill-spacing'].map(att8)
df['gill-size'] = df['gill-size'].map(att9)

df['gill-color'] = df['gill-color'].map(att10)

df['stalk-shape'] = df['stalk-shape'].map(att11)

df['stalk-root'] = df['stalk-root'].map(att12)

df['stalk-surface-above-ring'] = df['stalk-surface-above-ring'].map(att13)

df['stalk-surface-below-ring'] = df['stalk-surface-below-ring'].map(att14)

df['stalk-color-above-ring'] = df['stalk-color-above-ring'].map(att15)

df['stalk-color-below-ring'] = df['stalk-color-below-ring'].map(att16)

df['veil-type'] = df['veil-type'].map(att17)

df['veil-color'] = df['veil-color'].map(att18)

df['ring-number'] = df['ring-number'].map(att19)

df['ring-type'] = df['ring-type'].map(att20)

df['spore-print-color'] = df['spore-print-color'].map(att21)

df['population'] = df['population'].map(att22)

df['habitat'] = df['habitat'].map(att23)

df['classes'] = df['classes'].map(att1)

features = ['cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor',


'gill-attachment', 'gill-spacing', 'gill-size',

'gill-color', 'stalk-shape', 'stalk-root', 'stalk-surface-


above-ring', 'stalk-surface-below-ring',

'stalk-color-above-ring', 'stalk-color-below-ring', 'veil-


type', 'veil-color', 'ring-number', 'ring-type',

'spore-print-color', 'population', 'habitat']

X = df.drop('classes', axis=1)

y = df['classes']

info_gain_list = []

# Loop through each feature and calculate the information gain

for feature in X.columns:

info_gain_val = info_gain(df, feature, 'classes')


info_gain_list.append(info_gain_val)

print("Information Gain for", feature, "=", info_gain_val)

print("Information Gain List:", info_gain_list)

Output:
Information Gain for cap-shape = 0.048796701935373
Information Gain for cap-surface = 0.028590232773772706
Information Gain for cap-color = 0.0360492829762038
Information Gain for bruises = 0.19237948576121955
Information Gain for odor = 0.9060749773839998
Information Gain for gill-attachment = 0.014165027250616191
Information Gain for gill-spacing = 0.10088318399657026
Information Gain for gill-size = 0.23015437514804604
Information Gain for gill-color = 0.41697752341613137
Information Gain for stalk-shape = 0.00751677256966421
Information Gain for stalk-root = 0.1348176376272756
Information Gain for stalk-surface-above-ring = 0.2847255992184844
Information Gain for stalk-surface-below-ring = 0.2718944733927463
Information Gain for stalk-color-above-ring = 0.2538451734622398
Information Gain for stalk-color-below-ring = 0.24141556652756646
Information Gain for veil-type = 0.0
Information Gain for veil-color = 0.02381701612091669
Information Gain for ring-number = 0.03845266924309043
Information Gain for ring-type = 0.3180215107935376
Information Gain for spore-print-color = 0.4807049176849153
Information Gain for population = 0.2019580190668523
Information Gain for habitat = 0.156833604605092
Information Gain List: [0.048796701935373, 0.028590232773772706,
0.0360492829762038, 0.19237948576121955, 0.9060749773839998,
0.014165027250616191, 0.10088318399657026, 0.23015437514804604,
0.41697752341613137, 0.00751677256966421, 0.1348176376272756,
0.2847255992184844, 0.2718944733927463, 0.2538451734622398,
0.24141556652756646, 0.0, 0.02381701612091669, 0.03845266924309043,
0.3180215107935376, 0.4807049176849153, 0.2019580190668523,
0.156833604605082]

You might also like