0% found this document useful (0 votes)

31 views7 pages

Lab 3

about the lab 3

Uploaded by

Yajneswarpadhiary Padhiary

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

31 views7 pages

Lab 3

about the lab 3

Uploaded by

Yajneswarpadhiary Padhiary

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 7

import pandas as pd

import math

df = pd.read_csv('1d3.csv')

print("\n Input Data Set is:\n", df)

t = df.keys()[-1]

print('Target Attribute is: ', t)

attribute_names = list(df.keys())

attribute_names.remove(t)

print('Predicting Attributes:', attribute_names)

def entropy(probs):

return sum([-prob * math.log(prob, 2) for prob in probs])

def entropy_of_list(ls, value):

from collections import Counter

cnt = Counter(x for x in ls)

print('Target attribute class count(Yes/No)', dict(cnt))

total_instances = len(ls)

print("Total no of instances/records associated with {0} is: {1}".format(value, total_instances))

probs = [x / total_instances for x in cnt.values()]

return entropy(probs)

def information_gain(df, split_attribute, target_attribute, battr):

print("\n\n-----Information Gain Calculation of ", split_attribute, "--------")

df_split = df.groupby(split_attribute)

for gname, group in df_split:

glist = []

glist.append(gname)

print('Grouped Attribute Values \n', group)

df_agg1 = df_split.agg({target_attribute: lambda x: entropy_of_list(x, glist.pop())})

df_agg1.columns = ['Entropy']

df_agg2 = df_split.agg({target_attribute: lambda x: len(x) / nobs})

df_agg2.columns = ['Proportion']

if battr != 'S':

new_entropy = sum(df_agg1['Entropy'] * df_agg2['Proportion'])

else:

old_entropy = entropy_of_list(df[target_attribute], battr)

old_entropy = entropy_of_list(df[target_attribute], 'S-' + df.iloc[0][df.columns.get_loc(battr)])

return old_entropy - new_entropy

def id3(df, target_attribute, attribute_names, default_class=None, default_attr='S'):

from collections import Counter

cnt = Counter(x for x in df[target_attribute]) # class of YES/NO

if len(cnt) == 1:

return next(iter(cnt))

elif df.empty or (not attribute_names):

return default_class

else:

default_class = max(cnt.keys())

gainz = []

for attr in attribute_names:

ig = information_gain(df, attr, target_attribute, default_attr)

gainz.append(ig)

print("Information gain of ", attr, " is: ", ig)

index_of_max = gainz.index(max(gainz))

best_attr = attribute_names[index_of_max]

print("\nAttribute with the maximum gain is: ", best_attr)

tree = {best_attr: {}}

remaining_attribute_names = [i for i in attribute_names if i != best_attr]

for attr_val, data_subset in df.groupby(best_attr):

subtree = id3(data_subset, target_attribute, remaining_attribute_names, default_class,
best_attr)

tree[best_attr][attr_val] = subtree

return tree

from pprint import pprint

tree = id3(df, t, attribute_names)

print("\nThe Resultant Decision Tree is:")

pprint(tree)

The code you've shared implements a decision tree classifier using the ID3 (Iterative Dichotomiser
algorithm. Let's walk through the process and explain what the code is doing:

1. Reading the Dataset:

The dataset is read using pandas.read_csv():

python

Copy code

df = pd.read_csv('1d3.csv')

print("\n Input Data Set is:\n", df)

This loads the data from the CSV file 1d3.csv into a DataFrame, and prints the dataset.

2. Extracting Target and Predicting Attributes:

The target attribute (i.e., the attribute to be predicted) is the last column of the dataset:

python

Copy code

t = df.keys()[-1]

print('Target Attribute is: ', t)

The remaining columns are the attributes that are used to predict the target:
python

Copy code

attribute_names = list(df.keys())

attribute_names.remove(t)

print('Predicting Attributes:', attribute_names)

3. Entropy Calculation:

The function entropy(probs) calculates the entropy of a set of probabilities. The entropy is a
measure of the uncertainty or impurity of a set of classes. A lower entropy means the set is more
pure.

python

Copy code

def entropy(probs):

return sum([-prob * math.log(prob, 2) for prob in probs])

The function entropy_of_list(ls, value) calculates the entropy of a list of target values for a
particular attribute value.

python

Copy code

def entropy_of_list(ls, value):

from collections import Counter

cnt = Counter(x for x in ls)

print('Target attribute class count(Yes/No)', dict(cnt))

total_instances = len(ls)

print("Total no of instances/records associated with {0} is: {1}".format(value, total_instances))

probs = [x / total_instances for x in cnt.values()]

return entropy(probs)

4. Information Gain:

The Information Gain (IG) is used to determine which attribute to split on at each step of the tree.
It measures the reduction in entropy when splitting the data based on a particular attribute. The
function information_gain() calculates the information gain of splitting the dataset on a specific
attribute:

python

Copy code

def information_gain(df, split_attribute, target_attribute, battr):

print("\n\n-----Information Gain Calculation of ", split_attribute, "--------")

df_split = df.groupby(split_attribute)

for gname, group in df_split:

glist = []

glist.append(gname)

print('Grouped Attribute Values \n', group)

df_agg1 = df_split.agg({target_attribute: lambda x: entropy_of_list(x, glist.pop())})

df_agg1.columns = ['Entropy']

df_agg2 = df_split.agg({target_attribute: lambda x: len(x) / nobs})

df_agg2.columns = ['Proportion']

if battr != 'S':

new_entropy = sum(df_agg1['Entropy'] * df_agg2['Proportion'])

else:

old_entropy = entropy_of_list(df[target_attribute], battr)

old_entropy = entropy_of_list(df[target_attribute], 'S-' + df.iloc[0][df.columns.get_loc(battr)])

return old_entropy - new_entropy

5. ID3 Algorithm:

The main function, id3(), implements the ID3 algorithm for building a decision tree. This function
works recursively to split the data based on the attribute that maximizes the information gain.

python

Copy code

def id3(df, target_attribute, attribute_names, default_class=None, default_attr='S'):

from collections import Counter

cnt = Counter(x for x in df[target_attribute]) # class of YES/NO

if len(cnt) == 1:

return next(iter(cnt))

elif df.empty or (not attribute_names):

return default_class

else:

default_class = max(cnt.keys())

gainz = []
for attr in attribute_names:

ig = information_gain(df, attr, target_attribute, default_attr)

gainz.append(ig)

print("Information gain of ", attr, " is: ", ig)

index_of_max = gainz.index(max(gainz))

best_attr = attribute_names[index_of_max]

print("\nAttribute with the maximum gain is: ", best_attr)

tree = {best_attr: {}}

remaining_attribute_names = [i for i in attribute_names if i != best_attr]

for attr_val, data_subset in df.groupby(best_attr):

subtree = id3(data_subset, target_attribute, remaining_attribute_names, default_class,

best_attr)

tree[best_attr][attr_val] = subtree

return tree

Steps in the id3() Function:

1. Base Case: If all the instances in the dataset belong to the same class (e.g., all "YES" or all
"NO"), return that class.

2. Base Case: If the dataset is empty or there are no attributes left to split on, return the
default class.

3. Choose the Best Attribute: Calculate the information gain for each attribute, and choose
the attribute with the highest information gain to split on.

4. Recursive Case: Split the dataset based on the chosen attribute and recursively build
subtrees for each subset of data.

6. Printing the Decision Tree:

After running the id3() function, the decision tree is printed using the pprint() function for a better
visual representation:

python

Copy code

from pprint import pprint

tree = id3(df, t, attribute_names)

print("\nThe Resultant Decision Tree is:")

pprint(tree)

Conclusion:
The code will generate a decision tree based on the ID3 algorithm. The tree will be constructed
step-by-step by evaluating which attribute (from the available ones) provides the highest
information gain, and then recursively applying this process to each subset of the data.

The result is a hierarchical decision tree structure that can be used to make predictions based on
the values of the attributes.

ID3 Algorithm
No ratings yet
ID3 Algorithm
11 pages
Program 5
No ratings yet
Program 5
5 pages
ID3 Algorithm for ML Students
No ratings yet
ID3 Algorithm for ML Students
6 pages
Program 6
No ratings yet
Program 6
4 pages
ML 5
No ratings yet
ML 5
2 pages
Lab Manual2
No ratings yet
Lab Manual2
6 pages
Indexdw
No ratings yet
Indexdw
34 pages
MLT Experiment 3
No ratings yet
MLT Experiment 3
3 pages
Decision Trees
No ratings yet
Decision Trees
7 pages
Lab Program 3
No ratings yet
Lab Program 3
6 pages
Ashwin Report
No ratings yet
Ashwin Report
18 pages
Machine Learnine Experiment by Priyanka
No ratings yet
Machine Learnine Experiment by Priyanka
6 pages
Decision Tree ID3
No ratings yet
Decision Tree ID3
3 pages
Machine Learning Unit4
No ratings yet
Machine Learning Unit4
8 pages
ASSESSMENT2
No ratings yet
ASSESSMENT2
22 pages
221IT027 DA Lab3
No ratings yet
221IT027 DA Lab3
5 pages
Practical File Machine Learning
No ratings yet
Practical File Machine Learning
29 pages
Lab Program 3
No ratings yet
Lab Program 3
6 pages
P 4 Andp 5
No ratings yet
P 4 Andp 5
4 pages
ID3 Decision Tree Algorithm Demo
No ratings yet
ID3 Decision Tree Algorithm Demo
6 pages
Module 5 Notes
No ratings yet
Module 5 Notes
8 pages
AD3461 ML Lab Manual
No ratings yet
AD3461 ML Lab Manual
32 pages
Name: Suprit Darshan Shrestha Reg - no:19BCE2584: Lab DA1 Machine Learning Lab
No ratings yet
Name: Suprit Darshan Shrestha Reg - no:19BCE2584: Lab DA1 Machine Learning Lab
9 pages
AI&ML
No ratings yet
AI&ML
9 pages
ML Exp 4,5
No ratings yet
ML Exp 4,5
7 pages
ML Lab Manual
No ratings yet
ML Lab Manual
25 pages
ASSESSMENT2
No ratings yet
ASSESSMENT2
22 pages
Lecture 4
No ratings yet
Lecture 4
74 pages
Da Lab3 221it064
No ratings yet
Da Lab3 221it064
6 pages
Improved ID3 Algorithm for Data Mining
No ratings yet
Improved ID3 Algorithm for Data Mining
5 pages
Lecture 4
No ratings yet
Lecture 4
74 pages
Decision Tree Induction
No ratings yet
Decision Tree Induction
52 pages
15 1 Random Forest and Decision Tree
No ratings yet
15 1 Random Forest and Decision Tree
66 pages
3ID3 Algorithm
No ratings yet
3ID3 Algorithm
9 pages
ID3 Algorithm: Abbas Rizvi CS157 B Spring 2010
No ratings yet
ID3 Algorithm: Abbas Rizvi CS157 B Spring 2010
19 pages
01 Section 6.2.1 QR Code Content
No ratings yet
01 Section 6.2.1 QR Code Content
5 pages
Da Lab3 221it084 Final
No ratings yet
Da Lab3 221it084 Final
6 pages
ML Exp 3
No ratings yet
ML Exp 3
6 pages
Decision Tree and Random Forest
100% (1)
Decision Tree and Random Forest
74 pages
ID3 Algorithm & ROC Analysis
No ratings yet
ID3 Algorithm & ROC Analysis
51 pages
ML File
No ratings yet
ML File
13 pages
ML Lab Record
No ratings yet
ML Lab Record
49 pages
ML 4
No ratings yet
ML 4
5 pages
Import Import Def
No ratings yet
Import Import Def
2 pages
Id 3
No ratings yet
Id 3
1 page
MLT UNIT-3 Notes
No ratings yet
MLT UNIT-3 Notes
35 pages
ML Lab P-1
No ratings yet
ML Lab P-1
10 pages
Day 5 Supervised Technique-Decision Tree For Classification PDF
100% (1)
Day 5 Supervised Technique-Decision Tree For Classification PDF
58 pages
Step 2: Implement The ID3 Algorithm
No ratings yet
Step 2: Implement The ID3 Algorithm
3 pages
Play Tennis Prog 4
No ratings yet
Play Tennis Prog 4
3 pages
MANUAL
No ratings yet
MANUAL
33 pages
Decision Trees
No ratings yet
Decision Trees
11 pages
ML Lab Record
No ratings yet
ML Lab Record
33 pages
T6 Decision Tree
No ratings yet
T6 Decision Tree
38 pages
6CS4-02 Machine Learning Manish Bhardwaj
No ratings yet
6CS4-02 Machine Learning Manish Bhardwaj
625 pages
3 ID3 Algorithm Updated
No ratings yet
3 ID3 Algorithm Updated
3 pages
ID3 Algorithm: Decision Tree Basics
No ratings yet
ID3 Algorithm: Decision Tree Basics
8 pages
Decision Trees & Random Forests Guide
No ratings yet
Decision Trees & Random Forests Guide
64 pages
Unit-3 ML
No ratings yet
Unit-3 ML
47 pages
Unit 4
No ratings yet
Unit 4
27 pages
Language Models
No ratings yet
Language Models
34 pages
Lab 6
No ratings yet
Lab 6
47 pages
Lab 4
No ratings yet
Lab 4
4 pages
Lab 1
No ratings yet
Lab 1
5 pages
Constitution Test
No ratings yet
Constitution Test
2 pages
568C3 Overview
No ratings yet
568C3 Overview
15 pages
Entrepreneurship Theories Guide
No ratings yet
Entrepreneurship Theories Guide
4 pages
Affiliated Business Arrangement Disclosure Rev 2 15 2
No ratings yet
Affiliated Business Arrangement Disclosure Rev 2 15 2
3 pages
Protection Relay REX 521: Operator's Manual
100% (1)
Protection Relay REX 521: Operator's Manual
32 pages
Digital Electronics Lab-I: Laboratory Manual (EEC-352)
0% (1)
Digital Electronics Lab-I: Laboratory Manual (EEC-352)
41 pages
Grade 7 Computer Curriculum Map
100% (2)
Grade 7 Computer Curriculum Map
2 pages
Assurance - Certificate Level Notes
No ratings yet
Assurance - Certificate Level Notes
48 pages
Your Cliftonstrengths 34 Results: 1. Significance 2. Futuristic 3. Individualization 4. Focus 5. Maximizer
No ratings yet
Your Cliftonstrengths 34 Results: 1. Significance 2. Futuristic 3. Individualization 4. Focus 5. Maximizer
25 pages
Lab 9: Capacitors - Equipment Guide: Equipment Guide Object Description, Tips, Cautions Power Supply
No ratings yet
Lab 9: Capacitors - Equipment Guide: Equipment Guide Object Description, Tips, Cautions Power Supply
2 pages
Lacking Requirements BECEd Batch 2024 2025
No ratings yet
Lacking Requirements BECEd Batch 2024 2025
3 pages
Introduction To The Python Programming Language
No ratings yet
Introduction To The Python Programming Language
41 pages
Guidelines For GNSS Positioning in The Oil and Gas Industry: February
No ratings yet
Guidelines For GNSS Positioning in The Oil and Gas Industry: February
91 pages
BMO Everyday Banking Statement
No ratings yet
BMO Everyday Banking Statement
3 pages
Farmer's Market Leveled Book
No ratings yet
Farmer's Market Leveled Book
15 pages
Filed & Entered: Lead Case No. 8:10-bk-16743-TA
No ratings yet
Filed & Entered: Lead Case No. 8:10-bk-16743-TA
7 pages
TRAPeze Manual S7700
No ratings yet
TRAPeze Manual S7700
44 pages
Empowering Change Through Handicrafts
No ratings yet
Empowering Change Through Handicrafts
14 pages
Waler Beam To Sheet Pile Connection Plate Detail
100% (1)
Waler Beam To Sheet Pile Connection Plate Detail
2 pages
Testbank For Financial Accounting Information For Decisions 11th Edition Wild Instant Download
No ratings yet
Testbank For Financial Accounting Information For Decisions 11th Edition Wild Instant Download
18 pages
2-Rafael-TDS-625 Sand Dryer doc-CIF
No ratings yet
2-Rafael-TDS-625 Sand Dryer doc-CIF
20 pages
Chapter 6 Earthing System
No ratings yet
Chapter 6 Earthing System
28 pages
Tutorial 6 - Domain Modelling
No ratings yet
Tutorial 6 - Domain Modelling
2 pages
MiPower Software Intro Lab Guide
100% (1)
MiPower Software Intro Lab Guide
9 pages
Purifier Disc Cleaner
No ratings yet
Purifier Disc Cleaner
2 pages
Class 1 Summer Homework Help
100% (1)
Class 1 Summer Homework Help
5 pages
Annual Report 2017 en
No ratings yet
Annual Report 2017 en
202 pages
Value vs. Growth: Understanding the Trap
No ratings yet
Value vs. Growth: Understanding the Trap
18 pages
Solar Roof Design Checklist
No ratings yet
Solar Roof Design Checklist
3 pages
Lauren Lee Gauck V Karamian Complaint
No ratings yet
Lauren Lee Gauck V Karamian Complaint
15 pages
In Re Carl E. Snyder, Debtor. Roy D. Hansen Mortgage Company v. Carl E. Snyder, 81 F.3d 151, 4th Cir. (1996)
No ratings yet
In Re Carl E. Snyder, Debtor. Roy D. Hansen Mortgage Company v. Carl E. Snyder, 81 F.3d 151, 4th Cir. (1996)
2 pages

Lab 3

Uploaded by

Lab 3

Uploaded by

import pandas as pd

print("\n Input Data Set is:\n", df)

print('Target Attribute is: ', t)

print('Predicting Attributes:', attribute_names)

return sum([-prob * math.log(prob, 2) for prob in probs])

def entropy_of_list(ls, value):

from collections import Counter

cnt = Counter(x for x in ls)

print('Target attribute class count(Yes/No)', dict(cnt))

print("Total no of instances/records associated with {0} is: {1}".format(value, total_instances))

probs = [x / total_instances for x in cnt.values()]

def information_gain(df, split_attribute, target_attribute, battr):

print("\n\n-----Information Gain Calculation of ", split_attribute, "--------")

for gname, group in df_split:

print('Grouped Attribute Values \n', group)

df_agg2 = df_split.agg({target_attribute: lambda x: len(x) / nobs})

new_entropy = sum(df_agg1['Entropy'] * df_agg2['Proportion'])

old_entropy = entropy_of_list(df[target_attribute], battr)

old_entropy = entropy_of_list(df[target_attribute], 'S-' + df.iloc[0][df.columns.get_loc(battr)])

return old_entropy - new_entropy

def id3(df, target_attribute, attribute_names, default_class=None, default_attr='S'):

from collections import Counter

cnt = Counter(x for x in df[target_attribute]) # class of YES/NO

elif df.empty or (not attribute_names):

for attr in attribute_names:

ig = information_gain(df, attr, target_attribute, default_attr)

print("Information gain of ", attr, " is: ", ig)

print("\nAttribute with the maximum gain is: ", best_attr)

tree = {best_attr: {}}

remaining_attribute_names = [i for i in attribute_names if i != best_attr]

for attr_val, data_subset in df.groupby(best_attr):

from pprint import pprint

tree = id3(df, t, attribute_names)

print("\nThe Resultant Decision Tree is:")

1. Reading the Dataset:

The dataset is read using pandas.read_csv():

print("\n Input Data Set is:\n", df)

2. Extracting Target and Predicting Attributes:

print('Target Attribute is: ', t)

print('Predicting Attributes:', attribute_names)

return sum([-prob * math.log(prob, 2) for prob in probs])

def entropy_of_list(ls, value):

from collections import Counter

cnt = Counter(x for x in ls)

print('Target attribute class count(Yes/No)', dict(cnt))

print("Total no of instances/records associated with {0} is: {1}".format(value, total_instances))

probs = [x / total_instances for x in cnt.values()]

def information_gain(df, split_attribute, target_attribute, battr):

for gname, group in df_split:

print('Grouped Attribute Values \n', group)

df_agg1 = df_split.agg({target_attribute: lambda x: entropy_of_list(x, glist.pop())})

df_agg2 = df_split.agg({target_attribute: lambda x: len(x) / nobs})

new_entropy = sum(df_agg1['Entropy'] * df_agg2['Proportion'])

old_entropy = entropy_of_list(df[target_attribute], battr)

old_entropy = entropy_of_list(df[target_attribute], 'S-' + df.iloc[0][df.columns.get_loc(battr)])

return old_entropy - new_entropy

def id3(df, target_attribute, attribute_names, default_class=None, default_attr='S'):

from collections import Counter

cnt = Counter(x for x in df[target_attribute]) # class of YES/NO

elif df.empty or (not attribute_names):

ig = information_gain(df, attr, target_attribute, default_attr)

print("Information gain of ", attr, " is: ", ig)

print("\nAttribute with the maximum gain is: ", best_attr)

tree = {best_attr: {}}

remaining_attribute_names = [i for i in attribute_names if i != best_attr]

for attr_val, data_subset in df.groupby(best_attr):

subtree = id3(data_subset, target_attribute, remaining_attribute_names, default_class,

Steps in the id3() Function:

6. Printing the Decision Tree:

from pprint import pprint

tree = id3(df, t, attribute_names)

print("\nThe Resultant Decision Tree is:")

You might also like