15/10/2024, 16:18 week10 - Colab
Loading the data
# Block 1: Decision Tree Classifier
# Import necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target
target_names = iris.target_names
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
DecisionTreeClassifier
param_grid_dt = {
'max_depth': [None, 5, 10],
'min_samples_split': [2, 5, 10],
}
dt_classifier = DecisionTreeClassifier(random_state=42)
grid_search_dt = GridSearchCV(estimator=dt_classifier, param_grid=param_grid_dt, cv=5, n_jobs=-1)
grid_search_dt.fit(X_train, y_train)
best_dt_classifier = grid_search_dt.best_estimator_
y_pred_dt = best_dt_classifier.predict(X_test)
print("\nClassification Report for Decision Tree Classifier:")
print(classification_report(y_test, y_pred_dt, target_names=target_names))
conf_matrix_dt = confusion_matrix(y_test, y_pred_dt)
print(conf_matrix_dt)
accuracy_dt = accuracy_score(y_test, y_pred_dt)
print(f"Accuracy of Decision Tree Classifier: {accuracy_dt:.2f}%")
Classification Report for Decision Tree Classifier:
precision recall f1-score support
setosa 1.00 1.00 1.00 10
versicolor 1.00 1.00 1.00 9
virginica 1.00 1.00 1.00 11
accuracy 1.00 30
macro avg 1.00 1.00 1.00 30
weighted avg 1.00 1.00 1.00 30
[[10 0 0]
[ 0 9 0]
[ 0 0 11]]
Accuracy of Decision Tree Classifier: 1.00%
RandomForestClassifier
param_grid_rf = {
'n_estimators': [10, 50, 100],
'max_depth': [None, 5, 10],
'min_samples_split': [2, 5, 10],
}
rf_classifier = RandomForestClassifier(random_state=42)
grid_search_rf = GridSearchCV(estimator=rf_classifier, param_grid=param_grid_rf, cv=5, n_jobs=-1)
grid_search_rf.fit(X_train, y_train)
best_rf_classifier = grid_search_rf.best_estimator_
y_pred_rf = best_rf_classifier.predict(X_test)
https://colab.research.google.com/drive/1mcEppbuL3E5LEkCmYd6osgwfnL_KCjT8#scrollTo=O3j1q3bTZBX_&printMode=true 1/3
15/10/2024, 16:18 week10 - Colab
print("\nClassification Report for Random Forest Classifier:")
print(classification_report(y_test, y_pred_rf, target_names=target_names))
conf_matrix_dt = confusion_matrix(y_test, y_pred_rf)
print(conf_matrix_dt)
accuracy_dt = accuracy_score(y_test, y_pred_dt)
print(f"Accuracy of Random Forest Classifier: {accuracy_dt:.2f}%")
Classification Report for Random Forest Classifier:
precision recall f1-score support
setosa 1.00 1.00 1.00 10
versicolor 1.00 1.00 1.00 9
virginica 1.00 1.00 1.00 11
accuracy 1.00 30
macro avg 1.00 1.00 1.00 30
weighted avg 1.00 1.00 1.00 30
[[10 0 0]
[ 0 9 0]
[ 0 0 11]]
Accuracy of Random Forest Classifier: 1.00%
BaggingClassifier
param_grid_bagging = {
'n_estimators': [10, 50, 100],
'max_samples': [0.5, 1.0],
'max_features': [0.5, 1.0],
}
bagging_classifier = BaggingClassifier(random_state=42)
grid_search_bagging = GridSearchCV(estimator=bagging_classifier, param_grid=param_grid_bagging, cv=5, n_jobs=-1)
grid_search_bagging.fit(X_train, y_train)
# Best parameters and predictions
best_bagging_classifier = grid_search_bagging.best_estimator_
y_pred_bagging = best_bagging_classifier.predict(X_test)
print("\nClassification Report for Bagging Classifier:")
print(classification_report(y_test, y_pred_bagging, target_names=target_names))
conf_matrix_dt = confusion_matrix(y_test, y_pred_bagging)
print(conf_matrix_dt)
accuracy_dt = accuracy_score(y_test, y_pred_dt)
print(f"Accuracy of Bagging Classifier: {accuracy_dt:.2f}%")
Classification Report for Bagging Classifier:
precision recall f1-score support
setosa 1.00 1.00 1.00 10
versicolor 1.00 1.00 1.00 9
virginica 1.00 1.00 1.00 11
accuracy 1.00 30
macro avg 1.00 1.00 1.00 30
weighted avg 1.00 1.00 1.00 30
[[10 0 0]
[ 0 9 0]
[ 0 0 11]]
Accuracy of Bagging Classifier: 1.00%
GradientBoostingClassifier
param_grid_gb = {
'n_estimators': [50, 100],
'learning_rate': [0.01, 0.1, 0.2],
'max_depth': [3, 5, 7],
}
gb_classifier = GradientBoostingClassifier(random_state=42)
grid_search_gb = GridSearchCV(estimator=gb_classifier, param_grid=param_grid_gb, cv=5, n_jobs=-1)
https://colab.research.google.com/drive/1mcEppbuL3E5LEkCmYd6osgwfnL_KCjT8#scrollTo=O3j1q3bTZBX_&printMode=true 2/3
15/10/2024, 16:18 week10 - Colab
grid_search_gb.fit(X_train, y_train)
# Best parameters and predictions
best_gb_classifier = grid_search_gb.best_estimator_
y_pred_gb = best_gb_classifier.predict(X_test)
print("\nClassification Report for Gradient Boosting Classifier:")
print(classification_report(y_test, y_pred_gb, target_names=target_names))
print("\nConfusion Matrix for Gradient Boosting Classifier:")
print(confusion_matrix(y_test, y_pred_gb))
print("\nAccuracy Score for Gradient Boosting Classifier:")
print(accuracy_score(y_test, y_pred_gb))
Classification Report for Gradient Boosting Classifier:
precision recall f1-score support
setosa 1.00 1.00 1.00 10
versicolor 1.00 1.00 1.00 9
virginica 1.00 1.00 1.00 11
accuracy 1.00 30
macro avg 1.00 1.00 1.00 30
weighted avg 1.00 1.00 1.00 30
Confusion Matrix for Gradient Boosting Classifier:
[[10 0 0]
[ 0 9 0]
[ 0 0 11]]
Accuracy Score for Gradient Boosting Classifier:
1.0
https://colab.research.google.com/drive/1mcEppbuL3E5LEkCmYd6osgwfnL_KCjT8#scrollTo=O3j1q3bTZBX_&printMode=true 3/3