miniproject-2
August 6, 2024
[13]: import pandas as pd
      import numpy as np
      from sklearn.model_selection import train_test_split
      from sklearn.preprocessing import StandardScaler
      import seaborn as sns
      import matplotlib.pyplot as plt
      from sklearn.metrics import confusion_matrix, precision_score,␣
       ↪recall_score,f1_score
      # Load the breast_cancer_dataset
      df = pd.read_csv("C:\\Users\\SRIRAM\\OneDrive\\Documents\\WIN-SEM␣
       ↪2022-23\\B2_ML\\LAB\\PROJECT\\Breast_cancer.csv")
      # column names of the dataset
      column_names = ['Sample_code_number', 'Clump_Thickness', 'Uniformity_of_Cell␣
       ↪Size', 'Uniformity_of_Cell_Shape',
                      'Marginal_Adhesion', 'Single_Epithelial_Cell_Size',␣
       ↪'Bare_Nuclei', 'Bland_Chromatin',
                      'Normal_Nucleoli', 'Mitoses', 'Class']
      df.columns = column_names
      #changing values of target function from [2,4] to [0,1]
      df['Class'] = np.where(df['Class'] == 2, 0, 1)
      # Droping 'Sample code number' column which is not required for classification
      df.drop('Sample_code_number', axis=1, inplace=True)
      # Replace missing values (denoted by '?') with NaN
      df.replace('?', np.nan, inplace=True)
      # Droping the rows with missing values
      df.dropna(inplace=True)
      # Converting columns to numeric data type
      df = df.astype({'Bare_Nuclei': 'int64', 'Class': 'int64'})
      #updated df is shown as output
                                               1
print("Updated data set without sample_code_number \n",df)
# Split the dataset into features and target
X = df.iloc[:, :-1] # loading Features into x
y = df.iloc[:, -1] # Target into y
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,␣
 ↪random_state=42)
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
Updated data set without sample_code_number
      Clump_Thickness Uniformity_of_Cell Size       Uniformity_of_Cell_Shape   \
0                  5                         1                            1
1                  5                         4                            4
2                  3                         1                            1
3                  6                         8                            8
4                  4                         1                            1
..               …                      …                           …
694                3                         1                            1
695                2                         1                            1
696                5                        10                           10
697                4                         8                            6
698                4                         8                            8
      Marginal_Adhesion   Single_Epithelial_Cell_Size    Bare_Nuclei   \
0                     1                             2              1
1                     5                             7             10
2                     1                             2              2
3                     1                             3              4
4                     3                             2              1
..                  …                           …            …
694                   1                             3              2
695                   1                             2              1
696                   3                             7              3
697                   4                             3              4
698                   5                             4              5
      Bland_Chromatin   Normal_Nucleoli   Mitoses   Class
0                   3                 1         1       0
1                   3                 2         1       0
2                   3                 1         1       0
3                   3                 7         1       0
                                           2
     4                    3               1               1   0
     ..               …              …        …       …
     694                1                 1               1   0
     695                1                 1               1   0
     696                8                10               2   1
     697               10                 6               1   1
     698               10                 4               1   1
     [683 rows x 10 columns]
[14]: # Naive Bayes
      from sklearn.naive_bayes import GaussianNB
      from sklearn.metrics import accuracy_score
      # Train the model
      nb = GaussianNB()
      nb.fit(X_train, y_train)
      # Make predictions
      y_pred = nb.predict(X_test)
      # Compute confusion matrix for Naive bayes
      sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, cmap='Blues')
      plt.xlabel('Predicted')
      plt.ylabel('Actual')
      plt.title('Confusion matrix for testing set using Naive bayes')
      plt.show()
      print("\n")
      # Evaluate performance(Accuracy)
      nb_accuracy = accuracy_score(y_test, y_pred)
      print("Naïve Bayesian Classifier accuracy:", nb_accuracy)
      print("\n")
      # Compute the precision
      nb_precision = precision_score(y_test, y_pred)
      print("Naive Bayes precision:", nb_precision)
      print("\n")
      # Compute the recall
      nb_recall = recall_score(y_test, y_pred)
      print("Naive Bayes recall:", nb_recall)
      print("\n")
                                                  3
     Naïve Bayesian Classifier accuracy: 0.9562043795620438
     Naive Bayes precision: 0.9482758620689655
     Naive Bayes recall: 0.9482758620689655
[15]: # ANN
      from sklearn.neural_network import MLPClassifier
      # Train the model
      ann = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500)
      ann.fit(X_train, y_train)
      # Make predictions
                                              4
y_pred = ann.predict(X_test)
# Compute confusion matrix for Naive bayes
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion matrix for testing set using ANN')
plt.show()
print("\n")
# Evaluate performance(Accuracy)
ann_accuracy = accuracy_score(y_test, y_pred)
print("ANN accuracy:", ann_accuracy)
print("\n")
# Compute the precision
ann_precision = precision_score(y_test, y_pred)
print("ANN precision:", ann_precision)
print("\n")
# Compute the recall
ann_recall = recall_score(y_test, y_pred)
print("ANN recall:", ann_recall)
print("\n")
                                       5
     ANN accuracy: 0.9781021897810219
     ANN precision: 0.9661016949152542
     ANN recall: 0.9827586206896551
[16]: # KNN
      from sklearn.neighbors import KNeighborsClassifier
      # Train the model
      knn = KNeighborsClassifier(n_neighbors=5)
      knn.fit(X_train, y_train)
      # Make predictions
                                             6
y_pred = knn.predict(X_test)
# Compute confusion matrix for Naive bayes
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion matrix for testing set using ANN')
plt.show()
print("\n")
# Evaluate performance(Accuracy)
knn_accuracy = accuracy_score(y_test, y_pred)
print("KNN accuracy:", knn_accuracy)
print("\n")
# Compute the precision
knn_precision = precision_score(y_test, y_pred)
print("KNN precision:", knn_precision)
print("\n")
# Compute the recall
knn_recall = recall_score(y_test, y_pred)
print("KNN recall:", knn_recall)
print("\n")
C:\Users\SRIRAM\anaconda3\lib\site-
packages\sklearn\neighbors\_classification.py:228: FutureWarning: Unlike other
reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode`
typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will
change: the default value of `keepdims` will become False, the `axis` over which
the statistic is taken will be eliminated, and the value None will no longer be
accepted. Set `keepdims` to True or False to avoid this warning.
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)
                                        7
     KNN accuracy: 0.9635036496350365
     KNN precision: 0.9818181818181818
     KNN recall: 0.9310344827586207
[23]: x = np.arange(3)
      y1 = [nb_accuracy,nb_precision,nb_recall]
      y2 = [ann_accuracy,ann_precision,ann_recall]
      y3 = [knn_accuracy,knn_precision,knn_recall]
      # plot data in grouped manner of bar type
      plt.bar(x-0.3, y1, width, color='violet')
                                             8
       plt.bar(x, y2, width, color='yellow')
       plt.bar(x+0.3, y3, width, color='green')
       plt.xticks(x, ['Naive Bayes', 'ANN', 'KNN'])
       plt.xlabel("Metrics")
       plt.ylabel("Values")
       plt.legend(["Accuracy","Precision","Recall","F1_score"])
       plt.show()
       [0 1 2]
[ ]: