import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
get_ipython ().run_line_magic ('matplotlib', 'inline')
#Get the Data
from sklearn.datasets import load_breast_cancer
cancer load_breast_cancer()
cancer.keys()
print (cancer [DESCR'])
cancer ['feature_names']
# Set up DataFrame
df feat pd.DataFrame (cancer ['data'], columns-cancer ['feature_names']) df
feat.info()
# In [9]:
cancer ['target']
# In [10]:
df_target pd.DataFrame (cancer ['target'], columns=['Cancer'])
df_feat.head()
# Standardizing the variables
from sklearn.preprocessing import StandardScaler
scaler StandardScaler()
scaler.fit(df_feat)
StandardScaler (copy-True, with mean-True, with_std=True)
scaled_features = scaler.transform(df_feat)
# In [25]:
df_feat_scaled= pd.DataFrame(scaled_features, columns=df_feat.columns)
In [26]:
dr_feat_scaled.head()
In [27]:
Train Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y train, y_test= train_test_split (scaled features, np.ravel
(df_target), test_size=0.30, random_state=105)
# In [20]:
#Using KNN Remember that we are trying to come up with a model to predict TARGET as
cancerouse or not. We'll start with k-1.
from sklearn.neighbors import KNeighborsClassifier
# In [29]:
knn KNeighborsClassifier (n_neighbors-1)
# In [30]:
knn.fit(X_train,y_train)
pred=knn.predict(X_test)
From sklearn.statistics import classification_report,confusion_matrix
print(confusion_matrix(y_test,pred))
print(classification_report(y_test,pred))
error_rate =[]
for i in range (1,40):
knn = KNeighborsClassifier(n_neighbors=i)
knn.fit (X_train, y_train)
pred_i =knn.predict (X_test)
error_rate.append(np.mean (pred_i != y_test))
In [36]:
plt.figure (figsize=(10,6))
plt.plot (range (1,40), error rate, color='blue', linestyle'dashed', marker-'o',
markerfacecolor='red', markersize=10)
plt.title('Error Rate vs. K Value')
plt.xlabel('K')
plt.ylabel('Error Rate')
# WITH K=1
knn=KNeighborsClassifier (n_neighbors=1)
knn.fit (X_train, y_train)
pred= knn.predict (X_test)
print('WITH K-1')
print('\n')
print(confusion matrix (y_test, pred))
print('\n')
print(classification_report (y_test, pred))
In [39]:
#NOW WITH K=21
knn= KNeighborsClassifier (n_neighbors=21)
knn.fit(X_train, y_train)
pred= knn.predict (X_test)
print('WITH k=21)
print('\n')
print (confusion matrix (y_test,pred))
print('\n')
print (classification_report (y_test,pred))