Question 1
def answer_one():
    return pd.DataFrame(data=np.c_[cancer.data, cancer.target],
columns=list(cancer.feature_names) + ['target'])
Question 2
def answer_two():
    cancerdf = answer_one()
    df = cancerdf['target'].value_counts()
    df.index = ['benign', 'malignant']
    return df
Question 3
def answer_three():
    cancerdf = answer_one()
    X = cancerdf.iloc[:, 0:30]
    y = cancerdf['target']
    return X, y
Question 4
from sklearn.model_selection import train_test_split
def answer_four():
    X, y = answer_three()
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    return X_train, X_test, y_train, y_test
Question 5
from sklearn.neighbors import KNeighborsClassifier
def answer_five():
    X_train, X_test, y_train, y_test = answer_four()
    knn = KNeighborsClassifier(n_neighbors = 1)
    knn.fit(X_train, y_train)
    return knn
Question 6
def answer_six():
    cancerdf = answer_one()
    means = cancerdf.mean()[:-1].values.reshape(1, -1)
    knn = answer_five()
    return knn.predict(means)
Question 7
def answer_seven():
    X_train, X_test, y_train, y_test = answer_four()
    knn = answer_five()
    return knn.predict(X_test)
Question 8
def answer_eight():
    X_train, X_test, y_train, y_test = answer_four()
    knn = answer_five()
    return knn.score(X_test, y_test)
Optional Plot
def accuracy_plot():
    import matplotlib.pyplot as plt
    %matplotlib notebook
    X_train, X_test, y_train, y_test = answer_four()
    mal_train_X   =   X_train[y_train==0]
    mal_train_y   =   y_train[y_train==0]
    ben_train_X   =   X_train[y_train==1]
    ben_train_y   =   y_train[y_train==1]
    mal_test_X   =   X_test[y_test==0]
    mal_test_y   =   y_test[y_test==0]
    ben_test_X   =   X_test[y_test==1]
    ben_test_y   =   y_test[y_test==1]
    knn = answer_five()
    scores = [knn.score(mal_train_X, mal_train_y), knn.score(ben_train_X,
ben_train_y),
              knn.score(mal_test_X, mal_test_y), knn.score(ben_test_X, ben_test_y)]
    plt.figure()
    bars = plt.bar(np.arange(4), scores,
color=['#4c72b0','#4c72b0','#55a868','#55a868'])
    for bar in bars:
        height = bar.get_height()
        plt.gca().text(bar.get_x() + bar.get_width()/2, height*.90, '{0:.
{1}f}'.format(height, 2),
                     ha='center', color='w', fontsize=11)
    plt.tick_params(top='off', bottom='off', left='off', right='off',
labelleft='off', labelbottom='on')
    for spine in plt.gca().spines.values():
        spine.set_visible(False)
    plt.xticks([0,1,2,3], ['Malignant\nTraining', 'Benign\nTraining', 'Malignant\
nTest', 'Benign\nTest'], alpha=0.8);
    plt.title('Training and Test Accuracies for Malignant and Benign Cells',
alpha=0.8)