9/7/2018                                                     komal_knn1_minMaxScalar
In [36]: import numpy as np
                    import pandas as pd
                     import matplotlib.pyplot as plt
                     from sklearn.preprocessing import MinMaxScaler
                     from sklearn.model_selection import train_test_split
                     from sklearn.neighbors import KNeighborsClassifier
                     from sklearn import metrics
                     import seaborn as sns
                     sns.set(font_scale=1.5)
                     sns.set(style='white',color_codes=True)
           In [2]: location = r"D:\komal\SIMPLILEARN\MY COURSES\IN PROGRESS\DATA SCIENCE WITH PYT
                   HON\Live class downloads\Aug 11 Sat - Sep 15 Sat - Attending\datasets\iris.cs
                   v"
           In [3]: # load the training data from breast cancer data set
                   df_iris = pd.read_csv(location)
                   df_iris.head()
           Out[3]:
                        sepal_length sepal_width petal_length petal_width                   class
                     0 5.1              3.5            1.4               0.2           Iris-setosa
                     1 4.9              3.0            1.4               0.2           Iris-setosa
                     2 4.7              3.2            1.3               0.2           Iris-setosa
                     3 4.6              3.1            1.5               0.2           Iris-setosa
                     4 5.0              3.6            1.4               0.2           Iris-setosa
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html   1/13
9/7/2018                                                   komal_knn1_minMaxScalar
           In [4]: # Check the available styles
                   plt.style.available
           Out[4]: ['bmh',
                    'classic',
                    'dark_background',
                    'fast',
                    'fivethirtyeight',
                    'ggplot',
                    'grayscale',
                    'seaborn-bright',
                    'seaborn-colorblind',
                    'seaborn-dark-palette',
                    'seaborn-dark',
                    'seaborn-darkgrid',
                    'seaborn-deep',
                    'seaborn-muted',
                    'seaborn-notebook',
                    'seaborn-paper',
                    'seaborn-pastel',
                    'seaborn-poster',
                    'seaborn-talk',
                    'seaborn-ticks',
                    'seaborn-white',
                    'seaborn-whitegrid',
                    'seaborn',
                    'Solarize_Light2',
                    'tableau-colorblind10',
                    '_classic_test']
           In [5]: plt.style.use('ggplot')
           In [6]: #   Means are in the same order of magnitude for all features so scaling
                   #   might not be beneficial.
                   #   If mean values were of different orders of magnitude, scaling could
                   #   significantly improve accuracy of a classifier.
                     df_iris.describe()
           Out[6]:
                             sepal_length sepal_width petal_length petal_width
                     count 150.000000        150.000000     150.000000      150.000000
                     mean 5.843333           3.054000       3.758667        1.198667
                     std     0.828066        0.433594       1.764420        0.763161
                     min     4.300000        2.000000       1.000000        0.100000
                     25%     5.100000        2.800000       1.600000        0.300000
                     50%     5.800000        3.000000       4.350000        1.300000
                     75%     6.400000        3.300000       5.100000        1.800000
                     max     7.900000        4.400000       6.900000        2.500000
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html   2/13
9/7/2018                                                   komal_knn1_minMaxScalar
           In [7]: X = df_iris.drop('class' , 1).values # drop target variable
                   y1 = df_iris['class'].values
                   y = df_iris['class']
           In [8]: scaler = MinMaxScaler()
                   scaler
           Out[8]: MinMaxScaler(copy=True, feature_range=(0, 1))
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html   3/13
9/7/2018                                                   komal_knn1_minMaxScalar
           In [9]: X_scaled = scaler.fit_transform(X)
                    print('X_scaled type is', type(X_scaled))
                    X_scaled
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html   4/13
9/7/2018                                                   komal_knn1_minMaxScalar
                    X_scaled type is <class 'numpy.ndarray'>
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html   5/13
9/7/2018                                                   komal_knn1_minMaxScalar
           Out[9]: array([[0.22222222,        0.625     ,     0.06779661,     0.04166667],
                          [0.16666667,        0.41666667,     0.06779661,     0.04166667],
                          [0.11111111,        0.5       ,     0.05084746,     0.04166667],
                          [0.08333333,        0.45833333,     0.08474576,     0.04166667],
                          [0.19444444,        0.66666667,     0.06779661,     0.04166667],
                          [0.30555556,        0.79166667,     0.11864407,     0.125     ],
                          [0.08333333,        0.58333333,     0.06779661,     0.08333333],
                          [0.19444444,        0.58333333,     0.08474576,     0.04166667],
                          [0.02777778,        0.375     ,     0.06779661,     0.04166667],
                          [0.16666667,        0.45833333,     0.08474576,     0.        ],
                          [0.30555556,        0.70833333,     0.08474576,     0.04166667],
                          [0.13888889,        0.58333333,     0.10169492,     0.04166667],
                          [0.13888889,        0.41666667,     0.06779661,     0.        ],
                          [0.        ,        0.41666667,     0.01694915,     0.        ],
                          [0.41666667,        0.83333333,     0.03389831,     0.04166667],
                          [0.38888889,        1.        ,     0.08474576,     0.125     ],
                          [0.30555556,        0.79166667,     0.05084746,     0.125     ],
                          [0.22222222,        0.625     ,     0.06779661,     0.08333333],
                          [0.38888889,        0.75      ,     0.11864407,     0.08333333],
                          [0.22222222,        0.75      ,     0.08474576,     0.08333333],
                          [0.30555556,        0.58333333,     0.11864407,     0.04166667],
                          [0.22222222,        0.70833333,     0.08474576,     0.125     ],
                          [0.08333333,        0.66666667,     0.        ,     0.04166667],
                          [0.22222222,        0.54166667,     0.11864407,     0.16666667],
                          [0.13888889,        0.58333333,     0.15254237,     0.04166667],
                          [0.19444444,        0.41666667,     0.10169492,     0.04166667],
                          [0.19444444,        0.58333333,     0.10169492,     0.125     ],
                          [0.25      ,        0.625     ,     0.08474576,     0.04166667],
                          [0.25      ,        0.58333333,     0.06779661,     0.04166667],
                          [0.11111111,        0.5       ,     0.10169492,     0.04166667],
                          [0.13888889,        0.45833333,     0.10169492,     0.04166667],
                          [0.30555556,        0.58333333,     0.08474576,     0.125     ],
                          [0.25      ,        0.875     ,     0.08474576,     0.        ],
                          [0.33333333,        0.91666667,     0.06779661,     0.04166667],
                          [0.16666667,        0.45833333,     0.08474576,     0.        ],
                          [0.19444444,        0.5       ,     0.03389831,     0.04166667],
                          [0.33333333,        0.625     ,     0.05084746,     0.04166667],
                          [0.16666667,        0.45833333,     0.08474576,     0.        ],
                          [0.02777778,        0.41666667,     0.05084746,     0.04166667],
                          [0.22222222,        0.58333333,     0.08474576,     0.04166667],
                          [0.19444444,        0.625     ,     0.05084746,     0.08333333],
                          [0.05555556,        0.125     ,     0.05084746,     0.08333333],
                          [0.02777778,        0.5       ,     0.05084746,     0.04166667],
                          [0.19444444,        0.625     ,     0.10169492,     0.20833333],
                          [0.22222222,        0.75      ,     0.15254237,     0.125     ],
                          [0.13888889,        0.41666667,     0.06779661,     0.08333333],
                          [0.22222222,        0.75      ,     0.10169492,     0.04166667],
                          [0.08333333,        0.5       ,     0.06779661,     0.04166667],
                          [0.27777778,        0.70833333,     0.08474576,     0.04166667],
                          [0.19444444,        0.54166667,     0.06779661,     0.04166667],
                          [0.75      ,        0.5       ,     0.62711864,     0.54166667],
                          [0.58333333,        0.5       ,     0.59322034,     0.58333333],
                          [0.72222222,        0.45833333,     0.66101695,     0.58333333],
                          [0.33333333,        0.125     ,     0.50847458,     0.5       ],
                          [0.61111111,        0.33333333,     0.61016949,     0.58333333],
                          [0.38888889,        0.33333333,     0.59322034,     0.5       ],
                          [0.55555556,        0.54166667,     0.62711864,     0.625     ],
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html   6/13
9/7/2018                                                   komal_knn1_minMaxScalar
                             [0.16666667,     0.16666667,     0.38983051,     0.375     ],
                             [0.63888889,     0.375     ,     0.61016949,     0.5       ],
                             [0.25      ,     0.29166667,     0.49152542,     0.54166667],
                             [0.19444444,     0.        ,     0.42372881,     0.375     ],
                             [0.44444444,     0.41666667,     0.54237288,     0.58333333],
                             [0.47222222,     0.08333333,     0.50847458,     0.375     ],
                             [0.5       ,     0.375     ,     0.62711864,     0.54166667],
                             [0.36111111,     0.375     ,     0.44067797,     0.5       ],
                             [0.66666667,     0.45833333,     0.57627119,     0.54166667],
                             [0.36111111,     0.41666667,     0.59322034,     0.58333333],
                             [0.41666667,     0.29166667,     0.52542373,     0.375     ],
                             [0.52777778,     0.08333333,     0.59322034,     0.58333333],
                             [0.36111111,     0.20833333,     0.49152542,     0.41666667],
                             [0.44444444,     0.5       ,     0.6440678 ,     0.70833333],
                             [0.5       ,     0.33333333,     0.50847458,     0.5       ],
                             [0.55555556,     0.20833333,     0.66101695,     0.58333333],
                             [0.5       ,     0.33333333,     0.62711864,     0.45833333],
                             [0.58333333,     0.375     ,     0.55932203,     0.5       ],
                             [0.63888889,     0.41666667,     0.57627119,     0.54166667],
                             [0.69444444,     0.33333333,     0.6440678 ,     0.54166667],
                             [0.66666667,     0.41666667,     0.6779661 ,     0.66666667],
                             [0.47222222,     0.375     ,     0.59322034,     0.58333333],
                             [0.38888889,     0.25      ,     0.42372881,     0.375     ],
                             [0.33333333,     0.16666667,     0.47457627,     0.41666667],
                             [0.33333333,     0.16666667,     0.45762712,     0.375     ],
                             [0.41666667,     0.29166667,     0.49152542,     0.45833333],
                             [0.47222222,     0.29166667,     0.69491525,     0.625     ],
                             [0.30555556,     0.41666667,     0.59322034,     0.58333333],
                             [0.47222222,     0.58333333,     0.59322034,     0.625     ],
                             [0.66666667,     0.45833333,     0.62711864,     0.58333333],
                             [0.55555556,     0.125     ,     0.57627119,     0.5       ],
                             [0.36111111,     0.41666667,     0.52542373,     0.5       ],
                             [0.33333333,     0.20833333,     0.50847458,     0.5       ],
                             [0.33333333,     0.25      ,     0.57627119,     0.45833333],
                             [0.5       ,     0.41666667,     0.61016949,     0.54166667],
                             [0.41666667,     0.25      ,     0.50847458,     0.45833333],
                             [0.19444444,     0.125     ,     0.38983051,     0.375     ],
                             [0.36111111,     0.29166667,     0.54237288,     0.5       ],
                             [0.38888889,     0.41666667,     0.54237288,     0.45833333],
                             [0.38888889,     0.375     ,     0.54237288,     0.5       ],
                             [0.52777778,     0.375     ,     0.55932203,     0.5       ],
                             [0.22222222,     0.20833333,     0.33898305,     0.41666667],
                             [0.38888889,     0.33333333,     0.52542373,     0.5       ],
                             [0.55555556,     0.54166667,     0.84745763,     1.        ],
                             [0.41666667,     0.29166667,     0.69491525,     0.75      ],
                             [0.77777778,     0.41666667,     0.83050847,     0.83333333],
                             [0.55555556,     0.375     ,     0.77966102,     0.70833333],
                             [0.61111111,     0.41666667,     0.81355932,     0.875     ],
                             [0.91666667,     0.41666667,     0.94915254,     0.83333333],
                             [0.16666667,     0.20833333,     0.59322034,     0.66666667],
                             [0.83333333,     0.375     ,     0.89830508,     0.70833333],
                             [0.66666667,     0.20833333,     0.81355932,     0.70833333],
                             [0.80555556,     0.66666667,     0.86440678,     1.        ],
                             [0.61111111,     0.5       ,     0.69491525,     0.79166667],
                             [0.58333333,     0.29166667,     0.72881356,     0.75      ],
                             [0.69444444,     0.41666667,     0.76271186,     0.83333333],
                             [0.38888889,     0.20833333,     0.6779661 ,     0.79166667],
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html   7/13
9/7/2018                                                   komal_knn1_minMaxScalar
                             [0.41666667,     0.33333333,     0.69491525,     0.95833333],
                             [0.58333333,     0.5       ,     0.72881356,     0.91666667],
                             [0.61111111,     0.41666667,     0.76271186,     0.70833333],
                             [0.94444444,     0.75      ,     0.96610169,     0.875     ],
                             [0.94444444,     0.25      ,     1.        ,     0.91666667],
                             [0.47222222,     0.08333333,     0.6779661 ,     0.58333333],
                             [0.72222222,     0.5       ,     0.79661017,     0.91666667],
                             [0.36111111,     0.33333333,     0.66101695,     0.79166667],
                             [0.94444444,     0.33333333,     0.96610169,     0.79166667],
                             [0.55555556,     0.29166667,     0.66101695,     0.70833333],
                             [0.66666667,     0.54166667,     0.79661017,     0.83333333],
                             [0.80555556,     0.5       ,     0.84745763,     0.70833333],
                             [0.52777778,     0.33333333,     0.6440678 ,     0.70833333],
                             [0.5       ,     0.41666667,     0.66101695,     0.70833333],
                             [0.58333333,     0.33333333,     0.77966102,     0.83333333],
                             [0.80555556,     0.41666667,     0.81355932,     0.625     ],
                             [0.86111111,     0.33333333,     0.86440678,     0.75      ],
                             [1.        ,     0.75      ,     0.91525424,     0.79166667],
                             [0.58333333,     0.33333333,     0.77966102,     0.875     ],
                             [0.55555556,     0.33333333,     0.69491525,     0.58333333],
                             [0.5       ,     0.25      ,     0.77966102,     0.54166667],
                             [0.94444444,     0.41666667,     0.86440678,     0.91666667],
                             [0.55555556,     0.58333333,     0.77966102,     0.95833333],
                             [0.58333333,     0.45833333,     0.76271186,     0.70833333],
                             [0.47222222,     0.41666667,     0.6440678 ,     0.70833333],
                             [0.72222222,     0.45833333,     0.74576271,     0.83333333],
                             [0.66666667,     0.45833333,     0.77966102,     0.95833333],
                             [0.72222222,     0.45833333,     0.69491525,     0.91666667],
                             [0.41666667,     0.29166667,     0.69491525,     0.75      ],
                             [0.69444444,     0.5       ,     0.83050847,     0.91666667],
                             [0.66666667,     0.54166667,     0.79661017,     1.        ],
                             [0.66666667,     0.41666667,     0.71186441,     0.91666667],
                             [0.55555556,     0.20833333,     0.6779661 ,     0.75      ],
                             [0.61111111,     0.41666667,     0.71186441,     0.79166667],
                             [0.52777778,     0.58333333,     0.74576271,     0.91666667],
                             [0.44444444,     0.41666667,     0.69491525,     0.70833333]])
           In [10]: # transform back to df for easier exploration/plotting (output of scaler)
                    X_scaled_df = pd.DataFrame(X_scaled, columns=['s_SepalLength','s_SepalWidth',
                                                                  's_PetalLength','s_PetalWidth'])
                      X_scaled_df.head()
           Out[10]:
                        s_SepalLength s_SepalWidth s_PetalLength s_PetalWidth
                      0 0.222222          0.625000         0.067797          0.041667
                      1 0.166667          0.416667         0.067797          0.041667
                      2 0.111111          0.500000         0.050847          0.041667
                      3 0.083333          0.458333         0.084746          0.041667
                      4 0.194444          0.666667         0.067797          0.041667
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html   8/13
9/7/2018                                                   komal_knn1_minMaxScalar
           In [11]: df_iris_scaled = pd.concat([X_scaled_df,y],axis=1)
                    df_iris_scaled.head()
           Out[11]:
                        s_SepalLength s_SepalWidth s_PetalLength s_PetalWidth                      class
                      0 0.222222          0.625000         0.067797          0.041667         Iris-setosa
                      1 0.166667          0.416667         0.067797          0.041667         Iris-setosa
                      2 0.111111          0.500000         0.050847          0.041667         Iris-setosa
                      3 0.083333          0.458333         0.084746          0.041667         Iris-setosa
                      4 0.194444          0.666667         0.067797          0.041667         Iris-setosa
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html   9/13
9/7/2018                                                   komal_knn1_minMaxScalar
           In [12]: # Notice x-axis on subplots are all the same for all features (0 to 1)
                    # after scaling.
                    fig = plt.figure(figsize=(14,9))
                    fig.suptitle('Frequency Distribution of Features by Species ',fontsize=20)
                    ax1 = fig.add_subplot(221)
                    df_iris_scaled.groupby("class").s_PetalLength.plot(kind='hist',
                                                                       alpha=0.8,
                                                                       legend=True,
                                                                       title='s_PetalLength')
                    ax2 = fig.add_subplot(222,sharey=ax1)
                    df_iris_scaled.groupby("class").s_PetalWidth.plot(kind='hist',
                                                                      alpha=0.8,
                                                                      legend=True,
                                                                      title='s_PetalWidth')
                    ax3 = fig.add_subplot(223,sharey=ax1)
                    df_iris_scaled.groupby("class").s_SepalLength.plot(kind='hist',
                                                                       alpha=0.8,
                                                                       legend=True,
                                                                       title='s_SepalLength')
                    ax4 = fig.add_subplot(224,sharey=ax1)
                    df_iris_scaled.groupby("class").s_SepalWidth.plot(kind='hist',
                                                                      alpha=0.8,
                                                                      legend=True,
                                                                      title='s_SepalWidth');
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html   10/13
9/7/2018                                                   komal_knn1_minMaxScalar
           In [13]: X_scaled_df.describe()
           Out[13]:
                             s_SepalLength s_SepalWidth s_PetalLength s_PetalWidth
                      count 150.000000         150.000000       150.000000           150.000000
                      mean 0.428704            0.439167         0.467571             0.457778
                      std    0.230018          0.180664         0.299054             0.317984
                      min    0.000000          0.000000         0.000000             0.000000
                      25%    0.222222          0.333333         0.101695             0.083333
                      50%    0.416667          0.416667         0.567797             0.500000
                      75%    0.583333          0.541667         0.694915             0.708333
                      max    1.000000          1.000000         1.000000             1.000000
           In [18]: # train and test split
                      X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, rando
                      m_state = 0)
           In [19]: print("train sample size",X_train.shape, type(X_train))
                    print("test sample size",X_test.shape, type(X_test))
                      train sample size (105, 4) <class 'numpy.ndarray'>
                      test sample size (45, 4) <class 'numpy.ndarray'>
           In [23]: clf = KNeighborsClassifier(n_neighbors=5)
                    clf.fit(X_train, y_train)
           Out[23]: KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                               metric_params=None, n_jobs=1, n_neighbors=5, p=2,
                               weights='uniform')
           In [24]: y_pred = clf.predict(X_test)
           In [28]: # Creates a confusion matrix
                    cm = metrics.confusion_matrix(y_test, y_pred)
           In [29]: cm
           Out[29]: array([[16, 0, 0],
                           [ 0, 17, 1],
                           [ 0, 0, 11]], dtype=int64)
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html   11/13
9/7/2018                                                   komal_knn1_minMaxScalar
           In [32]: CT=pd.crosstab(y_test, y_pred, rownames=['True'], colnames=['Predicted'], marg
                    ins=True)
                    CT
           Out[32]:
                            Predicted Iris-setosa Iris-versicolor Iris-virginica All
                                 True
                      Iris-setosa       16          0                0               16
                      Iris-versicolor 0             17               1               18
                      Iris-virginica    0           0                11              11
                      All               16          17               12              45
           In [38]: from sklearn.metrics import accuracy_score
    An insight we can get from the matrix is that the model was very accurate at classifying setosa and
    versicolor (True Positive/All = 1.0). However, accuracy for virginica was lower (11/12 = 0.917).
           In [39]: plt.figure(figsize=(6,4))
                    sns.heatmap(CT, annot=True)
                    plt.title('KNN classification model \nAccuracy:{0:.3f}'.format(accuracy_score(
                    y_test, y_pred)))
                    plt.ylabel('True label')
                    plt.xlabel('Predicted label')
           Out[39]: Text(0.5,16,'Predicted label')
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html   12/13
9/7/2018                                                   komal_knn1_minMaxScalar
           In [42]: from sklearn.metrics import classification_report
                    print(classification_report(y_test,y_pred))
                                          precision        recall     f1-score       support
                        Iris-setosa              1.00         1.00         1.00           16
                    Iris-versicolor              1.00         0.94         0.97           18
                     Iris-virginica              0.92         1.00         0.96           11
                         avg / total             0.98         0.98         0.98           45
           In [43]: # Classification accuracy : Overall how often is the classifier correct?
                    print(metrics.accuracy_score(y_test, y_pred))
                    # classification error : Overall how often is the classifier incorrect?
                    print(1-metrics.accuracy_score(y_test, y_pred))
                    0.9777777777777777
                    0.022222222222222254
           In [45]: # Sensitivity : when the actual value is +ve, how often is the predication cor
                    rect
                    # Also known as "True Positive Rate" or 'Recall"
                    # should be MAXIMIZED
                    #print(metrics.recall_score(y_test, y_pred, average='none'))
                    # Specificity: When the actual value is -ve, how often the prediction correct
                    # Also known as "Selective"
                    # should be MAXIMIZED
                    # False Positive Rate : when the actual value is negative, how often is the
                    # prediction incorrect
                    # 1- Specificity
                    # Precision: when a +ve value is predicted, how often is the prediction correc
                    t?
                    # print(metrics.precision_score(y_test, y_pred, average='none'))
file:///D:/komal/SIMPLILEARN/MY%20COURSES/IN%20PROGRESS/My%20Codes_ML_DS/codes%20in%20pdf/komal_knn1_minMaxScalar.html   13/13