..
code:: ipython3
import numpy as np
import pandas as pd
import plotly.express as px
.. code:: ipython3
df = pd.read_csv(r"C:\Users\DELL\Downloads\archive\loan_data.csv")
.. code:: ipython3
ds = df.copy()
ds.drop(columns=['person_home_ownership', 'loan_intent'], inplace = True)
.. code:: ipython3
ds = ds.sample(frac = 1, random_state = 42).reset_index(drop = True)
ds
.. raw:: html
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>person_age</th>
<th>person_gender</th>
<th>person_education</th>
<th>person_income</th>
<th>person_emp_exp</th>
<th>loan_amnt</th>
<th>loan_int_rate</th>
<th>loan_percent_income</th>
<th>cb_person_cred_hist_length</th>
<th>credit_score</th>
<th>previous_loan_defaults_on_file</th>
<th>loan_status</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>32.0</td>
<td>male</td>
<td>Associate</td>
<td>96865.0</td>
<td>10</td>
<td>7500.0</td>
<td>6.04</td>
<td>0.08</td>
<td>10.0</td>
<td>601</td>
<td>No</td>
<td>0</td>
</tr>
<tr>
<th>1</th>
<td>24.0</td>
<td>male</td>
<td>Associate</td>
<td>56838.0</td>
<td>6</td>
<td>9000.0</td>
<td>11.49</td>
<td>0.16</td>
<td>4.0</td>
<td>647</td>
<td>Yes</td>
<td>0</td>
</tr>
<tr>
<th>2</th>
<td>22.0</td>
<td>female</td>
<td>Master</td>
<td>37298.0</td>
<td>0</td>
<td>5000.0</td>
<td>14.88</td>
<td>0.13</td>
<td>4.0</td>
<td>711</td>
<td>No</td>
<td>1</td>
</tr>
<tr>
<th>3</th>
<td>23.0</td>
<td>female</td>
<td>Bachelor</td>
<td>39944.0</td>
<td>1</td>
<td>5000.0</td>
<td>13.99</td>
<td>0.13</td>
<td>3.0</td>
<td>597</td>
<td>Yes</td>
<td>0</td>
</tr>
<tr>
<th>4</th>
<td>42.0</td>
<td>male</td>
<td>High School</td>
<td>67974.0</td>
<td>20</td>
<td>10000.0</td>
<td>15.41</td>
<td>0.15</td>
<td>11.0</td>
<td>586</td>
<td>No</td>
<td>1</td>
</tr>
<tr>
<th>...</th>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
</tr>
<tr>
<th>44995</th>
<td>26.0</td>
<td>male</td>
<td>High School</td>
<td>88451.0</td>
<td>6</td>
<td>10625.0</td>
<td>6.03</td>
<td>0.12</td>
<td>4.0</td>
<td>559</td>
<td>Yes</td>
<td>0</td>
</tr>
<tr>
<th>44996</th>
<td>25.0</td>
<td>female</td>
<td>High School</td>
<td>34772.0</td>
<td>3</td>
<td>5876.0</td>
<td>11.69</td>
<td>0.17</td>
<td>4.0</td>
<td>647</td>
<td>No</td>
<td>1</td>
</tr>
<tr>
<th>44997</th>
<td>33.0</td>
<td>female</td>
<td>Associate</td>
<td>58317.0</td>
<td>9</td>
<td>10707.0</td>
<td>10.74</td>
<td>0.18</td>
<td>9.0</td>
<td>652</td>
<td>Yes</td>
<td>0</td>
</tr>
<tr>
<th>44998</th>
<td>26.0</td>
<td>male</td>
<td>Master</td>
<td>178602.0</td>
<td>6</td>
<td>20000.0</td>
<td>17.99</td>
<td>0.11</td>
<td>3.0</td>
<td>604</td>
<td>No</td>
<td>1</td>
</tr>
<tr>
<th>44999</th>
<td>26.0</td>
<td>female</td>
<td>High School</td>
<td>210894.0</td>
<td>4</td>
<td>15000.0</td>
<td>11.01</td>
<td>0.07</td>
<td>3.0</td>
<td>664</td>
<td>Yes</td>
<td>0</td>
</tr>
</tbody>
</table>
<p>45000 rows × 12 columns</p>
</div>
.. code:: ipython3
ds['person_gender'] = pd.factorize(ds['person_gender'])[0]
ds['previous_loan_defaults_on_file'] =
pd.factorize(ds['previous_loan_defaults_on_file'])[0]
education_hierarchy = {'High School': 0, 'Associate': 1, 'Bachelor':
2,'Master': 3, 'Doctorate': 4}
ds['person_education'] = ds['person_education'].map(education_hierarchy)
.. code:: ipython3
X = ds.iloc[:, [i for i in range(0, 12)]].values
Y = ds.iloc[:, -1].values
.. code:: ipython3
ds
.. raw:: html
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>person_age</th>
<th>person_gender</th>
<th>person_education</th>
<th>person_income</th>
<th>person_emp_exp</th>
<th>loan_amnt</th>
<th>loan_int_rate</th>
<th>loan_percent_income</th>
<th>cb_person_cred_hist_length</th>
<th>credit_score</th>
<th>previous_loan_defaults_on_file</th>
<th>loan_status</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>32.0</td>
<td>0</td>
<td>1</td>
<td>96865.0</td>
<td>10</td>
<td>7500.0</td>
<td>6.04</td>
<td>0.08</td>
<td>10.0</td>
<td>601</td>
<td>0</td>
<td>0</td>
</tr>
<tr>
<th>1</th>
<td>24.0</td>
<td>0</td>
<td>1</td>
<td>56838.0</td>
<td>6</td>
<td>9000.0</td>
<td>11.49</td>
<td>0.16</td>
<td>4.0</td>
<td>647</td>
<td>1</td>
<td>0</td>
</tr>
<tr>
<th>2</th>
<td>22.0</td>
<td>1</td>
<td>3</td>
<td>37298.0</td>
<td>0</td>
<td>5000.0</td>
<td>14.88</td>
<td>0.13</td>
<td>4.0</td>
<td>711</td>
<td>0</td>
<td>1</td>
</tr>
<tr>
<th>3</th>
<td>23.0</td>
<td>1</td>
<td>2</td>
<td>39944.0</td>
<td>1</td>
<td>5000.0</td>
<td>13.99</td>
<td>0.13</td>
<td>3.0</td>
<td>597</td>
<td>1</td>
<td>0</td>
</tr>
<tr>
<th>4</th>
<td>42.0</td>
<td>0</td>
<td>0</td>
<td>67974.0</td>
<td>20</td>
<td>10000.0</td>
<td>15.41</td>
<td>0.15</td>
<td>11.0</td>
<td>586</td>
<td>0</td>
<td>1</td>
</tr>
<tr>
<th>...</th>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
<td>...</td>
</tr>
<tr>
<th>44995</th>
<td>26.0</td>
<td>0</td>
<td>0</td>
<td>88451.0</td>
<td>6</td>
<td>10625.0</td>
<td>6.03</td>
<td>0.12</td>
<td>4.0</td>
<td>559</td>
<td>1</td>
<td>0</td>
</tr>
<tr>
<th>44996</th>
<td>25.0</td>
<td>1</td>
<td>0</td>
<td>34772.0</td>
<td>3</td>
<td>5876.0</td>
<td>11.69</td>
<td>0.17</td>
<td>4.0</td>
<td>647</td>
<td>0</td>
<td>1</td>
</tr>
<tr>
<th>44997</th>
<td>33.0</td>
<td>1</td>
<td>1</td>
<td>58317.0</td>
<td>9</td>
<td>10707.0</td>
<td>10.74</td>
<td>0.18</td>
<td>9.0</td>
<td>652</td>
<td>1</td>
<td>0</td>
</tr>
<tr>
<th>44998</th>
<td>26.0</td>
<td>0</td>
<td>3</td>
<td>178602.0</td>
<td>6</td>
<td>20000.0</td>
<td>17.99</td>
<td>0.11</td>
<td>3.0</td>
<td>604</td>
<td>0</td>
<td>1</td>
</tr>
<tr>
<th>44999</th>
<td>26.0</td>
<td>1</td>
<td>0</td>
<td>210894.0</td>
<td>4</td>
<td>15000.0</td>
<td>11.01</td>
<td>0.07</td>
<td>3.0</td>
<td>664</td>
<td>1</td>
<td>0</td>
</tr>
</tbody>
</table>
<p>45000 rows × 12 columns</p>
</div>
.. code:: ipython3
ds.dtypes
.. parsed-literal::
person_age float64
person_gender int64
person_education int64
person_income float64
person_emp_exp int64
loan_amnt float64
loan_int_rate float64
loan_percent_income float64
cb_person_cred_hist_length float64
credit_score int64
previous_loan_defaults_on_file int64
loan_status int64
dtype: object
.. code:: ipython3
def feature_scaling(X):
for i in range(12):
if X[:, i].max() == 1:
continue
else:
X[:, i] = (X[:, i] - X[:, i].min())/(X[:, i].max() - X[:, i].min())
return X
'''def feature_scaling(X):
X = X.astype(float) # Ensure float for scaling
col_max = X.max(axis=0)
col_min = X.min(axis=0)
# Boolean mask: True for binary columns (only 0 and 1)
binary_cols = (col_min == 0) & (col_max == 1)
# Avoid division by zero
range_vals = col_max - col_min
range_vals[range_vals == 0] = 1
# Perform min-max scaling
X_scaled = (X - col_min) / range_vals
# Put back the binary columns without scaling
X_scaled[:, binary_cols] = X[:, binary_cols]
return X_scaled
'''
.. parsed-literal::
'def feature_scaling(X):\n X = X.astype(float) # Ensure float for scaling\
n\n col_max = X.max(axis=0)\n col_min = X.min(axis=0)\n\n # Boolean mask:
True for binary columns (only 0 and 1)\n binary_cols = (col_min == 0) & (col_max
== 1)\n\n # Avoid division by zero\n range_vals = col_max - col_min\n
range_vals[range_vals == 0] = 1\n\n # Perform min-max scaling\n X_scaled = (X
- col_min) / range_vals\n\n # Put back the binary columns without scaling\n
X_scaled[:, binary_cols] = X[:, binary_cols]\n\n return X_scaled\n'
.. code:: ipython3
X1 = X.copy()
.. code:: ipython3
X1 = feature_scaling(X1)
.. code:: ipython3
training = X1[:3600]
x = training[:, :-1]
y = training[:, -1].reshape(3600, 1)
test = X1[3601:]
.. code:: ipython3
def sigmoid(f):
return 1/(1+(np.exp(-f)))
.. code:: ipython3
w = np.ones(11).reshape(11, 1); b = 0; m = len(x); f = np.dot(x, w) + b
s = sigmoid(f)
.. code:: ipython3
def cost_calc(y, s):
loss = -(y*np.log(s) + (1 - s)*np.log(1- s))
cost = (1/m)*np.sum(loss)
return cost
.. code:: ipython3
def derivative_calc(x, y, s):
dj_dw = (1/m)*np.dot(x.T, (s - y))
dj_db = (1/m)*np.sum(s - y)
return ([dj_dw, dj_db])
.. code:: ipython3
derivative_calc(x, y, s)
.. parsed-literal::
[array([[0.0452881 ],
[0.31779622],
[0.23901949],
[0.00751574],
[0.0321477 ],
[0.18264943],
[0.23971088],
[0.13128926],
[0.10121809],
[0.37012421],
[0.47815916]]),
np.float64(0.6954458548817282)]
.. code:: ipython3
def gradient_descent(w, b, a):
d = derivative_calc(x, y, s)
for i in range(10000):
w = w - a*d[0]
b = b - a*d[1]
return (w, b)
.. code:: ipython3
gradient_descent(w, b, 0.001)
.. parsed-literal::
(array([[ 0.54711902],
[-2.17796218],
[-1.3901949 ],
[ 0.9248426 ],
[ 0.67852302],
[-0.8264943 ],
[-1.39710879],
[-0.31289259],
[-0.01218094],
[-2.70124215],
[-3.78159158]]),
np.float64(-6.954458548815924))