0% found this document useful (0 votes)
5 views4 pages

Telecom Churn Proj

Uploaded by

FACTZ
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views4 pages

Telecom Churn Proj

Uploaded by

FACTZ
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, cross_val_score


from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix,
roc_auc_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE

# Load dataset
df = pd.read_csv('/content/WA_Fn-UseC_-Telco-Customer-Churn.csv')

# Drop customerID
df.drop('customerID', axis=1, inplace=True)

# Handle TotalCharges (has missing values)


df['TotalCharges'] = pd.to_numeric(df['TotalCharges'],
errors='coerce')
df.dropna(inplace=True)

# Encode target variable


df['Churn'] = df['Churn'].map({'Yes': 1, 'No': 0})

# Convert binary categorical features


binary_cols = ['gender', 'Partner', 'Dependents', 'PhoneService',
'PaperlessBilling']
for col in binary_cols:
df[col] = df[col].map({'Yes': 1, 'No': 0, 'Male': 1, 'Female': 0})

# One-hot encode remaining categorical variables


df = pd.get_dummies(df, drop_first=True)

# Features and target


X = df.drop('Churn', axis=1)
y = df['Churn']

# Scale numerical features


scaler = StandardScaler()
X[['tenure', 'MonthlyCharges', 'TotalCharges']] =
scaler.fit_transform(X[['tenure', 'MonthlyCharges', 'TotalCharges']])

# Balance the dataset using SMOTE


smote = SMOTE(random_state=42)
X_bal, y_bal = smote.fit_resample(X, y)
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X_bal, y_bal,
test_size=0.2, random_state=42)

# --- 1. Logistic Regression ---


logreg = LogisticRegression(max_iter=1000)
logreg.fit(X_train, y_train)
y_pred_log = logreg.predict(X_test)
print("Logistic Regression:")
print(classification_report(y_test, y_pred_log))
print("ROC-AUC:", roc_auc_score(y_test, logreg.predict_proba(X_test)
[:, 1]))
print("-" * 60)

Logistic Regression:
precision recall f1-score support

0 0.81 0.78 0.80 1037


1 0.79 0.82 0.80 1029

accuracy 0.80 2066


macro avg 0.80 0.80 0.80 2066
weighted avg 0.80 0.80 0.80 2066

ROC-AUC: 0.880777135210056
------------------------------------------------------------

# --- 2. Random Forest ---


rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print("Random Forest:")
print(classification_report(y_test, y_pred_rf))
print("ROC-AUC:", roc_auc_score(y_test, rf.predict_proba(X_test)[:,
1]))
print("-" * 60)

Random Forest:
precision recall f1-score support

0 0.84 0.82 0.83 1037


1 0.82 0.85 0.83 1029

accuracy 0.83 2066


macro avg 0.83 0.83 0.83 2066
weighted avg 0.83 0.83 0.83 2066
ROC-AUC: 0.9135555861688939
------------------------------------------------------------

# --- 3. XGBoost ---


xgb = XGBClassifier(use_label_encoder=False, eval_metric='logloss',
random_state=42)
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)
print("XGBoost Classifier:")
print(classification_report(y_test, y_pred_xgb))
print("ROC-AUC:", roc_auc_score(y_test, xgb.predict_proba(X_test)[:,
1]))
print("-" * 60)

/usr/local/lib/python3.11/dist-packages/xgboost/core.py:158:
UserWarning: [14:42:51] WARNING: /workspace/src/learner.cc:740:
Parameters: { "use_label_encoder" } are not used.

warnings.warn(smsg, UserWarning)

XGBoost Classifier:
precision recall f1-score support

0 0.84 0.81 0.82 1037


1 0.81 0.84 0.83 1029

accuracy 0.83 2066


macro avg 0.83 0.83 0.83 2066
weighted avg 0.83 0.83 0.83 2066

ROC-AUC: 0.9048410933460034
------------------------------------------------------------

# Feature Importance from Random Forest


importances = rf.feature_importances_
indices = np.argsort(importances)[-10:]
features = X.columns[indices]

plt.figure(figsize=(10, 6))
plt.title("Top 10 Feature Importances (Random Forest)")
plt.barh(range(len(indices)), importances[indices], align="center")
plt.yticks(range(len(indices)), features)
plt.xlabel("Relative Importance")
plt.show()

You might also like