3/6/24, 9:59 AM                                                 Untitled27.
ipynb - Colaboratory
      import numpy as np
      import matplotlib.pyplot as plt
      import pandas as pd
      import seaborn as sns
      %matplotlib inline
      Data = pd.read_csv('Salary_Data - Salary_Data.csv')
      # Display basic info about the dataset
      Data.info()
              <class 'pandas.core.frame.DataFrame'>
              RangeIndex: 30 entries, 0 to 29
              Data columns (total 2 columns):
               #   Column           Non-Null Count                 Dtype
              --- ------            --------------                 -----
               0   YearsExperience 30 non-null                     float64
               1   Salary           30 non-null                    int64
              dtypes: float64(1), int64(1)
              memory usage: 608.0 bytes
      # Display descriptive statistics of the dataset
      Data.describe()
                          YearsExperience                Salary
                  count          30.000000           30.000000
                  mean            5.313333       76003.000000
                   std            2.837888       27414.429785
                  min             1.100000       37731.000000
                  25%             3.200000       56720.750000
                  50%             4.700000       65237.000000
                  75%             7.700000      100544.750000
                  max            10.500000      122391.000000
      # Plot pairwise relationships in the dataset
      sns.pairplot(Data)
https://colab.research.google.com/drive/1kvXbQLsxeB40qzEbiBRoPPq-tqCGiUU3#scrollTo=Haca6ahBBeB3&printMode=true   1/5
3/6/24, 9:59 AM                                                 Untitled27.ipynb - Colaboratory
              <seaborn.axisgrid.PairGrid at 0x78cced27a410>
      # Plot heatmap of correlations
      sns.heatmap(Data.corr(), annot=True)
https://colab.research.google.com/drive/1kvXbQLsxeB40qzEbiBRoPPq-tqCGiUU3#scrollTo=Haca6ahBBeB3&printMode=true   2/5
3/6/24, 9:59 AM                                                 Untitled27.ipynb - Colaboratory
              <Axes: >
      # Plot distribution of Salary
      sns.distplot(Data["Salary"])
https://colab.research.google.com/drive/1kvXbQLsxeB40qzEbiBRoPPq-tqCGiUU3#scrollTo=Haca6ahBBeB3&printMode=true   3/5
3/6/24, 9:59 AM                                                 Untitled27.ipynb - Colaboratory
              <ipython-input-7-f50739657602>:2: UserWarning:
              `distplot` is a deprecated function and will be removed in seaborn v0.14.0.
              Please adapt your code to use either `displot` (a figure-level function with
              similar flexibility) or `histplot` (an axes-level function for histograms).
              For a guide to updating your code to use the new functions, please see
              https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751
                sns.distplot(Data["Salary"])
              <Axes: xlabel='Salary', ylabel='Density'>
      # Assuming you have a threshold to classify whether salary is above a certain level
      threshold = 70000 # Example threshold
      # Creating a binary target variable based on the threshold
      Data['AboveThreshold'] = (Data['Salary'] > threshold).astype(int)
      # Splitting the dataset into train and test sets
      X = Data[['YearsExperience']]
      y = Data['AboveThreshold']
      from sklearn.model_selection import train_test_split
      X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=21
      # Training the logistic regression model
      from sklearn.linear_model import LogisticRegression
      log_reg = LogisticRegression()
      log_reg.fit(X_train, y_train)
https://colab.research.google.com/drive/1kvXbQLsxeB40qzEbiBRoPPq-tqCGiUU3#scrollTo=Haca6ahBBeB3&printMode=true   4/5
3/6/24, 9:59 AM                                                 Untitled27.ipynb - Colaboratory
                  ▾ LogisticRegression
              LogisticRegression()
      # Making predictions
      predictions = log_reg.predict(X_test)
      # Evaluating the model
      from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
      accuracy = accuracy_score(y_test, predictions)
      conf matrix = confusion matrix(y test predictions)
https://colab.research.google.com/drive/1kvXbQLsxeB40qzEbiBRoPPq-tqCGiUU3#scrollTo=Haca6ahBBeB3&printMode=true   5/5