0% found this document useful (0 votes)
44 views15 pages

Brain Weight Prediction - Colab2

Brain Weight Prediction - Colab2 kaggle

Uploaded by

Avinash Official
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
44 views15 pages

Brain Weight Prediction - Colab2

Brain Weight Prediction - Colab2 kaggle

Uploaded by

Avinash Official
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 15

Avinash Shukla(27)

LABSHEET 2
# IMPORTANT: RUN THIS CELL IN ORDER

# TO THE CORRECT LOCATION (/kaggle/i

# THEN FEEL FREE TO DELETE THIS CELL

# NOTE: THIS NOTEBOOK ENVIRONMENT DI

# ENVIRONMENT SO THERE MAY BE MISSIN

# NOTEBOOK.

import os import sys


from tempfile import NamedTemporaryFfrom urllib.request import
urlopen
from urllib.parse import unquote, urfrom urllib.error import
HTTPError
from zipfile import ZipFileimport tarfile
import shutil

CHUNK_SIZE = 40960

DATA_SOURCE_MAPPING = 'headbrain:htt

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/workingKAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null


Avinash Shukla(27)
shutil.rmtree('/kaggle/input', ignor os.makedirs(KAGGLE_INPUT_PATH,
0o777os.makedirs(KAGGLE_WORKING_PATH, 0o7

try:
os.symlink(KAGGLE_INPUT_PATH, os.pexcept FileExistsError:
pass
try:
os.symlink(KAGGLE_WORKING_PATH, osexcept FileExistsError:
pass

for data_source_mapping in DATA_SOURdirectory, download_url_encoded


download_url = unquote(download_filename =
urlparse(download_urldestination_path = os.path.join(try:
with urlopen(download_url) a
total_length = fileres.h print(f'Downloading
{dirdl = 0
data = fileres.read(CHUNwhile len(data) > 0:
dl += len(data)
Avinash Shukla(27)

tfile.write(data)
done = int(50 * dl /
sys.stdout.write(f"\
sys.stdout.flush()
data = fileres.read(if
filename.endswith('.zwith ZipFile(tfile) as
zfile.extractall(deselse:
with tarfile.open(tfil
tarfile.extractall(d
print(f'\nDownloaded andexcept HTTPError as
e:
print(f'Failed to load (like

continue

except OSError as e:

print(f'Failed to load {down

continue

print('Data source import complete.'

Downloading headbrain, 1362 bytes compressed


[==================================================] 1362 bytes
downloaded
Downloaded and uncompressed:
headbrain Data source import
complete.

Implement and demonstrate Multiple Linear Regression for Brain Weights


Prediction using sklearn Read the training data
Avinash Shukla(27)
from a HeadBrain.CSV file.

Question 1

1. Import the necessary libraries

import pandas as pdimport numpy as np


import matplotlib.pyplot as pltimport seaborn as sns

2. Load the dataset using pandas.read_csv()

data = pd.read_csv('/kaggle/input/he

3. Display the first few rows of the dataframe

print("First few rows of the datafraprint(data.head())


First few rows of the dataframe:
Gender Age Range Head Size(cm^3)
Brain Weight(grams) 0 1 1
4512 1530
1 1 1 3738 1297
2 1 1 4261 1335
3 1 1 3777 1282
4 1 1 4177 1590

4. Check for any missing values in the dataset


Avinash Shukla(27)
print("\nMissing values in the datasprint(data.isnull().sum())

Missing values in the


dataset: Gender 0
Age Range 0
Head Size(cm^3) 0
Brain Weight(grams) 0
dtype: int64

5. Display the summary statistics of the dataset

print("\nSummary statistics of the dprint(data.describe())

Summary statistics of the dataset:


Gender Age Head Bra Weight(gr
Range Size(cm^3) in ams)
coun 237.000 237.000 237.000000 237.00000
t 000
000 0
mean 1.43459 1.53586 3633.991561 1282.8734
9 5 18
std 0.49675 0.49976 365.261422 120.34044
3 8 6
min 1.00000 1.00000 2720.000000 955.00000
0 0 0
25% 1.00000 1.00000 3389.000000 1207.0000
0 0 00
50% 1.00000 2.00000 3614.000000 1280.0000
0 0 00
75% 2.00000 2.00000 3876.000000 1350.0000
0 0 00
max 2.00000 2.00000 4747.000000 1635.0000
0 0 00

6. Visualize the distribution of the target variable (Brain

Weight)

plt.figure(figsize=(8,6))
sns.histplot(data['Brain Weight(gramplt.title('Distribution of
Brain Weiplt.xlabel('Brain Weight (grams)') plt.ylabel('Frequency')
plt.grid(True)
Avinash Shukla(27)

plt.show()
Avinash Shukla(27)

Question 2

1. Import the necessary libraries

import pandas as pd
import matplotlib.pyplot as pltimport seaborn as sns

2. Load the dataset

data = pd.read_csv('/kaggle/input/he
Avinash Shukla(27)
3. Calculate the correlation matrix using df.corr()

Display the correlation matrix

correlation_matrix = data.corr()

print("Correlation Matrix:")print(correlation_matrix)

Correlation Matrix:
Gender Age Hea Size(cm Bra Weight(gr
Range d ^3) in ams)
Gender 1.0000 - - -0.465266
00 0.0886 0.51405
52 0
Age Range - 1.0000 - -0.169438
0.0886 00 0.10542
52 8
Head - - 1.00000 0.799570
Size(cm^3) 0.5140 0.1054 0
50 28
Brain - - 0.79957 1.000000
Weight(grams) 0.4652 0.1694 0
66 38

4. Visualize the correlation matrix using a heatmap

plt.figure(figsize=(10, 6))
sns.heatmap(correlation_matrix, annoplt.title('Correlation
Heatmap')
plt.show()
Avinash Shukla(27)

5. Identify and list the features with the highest positive

and negative correlation with the target variable

target_variable = 'Brain Weight(gram

highest_positive_corr = correlation_

highest_negative_corr = correlation_

print(f"Highest Positive Correlation

print(f"Highest Negative Correlation

Highest Positive Correlation with Brain Weight(grams):


0.7995697092542966
Highest Negative Correlation with Brain Weight(grams): -
0.46526630736561253

Question 3
Avinash Shukla(27)

1. Import necessary libraries


Avinash Shukla(27)

import pandas as pd
from sklearn.model_selection import from sklearn.preprocessing
import St

2. Load the dataset

data = pd.read_csv('/kaggle/input/he

3. Select the features and the target variable

Assuming the target variable is 'Brain Weight(grams)'

X = data[['Head Size(cm^3)', 'Age Ray = data['Brain Weight(grams)']

4. Split the dataset into training and testing sets

X_train, X_test, y_train, y_test = t

5. Standardize the feature variables

Display the result

scaler = StandardScaler()
X_train_scaled = scaler.fit_transforX_test_scaled =
scaler.transform(X_t
Avinash Shukla(27)
print("X_train_scaled:", X_train_scaprint("X_test_scaled:",
X_test_scale
Avinash Shukla(27)
[ 7.84507453e-01 9.81980506e-01 -8.79882690e-01]
[ 1.52632995e+00 -1.01835015e+00 -8.79882690e-01]]

Question 4

1. Import necessary libraries

from sklearn.linear_model import Lin

2. Instantiate the Linear Regression model

model = LinearRegression()

3. Train the model using the training data

model.fit(X_train_scaled, y_train)

▾ LinearRegression

4. Display the model's coefficients and intercept

print("Coefficients:", model.coef_) print("Intercept:",


model.intercept_

Coefficients: [ 86.93015022 -12.1008346 -8.38590218]


Intercept: 1284.3030303030303

Question 5

1. Predict the brain weights using the testing data


Avinash Shukla(27)

y_pred = model.predict(X_test_scaled

2. Calculate and display performance metrics

from sklearn.metrics import mean_absimport numpy as np

# Mean Absolute Error (MAE)

mae = mean_absolute_error(y_test, y_

# Mean Squared Error (MSE)

mse = mean_squared_error(y_test, y_p

# Root Mean Squared Error (RMSE)rmse = np.sqrt(mse)

print("Mean Absolute Error (MAE):", print("Mean Squared Error


(MSE):", mprint("Root Mean Squared Error (RMSE

Mean Absolute Error (MAE):


54.9596474300944 Mean Squared
Error (MSE): 4394.40353991182
Root Mean Squared Error (RMSE): 66.29029747943375

3. Plot the predicted vs actual brain weights

import matplotlib.pyplot as plt

# Plot actual vs predicted


Avinash Shukla(27)

plt.scatter(y_test, y_pred)
plt.plot([min(y_test), max(y_test)], plt.xlabel('Actual Brain
Weights')
plt.ylabel('Predicted Brain Weights' plt.title('Predicted vs Actual
Brainplt.show()

You might also like