Avinash Shukla(27)
LABSHEET 2
# IMPORTANT: RUN THIS CELL IN ORDER
# TO THE CORRECT LOCATION (/kaggle/i
# THEN FEEL FREE TO DELETE THIS CELL
# NOTE: THIS NOTEBOOK ENVIRONMENT DI
# ENVIRONMENT SO THERE MAY BE MISSIN
# NOTEBOOK.
import os import sys
from tempfile import NamedTemporaryFfrom urllib.request import
urlopen
from urllib.parse import unquote, urfrom urllib.error import
HTTPError
from zipfile import ZipFileimport tarfile
import shutil
CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'headbrain:htt
KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/workingKAGGLE_SYMLINK='kaggle'
!umount /kaggle/input/ 2> /dev/null
Avinash Shukla(27)
shutil.rmtree('/kaggle/input', ignor os.makedirs(KAGGLE_INPUT_PATH,
0o777os.makedirs(KAGGLE_WORKING_PATH, 0o7
try:
os.symlink(KAGGLE_INPUT_PATH, os.pexcept FileExistsError:
pass
try:
os.symlink(KAGGLE_WORKING_PATH, osexcept FileExistsError:
pass
for data_source_mapping in DATA_SOURdirectory, download_url_encoded
download_url = unquote(download_filename =
urlparse(download_urldestination_path = os.path.join(try:
with urlopen(download_url) a
total_length = fileres.h print(f'Downloading
{dirdl = 0
data = fileres.read(CHUNwhile len(data) > 0:
dl += len(data)
Avinash Shukla(27)
tfile.write(data)
done = int(50 * dl /
sys.stdout.write(f"\
sys.stdout.flush()
data = fileres.read(if
filename.endswith('.zwith ZipFile(tfile) as
zfile.extractall(deselse:
with tarfile.open(tfil
tarfile.extractall(d
print(f'\nDownloaded andexcept HTTPError as
e:
print(f'Failed to load (like
continue
except OSError as e:
print(f'Failed to load {down
continue
print('Data source import complete.'
Downloading headbrain, 1362 bytes compressed
[==================================================] 1362 bytes
downloaded
Downloaded and uncompressed:
headbrain Data source import
complete.
Implement and demonstrate Multiple Linear Regression for Brain Weights
Prediction using sklearn Read the training data
Avinash Shukla(27)
from a HeadBrain.CSV file.
Question 1
1. Import the necessary libraries
import pandas as pdimport numpy as np
import matplotlib.pyplot as pltimport seaborn as sns
2. Load the dataset using pandas.read_csv()
data = pd.read_csv('/kaggle/input/he
3. Display the first few rows of the dataframe
print("First few rows of the datafraprint(data.head())
First few rows of the dataframe:
Gender Age Range Head Size(cm^3)
Brain Weight(grams) 0 1 1
4512 1530
1 1 1 3738 1297
2 1 1 4261 1335
3 1 1 3777 1282
4 1 1 4177 1590
4. Check for any missing values in the dataset
Avinash Shukla(27)
print("\nMissing values in the datasprint(data.isnull().sum())
Missing values in the
dataset: Gender 0
Age Range 0
Head Size(cm^3) 0
Brain Weight(grams) 0
dtype: int64
5. Display the summary statistics of the dataset
print("\nSummary statistics of the dprint(data.describe())
Summary statistics of the dataset:
Gender Age Head Bra Weight(gr
Range Size(cm^3) in ams)
coun 237.000 237.000 237.000000 237.00000
t 000
000 0
mean 1.43459 1.53586 3633.991561 1282.8734
9 5 18
std 0.49675 0.49976 365.261422 120.34044
3 8 6
min 1.00000 1.00000 2720.000000 955.00000
0 0 0
25% 1.00000 1.00000 3389.000000 1207.0000
0 0 00
50% 1.00000 2.00000 3614.000000 1280.0000
0 0 00
75% 2.00000 2.00000 3876.000000 1350.0000
0 0 00
max 2.00000 2.00000 4747.000000 1635.0000
0 0 00
6. Visualize the distribution of the target variable (Brain
Weight)
plt.figure(figsize=(8,6))
sns.histplot(data['Brain Weight(gramplt.title('Distribution of
Brain Weiplt.xlabel('Brain Weight (grams)') plt.ylabel('Frequency')
plt.grid(True)
Avinash Shukla(27)
plt.show()
Avinash Shukla(27)
Question 2
1. Import the necessary libraries
import pandas as pd
import matplotlib.pyplot as pltimport seaborn as sns
2. Load the dataset
data = pd.read_csv('/kaggle/input/he
Avinash Shukla(27)
3. Calculate the correlation matrix using df.corr()
Display the correlation matrix
correlation_matrix = data.corr()
print("Correlation Matrix:")print(correlation_matrix)
Correlation Matrix:
Gender Age Hea Size(cm Bra Weight(gr
Range d ^3) in ams)
Gender 1.0000 - - -0.465266
00 0.0886 0.51405
52 0
Age Range - 1.0000 - -0.169438
0.0886 00 0.10542
52 8
Head - - 1.00000 0.799570
Size(cm^3) 0.5140 0.1054 0
50 28
Brain - - 0.79957 1.000000
Weight(grams) 0.4652 0.1694 0
66 38
4. Visualize the correlation matrix using a heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(correlation_matrix, annoplt.title('Correlation
Heatmap')
plt.show()
Avinash Shukla(27)
5. Identify and list the features with the highest positive
and negative correlation with the target variable
target_variable = 'Brain Weight(gram
highest_positive_corr = correlation_
highest_negative_corr = correlation_
print(f"Highest Positive Correlation
print(f"Highest Negative Correlation
Highest Positive Correlation with Brain Weight(grams):
0.7995697092542966
Highest Negative Correlation with Brain Weight(grams): -
0.46526630736561253
Question 3
Avinash Shukla(27)
1. Import necessary libraries
Avinash Shukla(27)
import pandas as pd
from sklearn.model_selection import from sklearn.preprocessing
import St
2. Load the dataset
data = pd.read_csv('/kaggle/input/he
3. Select the features and the target variable
Assuming the target variable is 'Brain Weight(grams)'
X = data[['Head Size(cm^3)', 'Age Ray = data['Brain Weight(grams)']
4. Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = t
5. Standardize the feature variables
Display the result
scaler = StandardScaler()
X_train_scaled = scaler.fit_transforX_test_scaled =
scaler.transform(X_t
Avinash Shukla(27)
print("X_train_scaled:", X_train_scaprint("X_test_scaled:",
X_test_scale
Avinash Shukla(27)
[ 7.84507453e-01 9.81980506e-01 -8.79882690e-01]
[ 1.52632995e+00 -1.01835015e+00 -8.79882690e-01]]
Question 4
1. Import necessary libraries
from sklearn.linear_model import Lin
2. Instantiate the Linear Regression model
model = LinearRegression()
3. Train the model using the training data
model.fit(X_train_scaled, y_train)
▾ LinearRegression
4. Display the model's coefficients and intercept
print("Coefficients:", model.coef_) print("Intercept:",
model.intercept_
Coefficients: [ 86.93015022 -12.1008346 -8.38590218]
Intercept: 1284.3030303030303
Question 5
1. Predict the brain weights using the testing data
Avinash Shukla(27)
y_pred = model.predict(X_test_scaled
2. Calculate and display performance metrics
from sklearn.metrics import mean_absimport numpy as np
# Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_
# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_p
# Root Mean Squared Error (RMSE)rmse = np.sqrt(mse)
print("Mean Absolute Error (MAE):", print("Mean Squared Error
(MSE):", mprint("Root Mean Squared Error (RMSE
Mean Absolute Error (MAE):
54.9596474300944 Mean Squared
Error (MSE): 4394.40353991182
Root Mean Squared Error (RMSE): 66.29029747943375
3. Plot the predicted vs actual brain weights
import matplotlib.pyplot as plt
# Plot actual vs predicted
Avinash Shukla(27)
plt.scatter(y_test, y_pred)
plt.plot([min(y_test), max(y_test)], plt.xlabel('Actual Brain
Weights')
plt.ylabel('Predicted Brain Weights' plt.title('Predicted vs Actual
Brainplt.show()