0% found this document useful (0 votes)

29 views11 pages

External

Uploaded by

ponnekantilikhitha

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

29 views11 pages

External

Uploaded by

ponnekantilikhitha

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 11

1.

Create NumPy arrays from Python Data Structures, Intrinsic NumPy objects and Random Functions

import numpy as np

# From Python Data Structures

list_data = np.array([1, 2, 3, 4, 5])

tuple_data = np.array((1, 2, 3, 4, 5))

dict_data = np.array({'a': 1, 'b': 2})

# Intrinsic NumPy Objects

zeros = np.zeros((3, 3))

ones = np.ones((2, 2))

identity = np.eye(4)

arange = np.arange(0, 10, 2)

linspace = np.linspace(0, 1, 5)

# Random Functions

rand = np.random.rand(3, 3)

randint = np.random.randint(1, 10, size=(2, 3))

normal = np.random.normal(0, 1, 5)

# Output

print("From Python Data Structures:")

print("List to Array:", list_data)

print("Tuple to Array:", tuple_data)

print("Dict to Array:", dict_data)

print("\nIntrinsic NumPy Objects:")

print("Zeros Array:", zeros)

print("Ones Array:", ones)

print("Identity Matrix:", identity)

print("Arange:", arange)

print("Linspace:", linspace)
print("\nRandom Functions:")

print("Random Array (Uniform):", rand)

print("Random Integers:", randint)

print("Random Normal Distribution:", normal)

2.Create Pandas Series and DataFrame from various inputs.

import pandas as pd

import numpy as np

# Pandas Series from different inputs

series_from_list = pd.Series([1, 2, 3, 4, 5])

series_from_dict = pd.Series({'a': 1, 'b': 2, 'c': 3})

series_from_numpy = pd.Series(np.array([10, 20, 30, 40]))

# Pandas DataFrame from various inputs

df_from_dict = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})

df_from_list = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=['X', 'Y'])

df_from_numpy = pd.DataFrame(np.random.rand(3, 4), columns=['W', 'X', 'Y', 'Z'])

# Outputs

print("Series from List:", series_from_list)

print("Series from Dict:", series_from_dict)

print("Series from Numpy Array:", series_from_numpy)

print("\nDataFrame from Dict:")

print(df_from_dict)

print("\nDataFrame from List:")

print(df_from_list)

print("\nDataFrame from Numpy Array:")

print(df_from_numpy)
3,4. Develop a model on residual analysis of simple linear regression.

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

import seaborn as sns

from sklearn.datasets import fetch_california_housing # Use California housing dataset

from sklearn.linear_model import LinearRegression

from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_squared_error, r2_score

# Load California housing dataset

california = fetch_california_housing()

X = california.data[:, 3].reshape(-1, 1) # Using 'AveRooms' (average rooms per household)

y = california.target

# Split the data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the simple linear regression model

model = LinearRegression()

model.fit(X_train, y_train)

# Predict the target values using the test set

y_pred = model.predict(X_test)

# Calculate residuals

residuals = y_test - y_pred

# Residual Analysis

# 1. Plotting residuals vs. fitted values (predictions)

plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)

plt.scatter(y_pred, residuals, color='blue')

plt.axhline(y=0, color='red', linestyle='--')

plt.title('Residuals vs Fitted Values')

plt.xlabel('Fitted Values')

plt.ylabel('Residuals')

# 2. Plotting histogram of residuals

plt.subplot(1, 2, 2)

sns.histplot(residuals, kde=True, color='green')

plt.title('Histogram of Residuals')

plt.xlabel('Residuals')

plt.ylabel('Frequency')

plt.tight_layout()

plt.show()

# 3. Checking Normality with a Q-Q plot

import scipy.stats as stats

plt.figure(figsize=(6, 6))

stats.probplot(residuals, dist="norm", plot=plt)

plt.title('Q-Q Plot of Residuals')

plt.show()

# 4. Checking Homoscedasticity: Residuals vs Fitted values (already shown above)

# 5. Print model performance metrics

print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)}")

print(f"R-squared: {r2_score(y_test, y_pred)}")

5. Import any CSV file to Pandas DataFrame and perform the following:

(a) Handle missing data by detecting and dropping/ filling missing values.

(b) Transform data using apply () and map() method.

import pandas as pd

import numpy as np

file_path = r'C:\Users\peral\Downloads\test.csv' # Use raw string for Windows paths

# Load the CSV file

df = pd.read_csv(file_path)
print(df.head())

# (a) Handle Missing Data

print("\nMissing Data Detection:")

print(df.isnull().sum())

# Dropping rows with missing values

df_dropped = df.dropna()

print("\nDataFrame after Dropping Rows with Missing Values:")

print(df_dropped.head())

# Filling missing values (corrected)

df_filled = df.copy()

for column in df.columns:

if df[column].dtype == 'object': # For categorical columns

df_filled[column] = df[column].fillna(df[column].mode()[0]) # Corrected assignment

else: # For numerical columns

df_filled[column] = df[column].fillna(df[column].mean()) # Corrected assignment

print("\nDataFrame after Filling Missing Values:")

print(df_filled.head())

# (b) Transform Data using apply() and map()

# Apply transformation for numerical columns

for column in df.columns:

if df[column].dtype != 'object': # Apply transformation for numerical columns

df[column + ' Group'] = df[column].apply(lambda x: 'High' if x > df[column].median() else 'Low')

print(f"\nDataFrame after Applying Transformation to {column}:")

print(df[[column, column + ' Group']].head())

# Map categorical columns to uppercase

for column in df.select_dtypes(include=['object']).columns:

df[column] = df[column].map(lambda x: x.upper() if isinstance(x, str) else x)

print(f"\nDataFrame after Mapping {column} to Uppercase:")

print(df[[column]].head())
6. Visualize data using Line Plots, Bar Plots, Histograms, Density Plots and Scatter Plots.

import seaborn as sns

import matplotlib.pyplot as plt

# Load the built-in Iris dataset

df = sns.load_dataset('iris')

# Line Plot

plt.figure(figsize=(10, 6))

sns.lineplot(x=df.index, y=df['sepal_length'])

plt.title('Line Plot of Sepal Length')

plt.show()

# Bar Plot

plt.figure(figsize=(10, 6))

sns.barplot(x='species', y='sepal_length', data=df)

plt.title('Bar Plot of Sepal Length by Species')

plt.show()

# Histogram

plt.figure(figsize=(10, 6))

sns.histplot(df['sepal_length'], kde=False, bins=20)

plt.title('Histogram of Sepal Length')

plt.show()

# Density Plot (Updated with fill=True instead of shade=True)

plt.figure(figsize=(10, 6))

sns.kdeplot(df['sepal_length'], fill=True)

plt.title('Density Plot of Sepal Length')

plt.show()
# Scatter Plot

plt.figure(figsize=(10, 6))

sns.scatterplot(x='sepal_length', y='sepal_width', hue='species', data=df)

plt.title('Scatter Plot of Sepal Length vs Sepal Width')

plt.show()

7. Manipulation of NumPy arrays - Indexing, Slicing, Reshaping, Joining and Splitting.

import numpy as np

# Indexing

arr = np.array([1, 2, 3, 4, 5])

print(arr[2])

# Slicing

arr2 = np.array([10, 20, 30, 40, 50, 60])

print(arr2[1:4])

# Reshaping

arr3 = np.array([1, 2, 3, 4, 5, 6])

print(arr3.reshape(2, 3))

# Joining

arr4 = np.array([1, 2, 3])

arr5 = np.array([4, 5, 6])

print(np.concatenate((arr4, arr5)))

# Splitting

arr6 = np.array([10, 20, 30, 40, 50, 60])

print(np.split(arr6, 3))
8. Import any CSV file to Pandas DataFrame and perform the following:

(a) Visualize the first and last 10 records.

(b) Get the shape, index and column details.

import pandas as pd

# Import CSV file

df = pd.read_csv(r"C:\Users\peral\Downloads\train.csv") # Make sure the path is correct

# (a) Visualize the first and last 10 records

print("First 10 records:")

print(df.head(10))

print("\nLast 10 records:")

print(df.tail(10))

# (b) Get the shape, index, and column details

print("\nShape:", df.shape)

print("Index:", df.index)

print("Columns:", df.columns)

9. Residual plots of linear regression.

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error

# Load data

df = pd.read_csv(r'C:\Users\peral\Downloads\train.csv') # Updated file path

# Choose the feature and target columns

X = df[['age']].values # Use 'age' as the feature (replace with any other numerical column you prefer)

y = df['stroke'].values # Use 'stroke' as the target column

# Train linear regression model

model = LinearRegression()

model.fit(X, y)
# Predictions

y_pred = model.predict(X)

# Residuals

residuals = y - y_pred

# Plot residuals

plt.scatter(X, residuals)

plt.axhline(y=0, color='r', linestyle='--')

plt.xlabel('Age')

plt.ylabel('Residuals')

plt.title('Residual Plot')

plt.show()

10. Computation on NumPy arrays using Universal Functions and Mathematical methods.

import numpy as np

# Create array

arr = np.array([1, 2, 3, 4, 5])

# Universal functions (ufuncs)

print(np.sqrt(arr))

print(np.exp(arr))

print(np.log(arr))

# Mathematical methods

print(np.sum(arr))

print(np.mean(arr))

print(np.median(arr))

print(np.std(arr))

print(np.var(arr))
11. import any CSV file to Pandas DataFrame and perform the following:

(a) Detect and filter outliers.

(b) Perform Vectorized String operations on Pandas Series.

import pandas as pd

import numpy as np

df = pd.read_csv(r'C:\Users\peral\Downloads\train.csv')

column_name = 'age'

Q1 = df[column_name].quantile(0.25)

Q3 = df[column_name].quantile(0.75)

IQR = Q3 - Q1

filtered_df = df[(df[column_name] >= (Q1 - 1.5 * IQR)) & (df[column_name] <= (Q3 + 1.5 * IQR))]

print("Filtered DataFrame (Outliers removed):")

print(filtered_df)

string_column = 'gender'

df[string_column] = df[string_column].str.lower()

df[string_column] = df[string_column].str.replace('old', 'new')

print("\nDataFrame after string operations:")

print(df[[string_column]].head())

12. Download the House Pricing dataset from Kaggle and map the values to 23 Aesthetics.

import pandas as pd

file_path = 'path_to_your_downloaded_folder/train.csv'

df = pd.read_csv(file_path)

aesthetic_mapping = {

'ExterCond': {'Po': 'Poor', 'Fa': 'Fair', 'TA': 'Average', 'Gd': 'Good', 'Ex': 'Excellent'},

'ExterQual': {'Po': 'Poor', 'Fa': 'Fair', 'TA': 'Average', 'Gd': 'Good', 'Ex': 'Excellent'},

'PoolQC': {'NA': 'No Pool', 'Ex': 'Excellent', 'Gd': 'Good', 'TA': 'Average', 'Fa': 'Fair'},

for column, mapping in aesthetic_mapping.items():

if column in df.columns:

df[column] = df[column].map(mapping).fillna(df[column])

print(df[['ExterCond', 'ExterQual', 'PoolQC']].head())

ML 1-11
No ratings yet
ML 1-11
27 pages
DSC Lab Programs
No ratings yet
DSC Lab Programs
24 pages
ML Final Prac
No ratings yet
ML Final Prac
47 pages
23bet10114 Naman Gupta Assignment-1
No ratings yet
23bet10114 Naman Gupta Assignment-1
17 pages
Experiment No 11
No ratings yet
Experiment No 11
19 pages
Machine Learning Lab
No ratings yet
Machine Learning Lab
43 pages
AI & Data Science Lab Record
No ratings yet
AI & Data Science Lab Record
28 pages
Ex. No.: 01 Working With Numpy Arrays
No ratings yet
Ex. No.: 01 Working With Numpy Arrays
30 pages
ML (Sudhanshu)
No ratings yet
ML (Sudhanshu)
24 pages
Class Xii PDF For Practical
No ratings yet
Class Xii PDF For Practical
24 pages
Gec Practicals
No ratings yet
Gec Practicals
31 pages
EXP1-siddhant Gupta (23 - SE - 148)
No ratings yet
EXP1-siddhant Gupta (23 - SE - 148)
17 pages
DA Lab
No ratings yet
DA Lab
27 pages
ML Manual
No ratings yet
ML Manual
30 pages
CLASS1
No ratings yet
CLASS1
7 pages
Vanshika Goyal Gec Practicals
No ratings yet
Vanshika Goyal Gec Practicals
31 pages
DAV Guidelines
No ratings yet
DAV Guidelines
4 pages
DP Prog
No ratings yet
DP Prog
10 pages
ML Manual
No ratings yet
ML Manual
9 pages
Practicals 1 To 4
No ratings yet
Practicals 1 To 4
15 pages
Ilovepdf Merged (2) Merged
No ratings yet
Ilovepdf Merged (2) Merged
65 pages
Time Series Analysis Group 9
No ratings yet
Time Series Analysis Group 9
16 pages
Batch1 Ds
No ratings yet
Batch1 Ds
15 pages
Python Lab PRG
No ratings yet
Python Lab PRG
20 pages
Data Science Using Python Lab Week8
No ratings yet
Data Science Using Python Lab Week8
23 pages
Certificate
No ratings yet
Certificate
25 pages
Study Material IP XII
No ratings yet
Study Material IP XII
116 pages
FDS Record-1-4
No ratings yet
FDS Record-1-4
18 pages
CS3362 Data Science Laboratory Manual 2022-23
No ratings yet
CS3362 Data Science Laboratory Manual 2022-23
54 pages
ML File Syllabus
No ratings yet
ML File Syllabus
43 pages
Python Lab Assignment 7
No ratings yet
Python Lab Assignment 7
7 pages
Pandas
No ratings yet
Pandas
21 pages
Experimenting With Data Analysis Packages and Statistical Operations
No ratings yet
Experimenting With Data Analysis Packages and Statistical Operations
18 pages
Data Analysis Lab with Python
No ratings yet
Data Analysis Lab with Python
11 pages
Machine Learning Algorithms Guide
No ratings yet
Machine Learning Algorithms Guide
34 pages
Python Unit IV
No ratings yet
Python Unit IV
12 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
Programs
No ratings yet
Programs
8 pages
AD3301 DEV Lab Manual
No ratings yet
AD3301 DEV Lab Manual
26 pages
EX-02-Data Manipulation Pandas Matplot
No ratings yet
EX-02-Data Manipulation Pandas Matplot
9 pages
Data Science and Analtics Laboratory
No ratings yet
Data Science and Analtics Laboratory
21 pages
ML Lab File
No ratings yet
ML Lab File
43 pages
AIML Assignment - Merged
No ratings yet
AIML Assignment - Merged
7 pages
12 Ip Practical List With Solution Complete
No ratings yet
12 Ip Practical List With Solution Complete
5 pages
ML Shristi File
No ratings yet
ML Shristi File
49 pages
Data Analytics Lab Manual
No ratings yet
Data Analytics Lab Manual
26 pages
FDS Final Manual
No ratings yet
FDS Final Manual
41 pages
Some Exercises
No ratings yet
Some Exercises
9 pages
Python Basics for Data Science
No ratings yet
Python Basics for Data Science
30 pages
Mlalllabprgs
No ratings yet
Mlalllabprgs
17 pages
DA Programs
No ratings yet
DA Programs
44 pages
ML IU48prac1,2
No ratings yet
ML IU48prac1,2
16 pages
Dal Programs With Output
No ratings yet
Dal Programs With Output
11 pages
Data Analysis for Beginners
No ratings yet
Data Analysis for Beginners
8 pages
MLLab Manual
No ratings yet
MLLab Manual
24 pages
Fds Lab
No ratings yet
Fds Lab
16 pages
Unit 5 PythonPackages (Matplotlib)
No ratings yet
Unit 5 PythonPackages (Matplotlib)
24 pages
Roll NO 2020
No ratings yet
Roll NO 2020
8 pages
Write A Python Program To Explain How Can You Handle Missing Values Using Simpleimputer.
No ratings yet
Write A Python Program To Explain How Can You Handle Missing Values Using Simpleimputer.
9 pages
DSP Lab File - Experiments - 1 & 2 DTU
No ratings yet
DSP Lab File - Experiments - 1 & 2 DTU
15 pages
Construction of Nfa and Dfa From R
100% (2)
Construction of Nfa and Dfa From R
15 pages
Leov Word Problems
No ratings yet
Leov Word Problems
1 page
Honors Unit 1
No ratings yet
Honors Unit 1
18 pages
Design An Optimal PI Controller Using Artificial Bee Colony Algorithm For Buck Converter
No ratings yet
Design An Optimal PI Controller Using Artificial Bee Colony Algorithm For Buck Converter
7 pages
Cse 5
No ratings yet
Cse 5
4 pages
Clustering Techniques Review
No ratings yet
Clustering Techniques Review
2 pages
HM 302 Bio Statistics 2 2009
No ratings yet
HM 302 Bio Statistics 2 2009
7 pages
Finite Element Methods Lecture
No ratings yet
Finite Element Methods Lecture
16 pages
MATH224 (302) EngMaths (NumMeth) FinalExamQ&As 12-06-14
No ratings yet
MATH224 (302) EngMaths (NumMeth) FinalExamQ&As 12-06-14
4 pages
Signals and Systems Using Matlab Chapter 6 - Application of Laplace Analysis To Control
No ratings yet
Signals and Systems Using Matlab Chapter 6 - Application of Laplace Analysis To Control
22 pages
40 Data Structure MCQ's To Test Your Computer Knowledge: GK & Gs Latest Jobs Previous Year Papers Q & A More
No ratings yet
40 Data Structure MCQ's To Test Your Computer Knowledge: GK & Gs Latest Jobs Previous Year Papers Q & A More
17 pages
Application Template - Summers 2022-23
No ratings yet
Application Template - Summers 2022-23
4 pages
4 - 22865 - CS143 - 2018 - 1 - 2 - 1 - Lecture 01-Updated
No ratings yet
4 - 22865 - CS143 - 2018 - 1 - 2 - 1 - Lecture 01-Updated
44 pages
Information Entropy Explained
No ratings yet
Information Entropy Explained
3 pages
Maths Syllabus
No ratings yet
Maths Syllabus
2 pages
Big O Notation Simplified
No ratings yet
Big O Notation Simplified
10 pages
Latin Squares Design Has Following Features
No ratings yet
Latin Squares Design Has Following Features
9 pages
DSBA Curriculum Guide
No ratings yet
DSBA Curriculum Guide
18 pages
IGCSE Add Maths Differentiation
No ratings yet
IGCSE Add Maths Differentiation
14 pages
Machine Learning Course Overview
No ratings yet
Machine Learning Course Overview
17 pages
Kleens Theorem&NFA
No ratings yet
Kleens Theorem&NFA
78 pages
m02s12 12. 19cs403 - s12 - Binary Search
No ratings yet
m02s12 12. 19cs403 - s12 - Binary Search
14 pages
MIMO Antennas: Theory & Models
No ratings yet
MIMO Antennas: Theory & Models
48 pages
Eee282 Mi̇d2 Soluti̇on
No ratings yet
Eee282 Mi̇d2 Soluti̇on
3 pages
First Order System Response Analysis
No ratings yet
First Order System Response Analysis
19 pages
Lagrangian Methods for Optimization
No ratings yet
Lagrangian Methods for Optimization
6 pages
Future Identity Card Using Lattice-Based Cryptography and Steganography
No ratings yet
Future Identity Card Using Lattice-Based Cryptography and Steganography
11 pages
(FREE PDF Sample) Business Analytics James R. Evans Ebooks
100% (9)
(FREE PDF Sample) Business Analytics James R. Evans Ebooks
62 pages
Lecture 6: Combinational Atpg
No ratings yet
Lecture 6: Combinational Atpg
28 pages