0% found this document useful (0 votes)

14 views20 pages

R Programming for Data Science

Lab

Uploaded by

pvarshinibca

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

14 views20 pages

R Programming for Data Science

Lab

Uploaded by

pvarshinibca

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 20

DATA SCIENCE & MACHINE LEARNING

P23CAP26 LABORATORY

EX . NO: 1 - R AS CALCULATOR APPLICATION.

PROGRAM:

add <- function(x, y) {

return(x + y)
}

subtract <- function(x, y) {

return(x - y)
}

multiply <- function(x, y) {

return(x * y)
}

divide <- function(x, y) {

return(x / y)
}
print("Select operation.")
print("1.Add")
print("2.Subtract")
print("3.Multiply")
print("4.Divide")

choice = as.integer(readline(prompt="Enter choice[1/2/3/4]: "))

num1 = as.integer(readline(prompt="Enter first number: "))
num2 = as.integer(readline(prompt="Enter second number: "))
operator <- switch(choice,"+","-","*","/")
result <- switch(choice, add(num1, num2), subtract(num1, num2), multiply(num1,
num2), divide(num1, num2))
print(paste(num1, operator, num2, "=", result))

OUTPUT
EX.NO: 2 – DISCRIPTIVE STATISTICS ON R.

#MEAN#

how_was_the_day = c(2,3,4,3,5,4,2,1,3)

tot_day = sum(how_was_the_day)

cnt_day = length(how_was_the_day)

avg_for_day = tot_day / cnt_day

print(avg_for_day)

mean(how_was_the_day)

#MEDIAN#

a = c(200,400,1000,450,234,700)
median(a)

b = c(7000,4000,2000,1000,5000,5000)
median(b)
c = c(20,30,40,50,60)
median(c)

#MODE#

getmode <- function(v) {

uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
sale <- c(10,45,67,30,60,34,59,90)

result <- getmode(sale)

print(result)

OUTPUT
EX . NO: 3 - READING AND WRITING DIFFERENT TYPES OF DATASETS

PROGRAM:
# Install necessary packages
install.packages("readr")
install.packages("readxl")
install.packages("writexl")
install.packages("haven")

# Load the packages

library(readr)
library(readxl)
library(writexl)
library(haven)

# CSV Files
df_csv <- read_csv("file.csv")
write_csv(df_csv, "output.csv")

# Excel Files (XLSX)

df_excel <- read_excel("file.xlsx")
write_xlsx(df_excel, "output.xlsx")

# Text Files (Tab-Delimited)

df_tsv <- read_tsv("file.tsv")
write_tsv(df_tsv, "output.tsv")
OUTPUT:
EX.NO: 4. VISUALIZATION.

PROGRAM:

library(lattice)
library(datasets)
mtcars
str(mtcars)
1.#HISTOGRAM#
histogram(~mpg,data=mtcars)
2.#BOX PLOT#
bwplot(~mpg,data=mtcars)
3.#DENSITY PLOT#
densityplot(~mpg,data=mtcars)
4.#BAR GRAPH#
barchart(~mpg,data=mtcars)
OUTPUT:

HISTOGRAM

BOXPLOT

DENSITY PLOT
BAR GRAPH
EX.NO:5 CORRELATION AND COVARIANCE

PROGRAM:
x <- c(2, 4, 6, 8, 10)

y <- c(1, 3, 2, 5, 7)

#COVARIANCE#

cov_xy <- cov(x, y)

cat("Covariance between x and y:", cov_xy, "\n")

#CORRELATION#

cor_xy <- cor(x, y)

cat("Correlation between x and y:", cor_xy, "\n")

OUTPUT:
6.REGRESSION MODEL

# Set seed for reproducibility

set.seed(42)

# Generate data
x <- rnorm(100, mean = 5, sd = 2)
y <- 3 + 2 * x + rnorm(100)

# Fit linear model

model <- lm(y ~ x)

# Summary of the model

print(summary(model))

# Plot the data and regression line using base R

plot(x, y, main = "Scatter Plot with Regression Line", xlab = "x", ylab = "y")
abline(model, col = "blue")

OUTPUT:
7.Multiple regression

install.packages("tidyverse")

library(tidyverse)

data(mtcars)

head(mtcars)

model <- lm(mpg ~ hp + wt + cyl, data = mtcars)

summary(model)

new_data <- tibble(hp = c(150, 200), wt = c(2.5, 3.0), cyl = c(4, 6))

predictions <- predict(model, newdata = new_data)

print(predictions)

par(mfrow = c(2, 2))

plot(model)

Output:
print(predictions)
1 2
24.36217 19.99355

Plot:
EX.NO: 8. Regression model for prediction
# Load necessary libraries

library(ggplot2) # For visualization

# Load the dataset

data(mtcars)

# Explore the dataset

head(mtcars)

summary(mtcars)

# Fit a linear regression model

# Predicting 'mpg' based on 'wt' and 'hp'

model <- lm(mpg ~ wt + hp, data = mtcars)

# Display the summary of the model

summary(model)

# Create a new data frame for predictions

new_data <- data.frame(wt = c(2.5, 3.0, 3.5), hp = c(100, 120, 150))

# Make predictions

predictions <- predict(model, new_data)

# Print the predictions

cat("Predictions for new data:\n")

print(predictions)

# Add predictions to the original dataset for visualization

mtcars$predicted_mpg <- predict(model, mtcars)

# Plot actual vs. predicted mpg

ggplot(mtcars, aes(x = mpg, y = predicted_mpg)) +

geom_point(color = "blue") +

geom_abline(intercept = 0, slope = 1, color = "red") +

labs(title = "Actual vs. Predicted MPG",

x = "Actual MPG",

y = "Predicted MPG") +

theme_minimal()

OUTPUT :
EX.NO : 9 – CLASSIFICATION MODEL.

PROGRAM:
# Install and load necessary packages
install.packages("caret")
install.packages("randomForest")
library(caret)
library(randomForest)

# Load the dataset

data(iris)
# Split data into training and testing sets
set.seed(123) # for reproducibility
trainIndex <- createDataPartition(iris$Species, p = 0.7, list = FALSE)
trainData <- iris[trainIndex, ]
testData <- iris[-trainIndex, ]

# Train a Random Forest model

model <- train(Species ~ ., data = trainData, method = "rf")

# Make predictions on the test set

predictions <- predict(model, newdata = testData)

# Evaluate model performance

confMatrix <- confusionMatrix(predictions, testData$Species)
print(confMatrix)

OUTPUT :

Confusion Matrix and Statistics

Reference
Prediction setosa versicolor virginica
setosa 15 0 0
versicolor 0 14 2
virginica 0 1 13

Overall Statistics

Accuracy : 0.9333
95% CI : (0.8173, 0.986)
No Information Rate : 0.3333
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.9

Mcnemar's Test P-Value : NA

Statistics by Class:

Class: setosa Class: versicolor Class: virginica

Sensitivity 1.0000 0.9333 0.8667
Specificity 1.0000 0.9333 0.9667
Pos Pred Value 1.0000 0.8750 0.9286
Neg Pred Value 1.0000 0.9655 0.9355
Prevalence 0.3333 0.3333 0.3333
Detection Rate 0.3333 0.3111 0.2889
Detection Prevalence 0.3333 0.3556 0.3111
Balanced Accuracy 1.0000 0.9333 0.9167

EX.NO : 10. CLUSTERING MODE

PROGRAM:
# Load necessary package
set.seed(123) # For reproducibility

# Load the iris dataset

data(iris)
# Remove the species column for clustering
iris_data <- iris[, -5]

# Perform K-means clustering with 3 clusters

kmeans_result <- kmeans(iris_data, centers = 3, nstart = 20)

# View the clustering results

print(kmeans_result$cluster)

# Add cluster information to the original dataset

iris$Cluster <- as.factor(kmeans_result$cluster)

# Plot the clusters

library(ggplot2)
ggplot(iris, aes(x = Petal.Length, y = Petal.Width, color = Cluster)) +
geom_point(size = 3) +
labs(title = "K-means Clustering of Iris Dataset") +
theme_minimal()
OUTPUT:

Final Data Lab
No ratings yet
Final Data Lab
21 pages
20BCE1205 Lab6
No ratings yet
20BCE1205 Lab6
12 pages
Assigmnent 3 (Data Mining)
No ratings yet
Assigmnent 3 (Data Mining)
18 pages
Introds Final Part2 2020 Incl Sol
No ratings yet
Introds Final Part2 2020 Incl Sol
6 pages
Expt7.ipynb - JupyterLab
No ratings yet
Expt7.ipynb - JupyterLab
4 pages
Data Science Project
No ratings yet
Data Science Project
31 pages
WEEK
No ratings yet
WEEK
17 pages
Exp 6
No ratings yet
Exp 6
10 pages
Bi 5to 8
No ratings yet
Bi 5to 8
6 pages
Classification
No ratings yet
Classification
4 pages
BDA Lab Manual (12 Weeks)
No ratings yet
BDA Lab Manual (12 Weeks)
22 pages
Vighnesh - S Log 13
No ratings yet
Vighnesh - S Log 13
4 pages
R Course - Part7 ML - Exercise Sheet 2024
No ratings yet
R Course - Part7 ML - Exercise Sheet 2024
8 pages
Data Science
No ratings yet
Data Science
15 pages
Cheatsheet - BigData
No ratings yet
Cheatsheet - BigData
7 pages
4503 Rc158 010d Machinelearning 1
100% (1)
4503 Rc158 010d Machinelearning 1
6 pages
Big Data Machine Learning
100% (1)
Big Data Machine Learning
6 pages
Exp 5
No ratings yet
Exp 5
8 pages
Toc ch1
No ratings yet
Toc ch1
9 pages
Anuj Khandelwal 3029 BCP A Business Analytics Continuous Assessment 2
No ratings yet
Anuj Khandelwal 3029 BCP A Business Analytics Continuous Assessment 2
20 pages
ISYE6501 Homework 2
No ratings yet
ISYE6501 Homework 2
11 pages
R Lab Program
No ratings yet
R Lab Program
20 pages
Da 06-10
No ratings yet
Da 06-10
14 pages
Jaswinder Pal Singh 2024-04-05: Library Data Print Unique
No ratings yet
Jaswinder Pal Singh 2024-04-05: Library Data Print Unique
4 pages
Exp 7
No ratings yet
Exp 7
4 pages
R Examples of Using Some Prediction Tools (Highlight: Random Forest)
No ratings yet
R Examples of Using Some Prediction Tools (Highlight: Random Forest)
9 pages
R Data Preprocessing & Analysis
No ratings yet
R Data Preprocessing & Analysis
7 pages
SVM Analysis for STAT 6601 Students
No ratings yet
SVM Analysis for STAT 6601 Students
22 pages
Discussion 3 Supervised
No ratings yet
Discussion 3 Supervised
14 pages
Arbol de Decisiones XGBoos
No ratings yet
Arbol de Decisiones XGBoos
7 pages
Lab 4.5 - Review
No ratings yet
Lab 4.5 - Review
5 pages
Introduction To R. Graphical Representation of Multivariate Observations
No ratings yet
Introduction To R. Graphical Representation of Multivariate Observations
5 pages
Solution HW2
No ratings yet
Solution HW2
6 pages
Data Scinece Practical File
No ratings yet
Data Scinece Practical File
23 pages
Practical Machine Learning
No ratings yet
Practical Machine Learning
11 pages
Classification Using R
No ratings yet
Classification Using R
9 pages
Machine Learning Practice Quiz
No ratings yet
Machine Learning Practice Quiz
5 pages
Notes - With R Code
No ratings yet
Notes - With R Code
7 pages
Plotting With R
No ratings yet
Plotting With R
2 pages
Assignment 4 R Program1
No ratings yet
Assignment 4 R Program1
11 pages
Correlation and Covariance - Ex5
No ratings yet
Correlation and Covariance - Ex5
4 pages
Datamining 2
No ratings yet
Datamining 2
54 pages
Datamining Lab Record
No ratings yet
Datamining Lab Record
36 pages
PR
No ratings yet
PR
17 pages
Big Data
No ratings yet
Big Data
17 pages
Data Analysis & Clustering Guide
No ratings yet
Data Analysis & Clustering Guide
22 pages
Data Mining - R Assignment: Konstantinos Stavrou (70134) 11/11/2012
No ratings yet
Data Mining - R Assignment: Konstantinos Stavrou (70134) 11/11/2012
13 pages
DATAMINING
No ratings yet
DATAMINING
24 pages
R Tidyverse Guide for Beginners
No ratings yet
R Tidyverse Guide for Beginners
1 page
Naïve Bayes
No ratings yet
Naïve Bayes
11 pages
Rlab
No ratings yet
Rlab
7 pages
EXXAM
No ratings yet
EXXAM
3 pages
Data Analytics Programs
No ratings yet
Data Analytics Programs
12 pages
Bda Assign
No ratings yet
Bda Assign
15 pages
Lab
No ratings yet
Lab
9 pages
Arena User's Guide
No ratings yet
Arena User's Guide
154 pages
Summers Real Spaces 2003 Intro
No ratings yet
Summers Real Spaces 2003 Intro
26 pages
Solid Liquid Equilibrium, Metastable Zone, and Nucleation Parameters of The Oxalic Acid-Water System
No ratings yet
Solid Liquid Equilibrium, Metastable Zone, and Nucleation Parameters of The Oxalic Acid-Water System
4 pages
ACCO 20103 Intermediate Accounting 3 Midterm Quicknotes
No ratings yet
ACCO 20103 Intermediate Accounting 3 Midterm Quicknotes
28 pages
Dimarogonas
No ratings yet
Dimarogonas
8 pages
Steam Turbine Mechanics
No ratings yet
Steam Turbine Mechanics
51 pages
Est 102 Computer Programming in C
No ratings yet
Est 102 Computer Programming in C
326 pages
Lecture # 3, Separate Source & Induce Test
No ratings yet
Lecture # 3, Separate Source & Induce Test
26 pages
STAAD-Pro Steel Frame Design Guide
No ratings yet
STAAD-Pro Steel Frame Design Guide
4 pages
Business Analytics: Global Edition James R. Evans Download
100% (1)
Business Analytics: Global Edition James R. Evans Download
127 pages
C4 Electronification EN PDF
No ratings yet
C4 Electronification EN PDF
6 pages
Polynomial Rings and Field Theory
No ratings yet
Polynomial Rings and Field Theory
19 pages
T S Grewal Solutions Partnernership Accounts Fundamentals Q Nos 71 To 80
No ratings yet
T S Grewal Solutions Partnernership Accounts Fundamentals Q Nos 71 To 80
11 pages
3D Fatigue Crack Closure in Ti-6Al-4V
No ratings yet
3D Fatigue Crack Closure in Ti-6Al-4V
15 pages
Characterization and Performance Comparison of Ripple-Based Control For Voltage Regulator Modules
No ratings yet
Characterization and Performance Comparison of Ripple-Based Control For Voltage Regulator Modules
8 pages
MEK-2500 Datasheet PDF
No ratings yet
MEK-2500 Datasheet PDF
2 pages
SPECIAL CONDITIONS-TECHNICAL For EARTHWORK MINOR BRIDGES
No ratings yet
SPECIAL CONDITIONS-TECHNICAL For EARTHWORK MINOR BRIDGES
37 pages
Revision History Revision Date Purpose
No ratings yet
Revision History Revision Date Purpose
5 pages
Dilip M. Salwi - Story of Zero - Children's Book Trust, New Delhi (1988) PDF
100% (1)
Dilip M. Salwi - Story of Zero - Children's Book Trust, New Delhi (1988) PDF
28 pages
Physioex Lab Report: Pre-Lab Quiz Results
No ratings yet
Physioex Lab Report: Pre-Lab Quiz Results
5 pages
Part IV BJT
No ratings yet
Part IV BJT
76 pages
Math Puzzles for Young Learners
No ratings yet
Math Puzzles for Young Learners
6 pages
MATRICES 3x3 - Equations
No ratings yet
MATRICES 3x3 - Equations
28 pages
Grade 8 Maths Practice Paper
No ratings yet
Grade 8 Maths Practice Paper
33 pages
TC & BSC Overview
No ratings yet
TC & BSC Overview
36 pages
3399 Report Acon 12.12.2021
No ratings yet
3399 Report Acon 12.12.2021
29 pages
أهمية المساندة الاجتماعية في تحقيق الشعور بالأمن النفسي لدى طلبة الجامعة
No ratings yet
أهمية المساندة الاجتماعية في تحقيق الشعور بالأمن النفسي لدى طلبة الجامعة
17 pages
Wada 1988 2
No ratings yet
Wada 1988 2
7 pages
Rdso Specification NO. M&C/PCN/110/2006: Price
No ratings yet
Rdso Specification NO. M&C/PCN/110/2006: Price
11 pages
Constructability Guide
100% (1)
Constructability Guide
31 pages

R Programming for Data Science

Uploaded by

R Programming for Data Science

Uploaded by

DATA SCIENCE & MACHINE LEARNING

EX . NO: 1 - R AS CALCULATOR APPLICATION.

add <- function(x, y) {

subtract <- function(x, y) {

multiply <- function(x, y) {

divide <- function(x, y) {

choice = as.integer(readline(prompt="Enter choice[1/2/3/4]: "))

avg_for_day = tot_day / cnt_day

getmode <- function(v) {

result <- getmode(sale)

# Load the packages

# Excel Files (XLSX)

# Text Files (Tab-Delimited)

cov_xy <- cov(x, y)

cat("Covariance between x and y:", cov_xy, "\n")

cor_xy <- cor(x, y)

cat("Correlation between x and y:", cor_xy, "\n")

# Set seed for reproducibility

# Fit linear model

# Summary of the model

# Plot the data and regression line using base R

model <- lm(mpg ~ hp + wt + cyl, data = mtcars)

predictions <- predict(model, newdata = new_data)

par(mfrow = c(2, 2))

library(ggplot2) # For visualization

# Load the dataset

# Explore the dataset

# Fit a linear regression model

model <- lm(mpg ~ wt + hp, data = mtcars)

# Display the summary of the model

# Create a new data frame for predictions

new_data <- data.frame(wt = c(2.5, 3.0, 3.5), hp = c(100, 120, 150))

predictions <- predict(model, new_data)

# Print the predictions

cat("Predictions for new data:\n")

# Add predictions to the original dataset for visualization

mtcars$predicted_mpg <- predict(model, mtcars)

# Plot actual vs. predicted mpg

ggplot(mtcars, aes(x = mpg, y = predicted_mpg)) +

geom_abline(intercept = 0, slope = 1, color = "red") +

labs(title = "Actual vs. Predicted MPG",

# Load the dataset

# Train a Random Forest model

# Make predictions on the test set

# Evaluate model performance

Confusion Matrix and Statistics

Mcnemar's Test P-Value : NA

Class: setosa Class: versicolor Class: virginica

EX.NO : 10. CLUSTERING MODE

# Load the iris dataset

# Perform K-means clustering with 3 clusters

# View the clustering results

# Add cluster information to the original dataset

# Plot the clusters

You might also like