DATA SCIENCE & MACHINE LEARNING
P23CAP26 LABORATORY
EX . NO: 1 - R AS CALCULATOR APPLICATION.
PROGRAM:
add <- function(x, y) {
return(x + y)
}
subtract <- function(x, y) {
return(x - y)
}
multiply <- function(x, y) {
return(x * y)
}
divide <- function(x, y) {
return(x / y)
}
print("Select operation.")
print("1.Add")
print("2.Subtract")
print("3.Multiply")
print("4.Divide")
choice = as.integer(readline(prompt="Enter choice[1/2/3/4]: "))
num1 = as.integer(readline(prompt="Enter first number: "))
num2 = as.integer(readline(prompt="Enter second number: "))
operator <- switch(choice,"+","-","*","/")
result <- switch(choice, add(num1, num2), subtract(num1, num2), multiply(num1,
num2), divide(num1, num2))
print(paste(num1, operator, num2, "=", result))
OUTPUT
EX.NO: 2 – DISCRIPTIVE STATISTICS ON R.
#MEAN#
how_was_the_day = c(2,3,4,3,5,4,2,1,3)
tot_day = sum(how_was_the_day)
cnt_day = length(how_was_the_day)
avg_for_day = tot_day / cnt_day
print(avg_for_day)
mean(how_was_the_day)
#MEDIAN#
a = c(200,400,1000,450,234,700)
median(a)
b = c(7000,4000,2000,1000,5000,5000)
median(b)
c = c(20,30,40,50,60)
median(c)
#MODE#
getmode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
sale <- c(10,45,67,30,60,34,59,90)
result <- getmode(sale)
print(result)
OUTPUT
EX . NO: 3 - READING AND WRITING DIFFERENT TYPES OF DATASETS
PROGRAM:
# Install necessary packages
install.packages("readr")
install.packages("readxl")
install.packages("writexl")
install.packages("haven")
# Load the packages
library(readr)
library(readxl)
library(writexl)
library(haven)
# CSV Files
df_csv <- read_csv("file.csv")
write_csv(df_csv, "output.csv")
# Excel Files (XLSX)
df_excel <- read_excel("file.xlsx")
write_xlsx(df_excel, "output.xlsx")
# Text Files (Tab-Delimited)
df_tsv <- read_tsv("file.tsv")
write_tsv(df_tsv, "output.tsv")
OUTPUT:
EX.NO: 4. VISUALIZATION.
PROGRAM:
library(lattice)
library(datasets)
mtcars
str(mtcars)
1.#HISTOGRAM#
histogram(~mpg,data=mtcars)
2.#BOX PLOT#
bwplot(~mpg,data=mtcars)
3.#DENSITY PLOT#
densityplot(~mpg,data=mtcars)
4.#BAR GRAPH#
barchart(~mpg,data=mtcars)
OUTPUT:
HISTOGRAM
BOXPLOT
DENSITY PLOT
BAR GRAPH
EX.NO:5 CORRELATION AND COVARIANCE
PROGRAM:
x <- c(2, 4, 6, 8, 10)
y <- c(1, 3, 2, 5, 7)
#COVARIANCE#
cov_xy <- cov(x, y)
cat("Covariance between x and y:", cov_xy, "\n")
#CORRELATION#
cor_xy <- cor(x, y)
cat("Correlation between x and y:", cor_xy, "\n")
OUTPUT:
6.REGRESSION MODEL
# Set seed for reproducibility
set.seed(42)
# Generate data
x <- rnorm(100, mean = 5, sd = 2)
y <- 3 + 2 * x + rnorm(100)
# Fit linear model
model <- lm(y ~ x)
# Summary of the model
print(summary(model))
# Plot the data and regression line using base R
plot(x, y, main = "Scatter Plot with Regression Line", xlab = "x", ylab = "y")
abline(model, col = "blue")
OUTPUT:
7.Multiple regression
install.packages("tidyverse")
library(tidyverse)
data(mtcars)
head(mtcars)
model <- lm(mpg ~ hp + wt + cyl, data = mtcars)
summary(model)
new_data <- tibble(hp = c(150, 200), wt = c(2.5, 3.0), cyl = c(4, 6))
predictions <- predict(model, newdata = new_data)
print(predictions)
par(mfrow = c(2, 2))
plot(model)
Output:
print(predictions)
1 2
24.36217 19.99355
Plot:
EX.NO: 8. Regression model for prediction
# Load necessary libraries
library(ggplot2) # For visualization
# Load the dataset
data(mtcars)
# Explore the dataset
head(mtcars)
summary(mtcars)
# Fit a linear regression model
# Predicting 'mpg' based on 'wt' and 'hp'
model <- lm(mpg ~ wt + hp, data = mtcars)
# Display the summary of the model
summary(model)
# Create a new data frame for predictions
new_data <- data.frame(wt = c(2.5, 3.0, 3.5), hp = c(100, 120, 150))
# Make predictions
predictions <- predict(model, new_data)
# Print the predictions
cat("Predictions for new data:\n")
print(predictions)
# Add predictions to the original dataset for visualization
mtcars$predicted_mpg <- predict(model, mtcars)
# Plot actual vs. predicted mpg
ggplot(mtcars, aes(x = mpg, y = predicted_mpg)) +
geom_point(color = "blue") +
geom_abline(intercept = 0, slope = 1, color = "red") +
labs(title = "Actual vs. Predicted MPG",
x = "Actual MPG",
y = "Predicted MPG") +
theme_minimal()
OUTPUT :
EX.NO : 9 – CLASSIFICATION MODEL.
PROGRAM:
# Install and load necessary packages
install.packages("caret")
install.packages("randomForest")
library(caret)
library(randomForest)
# Load the dataset
data(iris)
# Split data into training and testing sets
set.seed(123) # for reproducibility
trainIndex <- createDataPartition(iris$Species, p = 0.7, list = FALSE)
trainData <- iris[trainIndex, ]
testData <- iris[-trainIndex, ]
# Train a Random Forest model
model <- train(Species ~ ., data = trainData, method = "rf")
# Make predictions on the test set
predictions <- predict(model, newdata = testData)
# Evaluate model performance
confMatrix <- confusionMatrix(predictions, testData$Species)
print(confMatrix)
OUTPUT :
Confusion Matrix and Statistics
Reference
Prediction setosa versicolor virginica
setosa 15 0 0
versicolor 0 14 2
virginica 0 1 13
Overall Statistics
Accuracy : 0.9333
95% CI : (0.8173, 0.986)
No Information Rate : 0.3333
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.9
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: setosa Class: versicolor Class: virginica
Sensitivity 1.0000 0.9333 0.8667
Specificity 1.0000 0.9333 0.9667
Pos Pred Value 1.0000 0.8750 0.9286
Neg Pred Value 1.0000 0.9655 0.9355
Prevalence 0.3333 0.3333 0.3333
Detection Rate 0.3333 0.3111 0.2889
Detection Prevalence 0.3333 0.3556 0.3111
Balanced Accuracy 1.0000 0.9333 0.9167
EX.NO : 10. CLUSTERING MODE
PROGRAM:
# Load necessary package
set.seed(123) # For reproducibility
# Load the iris dataset
data(iris)
# Remove the species column for clustering
iris_data <- iris[, -5]
# Perform K-means clustering with 3 clusters
kmeans_result <- kmeans(iris_data, centers = 3, nstart = 20)
# View the clustering results
print(kmeans_result$cluster)
# Add cluster information to the original dataset
iris$Cluster <- as.factor(kmeans_result$cluster)
# Plot the clusters
library(ggplot2)
ggplot(iris, aes(x = Petal.Length, y = Petal.Width, color = Cluster)) +
geom_point(size = 3) +
labs(title = "K-means Clustering of Iris Dataset") +
theme_minimal()
OUTPUT: