17) Write an R program to print, get information, print variable values,
sort variable values and analyse data for the inbuilt dataset.
# Load the built-in dataset
data("PlantGrowth")
# 1. Print the dataset
print("Dataset - PlantGrowth:")
print(PlantGrowth)
# 2. Get basic information about the dataset
print("Structure of the dataset:")
str(PlantGrowth)
print("Summary of the dataset:")
summary(PlantGrowth)
# 3. Print variable values
print("Values of 'weight' variable:")
print(PlantGrowth$weight)
print("Values of 'group' variable:")
print(PlantGrowth$group)
# 4. Sort the 'weight' variable in ascending order
print("Sorted weights (ascending):")
print(sort(PlantGrowth$weight))
sorted_df <- PlantGrowth[order(PlantGrowth$weight), ]
print("Data frame sorted by weight:")
print(sorted_df)
# 5. Analyze the data
mean_weight <- mean(PlantGrowth$weight)
cat("Mean of weights:", mean_weight, "\n")
sd_weight <- sd(PlantGrowth$weight)
cat("Standard deviation of weights:", sd_weight, "\n")
boxplot(weight ~ group, data = PlantGrowth,
main = "Weight by Treatment Group",
xlab = "Group", ylab = "Weight",
col = c("lightblue", "lightgreen", "lightcoral"))
group_counts <- table(PlantGrowth$group)
print("Count of samples in each group:")
print(group_counts)
21) Write an R Program to find Factorial of a number.
num <- as.numeric(readline("Enter a number: "))
factorial <- 1
if (num >= 0) {
for (i in 1:num)
{
factorial <- factorial * i
}
cat("Factorial of", num, "is:", factorial, "\n")
} else {
cat("Factorial is not defined for negative numbers.\n")
}
22) Write an R Program to print the following pattern
* *
** **
*** ***
*******
rows <- 4
for (i in 1:rows)
{ cat(rep("*", i), sep =
"")
spaces <- 2 * (rows - i)
cat(rep(" ", spaces), sep = "")
cat(rep("*", i), sep = "") cat("\
n")
}
23) Write an R Program to print the following pattern
*
**
***
****
rows <- 4
for (i in 1:rows) {
cat(rep(" ", rows - i), sep = "")
cat(rep("*", i), sep = "")
cat("\n")
}
24) Write an R Program to print the following pattern
*
**
***
****
rows <- 4
for (i in 1:rows) {
cat(rep(" ", rows - i), sep = "")
cat(rep("* ", i), sep = "") cat("\
n")
}
25) Write an R Program to print the following pattern
*
***
*****
***
*
rows <- 3
for (i in 1:rows) {
cat(rep(" ", rows - i), sep = "")
cat(rep("* ", i), sep = "")
cat("\n")
}
for (i in (rows - 1):1)
{ cat(rep(" ", rows - i), sep =
"")
cat(rep("* ", i), sep = "")
cat("\n")
}
26) Write an R Program to process the dataset by using its functions.
library(dplyr)
data <- iris
head(data)
sum(is.na(data))
summary_data <- data %>%
summarise(mean_Sepal.Length = mean(Sepal.Length, na.rm = TRUE),
max_Sepal.Length = max(Sepal.Length, na.rm = TRUE),
min_Sepal.Length = min(Sepal.Length, na.rm = TRUE))
species_summary <- data %>%
group_by(Species) %>%
summarise(mean_Sepal.Length = mean(Sepal.Length, na.rm = TRUE))
print(summary_data)
print(species_summary)
plot(data$Sepal.Length,
data$Sepal.Width, main = "Sepal
Length vs Sepal Width",
xlab = "Sepal Length", ylab = "Sepal Width",
col = data$Species, pch = 19)
27) Write an R Program to implement Linear Regression Algorithm.
library(ggplot2)
library(readr)
library(caret)
dataset <- read_csv("preprocessed_diabetes_dataset.csv")
head(dataset)
model <- lm(Glucose ~ BMI, data = dataset)
summary(model)
dataset$Predicted_Glucose <- predict(model, newdata = dataset)
plot <- ggplot(dataset, aes(x = BMI, y = Glucose)) +
geom_point(color = "blue", alpha = 0.5) + # Scatter plot of actual
values
geom_line(aes(y = Predicted_Glucose), color = "red", size = 1) + #
Regression line
labs(title = "Linear Regression: Glucose vs BMI",
x = "BMI",
y = "Glucose") +
theme_minimal()
print(plot)
28) Write an R program to implement a Support Vector
Machine Algorithm.
if (!require(e1071)) {
install.packages("e1071")
library(e1071)
}
data(iris)
set.seed(123)
sample_index <- sample(1:nrow(iris), 0.7 * nrow(iris))
train_data <- iris[sample_index, ]
test_data <- iris[-sample_index, ]
svm_model <- svm(Species ~ ., data = train_data, kernel = "linear")
predictions <- predict(svm_model, test_data)
conf_matrix <- table(Predicted = predictions, Actual = test_data$Species)
print(conf_matrix)
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Accuracy:", round(accuracy * 100, 2), "%\n")
29) Write an R program to implement a Decision Tree Algorithm.
# Load necessary libraries
if (!require(rpart)) install.packages("rpart")
if (!require(rpart.plot)) install.packages("rpart.plot")
library(rpart)
library(rpart.plot)
# Load dataset
diabetes <- read.csv("diabetes_data.csv")
# Set seed
set.seed(123)
# Split data into training (70%) and testing (30%)
sample_index <- sample(1:nrow(diabetes), 0.7 * nrow(diabetes))
train_data <- diabetes[sample_index, ]
test_data <- diabetes[-sample_index, ]
# Build decision tree model
tree_model <- rpart(Outcome ~ ., data = train_data, method = "class")
# Plot the tree
rpart.plot(tree_model, main = "Decision Tree for Diabetes Prediction")
# Make predictions
predictions <- predict(tree_model, test_data, type = "class")
# Confusion matrix
conf_matrix <- table(Predicted = predictions, Actual =
test_data$Outcome)
print(conf_matrix)
# Accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Accuracy:", round(accuracy * 100, 2), "%\n")
30) Write an R program to implement a Naive Bayes Algorithm.
if (!require(e1071)) {
install.packages("e1071")
library(e1071)
}
data(mtcars)
mtcars$am <- as.factor(mtcars$am)
set.seed(123)
sample_index <- sample(1:nrow(mtcars), 0.7 * nrow(mtcars))
train_data <- mtcars[sample_index, ]
test_data <- mtcars[-sample_index, ]
nb_model <- naiveBayes(am ~ ., data = train_data)
predictions <- predict(nb_model, test_data)
conf_matrix <- table(Predicted = predictions, Actual = test_data$am)
print(conf_matrix)
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Accuracy:", round(accuracy * 100, 2), "%\n")
31) Write an R program to implement a K-Nearest
Neighbour Algorithm.
# Install and load required libraries
if (!require(class)) {
install.packages("class")
library(class)
}
# Load the dataset
wine_data <- read.csv("redwinequality.csv")
# View the first few rows of the dataset
head(wine_data)
# Convert 'quality' to a factor (since it's the target variable)
wine_data$quality <- as.factor(wine_data$quality)
# Set seed for reproducibility
set.seed(123)
# Split the dataset into training (70%) and testing (30%) sets
sample_index <- sample(1:nrow(wine_data), 0.7 * nrow(wine_data))
train_data <- wine_data[sample_index, ]
test_data <- wine_data[-sample_index, ]
# Separate features and target variable for both train and test sets
train_x <- train_data[, -ncol(wine_data)] # All columns except 'quality'
train_y <- train_data$quality # Target variable (quality)
test_x <- test_data[, -ncol(wine_data)] # All columns except
'quality' test_y <- test_data$quality # Target variable (quality)
# Train the KNN model (k = 3)
k_value <- 3
knn_predictions <- knn(train_x, test_x, train_y, k = k_value)
# Confusion matrix
conf_matrix <- table(Predicted = knn_predictions, Actual = test_y)
print(conf_matrix)
# Calculate accuracy
accuracy <- sum(diag(conf_matrix)) / sum(conf_matrix)
cat("Accuracy:", round(accuracy * 100, 2), "%\n")
32) Write an R program to implement a K-Means
Clustering Algorithm.
data("USArrests")
head(USArrests)
USArrests <- na.omit(USArrests)
scaled_data <- scale(USArrests)
set.seed(123)
kmeans_result <- kmeans(scaled_data, centers = 3, nstart = 25)
print(kmeans_result)
USArrests$Cluster <- as.factor(kmeans_result$cluster)
if (!require(ggplot2)) {
install.packages("ggplot2")
library(ggplot2)
}
pca_result <- prcomp(scaled_data)
pca_data <- data.frame(pca_result$x[, 1:2], Cluster = USArrests$Cluster)
ggplot(pca_data, aes(x = PC1, y = PC2, color = Cluster)) +
geom_point(size = 3) +
labs(title = "K-means Clustering on USArrests Dataset", x = "Principal
Component 1", y = "Principal Component 2") +
theme_minimal()