L.R.
G GOVERNMENT ARTS COLLEGE FOR
           WOMEN (AFFILIATED TO BHARATHIAR
                      UNIVERSITY)
                   TIRUPUR-641604.
          DEPARTMENT OF COMPUTER SCIENCE
               I MSC-COMPUTER SCIENCE
          PRACTICAL-III :DATA MINING USING R
NAME :
REG.NO:
CERTIFICATE
    L R.G GOVERNMENT ARTS COLLEGE FOR WOMEN
         (AFFILIATED TO BHARATHIAR UNIVERSITY)
                                TIRUPUR-641604
NAME               :
REGISTER NO:
CLASS              :
This is to certify that it is a bonafide record of practical work done by the above student of
the I-M.SC COMPUTER SCIENCE PRACTICAL-III: DATA MINING USING R during the
academic year 2023-2024.
Staff in Charge.                                                      Head of the Department
Submitted for the Bharathiar University Practical Examination held On. . . . . . . . . . . . . . . .
Internal Examiner.                                                       External Examiner
CONTENT
                    INDEX
S.N   DATE            CONTENTS              PA   SIGN
 O                                           G
                                             E
                                             N
                                             O
 1              APRIORI ALGORITHM TO
              EXTRACT ASSOCIATION RULES
                   OF DATA MINING
 2           K-MEANS CLUSTERING ALGORITHM
 3              HIERARCHICAL CLUSTERING
 4              CLASSIFICATION ALGORITHM
 5                   DECISION TREE
 6                 LINEAR REGRESSION
 7                 DATA VISUALZATION
                  1. APRIORI ALGORITHM TO EXTRACT ASSOCIATION RULES
                                         OF DATA MINING
# Loading Libraries
library(arules)
library(arulesViz)
library(RColorBrewer)
# import dataset
data('Groceries')
# using apriori() function
rules<-apriori(Groceries,parameter=list(supp=0.01,conf=0.2))
# using inspect() function
inspect(rules[1:10])
# using itemFrequencyPlot() function
arules::itemFrequencyPlot(Groceries,topN=20,
                             col=brewer.pal(8,'Pastel2'),
                             main='Relative Item Frequency
                             Plot', type='relative',
                             ylab='Item Frequency(Relative)')
OUTPUT:
# Loading Libraries
>library(arules)
Loading required package: Matrix
Attaching package: ‘arules’
The following objects are masked from
   ‘package:base’: abbreviate, write
> library(arulesViz)
> library(RColorBrewer)
> # import dataset
> data('Groceries')
> # using apriori() function
> rules<-apriori(Groceries,parameter=list(supp=0.01,conf=0.2))
Apriori
Parameter specification:
confidence minval smax arem aval originalSupport maxtime support minlen
     0.2 0.1    1 none FALSE            TRUE        5   0.01    1
maxlen target ext
   10 rules TRUE
Algorithmic
control:
filter tree heap memopt load sort verbose
  0.1 TRUE TRUE FALSE TRUE              2   TRUE
Absolute minimum support count: 98
set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[169 item(s), 9835 transaction(s)] done [0.01s].
sorting and recoding items ... [88 item(s)] done [0.00s].
creating transaction tree ... done [0.01s].
checking subsets of size 1 2 3 4 done [0.00s].
writing ... [232 rule(s)] done [0.00s].
creating S4 object ... done [0.00s].
> # using inspect() function
> inspect(rules[1:10])
   lhs              rhs         support    confidence coverage
[1] {}              => {whole milk}       0.25551601 0.2555160 1.00000000
[2] {hard cheese} => {whole milk}             0.01006609 0.4107884 0.02450432
[3] {butter milk} => {other vegetables} 0.01037112 0.3709091 0.02796136
[4] {butter milk} => {whole milk}             0.01159126 0.4145455 0.02796136
[5] {ham}             => {whole milk}       0.01148958 0.4414062 0.02602949
[6] {sliced cheese} => {whole milk}           0.01077783 0.4398340 0.02450432
[7] {oil}           => {whole milk}       0.01128622 0.4021739 0.02806304
[8] {onions}          => {other vegetables} 0.01423488 0.4590164 0.03101169
[9] {onions}          => {whole milk}       0.01209964 0.3901639 0.03101169
[10] {berries}        => {yogurt}         0.01057448 0.3180428 0.03324860
   lift     count
[   1.0000   25
1     00     13
]
[   1.6076   9
2     82     9
]
[   1.9169   10
3     16     2
]
[   1.6223   11
4     85     4
]
[   1.7275   11
5     09     3
]
[   1.7213   10
6     56     6
]
[   1.5739   11
7     68     1
]
[8] 2.372268 140
[9] 1.526965 119
[10] 2.279848 104
> # using itemFrequencyPlot() function
> arules::itemFrequencyPlot(Groceries,topN=20,
+ col=brewer.pal(8,'Pastel2'),
+ main='Relative Item Frequency Plot',
+ type='relative',
+ ylab='Item Frequency(Relative)')
Relative Item Frequency Plot
                             2.K-Means Clustering
library(cluster)
df=USArrests
#Number of Rows and Columns of the Actul Data
set dim(df)
head(df)
#remove rows with missing
values df=na.omit(df)
dim(df)
#scale each variable to have mean 0 and sd 1
df=scale(df)
head(df)
set.seed(1)
#Cluster the dataset with 5 groups
km=kmeans(df,centers=5,nstart=25)
print(km)
plot(df)
points(km$centers,col=1:5,pch=8,cex=2)
cnt=table(km$cluster)
print(cnt)
final_data=cbind(df,cluster=km$cluster)
head(final_data)
plot(final_data,cex=0.6,main="Final Data")
ag=aggregate(final_data,by=list(cluster=km$cluster),mean)
head(ag)
plot(ag,cex=0.6,main="Aggregate")
 OUTPUT:
#K-means Clustering
> #Number of Rows and Columns of the Actaul Data set
> dim(df)
[1] 50 4
> head(df)
      Murder Assault UrbanPop Rape
 Alabam        13     23     58
 a             .2     6      21.2
 Alaska       10.    263   48
              0            44.5
 Arizona       8.1   294   80
                           31.0
 Arkansa       8.    190    50
 s             8            19.5
 Californ      9.    276   91
 ia            0           40.6
 Colorad       7.    204    78
 o             9            38.7
     > #remove rows with missing
                          values
> dim(df)
[1] 50 4
> #scale each variable to have mean 0 and sd 1
> head(df)
           Murder Assault UrbanPop       Rape
Alabama      1.24256408 0.7828393 -0.5209066 -0.003416473
Alaska      0.50786248 1.1068225 -1.2117642 2.484202941
Arizona     0.07163341 1.4788032 0.9989801 1.042878388
Arkansas 0.23234938 0.2308680 -1.0735927 -0.184916602
California 0.27826823 1.2628144 1.7589234 2.067820292
Colorado 0.02571456 0.3988593 0.8608085 1.864967207
> #Cluster the dataset with 5 groups
> print(km)
K-means clustering with 5 clusters of sizes 7, 10, 10, 11,
12 Cluster means:
   Murder Assault UrbanPop                     Rape
1 1.5803956 0.9662584 -0.7775109 0.04844071
2 -1.1727674 -1.2078573 -1.0045069 -1.10202608
3 -0.6286291 -0.4086988 0.9506200 -0.38883734
4 -0.1642225 -0.3658283 -0.2822467 -0.11697538
5 0.7298036 1.1188219 0.7571799 1.32135653
 Clustering vector:
     Alabama             Alaska                          Arkan         Californi
                                                                       a
                      Arizona 15 5                 4            sa
     Colorado Connecticut                                       s
              Delaware                                                     Georg
                                                                5          ia
                                                               Flori
                                                               da
        5             3             3          5                1
      Haw                Idah         Illino           India           Iowa
       aii               o            is               na
        3             2             5          4                2
     Kansas      Kentucky             Louisiana            Maine
                 Maryland 4                    4           1           2
Massachusetts            Michigan        Minnesota Mississippi                Missouri
        3            5            2            1               4
    Montana       Nebraska               Nevada New Hampshire                 New
        Jersey 4 4                5            2               3
  New Mexico             New York North Carolina North Dakota
                     Ohio 5       5                            1                    2
           3
Oklahoma       Oregon Pennsylvania Rhode Island South Carolina
        4           4         3              3          1
 South Dakota       Tennessee              Texas        Utah
                    Vermont 2              1 5                     3
    Virginia    Washington West Virginia              Wisconsin
                Wyoming 4                    3        22                   4
Within cluster sum of squares by cluster:
[1] 6.128432 7.443899 9.326266 7.788275 18.257332
(between_SS / total_SS = 75.0 %)
Available components:
[1] "cluster"     "centers"   "totss"            "withinss"    "tot.withinss"
[6] "betweenss"     "size"        "iter"         "ifault"
> plot(df)
> points(km$centers,col=1:5,pch=8,cex=2)
> print(cnt)
12345
7 10 10 11 12
> head(final_data)
          Murder Assault UrbanPop          Rape cluster
 Alaba         1.24256408 0.7828393 -0.5209066 -                  1
 ma            0.003416473
 Alaska    0.50786248 1.1068225 -1.2117642 2.484202941    5
 Arizon     0.07163341 1.4788032 0.9989801 1.042878388    5
 a
Arkansas 0.23234938 0.2308680 -1.0735927 -0.184916602         4
California 0.27826823 1.2628144 1.7589234 2.067820292     5
Colorado 0.02571456 0.3988593 0.8608085 1.864967207            5
> plot(final_data,cex=0.6,main="Final Data")
> head(ag)
    cluster   Murder Assault UrbanPop       Rape cluster
1       1 1.5803956 0.9662584 -0.7775109 0.04844071        1
2       2 -1.1727674 -1.2078573 -1.0045069 -1.10202608     2
3       3 -0.6286291 -0.4086988 0.9506200 -0.38883734      3
4       4 -0.1642225 -0.3658283 -0.2822467 -0.11697538     4
5       5 0.7298036 1.1188219 0.7571799 1.32135653         5
> plot(ag,cex=0.6,main="Aggregate")
                         3.HIERARCHICAL CLUSTERING
#Hierarchical Clustering
library(cluster)
df=USArrests
#remove rows with missing
values df=na.omit(df)
#scale each variable to have a mean 0 and sd of
1 df=scale(df)
head(df)
d=dist(df,method="euclidean")
#complete dendogram
hc1=hclust(d,method="complete")
plot(hc1,cex=0.6,main="complete dendogram",hang=-1)
#average dendogram
hc2=hclust(d,method="average")
plot(hc2,cex=0.6,main="Average Dendogram",hang=-1)
abline(h=3.0,col="green")
groups=cutree(hc2,k=4)
print(groups)
table(groups)
rect.hclust(hc2,k=4,border="red")
final_data=cbind(df,cluster=groups)
head(final_data)
plot(final_data,cex=0.6,main="Final Data")
 OUTPUT:
#Hierarchical Clustering
> #remove rows with missing values
> #scale each variable to have a mean 0 and sd of 1
> head(df)
          Murder Assault UrbanPop        Rape
Alabama      1.24256408 0.7828393 -0.5209066 -0.003416473
Alaska    0.50786248 1.1068225 -1.2117642 2.484202941
Arizona   0.07163341 1.4788032 0.9989801 1.042878388
Arkansas 0.23234938 0.2308680 -1.0735927 -0.184916602
California 0.27826823 1.2628144 1.7589234 2.067820292
Colorado 0.02571456 0.3988593 0.8608085 1.864967207
> #complete dendogram
> plot(hc1,cex=0.6,main="complete dendogram",hang=-1)
> #average dendogram
> print(groups)
    Alabama         Alaska                        Arkan         Californi
                                                                a
                    Arizona 12 3            4            sa
    Colorado Connecticut                                 s
             Delaware                                               Geor
                                                         3          gia
                                                        Flori
                                                        da
      3             4          4        3                1
     Haw            Idah       Illino           India           Iowa
      aii           o          is               na
      4             4          3        4                4
    Kansas      Kentucky       Louisiana            Maine
                Maryland 4              4           1           4
Massachusetts       Michigan       Minnesota Mississippi               Missouri
  4       3          4       1       3
Montana   Nebraska       Nevada New Hampshire   New Jersey
         4        4          3        4         4
  New Mexico          New York North Carolina North Dakota
                  Ohio 3     3                  1                   4
   Oklahoma           Oregon Pennsylvania Rhode Island South Carolina 4
                  4          4        4         1
 South Dakota      Tennessee        Texas       Utah
                  Vermont 4         1 3                   4
   Virginia     Washington West Virginia      Wisconsin
                Wyoming 4             4       44                4
groups
1234
7 1 12 30
> rect.hclust(hc2,k=4,border="red")
> final_data=cbind(df,cluster=groups)
> head(final_data)
          Murder Assault UrbanPop       Rape cluster
 Alaba      1.24256408 0.7828393 -0.5209066 -                1
 ma         0.003416473
 Alaska    0.50786248 1.1068225 -1.2117642 2.484202941   2
 Arizon    0.07163341 1.4788032 0.9989801 1.042878388    3
 a
 Arkansas 0.23234938 0.2308680 -1.0735927 -0.184916602       4
 California 0.27826823 1.2628144 1.7589234 2.067820292   3
 Colorado 0.02571456 0.3988593 0.8608085 1.864967207         3
> plot(final_data,cex=0.6,main="Final Data")
                         4.CLASSIFICATION ALGORITHM
#Classification
Algorithm library(class)
df=data(iris)
#Number of Rows and Columns
dim(iris)
head(iris)
rand=sample(1:nrow(iris),0.9*nrow(iris))
head(rand)
#Scale the values using Normalization
method nor<-function(x)
return((x-min(x))/(max(x)-min(x)))
iris_norm=as.data.frame(lapply(iris[,c(1,2,3,4)],nor))
head(iris_norm)
#Train dataset
iris_train=iris_norm[rand,]
iris_train_target=iris[rand,5
] #Test dataset
iris_test=iris_norm[-rand,]
iris_test_target=iris[-rand,5]
dim(iris_train)
dim(iris_test)
#K-nearesr neighbour Classification
model1=knn(train=iris_train,test=iris_test,cl=iris_train_target,k=7)
#Confusion Matric
tab=table(model1,iris_test_target)
print(tab)
accuracy=function(x)
sum(diag(x)/sum(rowSums(x)))*100
cat("Accuracy classifier=",accuracy(tab))
 OUTPUT:
#Classification Algorithm
> #Number of Rows and Columns
> dim(iris)
[1] 150 5
> head(iris)
 Sepal.Length Sepal.Width Petal.Length Petal.Width Species
 1    5.1       3      1.4     0.2     seto
                .                       sa
                5
 2    4.9       3      1.4     0.2     seto
                .                       sa
                0
 3    4.7       3      1.3     0.2     seto
                .                       sa
                2
 4    4.6       3      1.5     0.2     seto
                .                       sa
                1
 5    5.0       3      1.4     0.2     seto
                .                       sa
                6
 6    5.4       3      1.7     0.4     seto
                .                       sa
                9
> head(rand)
[1] 114 107 25 128 14 24
> #Scale the values using Normalization method
> nor<-function(x)
+{
+ return((x-min(x))/(max(x)-min(x)))
+}
> head(iris_norm)
 Sepal.Length Sepal.Width Petal.Length Petal.Width
     1       0.62500   0.067796   0.041666
0.22222222      00           61      67
     2       0.41666   0.067796   0.041666
0.16666667      67           61      67
     3       0.50000   0.050847   0.041666
0.11111111      00           46      67
     4       0.45833   0.084745   0.041666
0.08333333      33           76      67
5 0.19444444 0.6666667 0.06779661 0.04166667
6 0.30555556 0.7916667 0.11864407 0.12500000
> #Train dataset
> iris_train=iris_norm[rand,]
> iris_train_target=iris[rand,5]
> #Test dataset
> iris_test=iris_norm[-rand,]
> iris_test_target=iris[-rand,5]
> dim(iris_train)
[1] 135 4
> dim(iris_test)
[1] 15 4
> #K-nearesr neighbour Classification
> model1=knn(train=iris_train,test=iris_test,cl=iris_train_target,k=7)
> #Confusion Matric
> print(tab)
         iris_test_target
model1        setosa versicolor
 virginica setosa               6       0
 versicolor       0         6       1
 virginica        0     0         2
> accuracy=function(x)
+{
+ sum(diag(x)/sum(rowSums(x)))*100
+}
cat("Accuracy classifier=",accuracy(tab)) Accuracy classifier= 100>
                                    5.DECISION TREE
#Decision Tree
library(rpart)
data=iris
str=data
head(data)
#creating the decision tree using regression
dtree=rpart(Sepal.Width~Sepal.Length+Petal.Width+Petal.Length+Species,data=iris,method
="anova")
plot(dtree,uniform=TRUE,main="Sepal Width Decision Tree Using Regression")
print(dtree)
text(dtree,use.n=TRUE,cex=.7)
#predicting the Sepal Width
adata<-data.frame(Species='versicolor',Sepal.Length=5.1,Petal.Length=4.5,Petal.Width=1.4)
cat("Predicted Value:\n")
pt=predict(dtree,adata,method="anova")
print(pt)
plot(pt)
#creating the decision tree using
classification df=as.data.frame(data)
dt=rpart(Sepal.Width~Sepal.Length+Petal.Width+Petal.Length+Species,data=df,method="cl
ass")
plot(dt,uniform=TRUE,main="Sepal Width Decision Tree using Classification")
print(dt)
text(dt,use.n=TRUE,cex=.7)
 OUTPUT:
> #Decision Tree
> head(data)
 Sepal.Length Sepal.Width Petal.Length Petal.Width Species
 1    5.1       3      1.4     0.2   seto
                .                     sa
                5
 2    4.9       3      1.4     0.2   seto
                .                     sa
                0
 3    4.7       3      1.3     0.2   seto
                .                     sa
                2
 4    4.6       3      1.5     0.2   seto
                .                     sa
                1
 5    5.0       3      1.4     0.2   seto
                .                     sa
                6
 6    5.4        3       1.7      0.4 seto
                  .                       sa
                 9
> #creating the decision tree using regression
dtree=rpart(Sepal.Width~Sepal.Length+Petal.Width+Petal.Length+Species,data=iris,metho
d
="anova")
> plot(dtree,uniform=TRUE,main="Sepal Width Decision Tree Using Regression")
> print(dtree)
n= 150
node), split, n, deviance, yval
    * denotes terminal node
1) root 150 28.3069300 3.057333
 2) Species=versicolor,virginica 100 10.9616000 2.872000
   4) Petal.Length< 4.05 16 0.7975000 2.487500 *
   5) Petal.Length>=4.05 84 7.3480950 2.945238
    10) Petal.Width< 1.95 55 3.4920000 2.860000
     20) Sepal.Length< 6.35 36 2.5588890 2.805556 *
     21) Sepal.Length>=6.35 19 0.6242105 2.963158 *
    11) Petal.Width>=1.95 29 2.6986210 3.106897
     22) Petal.Length< 5.25 7 0.3285714 2.914286 *
     23) Petal.Length>=5.25 22 2.0277270 3.168182 *
 3) Species=setosa 50 7.0408000 3.428000
   6) Sepal.Length< 5.05 28 2.0496430 3.203571 *
   7) Sepal.Length>=5.05 22 1.7859090 3.713636 *
> text(dtree,use.n=TRUE,cex=.7)
> #predicting the Sepal Width
Predicted Value:
2.805556
> plot(pt)
> #creating the decision tree using classification
> plot(dt,uniform=TRUE,main="Sepal Width Decision Tree using Classification")
> print(dt)
n= 150
node), split, n, loss, yval, (yprob)
    * denotes terminal node
 1) root 150 124 3 (0.0067 0.02 0.027 0.02 0.053 0.033 0.06 0.093 0.067 0.17 0.073 0.087
0.04 0.08 0.04 0.027 0.02 0.04 0.013 0.0067 0.0067 0.0067 0.0067)
  2) Petal.Width>=0.8 100 80 3 (0.01 0.03 0.03 0.03 0.08 0.05 0.09 0.14 0.09 0.2 0.07 0.08
0.04 0.03 0 0.01 0 0.02 0 0 0 0 0)
   4) Sepal.Length< 6.45 65 55 2.8 (0.015 0.046 0.046 0.046 0.11 0.062 0.14 0.15 0.11 0.14
0.015 0.046 0.031 0.046 0 0 0 0 0 0 0 0 0)
    8) Petal.Width< 1.95 56 47 2.7 (0.018 0.054 0.054 0.054 0.11 0.071 0.16 0.11 0.12 0.16
0.018 0.036 0.018 0.018 0 0 0 0 0 0 0 0 0)
     16) Sepal.Length< 5.55 12 9 2.4 (0.083 0 0.17 0.25 0.25 0.083 0.083 0 0 0.083 0 0 0 0
0 0 0 0 0 0 0 0 0) *
     17) Sepal.Length>=5.55 44 36 2.7 (0 0.068 0.023 0 0.068 0.068 0.18 0.14 0.16 0.18
0.023 0.045 0.023 0.023 0 0 0 0 0 0 0 0 0)
       34) Petal.Width< 1.55 29 23 2.9 (0 0.1 0.034 0 0.069 0.1 0.1 0.17 0.21 0.17 0 0.034 0
0 0 0 0 0 0 0 0 0 0)
       68) Sepal.Length>=5.95 15 11 2.9 (0 0.2 0.067 0 0.067 0.067 0 0.2 0.27 0.067 0
0.067 0 0 0 0 0 0 0 0 0 0 0) *
        69) Sepal.Length< 5.95 14 10 3 (0 0 0 0 0.071 0.14 0.21 0.14 0.14 0.29 0 0 0 0 0 0 0
0 0 0 0 0 0) *
      35) Petal.Width>=1.55 15 10 2.7 (0 0 0 0 0.067 0 0.33 0.067 0.067 0.2 0.067 0.067
0.067 0.067 0 0 0 0 0 0 0 0 0) *
       9) Petal.Width>=1.95 9 5 2.8 (0 0 0 0 0.11 0 0 0.44 0 0 0 0.11 0.11 0.22 0 0 0 0 0 0 0 0
0) *
   5) Sepal.Length>=6.45 35 24 3 (0 0 0 0 0.029 0.029 0 0.11 0.057 0.31 0.17 0.14 0.057 0 0
0.029 0 0.057 0 0 0 0 0) *
  3) Petal.Width< 0.8 50 41 3.4 (0 0 0.02 0 0 0 0 0 0.02 0.12 0.08 0.1 0.04 0.18 0.12 0.06
0.06 0.08 0.04 0.02 0.02 0.02 0.02)
   6) Sepal.Length< 4.95 20 15 3 (0 0 0.05 0 0 0 0 0 0.05 0.25 0.2 0.2 0 0.15 0 0.1 0 0 0 0 0
0 0)
    12) Petal.Length< 1.45 13 8 3 (0 0 0.077 0 0 0 0 0 0.077 0.38 0 0.23 0 0.077 0 0.15 0 0 0
0 0 0 0) *
   13) Petal.Length>=1.45 7 3 3.1 (0 0 0 0 0 0 0 0 0 0 0.57 0.14 0 0.29 0 0 0 0 0 0 0 0 0) *
   7) Sepal.Length>=4.95 30 24 3.4 (0 0 0 0 0 0 0 0 0 0.033 0 0.033 0.067 0.2 0.2 0.033 0.1
0.13 0.067 0.033 0.033 0.033 0.033)
    14) Petal.Length< 1.45 11 7 3.5 (0 0 0 0 0 0 0 0 0 0 0 0.091 0.091 0.091 0.36 0.091 0 0
0.091 0.091 0 0.091 0) *
    15) Petal.Length>=1.45 19 14 3.4 (0 0 0 0 0 0 0 0 0 0.053 0 0 0.053 0.26 0.11 0 0.16
0.21 0.053 0 0.053 0 0.053) *
> text(dt,use.n=TRUE,cex=.7)
                               6.LINEAR REGRESSION
#Linear Regression
setwd("D:/R")
df=read.csv("h2.csv",header=TRUE)
print(df)
lr=lm(height~weight,data=df)
print(lr)
#Linear Regression
plot(df$height,df$weight,col="blue",main="Height_Weight
Regression",cex=1.3,pch=15,xlab="height",ylab="weight")
print(summary(lr))
print(residuals(lr))
coeff=coefficients(lr)
eq=paste0("y",round(coeff[2],1),"*(",round(coeff[1],1),"*x)")
print(eq)
#Linear Equation
new.weights=data.frame(weight=c(60,50)
) print(new.weights)
df1=predict(lr,newdata=new.weights)
print(df1)
df2=data.frame(df1,new.weights)
names(df2)=c("height","weight")
print(df2)
df3=rbind(df,df2)
print(df3)
write.csv(df3,"h3.csv")
pie(table(df3$height))
 OUTPUT:
> #Linear Regression
> setwd("D:/R")
> df=read.csv("h2.csv",header=TRUE)
> print(df)
 height weight
 1        8
          0
 174
 2        7
          0
 150
 3        7
          5
 160
 4        8
          5
 180
> lr=lm(height~weight,data=df)
> print(lr)
Call:
lm(formula = height ~ weight, data =
df) Coefficients:
(Intercept)    weight
      4.80         2.08
> #Linear Regression
> print(summary(lr))
 Call:
 lm(formula = height ~ weight, data =
 df) Residuals:
  1      2   3 4
 2.8 -0.4 -0.8 -1.6
 Coefficients:
               Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.8000            16.4463 0.292 0.7979
weight             2.0800     0.2117 9.827 0.0102 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.366 on 2 degrees of freedom
Multiple R-squared: 0.9797,             Adjusted R-squared: 0.9696
F-statistic: 96.57 on 1 and 2 DF, p-value: 0.0102
> print(residuals(lr))
    1      2 3      4
2.8 -0.4 -0.8 -1.6
> print(eq)
[1] "y2.1*(4.8*x)"
> #Linear Equation
> print(new.weights)
    weight
1         60
2         50
> print(df1)
      1     2
129.6 108.8
> print(df2)
height weight
1 129.6           60
2 108.8           50
> df3=rbind(df,df2)
> print(df3)
 height
 weight
 1 174.0       8
               0
 2 150.0       7
               0
 3 160.0       7
               5
 4 180.0       8
               5
 11            6
 129.6         0
 21            5
 108.8         0
> write.csv(df3,"h3.csv")
> pie(table(df3$height))
                             7.DATA VISUALIZATION
#Data Visualization
X=iris
dim(X)
summary(X)
head(X)
hist(X$Sepal.Length,main='Histogram',col='green')
barplot(X$Sepal.Length[1:10],main='Barplot',col='red',xlab='Sepal.Length'
) pie(table(X$Sepal.Length),main='pie-chart')
pairs(X)
plot(X$Sepal.Length,main='plot-chart',col='blue')
boxplot(X,main='Boxplot',col='yellow')
 OUTPUT:
> #Data Visualization
> dim(X)
[1] 150 5
> summary(X)
 Sepal.Length   Sepal.Width
                Petal.Length Min. :4.300
Min. :2.000 Min. :1.000 1st Qu.:5.100
1st Qu.:2.800 1st Qu.:1.600
Median :5.800 Median :3.000 Median :4.350
Mean :5.843 Mean :3.057 Mean :3.758 3rd
Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 Max.
:7.900 Max. :4.400 Max. :6.900
 Petal.Width      Species
Min. :0.100 setosa      :50
1st Qu.:0.300 versicolor:50
Median :1.300 virginica :50
Mean :1.199
3rd Qu.:1.800
Max. :2.500
> head(X)
 Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1   5.1   3   1.4   0.2   seto
          .                sa
          5
2   4.9   3   1.4   0.2   seto
          .                sa
          0
3   4.7   3   1.3   0.2   seto
          .                sa
          2
 4   4.6       3      1.5     0.2   seto
               .                     sa
               1
 5   5.0       3      1.4     0.2   seto
               .                     sa
               6
 6   5.4       3      1.7     0.4   seto
               .                     sa
               9
> hist(X$Sepal.Length,main='Histogram',col='green')
> barplot(X$Sepal.Length[1:10],main='Barplot',col='red',xlab='Sepal.Length')
> pie(table(X$Sepal.Length),main='pie-chart')
> pairs(X)
> plot(X$Sepal.Length,main='plot-chart',col='blue')
> boxplot(X,main='Boxplot',col='yellow')