Segmentation:Clustering: Krissie 2024-11-21
Segmentation:Clustering: Krissie 2024-11-21
Krissie
2024-11-21
#----------------Segmentation---------------------#
##--------##Clustering##---------##
#1.Loading data
setwd("/Users/kitsanasudsaneh/Desktop/fall2024")
seg.raw <-read.csv("rintro-chapter5.csv")
str(seg.raw)
#2.Cleaning and Preparing the data: convert some variable to factor (chr > num)
seg.raw$gender <- factor(seg.raw$gender)
seg.raw$ownHome <- factor(seg.raw$ownHome)
seg.raw$subscribe <- factor(seg.raw$subscribe)
seg.raw$Segment <- factor(seg.raw$Segment)
str(seg.raw)
#-------##Distance-based Methods##--------#
1
##
##
##
##
##
Height
4
3
2
1
0.0 0.2 0.4 0.6 0.8
0.2617250
0.2329028
0.2532815
0.0000000
1
128
137
102
101
107
library(cluster)
173
219
298
256
287 65
172
141
121
129
89
257
242
278
294
283
288
185
204
21513053
258
20
271
276
261
293199
223
194
224
19184108
0.4129493
0.0679978
0.0000000
0.2532815
206
186
3595
249
154 2
227
218
229
205
212
217
111
262
133
147
146
149
117
142
124
134
132
#---Compute a Distance Matrix
138
82
246
49
233
32
63
266
292 88
248
260
282
42
255
281484
30
24413
289
237 0.4246012
0.0000000
0.0679978
0.2329028
3
277
254
12
243
#---Perform Hierarchical Clustering
#ideal for exploring relationships.
245
69
268
272
66
297
285
251
284
270
67
275
61
187
161
197
157
171
213
2
51
72
24740
24
97
240
279
68
5
47
22
273
55
274
118
145 25
122
150
231
39
139
190
19
83
296
seg.dist
10
252
17
286
18
168
21
8781
92
73
26
241
232
300 265
91
96
52
236
41
439 0
99
37
14
167
238
65
189
167
226
184
225
153
160
176
211
214
165
216
as.matrix(seg.dist)[1:4,1:4] #Distances of first 4 observations
11
79
195
295
175 3
269
156
179
192
152
208
196
203
198
170
177
163
164
209
174
193
230 202
112
115
109
126
127
131
143
103
113
105
140
119
148
106
123
135
#3.Hierarchical Clustering Method:groups data based on a tree structure,
116
110
136
104
114 7
27
58
239
250
23
234
235
54
78
60
94 76
34
253
267
222
93
568
28
15831
62
441
45
77
70
74
29059
100
86
36
263
85
291
46
299
seg.dist <- daisy(seg.df) #calculates distances for mixed data (numeric + categorical)
128
137
102
101
107
173
219
298
256
287
65
172
141
121
129
89
257
242
278
294
283
288
#---Comparing observations in branches
seg.df[c(128,137),] #similar
#---Cophenetic Correlation Coefficient: how well the hierarchical clustering (dendrogram) represents
#the actual distances in the data (close to 1 > good)
cor(cophenetic(seg.hc),seg.dist)
## [1] 0.7682436
#---Getting K groups from the tree: Decide how many you decide how many groups (clusters) you want to ma
plot(seg.hc)
rect.hclust(seg.hc, k=4,border = "red")
3
Cluster Dendrogram
0.8
0.6
0.4
Height
0.2
0.0
13053
84108
202
265
76
258
568
185
7
199
223
204
215
141
3595
116
88
5750
248
91
190
441
81
92
209
153
160
0
25
65
238
276
172
65
156
110
184
225
207
254
231
118
180
267
121
129
173
219
298
270
157
235
22
273
68
283
288
222
96
155
237
277
136
296
186
240
279
194
224
210
111
262
484
250
191
206
295
271
20
289
13
26
241
146
100
59
31
62
40
27
47
5
73
196
203
165
216
161
197
189
174
200
220
102
264
242
19
83
162
257
89
285
12
86
274
55
239
58
154
227
132
138
168
18
11
261
293
93
69
268
266
292
176
182
70
128
137
205
139
39
99
106
54
34
253
61
232
300
103
234
23
41
43
171
213
188
236
52
193
230
45
77
166
252
10
256
287
51
72
243
245
247
3
163
164
263
36
158
28
272
32
63
233
49
30
244
259
29
151
181
37
104
114
211
214
152
208
144
278
294
183
228
112
115
145
179
192
275
67
21
87
249
46
299
24
97
149
198
119
148
42
251
284
124
134
159
221
195
79
133
147
131
143
260
282
286
17
33
9
297
66
64
71
169
187
78
246
82
7
255
281
389
109
113
101
107
167
226
74
290
212
217
175
269
123
135
280
98
105
140
218
229
122
150
15
80
117
142
178
201
14
16
170
177
120
125
60
94
126
127
291
85
seg.dist
hclust (*, "complete")
seg.hc.segment <-cutree(seg.hc, k =4) #It assigns each data point to one of these 4 clusters.
table(seg.hc.segment) #counts how many observations fall into each cluster.
## seg.hc.segment
## 1 2 3 4
## 124 136 18 22
#4.Quick summary to inspect the variables in seg.df with reference to the 4 clusters.
seg.summ <- function(data, groups)
{
aggregate(data, list(groups), function(x) mean(as.numeric(x)))
}
seg.summ(seg.df,seg.hc.segment)
4
axis(1, at=c(1, 2), labels=c("Subscribe: No", "Subscribe: Yes"))
axis(2, at=c(1, 2), labels=levels(seg.df$gender))
Male
Female
##the choice of variables (like gender and subscribe) depends on what we want to learn.
##For example:Are males more likely to subscribe?
#Do certain clusters have more females or males who subscribe?
##6.K-means clustering
5
#---Run K-means: minimizes the distance within clusters finding clusters by finding cluster centers
set.seed(96743) #Set seed for reproducibility
seg.k <- kmeans(seg.df.num, centers = 4)
#---Summarize Clusters
seg.summ <- function(data, groups)
{
aggregate(data, list(groups), mean)
}
seg.summ(seg.df,seg.k$cluster)
#---Visualize the Clusters: A plot show how well-separated the clusters are.
6
Boxplot of comparing income in each Segment
4
3
Income
2
1
Segments
##--Clusplot: use to visualizing the overall clusters to see that they share similarities in the data va
library(cluster)
clusplot(seg.df, seg.k$cluster, color = TRUE, shade = TRUE, labels = 4,
lines = 0,main = "Clusplot of the overall segment")
7
Clusplot of the overall segment
3
3
1 4
2
2
Component 2
1
0
−1
−2
−3
−4 −2 0 2 4
Component 1
These two components explain 48.49 % of the point variability.
#--------Model-based Methods-------#
#1.poLCA(): use only categorical variables ; finds hidden groups (latent class) when they aren't directl
#--- Create a formula: that combines multiple variable into one group.It prepares these variables to be
seg.f <- with(seg.df.cut,cbind(age, gender, income, kids, ownHome, subscribe)~1)
#2.Run model in different class (start with the smaller number) How well the data fits into n classes.
library(poLCA)
8
## Loading required package: MASS
set.seed(02807)
seg.LCA3 <- poLCA(seg.f, data = seg.df.cut,nclass = 3) # there are 3 latent (hidden) classes.
9
## number of estimated parameters: 20
## residual degrees of freedom: 43
## maximum log-likelihood: -1092.345
##
## AIC(3): 2224.691
## BIC(3): 2298.767
## G^2(3): 42.77441 (Likelihood ratio/deviance statistic)
## X^2(3): 38.47647 (Chi-square goodness of fit)
##
10
## class 3: 1.0000 0.0000
## class 4: 0.7203 0.2797
##
## Estimated class population shares
## 0.4101 0.3697 0.0643 0.1559
##
## Predicted class memberships (by modal posterior prob.)
## 0.41 0.3733 0.0667 0.15
##
## =========================================================
## Fit for 4 latent classes:
## =========================================================
## number of observations: 300
## number of estimated parameters: 27
## residual degrees of freedom: 36
## maximum log-likelihood: -1088.021
##
## AIC(4): 2230.041
## BIC(4): 2330.043
## G^2(4): 34.12473 (Likelihood ratio/deviance statistic)
## X^2(4): 31.50696 (Chi-square goodness of fit)
##
#---compare Model Fit using: BIC (A lower BIC value indicates a better fit)
seg.LCA3$bic
## [1] 2298.767
seg.LCA4$bic
## [1] 2330.043
11
## Group.1 age gender income kids ownHome subscribe
## 1 1 28.22385 NA 30075.32 1.1285714 NA NA
## 2 2 54.44407 NA 60082.47 0.3846154 NA NA
## 3 3 37.47652 NA 54977.08 2.0793651 NA NA
##
## 1 2 3
## 70 104 126
12
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
13
## col.clus[jInd[i]], : "lables" is not a graphical parameter
14
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
15
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
16
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
1
0
−1
−2
−3
−3 −2 −1 0 1 2 3 4
Component 1
These two components explain 48.49 % of the point variability.
17
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
##
## 1 2 3 4
## 123 112 20 45
18
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
19
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
20
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
21
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
22
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
## Warning in segments(lx1, ly1, lx2, ly2, ...): "lables" is not a graphical
## parameter
1
0
−1
−2
−2 0 2
Component 1
These two components explain 48.49 % of the point variability.
#2.Mclust:
#---Fit a Model-Based Clustering
library(mclust)
23
seg.mc <-Mclust(seg.df.num)
summary(seg.mc) #Fit a 3-Cluster Model
## ----------------------------------------------------
## Gaussian finite mixture model fitted by EM algorithm
## ----------------------------------------------------
##
## Mclust VEV (ellipsoidal, equal shape) model with 3 components:
##
## log-likelihood n df BIC ICL
## -5137.106 300 73 -10690.59 -10690.59
##
## Clustering table:
## 1 2 3
## 163 71 66
## ----------------------------------------------------
## Gaussian finite mixture model fitted by EM algorithm
## ----------------------------------------------------
##
## Mclust VII (spherical, varying volume) model with 4 components:
##
## log-likelihood n df BIC ICL
## -16862.69 300 31 -33902.19 -33906.18
##
## Clustering table:
## 1 2 3 4
## 104 66 59 71
## df BIC
## seg.mc 73 10690.59
## seg.mc4 31 33902.19
24
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
#---Visualize Clusters #if clusters overlap, it means some groups are less distinct.
library(cluster)
clusplot(seg.df, seg.mc$class, color= TRUE, shade = TRUE, labels = 4,
main = "Mcluster Plot")
Mcluster Plot
1
3
2
3
Component 2
2
1
0
−1
−2
−3
−3 −2 −1 0 1 2 3 4
Component 1
These two components explain 48.49 % of the point variability.
25
Including Plots
400
200
0
temperature
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that
generated the plot.
26