0% found this document useful (0 votes)
48 views4 pages

Kmeans Clustering

The document outlines various clustering and statistical analysis techniques using the USArrests dataset, including K-means and hierarchical clustering methods. It also covers the computation of variance-covariance and correlation matrices, as well as principal component analysis (PCA) with visualizations. Additionally, it demonstrates the generation of multivariate normal samples and their statistical properties.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
48 views4 pages

Kmeans Clustering

The document outlines various clustering and statistical analysis techniques using the USArrests dataset, including K-means and hierarchical clustering methods. It also covers the computation of variance-covariance and correlation matrices, as well as principal component analysis (PCA) with visualizations. Additionally, it demonstrates the generation of multivariate normal samples and their statistical properties.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 4

#kmeans clustering

set.seed(100)

data=USArrests

f<-kmeans(data, 3, iter.max = 100, nstart = 2)

t<-f$cluster

table(t)

#Hierarchical clustering

data=USArrests

d <- dist(data, method = "euclidean")

fit <- hclust(d, method="single")

fit <- hclust(d, method="complete")

fit <- hclust(d, method="average")

clusters=cutree(fit,3)

table(clusters)

plot(fit)

#sample var-cov matrix and corr matrix

y1=c(35,35,40,10,6,20,35,35,35,30)

y2=c(3.5,4.9,30.0,2.8,2.7,2.8,4.6,10.9,8.0,1.6)

y3=c(2.80,2.70,4.38,3.21,2.73,2.81,2.88,2.90,3.28,3.20)

data1=cbind(y1,y2,y3)

data1

varcov=cov(data1)

varcov
corr=cov2cor(varcov)

corr

Height=c(69,74,68,70,72,67,66,70,76,68,72,79,74,67,66,71,74,75,75,76)

Weight=c(153,175,155,135,172,150,115,137,200,130,140,265,185,112,140,150,165,185,210,220)

sample=data.frame(Height,Weight)

sigma=matrix(c(20,100,100,1000),nrow=2,ncol=2,byrow=T)

mu0=c(70,170)

N=20

#H0:mu=mu0

#Test statistic

X_bar=colMeans(sample)

(chi_cal=N*(X_bar-mu0)%*%solve(sigma)%*%cbind(X_bar-mu0))

(chi_tab=qchisq(0.95,2))

#principal component analysis

model<-prcomp(USArrests,scale. = TRUE)

summary(model)

plot(model,main="Scree Plots with bars")

plot(model,type="l",main="Scree Plots with line")

y1=c(191,195,181,183,176,208,189,197,188,192,179,183,174,190,188,163,195,186,181,175,192,174,17
6,197,190)
y2=c(155,149,148,153,144,157,150,159,152,150,158,147,150,159,151,137,155,153,145,140,154,143,13
9,167,163)

x1=c(179,201,185,188,171,192,190,189,197,187,186,174,185,195,187,161,183,173,182,165,185,178,17
6,200,187)

x2=c(145,152,149,149,142,152,149,152,159,151,148,147,152,157,158,130,158,148,146,137,152,147,14
3,158,150)

matrix=matrix(c(y1,y2,x1,x2),ncol=4,nrow=25,byrow=FALSE)

colnames(matrix)=c("y1","y2","x1","x2")

matrix

round(colMeans(matrix),digit=4)

round(cov(matrix),digit=4)

set.seed(100)

u=c(3,1,4)

sigma1=matrix(c(6,1,-2,1,13,4,-2,4,4),nrow=3)

library("MASS")

sample=round(mvrnorm(n=50,mu=u,Sigma=sigma1),digits = 4);sample[c(1,2),]

sample_mean=round(cbind(colMeans(sample)),digits = 0)

sample_varcov=round(cbind(cov(sample)),digits = 0)

mu2=c(-2,3,-1,5)

sigma2=matrix(c(11,-8,3,9,-8,9,-3,-6,3,-3,2,3,9,-6,3,9),nrow=4)

y1=mvrnorm(n=50,mu=mu2,Sigma = sigma2)

y_mu=y1-matrix(rep(mu2,50),ncol=4,byrow=TRUE)

T=chol(sigma2)
round(T,4)

t(T)%*%T

T_t_inv=solve(t(T))

z=t(T_t_inv%*%t(y_mu));dim(z)

round(head(z),4)

round(colMeans(z),0)

round(cov(z,z),0)

c1=eigen(sigma2)$vectors

D1=diag(sqrt(eigen(sigma2)$values))

sigma3=c1%*%D1%*%t(c1);sigma3

sigma3inv=solve(sigma3)

z2=round(crossprod(sigma3inv,t(1)),digits = 4);t(z2[,c(1,2)])

You might also like