#kmeans clustering
set.seed(100)
data=USArrests
f<-kmeans(data, 3, iter.max = 100, nstart = 2)
t<-f$cluster
table(t)
#Hierarchical clustering
data=USArrests
d <- dist(data, method = "euclidean")
fit <- hclust(d, method="single")
fit <- hclust(d, method="complete")
fit <- hclust(d, method="average")
clusters=cutree(fit,3)
table(clusters)
plot(fit)
#sample var-cov matrix and corr matrix
y1=c(35,35,40,10,6,20,35,35,35,30)
y2=c(3.5,4.9,30.0,2.8,2.7,2.8,4.6,10.9,8.0,1.6)
y3=c(2.80,2.70,4.38,3.21,2.73,2.81,2.88,2.90,3.28,3.20)
data1=cbind(y1,y2,y3)
data1
varcov=cov(data1)
varcov
corr=cov2cor(varcov)
corr
Height=c(69,74,68,70,72,67,66,70,76,68,72,79,74,67,66,71,74,75,75,76)
Weight=c(153,175,155,135,172,150,115,137,200,130,140,265,185,112,140,150,165,185,210,220)
sample=data.frame(Height,Weight)
sigma=matrix(c(20,100,100,1000),nrow=2,ncol=2,byrow=T)
mu0=c(70,170)
N=20
#H0:mu=mu0
#Test statistic
X_bar=colMeans(sample)
(chi_cal=N*(X_bar-mu0)%*%solve(sigma)%*%cbind(X_bar-mu0))
(chi_tab=qchisq(0.95,2))
#principal component analysis
model<-prcomp(USArrests,scale. = TRUE)
summary(model)
plot(model,main="Scree Plots with bars")
plot(model,type="l",main="Scree Plots with line")
y1=c(191,195,181,183,176,208,189,197,188,192,179,183,174,190,188,163,195,186,181,175,192,174,17
6,197,190)
y2=c(155,149,148,153,144,157,150,159,152,150,158,147,150,159,151,137,155,153,145,140,154,143,13
9,167,163)
x1=c(179,201,185,188,171,192,190,189,197,187,186,174,185,195,187,161,183,173,182,165,185,178,17
6,200,187)
x2=c(145,152,149,149,142,152,149,152,159,151,148,147,152,157,158,130,158,148,146,137,152,147,14
3,158,150)
matrix=matrix(c(y1,y2,x1,x2),ncol=4,nrow=25,byrow=FALSE)
colnames(matrix)=c("y1","y2","x1","x2")
matrix
round(colMeans(matrix),digit=4)
round(cov(matrix),digit=4)
set.seed(100)
u=c(3,1,4)
sigma1=matrix(c(6,1,-2,1,13,4,-2,4,4),nrow=3)
library("MASS")
sample=round(mvrnorm(n=50,mu=u,Sigma=sigma1),digits = 4);sample[c(1,2),]
sample_mean=round(cbind(colMeans(sample)),digits = 0)
sample_varcov=round(cbind(cov(sample)),digits = 0)
mu2=c(-2,3,-1,5)
sigma2=matrix(c(11,-8,3,9,-8,9,-3,-6,3,-3,2,3,9,-6,3,9),nrow=4)
y1=mvrnorm(n=50,mu=mu2,Sigma = sigma2)
y_mu=y1-matrix(rep(mu2,50),ncol=4,byrow=TRUE)
T=chol(sigma2)
round(T,4)
t(T)%*%T
T_t_inv=solve(t(T))
z=t(T_t_inv%*%t(y_mu));dim(z)
round(head(z),4)
round(colMeans(z),0)
round(cov(z,z),0)
c1=eigen(sigma2)$vectors
D1=diag(sqrt(eigen(sigma2)$values))
sigma3=c1%*%D1%*%t(c1);sigma3
sigma3inv=solve(sigma3)
z2=round(crossprod(sigma3inv,t(1)),digits = 4);t(z2[,c(1,2)])