8.2 某医院研究心电图指标对健康人(1),硬化病患者(2)和冠心病患者(3)的鉴别能力,先训练样本如下。试用距离判别(考虑方差相同与方差不同两种情况)、Bayes判别(考虑方差同和不同两种情况,且先验概率为11/23,7/23,5/23)对数据进行分析。
原始数据 data.txt如下, 3,4,5,6列为心电图指标,2列为类别
1 1 8.11 261.01 13.23 7.36 2 1 9.36 185.39 9.02 5.99 3 1 9.85 249.58 15.61 6.11 4 1 2.55 137.13 9.21 4.35 5 1 6.01 231.34 14.27 8.79 6 1 9.64 231.38 13.03 8.53 7 1 4.11 260.25 14.72 10.02 8 1 8.90 259.91 14.16 9.79 9 1 7.71 273.84 16.01 8.79 10 1 7.51 303.59 19.14 8.53 11 1 8.06 231.03 14.41 6.15 12 2 6.80 308.90 15.11 8.49 13 2 8.68 258.69 14.02 7.16 14 2 5.67 355.54 15.03 9.43 17 2 3.71 316.32 17.12 8.17 17 2 5.37 274.57 16.75 9.67 18 2 9.89 409.42 19.47 10.49 19 3 5.22 330.34 18.19 9.61 20 3 4.71 331.47 21.26 13.72 21 3 4.71 352.50 20.79 11.00 22 3 3.36 347.31 17.90 11.19 23 3 8.27 189.56 12.74 6.94
函数distinguish.distance.R
distinguish.distance<-function
(TrnX, TrnG, TstX = NULL, var.equal = FALSE){
if ( is.factor(TrnG) == FALSE){
mx<-nrow(TrnX); mg<-nrow(TrnG)
TrnX<-rbind(TrnX, TrnG)
TrnG<-factor(rep(1:2, c(mx, mg)))
}
if (is.null(TstX) == TRUE) TstX<-TrnX
if (is.vector(TstX) == TRUE) TstX<-t(as.matrix(TstX))
else if (is.matrix(TstX) != TRUE)
TstX<-as.matrix(TstX)
if (is.matrix(TrnX) != TRUE) TrnX<-as.matrix(TrnX)
nx<-nrow(TstX)
blong<-matrix(rep(0, nx), nrow=1, dimnames=list("blong", 1:nx))
g<-length(levels(TrnG))
mu<-matrix(0, nrow=g, ncol=ncol(TrnX))
for (i in 1:g)
mu[i,]<-colMeans(TrnX[TrnG==i,])
D<-matrix(0, nrow=g, ncol=nx)
if (var.equal == TRUE || var.equal == T){
for (i in 1:g)
D[i,]<- mahalanobis(TstX, mu[i,], var(TrnX))
}
else{
for (i in 1:g)
D[i,]<- mahalanobis(TstX, mu[i,], var(TrnX[TrnG==i,]))
}
for (j in 1:nx){
dmin<-Inf
for (i in 1:g)
if (D[i,j]<dmin){
dmin<-D[i,j]; blong[j]<-i
}
}
blong
}
函数distinguish.bayes.R
distinguish.bayes<-function
(TrnX, TrnG, p=rep(1, length(levels(TrnG))),
TstX = NULL, var.equal = FALSE){
if ( is.factor(TrnG) == FALSE){
mx<-nrow(TrnX); mg<-nrow(TrnG)
TrnX<-rbind(TrnX, TrnG)
TrnG<-factor(rep(1:2, c(mx, mg)))
}
if (is.null(TstX) == TRUE) TstX<-TrnX
if (is.vector(TstX) == TRUE) TstX<-t(as.matrix(TstX))
else if (is.matrix(TstX) != TRUE)
TstX<-as.matrix(TstX)
if (is.matrix(TrnX) != TRUE) TrnX<-as.matrix(TrnX)
nx<-nrow(TstX)
blong<-matrix(rep(0, nx), nrow=1, dimnames=list("blong", 1:nx))
g<-length(levels(TrnG))
mu<-matrix(0, nrow=g, ncol=ncol(TrnX))
for (i in 1:g)
mu[i,]<-colMeans(TrnX[TrnG==i,])
D<-matrix(0, nrow=g, ncol=nx)
if (var.equal == TRUE || var.equal == T){
for (i in 1:g){
d2 <- mahalanobis(TstX, mu[i,], var(TrnX))
D[i,] <- d2 - 2*log(p[i])
}
}
else{
for (i in 1:g){
S<-var(TrnX[TrnG==i,])
d2 <- mahalanobis(TstX, mu[i,], S)
D[i,] <- d2 - 2*log(p[i])-log(det(S))
}
}
for (j in 1:nx){
dmin<-Inf
for (i in 1:g)
if (D[i,j]<dmin){
dmin<-D[i,j]; blong[j]<-i
}
}
blong
}
运行脚本
data <- read.table("data.txt");
X <- data[,3:6];
G <- factor(data[,2]);
source("distinguish.distance.R");
distinguish.distance(X,G);
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
#blong 2 1 1 1 1 1 1 1 1 1 1 2 1 2 2 1 2 2 3 3 3 3
distinguish.distance(X,G,var.equal=TRUE);
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
#blong 1 1 1 1 1 1 3 1 1 3 1 2 1 2 2 3 2 2 3 3 3 1
source("distinguish.bayes.R");
distinguish.bayes(X,G, p=c(rep(11/23,11),rep(7/23,7),rep(5/23,5)));
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
#blong 2 1 1 1 1 1 1 1 1 1 1 2 1 2 2 1 2 2 3 3 3 3
distinguish.bayes(X,G, p=c(rep(11/23,11),rep(7/23,7),rep(5/23,5)),var.equal=TRUE);
# 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
#blong 1 1 1 1 1 1 3 1 1 3 1 2 1 2 2 3 2 2 3 3 3 1
博文源代码和习题均来自于教材《统计建模与R软件》(ISBN:9787302143666,作者:薛毅)。