zoukankan      html  css  js  c++  java
  • 机器学习第二次作业——鸢尾花

    机器学习第二次作业——鸢尾花

    1.加载Iris数据集

    该数据集是一个字典

    {

    "data":...,

    "target":...,

    "target_names":...

    "DESCR":...

    }

    data字段是numpy数据特征

    target是花的类型

    target_name是指花名

    DESCR是数据集描述

    2.数据集可视化

    2.1 数据可视化展示

    image.png

    由上图可看出versicolor 和virgincia 是比较难以区分的,setosa相较于它们区分特征更明显

    2.2代码实现:

    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    import numpy as np
    # plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
    plt.rcParams['savefig.dpi'] = 300 #图片像素
    plt.rcParams['figure.dpi'] = 300 #分辨率
    
    def data_visualization_3D(df_Iris,tar):
    	flag1, flag2, flag3,flag4 = 0, 0, 0,0
    	labels = ["First", "Second", "Third"]
    	fig=plt.figure(figsize=(10,10))
    	xx=[[0,1,2],[1,2,3],[0,1,3],[0,2,3]]
    	yy=[["sepal_length (cm)","sepal_width (cm)","petal_length (cm)"],["sepal_width (cm)","petal_length (cm)","petal_width (cm)"],["sepal_length","sepal_width","petal_width"],["sepal_length","petal_length","petal_width"]]
    	for i in range(4):
    		ax=fig.add_subplot(221+i,projection="3d")
    		ax.scatter(df_Iris[tar==0,xx[i][0]],df_Iris[tar==0,xx[i][1]],df_Iris[tar==0,xx[i][2]],c="r",marker="o",label="setosa")
    		ax.scatter(df_Iris[tar == 1, xx[i][0]], df_Iris[tar == 1, xx[i][1]], df_Iris[tar == 1, xx[i][2]], c="b",
    				   marker="x", label="versicolor")
    		ax.scatter(df_Iris[tar == 2, xx[i][0]], df_Iris[tar == 2, xx[i][1]], df_Iris[tar == 2, xx[i][2]], c="g",
    				   marker="^", label="virginica")
    		ax.set_xlabel(yy[i][0])
    		ax.set_ylabel(yy[i][1])
    		ax.set_zlabel(yy[i][2])
    		plt.legend(loc=0)
    	plt.show()
    
        
      
    data,tar=readData()
    data_visualization_3D(data,tar)
    

    3.MED线性分类

    3.1 分类结果

    image.png

    3.2 量化指标

    Accuracy: 1.0
    Recall: 1.0
    specificity: 1.0
    F1_Score 1.0

    3.3 核心代码

    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    import numpy as np
    
    def get_Iris_linear(data,tar,flag):
    	linear_data=[data[i] for i in range(data.shape[0]) if tar[i]!=flag]
    	linear_tar=[tar[i] for i in range(data.shape[0]) if tar[i]!=flag]
    	return np.asarray(linear_data,dtype="float64"),np.asarray(linear_tar,dtype="float64")
    
    def hold_out_partition(testRate, trainRate, data, tar): #留出法
    	import random
    	import numpy as np
    	testSet = []
    	testTar = []
    	trainSet = []
    	trainTar = []
    	listNum = []
    	for i in range(data.shape[0]):
    		if tar[i] == 1:
    			listNum.append(i)
    	for i in random.sample(listNum, int(testRate * 50)):
    		testSet.append(data[i])
    		testTar.append(tar[i])
    		listNum.remove(i)
    
    	for i in listNum:
    		trainSet.append(data[i])
    		trainTar.append(tar[i])
    	listNum = []
    	for i in range(data.shape[0]):
    		if tar[i] != 1:
    			listNum.append(i)
    	for i in random.sample(listNum, int(testRate * 50)):
    		testSet.append(data[i])
    		testTar.append(tar[i])
    		listNum.remove(i)
    	for i in listNum:
    		trainSet.append(data[i])
    		trainTar.append(tar[i])
    	return np.asarray(testSet, dtype="float64"), np.asarray(testTar, dtype="float64"), np.asarray(trainSet,dtype="float64"), np.asarray(trainTar, dtype="float64")
    
    cmap={
    	0:"r",
    	1:"b",
    	2:"g"
    }
    shapeMap={
    	0:"o",
    	1:"x",
    	2:"^"
    }
    map={
    	"Iris-setosa":0,
    	"Iris-versicolor":1,
    	"Iris-virginica":2,
    	0:"setosa",
    	1:"versicolor",
    	2:"virginica"
    
    }
    def classifier_MED(data,tar,posC,negC): #MED分类器
    	testSet ,testTar ,trainSet ,trainTar = partition.hold_out_partition(0.3,0.7,data,tar)
    	C1,C2=[],[]
    	N1,N2=0,0
    	for i in range(trainSet.shape[0]):
    		if trainTar[i]==negC:
    			N1+=1
    			C1.append(trainSet[i])
    		elif trainTar[i]==posC:
    			N2+=1
    			C2.append(trainSet[i])
    	C1,C2=np.asarray(C1),np.asarray(C2)
    	z1,z2=C1.sum(axis=0)/N1,C2.sum(axis=0)/N2
    	testRes=[]
    	for x in testSet:
    		res=np.dot((z2-z1).transpose(),(x-(z1+z2)/2))
    		testRes.append(res)
    	testTar=testTar.astype("int16")
    	TP,FP,TN,FN=0,0,0,0
    	for i in range (len(testRes)):
    		#第C2类为正类,第C1类为负类
    		if testTar[i]==posC and testRes[i]>=0:
    			TP+=1
    		elif testTar[i]==posC and testRes[i]<0:
    			FN+=1
    		elif testTar[i]==negC and testRes[i]<0:
    			TN+=1
    		elif testTar[i] == negC and testRes[i]>=0:
    			FP+=1
    	testRes=np.array(testRes)
    	accuracy=float((TP+TN)/(TP+TN+FP+FN))
    	recall=float(TP/(TP+FN))
    	precision=float(TP/(TP+FP))
    	specificity=float(TN/(TN+FP))
    	F1_Score=float((2*recall*precision)/(recall+precision))
    	print("Accuracy:",accuracy)
    	print("Recall:",recall)
    	print("specificity:",specificity)
    	print("F1_Score",F1_Score)
    
    
    	#画图部分
    	fig = plt.figure(figsize=(10, 10))
    	xx = [[0, 1, 2], [1, 2, 3], [0, 1, 3], [0, 2, 3]]
    	yy = [["sepal_length (cm)", "sepal_width (cm)", "petal_length (cm)"],
    		  ["sepal_width (cm)", "petal_length (cm)", "petal_width (cm)"],
    		  ["sepal_length", "sepal_width", "petal_width"], ["sepal_length", "petal_length", "petal_width"]]
    	for i in range(4):
    		ax = fig.add_subplot(221 + i, projection="3d")
    		X, Y = np.meshgrid(np.arange(testSet.min(axis=0)[xx[i][0]],testSet.max(axis=0)[xx[i][0]],1), np.arange(testSet.min(axis=0)[xx[i][1]],testSet.max(axis=0)[xx[i][1]],1))
    		u1=np.array([z1[xx[i][0]],z1[xx[i][1]],z1[xx[i][2]]])
    		u2=np.array([z2[xx[i][0]],z2[xx[i][1]],z2[xx[i][2]]])
    		u=(u2-u1).transpose()
    		Z=(np.dot(u,(u1+u2)/2)-u[0]*X-u[1]*Y)/u[2]
    		ax.scatter(testSet[testRes>=0,xx[i][0]], testSet[testRes>=0,xx[i][1]],testSet[testRes>=0,xx[i][2]], c=cmap[posC], marker=shapeMap[posC], label=map[posC])
    		ax.scatter(testSet[testRes<0,xx[i][0]],testSet[testRes<0,xx[i][1]], testSet[testRes<0,xx[i][2]],c=cmap[negC], marker=shapeMap[negC],label=map[negC])
    		ax.set_xlabel(yy[i][0])
    		ax.set_ylabel(yy[i][1])
    		ax.set_zlabel(yy[i][2])
    		ax.plot_surface(X,Y,Z,alpha=0.4)#
    		ax.legend(loc=0)
    	plt.show()
    	
    linear_data,linear_tar=get_Iris_linear(data,tar,2)
    classifier_MED(linear_data,linear_tar,0,1)
    

    4.数据集白化

    4.1 图片展示

    image.png

    白化之后,数据在某些维度上更容易区分了

    4.2 核心代码

    def witening(data):
    	Ex=np.cov(data,rowvar=False) #Ex为data的协方差矩阵
    	print(Ex.shape)
    	a, b = np.linalg.eig(Ex) #原始特征协方差矩阵Ex的特征值和特征向量
    	#特征向量单位化
    	modulus=[]
    	b=np.real(b)
    	for i in range(b.shape[1]):
    		sum=0
    		for j in range(b.shape[0]):
    			sum+=b[i][j]**2
    		modulus.append(sum)
    	modulus=np.asarray(modulus,dtype="float64")
    	b=b/modulus
    	# print(b)
    	#对角矩阵A
    	a=np.real(a)
    	A=np.diag(a**(-0.5))
    	W=np.dot(A,b.transpose())
    	X=np.dot(W,np.dot(Ex,W.transpose()))
    	for i in range(W.shape[0]):
    		for j in range(W.shape[1]):
    			if np.isnan(W[i][j]):
    				W[i][j]=0
    	print(W)
    	return np.dot(data,W)
    	
    witening(data)
    visualization.data_visualization_3D(witening(data),tar)
    

    5.MED非线性分类

    5.1 结果展示

    image.png

    5.2 结果展示

    Accuracy: 0.9
    Recall: 0.8666666666666667
    specificity: 0.9333333333333333
    F1_Score 0.896551724137931

    5.3核心代码

    def get_Iris_noLinear(data,tar,flag):
    	linear_data = [data[i] for i in range(data.shape[0]) if tar[i] != flag]
    	linear_tar = [tar[i] for i in range(data.shape[0]) if tar[i] != flag]
    	return np.asarray(linear_data, dtype="float64"), np.asarray(linear_tar, dtype="float64")
    
    noLinear_data,noLinear_tar=get_Iris_noLinear(data,tar,0)
    classifier_MED(noLinear_data,noLinear_tar,1,2)
    

    6.多分类贝叶斯分类器

    6.1 数据可视化

    image.png

    Accuracy: 0.9933333333333334

    6.2 核心代码

    #K折验证
    def	K_Folds_Cross_Validation(data,tar,k):
    	import random
    	import numpy as np
    	Set=[]
    	Tar=[]
    	listNum = []
    	for i in range(k):
    		tempSet=[]
    		tempTar=[]
    		tempSet.extend(data[i*10:(i+1)*10])
    		tempTar.extend(tar[i*10:(i+1)*10])
    		tempSet.extend(data[(i+5) * 10:(i + 6) * 10])
    		tempTar.extend(tar[(i+5) * 10:(i + 6) * 10])
    		tempSet.extend(data[(i+10) * 10:(i + 11) * 10])
    		tempTar.extend(tar[(i+10) * 10:(i + 11) * 10])
    		Set.append(tempSet)
    		Tar.append(tempTar)
    	return np.asarray(Set),np.asarray(Tar)
    
    
    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    import numpy as np
    from scipy import stats
    import visualization,partition
    # plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签
    plt.rcParams['savefig.dpi'] = 300 #图片像素
    plt.rcParams['figure.dpi'] = 300 #分辨率
    map={
    	"Iris-setosa":0,
    	"Iris-versicolor":1,
    	"Iris-virginica":2
    
    }
    #贝叶斯分类器
    class BayesParameter(): #存储贝叶斯分类器参数
    
    	def __init__(self,mean,cov,category):
    		self.mean=mean
    		self.cov=cov
    		self.category=category
    class  BayesClassifier():  #贝叶斯分类器,高斯分布概率估计
    
    	def __init__(self):
    		self.parameters=[]
    
    	def train(self,X_data,Y_data):
    
    		for category in set(Y_data):#遍历每一种类别
    			selected= Y_data==category #选中对应该类别的数据
    			X_newData= X_data[selected] #得到新数据
    			mean=np.mean(X_newData,axis=0) #得到均值
    			cov = np.cov(X_newData.transpose()) #注意坑 或者设定参数np.cov(X_newData, rowvar=False)
    			self.parameters.append(BayesParameter(mean,cov,category))
    
    	def predit(self,data):
    		res=-1
    		probability=0
    		for parameter in self.parameters:
    			if stats.multivariate_normal.pdf(data, mean=parameter.means, cov=parameter.cov)>probability:
    				res=parameter.category
    				probability=stats.multivariate_normal.pdf(data, mean=parameter.means, cov=parameter.cov)
    		return res
    
        
    if __name__=="__main__":
        set,tar=partition.K_Folds_Cross_Validation(data,tar,5)
    	accuracy=0
    	print(tar[0].shape)
    	for i in range(5): #第i个子集作为测试集
    		x,y=0,0
    		X_data,Y_data=None,None
    		for j in range(5):
    			if i!=j:
    				if x*y==0:
    					X_data=set[i]
    					Y_data=tar[i]
    				else:
    					X_data=np.concatenate((X_data,set[i]),axis=0)
    					Y_data = np.concatenate((Y_data, tar[i]), axis=0)
    					x+=1
    					y+=1
    		bc=BayesClassifier()
    		bc.train(X_data,Y_data)
    		y_predict=[bc.predit(x) for x in set[i]]
    		tempAccuracy=np.sum(y_predict==tar[i])/tar[i].shape[0]
    		accuracy+=tempAccuracy
    	accuracy=accuracy/5
    	print(accuracy)
    
    #https://blog.csdn.net/weixin_37895339/article/details/80351541 协方差和高斯分布的关系
    def data_visualization_2D_Bayes(data,tar):
    	testSet, testTar, trainSet, trainTar = partition.hold_out_partition(0.3, 0.7, data, tar)
    	bc = BayesClassifier()
    	bc.train(trainSet, trainTar)
    	testPredict = np.array([bc.predit(x) for x in testSet],dtype="int")
    
    	import math
    	# 画图部分
    	fig = plt.figure(figsize=(10, 10))
    	xx = [[0, 1], [1, 2], [2, 3], [0,2],[0,3],[1,3]]
    	yy = [["sepal_length (cm)", "sepal_width (cm)"],
    		  ["sepal_width (cm)", "petal_length (cm)"],
    		  ["sepal_width(cm)", "petal_width(cm)"],
    		  ["sepal_length (cm)","petal_length (cm)"],
    		  ["sepal_length (cm)","petal_width(cm)"],
    		  ["sepal_width (cm)","petal_width(cm)"]]
    	for i in range(6):
    		ax = fig.add_subplot(321 + i)
    		x_max,x_min=testSet.max(axis=0)[xx[i][0]]+0.5,testSet.min(axis=0)[xx[i][0]]-0.5
    		y_max,y_min=testSet.max(axis=0)[xx[i][1]]+0.5,testSet.min(axis=0)[xx[i][1]]-0.5
    		xlist = np.linspace(x_min, x_max, 100)  # Create 1-D arrays for x,y dimensions
    		ylist = np.linspace(y_min, y_max, 100)
    		XX, YY = np.meshgrid(xlist, ylist)
    		bc = BayesClassifier()
    		bc.train(trainSet[:, xx[i]],trainTar)
    		xys = [np.array([xx, yy]).reshape(1, -1) for xx, yy in zip(np.ravel(XX), np.ravel(YY))]
    		zz = np.array([bc.predit(x) for x in xys])
    		Z = zz.reshape(XX.shape)
    		plt.contourf(XX, YY, Z, 2, alpha=.1, colors=('blue', 'red', 'green'))
    		ax.scatter(testSet[testPredict == 0, xx[i][0]], testSet[testPredict == 0, xx[i][1]],
    				  c='r', marker='o',
    				   label="setosa")
    		ax.scatter(testSet[testPredict==1, xx[i][0]], testSet[testPredict==1, xx[i][1]], c='b', marker='x',
    				   label="versicolor")
    		ax.scatter(testSet[testPredict==2, xx[i][0]], testSet[testPredict==2, xx[i][1]], c='g', marker='^',
    				   label="virginica")
    		ax.set_xlabel(yy[i][0])
    		ax.set_ylabel(yy[i][1])
    		ax.legend(loc=0)
    	plt.show()
    data,tar=readData()
    data_visualization_2D_Bayes(data,tar)
    
  • 相关阅读:
    Xamarin和微软发起.NET基金会
    迷你 MVC
    编制进度计划、保存基准
    JQuery UI Layout Plug-in布局
    (转载)Log4Net 在多层项目中的使用小记
    Json.Net6.0
    EasyUI搭建前端框架
    using和yield return
    ExpandoObject,DynamicObject,DynamicMetaObject
    Net 4.0 之 Dynamic 动态类型
  • 原文地址:https://www.cnblogs.com/JustNo/p/12640959.html
Copyright © 2011-2022 走看看