zoukankan      html  css  js  c++  java
  • 《机器学习》第一次作业——第一至三章学习记录和心得

    《机器学习》第一次作业——第一至三章学习记录和心得

    orz懒人直接上图了,真的好多作业好多考试啊啊啊啊啊啊啊啊啊啊

    关于讨论区作业

    机器学习的作业先做完了,所以可以参考这篇博客,看得出来实验班的大哥哥大姐姐们还是勤奋的,居然有40+访问量了……

    1.复现MICD分类器的源码

    请根据第二章的理论知识,尝试用Python、MATLAB等常见语言复现MICD分类器。

    可以在讨论区内跟大家分享一下自己的代码

    from sklearn import datasets
    import pandas as pd
    import numpy as np
    import seaborn as sns
    from matplotlib import pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    from scipy.stats import multivariate_normal as gaussian_cal
    
    #使用np的数组方便向量运算
    def getIrisLinear(data,iris_type,flag):
        data_linear = [data[i] for i in range(len(data)) if iris_type[i]!=flag]
        iris_type_linear = [iris_type[i] for i in range(len(iris_type)) if iris_type[i]!=flag]
        return np.asarray(data_linear,dtype="float64"),np.asarray(iris_type_linear,dtype="float64")
    
    
    def hold_out_partition(data_linear,iris_type_linear):
        import random
    
        train_data = []
        train_type = []
        test_data = []
        test_type = []
        first_cur = []
        second_cur = []
        for i in range(len(data_linear)):
            if iris_type_linear[i] == 0:
                first_cur.append(i)
            else:
                second_cur.append(i)
        k = len(first_cur)-1
        #七三开训练集和测试集
        train_size = int(len(first_cur) * 7 / 10)
        test_size = int(len(first_cur) * 3 / 10)
        for i in range(0,train_size):
            cur = random.randint(0,k)
            train_data.append(data_linear[first_cur[cur]])
            train_type.append(iris_type_linear[first_cur[cur]])
            k = k - 1
            first_cur.remove(first_cur[cur])
        for i in range(len(first_cur)):
            test_data.append(data_linear[first_cur[i]])
            test_type.append(iris_type_linear[first_cur[i]])
    
        k = len(second_cur)-1
        train_size = int(len(second_cur) * 7 / 10)
        test_size = int(len(second_cur) * 3 / 10)
        for i in range(0, train_size):
            cur = random.randint(0, k)
            train_data.append(data_linear[second_cur[cur]])
            train_type.append(iris_type_linear[second_cur[cur]])
            k = k - 1
            second_cur.remove(second_cur[cur])
        for i in range(len(second_cur)):
            test_data.append(data_linear[second_cur[i]])
            test_type.append(iris_type_linear[second_cur[i]])
    
        return np.asarray(train_data,dtype="float64"),np.asarray(train_type,dtype="int16"),np.asarray(test_data,dtype="float64"),np.asarray(test_type,dtype="int16")
    
    
    def MED_linear_classification(data,iris_type,t,f,flag):
        data_linear,iris_type_linear=getIrisLinear(data,iris_type,flag)
        train_data,train_type,test_data,test_type = hold_out_partition(data_linear,iris_type_linear)
        c1 = []
        c2 = []
        n1=0
        n2=0
        #计算均值
        for i in range(len(train_data)):
            if train_type[i] == 1:
                n1+=1
                c1.append(train_data[i])
            else:
                n2+=1
                c2.append(train_data[i])
        c1 = np.asarray(c1)
        c2 = np.asarray(c2)
        z1 = c1.sum(axis=0)/n1
        z2 = c2.sum(axis=0)/n2
        test_result = []
        for i in range(len(test_data)):
            result = np.dot(z2-z1,test_data[i]-(z1+z2)/2)
            test_result.append(np.sign(result))
        test_result = np.array(test_result)
        TP = 0
        FN = 0
        TN = 0
        FP = 0
        for i in range(len(test_result)):
            if(test_result[i]>=0 and test_type[i]==t):
                TP+=1
            elif(test_result[i]>=0 and test_type[i]==f):
                FN+=1
            elif(test_result[i]<0 and test_type[i]==t):
                FP+=1
            elif(test_result[i]<0 and test_type[i]==f):
                TN+=1
        Recall = TP/(TP+FN)
        Precision = TP/(TP+FP)
        print("Recall= %f"% Recall)
        print("Specify= %f"% (TN/(TN+FP)))
        print("Precision= %f"% Precision)
        print("F1 Score= %f"% (2*Recall*Precision/(Recall+Precision)))
        #开始画图
        xx = [[0, 1, 2], [1, 2, 3], [0, 2, 3], [0, 1, 3]]
        iris_name =['setosa','vesicolor','virginica']
        iris_color = ['r','g','b']
        iris_icon = ['o','x','^']
        fig = plt.figure(figsize=(20, 20))
        feature = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
        for i in range(4):
            ax = fig.add_subplot(221 + i, projection="3d")
            X = np.arange(test_data.min(axis=0)[xx[i][0]],test_data.max(axis=0)[xx[i][0]],1)
            Y = np.arange(test_data.min(axis=0)[xx[i][1]],test_data.max(axis=0)[xx[i][1]],1)
            X,Y = np.meshgrid(X,Y)
            m1 = [z1[xx[i][0]],z1[xx[i][1]],z1[xx[i][2]]]
            m2 = [z2[xx[i][0]], z2[xx[i][1]], z2[xx[i][2]]]
            m1 = np.array(m1)
            m2 = np.array(m2)
            m = m2-m1
            #公式化简可得
            Z = (np.dot(m,(m1+m2)/2)-m[0]*X-m[1]*Y)/m[2]
            ax.scatter(test_data[test_result >= 0, xx[i][0]], test_data[test_result>=0, xx[i][1]], test_data[test_result >= 0, xx[i][2]],
                       c=iris_color[t], marker=iris_icon[t], label=iris_name[t])
            ax.scatter(test_data[test_result < 0, xx[i][0]], test_data[test_result < 0, xx[i][1]],
                       test_data[test_result < 0, xx[i][2]],
                       c=iris_color[f], marker=iris_icon[f], label=iris_name[f])
            ax.set_zlabel(feature[xx[i][2]])
            ax.set_xlabel(feature[xx[i][0]])
            ax.set_ylabel(feature[xx[i][1]])
            ax.plot_surface(X,Y,Z,alpha=0.4)
            plt.legend(loc=0)
        plt.show()
    
    def whiten_feature(data):
        Ex = np.cov(data,rowvar=False)#这个一定要加……因为我们计算的是特征的协方差
        a,w1 = np.linalg.eig(Ex)
        w1 = np.real(w1)
        module = []
        for i in range(w1.shape[1]):
            sum = 0
            for j in range(w1.shape[0]):
                sum += w1[i][j]**2
            module.append(sum**0.5)
        module = np.asarray(module,dtype="float64")
        w1 = w1/module
        a = np.real(a)
        a=a**(-0.5)
        w2 = np.diag(a)
        w = np.dot(w2,w1.transpose())
        for i in range(w.shape[0]):
            for j in range(w.shape[1]):
                if np.isnan(w[i][j]):
                    w[i][j]=0
        #print(w)
        return np.dot(data,w)
    
    def show_whiten_3D(data,iris_type):
        whiten_array = whiten_feature(data)
        show_3D(whiten_array,iris_type)
    

    2.复现MICD分类器的源码

    请根据第二章的理论知识,尝试用Python、MATLAB等常见语言复现MICD分类器。

    可以在讨论区内跟大家分享一下自己的代码

    from sklearn import datasets
    import pandas as pd
    import numpy as np
    import seaborn as sns
    from matplotlib import pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D
    from scipy.stats import multivariate_normal as gaussian_cal
    
    #使用np的数组方便向量运算
    def getIrisLinear(data,iris_type,flag):
        data_linear = [data[i] for i in range(len(data)) if iris_type[i]!=flag]
        iris_type_linear = [iris_type[i] for i in range(len(iris_type)) if iris_type[i]!=flag]
        return np.asarray(data_linear,dtype="float64"),np.asarray(iris_type_linear,dtype="float64")
    
    
    def hold_out_partition(data_linear,iris_type_linear):
        import random
    
        train_data = []
        train_type = []
        test_data = []
        test_type = []
        first_cur = []
        second_cur = []
        for i in range(len(data_linear)):
            if iris_type_linear[i] == 0:
                first_cur.append(i)
            else:
                second_cur.append(i)
        k = len(first_cur)-1
        #七三开训练集和测试集
        train_size = int(len(first_cur) * 7 / 10)
        test_size = int(len(first_cur) * 3 / 10)
        for i in range(0,train_size):
            cur = random.randint(0,k)
            train_data.append(data_linear[first_cur[cur]])
            train_type.append(iris_type_linear[first_cur[cur]])
            k = k - 1
            first_cur.remove(first_cur[cur])
        for i in range(len(first_cur)):
            test_data.append(data_linear[first_cur[i]])
            test_type.append(iris_type_linear[first_cur[i]])
    
        k = len(second_cur)-1
        train_size = int(len(second_cur) * 7 / 10)
        test_size = int(len(second_cur) * 3 / 10)
        for i in range(0, train_size):
            cur = random.randint(0, k)
            train_data.append(data_linear[second_cur[cur]])
            train_type.append(iris_type_linear[second_cur[cur]])
            k = k - 1
            second_cur.remove(second_cur[cur])
        for i in range(len(second_cur)):
            test_data.append(data_linear[second_cur[i]])
            test_type.append(iris_type_linear[second_cur[i]])
    
        return np.asarray(train_data,dtype="float64"),np.asarray(train_type,dtype="int16"),np.asarray(test_data,dtype="float64"),np.asarray(test_type,dtype="int16")
    
    
    def MED_linear_classification(data,iris_type,t,f,flag):
        data_linear,iris_type_linear=getIrisLinear(data,iris_type,flag)
        train_data,train_type,test_data,test_type = hold_out_partition(data_linear,iris_type_linear)
        c1 = []
        c2 = []
        n1=0
        n2=0
        #计算均值
        for i in range(len(train_data)):
            if train_type[i] == 1:
                n1+=1
                c1.append(train_data[i])
            else:
                n2+=1
                c2.append(train_data[i])
        c1 = np.asarray(c1)
        c2 = np.asarray(c2)
        z1 = c1.sum(axis=0)/n1
        z2 = c2.sum(axis=0)/n2
        test_result = []
        for i in range(len(test_data)):
            result = np.dot(z2-z1,test_data[i]-(z1+z2)/2)
            test_result.append(np.sign(result))
        test_result = np.array(test_result)
        TP = 0
        FN = 0
        TN = 0
        FP = 0
        for i in range(len(test_result)):
            if(test_result[i]>=0 and test_type[i]==t):
                TP+=1
            elif(test_result[i]>=0 and test_type[i]==f):
                FN+=1
            elif(test_result[i]<0 and test_type[i]==t):
                FP+=1
            elif(test_result[i]<0 and test_type[i]==f):
                TN+=1
        Recall = TP/(TP+FN)
        Precision = TP/(TP+FP)
        print("Recall= %f"% Recall)
        print("Specify= %f"% (TN/(TN+FP)))
        print("Precision= %f"% Precision)
        print("F1 Score= %f"% (2*Recall*Precision/(Recall+Precision)))
        #开始画图
        xx = [[0, 1, 2], [1, 2, 3], [0, 2, 3], [0, 1, 3]]
        iris_name =['setosa','vesicolor','virginica']
        iris_color = ['r','g','b']
        iris_icon = ['o','x','^']
        fig = plt.figure(figsize=(20, 20))
        feature = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
        for i in range(4):
            ax = fig.add_subplot(221 + i, projection="3d")
            X = np.arange(test_data.min(axis=0)[xx[i][0]],test_data.max(axis=0)[xx[i][0]],1)
            Y = np.arange(test_data.min(axis=0)[xx[i][1]],test_data.max(axis=0)[xx[i][1]],1)
            X,Y = np.meshgrid(X,Y)
            m1 = [z1[xx[i][0]],z1[xx[i][1]],z1[xx[i][2]]]
            m2 = [z2[xx[i][0]], z2[xx[i][1]], z2[xx[i][2]]]
            m1 = np.array(m1)
            m2 = np.array(m2)
            m = m2-m1
            #公式化简可得
            Z = (np.dot(m,(m1+m2)/2)-m[0]*X-m[1]*Y)/m[2]
            ax.scatter(test_data[test_result >= 0, xx[i][0]], test_data[test_result>=0, xx[i][1]], test_data[test_result >= 0, xx[i][2]],
                       c=iris_color[t], marker=iris_icon[t], label=iris_name[t])
            ax.scatter(test_data[test_result < 0, xx[i][0]], test_data[test_result < 0, xx[i][1]],
                       test_data[test_result < 0, xx[i][2]],
                       c=iris_color[f], marker=iris_icon[f], label=iris_name[f])
            ax.set_zlabel(feature[xx[i][2]])
            ax.set_xlabel(feature[xx[i][0]])
            ax.set_ylabel(feature[xx[i][1]])
            ax.plot_surface(X,Y,Z,alpha=0.4)
            plt.legend(loc=0)
        plt.show()
    
    def whiten_feature(data):
        Ex = np.cov(data,rowvar=False)#这个一定要加……因为我们计算的是特征的协方差
        a,w1 = np.linalg.eig(Ex)
        w1 = np.real(w1)
        module = []
        for i in range(w1.shape[1]):
            sum = 0
            for j in range(w1.shape[0]):
                sum += w1[i][j]**2
            module.append(sum**0.5)
        module = np.asarray(module,dtype="float64")
        w1 = w1/module
        a = np.real(a)
        a=a**(-0.5)
        w2 = np.diag(a)
        w = np.dot(w2,w1.transpose())
        for i in range(w.shape[0]):
            for j in range(w.shape[1]):
                if np.isnan(w[i][j]):
                    w[i][j]=0
        #print(w)
        return np.dot(data,w)
    
    def show_whiten_3D(data,iris_type):
        whiten_array = whiten_feature(data)
        show_3D(whiten_array,iris_type)
    
  • 相关阅读:
    设计模式(8)[JS版]-JavaScript设计模式之如何实现适配器模式???
    JS+CSS实现左右文字滚动
    设计模式(7)[JS版]-JavaScript设计模式之原型模式如何实现???
    如何更聪明地学习:20种让你更高效学习的科学方法
    设计模式(6)[JS版]-JavaScript如何实现抽象工厂模式?
    设计模式(5)[JS版]-JavaScript如何实现工厂方法模式?
    设计模式(4)[JS版]-JavaScript如何实现建造者模式?
    设计模式(3)[JS版]-JavaScript中的构造函数模式是什么?
    PAT A1094 The Largest Generation (25分)
    PAT A1090 Highest Price in Supply Chain (25分)(边界问题)
  • 原文地址:https://www.cnblogs.com/Thewillman/p/14720364.html
Copyright © 2011-2022 走看看