zoukankan      html  css  js  c++  java
  • 机器学习 周志华 3.3 答案

    数据集见文章末尾。 

    from numpy import *
    from numpy.linalg import *
    import matplotlib.pyplot as plt
    
    
    def loadDataSet(fileName):
        #加载数据,返回样本数组,其中每一行为一个样本,每行最后一列为标记。
        dataMat = []
        fr = open(fileName)
        for line in fr.readlines():
            lineArr = []
            curLine = line.strip().split('	') #假设数据之间以列表符分隔
            for i in range(len(curLine)):
                lineArr.append(float(curLine[i]))
            dataMat.append(lineArr)
        dataArr = array(dataMat)
        return dataArr
    
    
    def gradienet_down(dataArr, betaArrT, n, a, error):
        # 使用梯度下降法训练模型
        yArr = dataArr[:, 2]
        xArr = column_stack((dataArr[:, 0:2], ones(shape(dataArr)[0])))
        for i in range(n):
            start = logistic(xArr, yArr, betaArrT)
            df = dfunc(xArr, yArr, betaArrT)
            betaArrT = betaArrT - a * df
            end = logistic(xArr, yArr, betaArrT)
            if abs(start - end) < error:
                break
        return betaArrT, i
    
    
    def newtwon(dataArr, betaArrT, n, error):
        # 使用牛顿迭代法训练模型
        yArr = dataArr[:, 2]    # y表示最终预测结果:1表示好瓜,0表示坏瓜;yArr预测结果的集合。
        xArr = column_stack((dataArr[:, 0:2], ones(shape(dataArr)[0])))
        for i in range(n):
            df = dfunc(xArr, yArr, betaArrT)    # 一阶导数
            if dot(df, df.transpose()) < error:
                break
            d2f = d2func(xArr, yArr, betaArrT)  # 二阶导数
            betaArrT = betaArrT - solve(d2f, df)
        return betaArrT, i
    
    
    def LDA(dataArr, betaArrT):
        #使用线性判别分析训练模型
        xArr = dataArr[:, 0:2]
        m, n=shape(xArr)
        u0 = zeros((1, 2)) #反例均值
        u1 = zeros((1, 2)) #正例均值
        m0 = 0
        m1 = 0
        for i in range(m):
            if dataArr[i, -1] == 0:
                u0[0, :] += xArr[i, :]
                m0 += 1
            else:
                u1[0, :] += xArr[i, :]
                m1 += 1
        u0 = u0 / m0
        u1 = u1 / m1
        sw = zeros((2, 2))   # 类内散度矩阵
        for i in range(m):
            if dataArr[i, -1] == 0:
                sw += (xArr[i, :] - u0) * (xArr[i, :] - u0).T
            else:
                sw += (xArr[i, :] - u1) * (xArr[i, :] - u1).T
        return dot(inv(sw), (u0 - u1).T)
    
    
    def logistic(xArr, yArr, betaArrT):
        #计算对数似然l(w,b)
        m, n = shape(xArr) #m为样本数,n为属性值数
        result = 0
        for i in range(m):
            result += -yArr[i]*dot(xArr[i], betaArrT)+log(1+exp(dot(xArr[i], betaArrT)))
        return result
    
    
    def p1(x, betaArrT):
        #计算后验概率估计P1
        return exp(dot(x, betaArrT))/(1+exp(dot(x, betaArrT)))
    
    
    def dfunc(xArr, yArr, betaArrT):
        #求一阶导数
        m, n = shape(xArr)
        result = 0
        for i in range(m):
            result += xArr[i] * ((yArr[i]-p1(xArr[i], betaArrT)))     # 公式(3.30)
        return -result.transpose()
    
    
    def d2func(xArr, yArr, betaArrT):
        #求二阶导数
        m, n= shape(xArr)
        d2f = zeros((m, m))
        for i in range(m):
            d2f[i][i] = p1(xArr[i], betaArrT) * (1 - p1(xArr[i], betaArrT))
        return mat(xArr.transpose()) * mat(d2f) * mat(xArr)
    
    
    def plotDataSet(dataArr, betaArrT, fig, index):
        #可视化数据集
        m, n = shape(dataArr) #m为样本数,n-1为属性值数
        xcord1 = [] #好瓜密度
        ycord1 = [] #好瓜含糖率
        xcord2 = [] #坏瓜密度
        ycord2 = [] #坏瓜含糖率
        for i in range(m):
            if dataArr[i, 2] == 1:
                for j in range(n - 1):
                    if j == 0:
                        xcord1.append(dataArr[i, j])
                    else:
                        ycord1.append(dataArr[i, j])
            else:
                for j in range(n - 1):
                    if j == 0:
                        xcord2.append(dataArr[i, j])
                    else:
                        ycord2.append(dataArr[i, j])
        ax = fig.add_subplot(index)
        ax.scatter(xcord1, ycord1, s=30, c='red') #红色代表好瓜
        ax.scatter(xcord2, ycord2, s=30, c='green') #绿色代表坏瓜
        plotResult(betaArrT)
    
    
    def plotResult(betaArrT):
        x1 = arange(0, 0.8, 0.01)
        y1 = [-(betaArrT[2] + betaArrT[0] * x1[k]) / betaArrT[1] for k in range(len(x1))]
        plt.plot(x1, y1)
    
    
    if __name__ == '__main__':
        fileName = '/home/jq/桌面/西瓜数据集3.0'
        dataArr = loadDataSet(fileName)
    
        beta = [1, 1, 1]    # 最终的参数w,b; w的转置是矩阵:[w1,w2]。
        betaArrT = array(beta).transpose()      # 返回转置矩阵
        n = 1000    # 牛顿法迭代的上限
        a = 0.1
        error = 0.0001
    
        result, step = gradienet_down(dataArr, betaArrT, n, a, error)
        result1, step1 = newtwon(dataArr, betaArrT, n, error)
        result2 = LDA(dataArr, betaArrT)
    
        print('梯度下降法共迭代了' + str(step) + '次,结果为:beta=' + str(result))
        print('牛顿法共迭代了' + str(step1) + '次,结果为:beta=' + str(result1))
        print('LDA结果为:w=' + str(result2))
    
        fig = plt.figure()
        plotDataSet(dataArr, result, fig, 221)
        plotDataSet(dataArr, result1, fig, 222)
        result2 = [result2[0,0],result2[1,0],0]
        plotDataSet(dataArr, result2, fig, 223)
        plt.show()

     数据集:

    参考:https://www.jianshu.com/p/3a9023d141bc

       https://blog.csdn.net/bebusy/article/details/82753619

  • 相关阅读:
    命令行工具--netstat
    Man手册--nmap
    bc -l 对于 %取模计算出错
    离散对数和原根 欧拉定理证明
    bc-win32-power-echo-vim-not-work
    2010版EXCEL下方sheet工作表隐藏了
    word多出空标题,样式是列出段落
    plantuml
    久石让《龙猫》豆豆龙 钢琴视奏版
    神奇校车 = topsage
  • 原文地址:https://www.cnblogs.com/jiaoqiang/p/11210256.html
Copyright © 2011-2022 走看看