zoukankan      html  css  js  c++  java
  • 机器学习实战笔记 logistic回归

    参考链接http://blog.csdn.net/lu597203933/article/details/38468303

    Logistic代码

    # Logistic回归分类 使用梯度上升找最佳参数
    import numpy as np
    
    
    def loadDataSet():
        datMat = [];
        labelMat = []
        fr = open('testSet.txt')
        for line in fr.readlines():
            lineArr = line.strip().split('	')
            # 添加常数项对应的x值1
            datMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
            labelMat.append(int(lineArr[2]))
        return datMat, labelMat
    
    
    def sigmoid(inx):
        return 1.0 / (1 + np.exp(-inx))
    
    # 可以这样理解 但事实并非如此
    # f(x)=ax1+bx2+cx3
    # L(a,b,c)=(1/2)(i从1到100 (f(xi)-yi)^2)的最小值
    # 记下降率为rate=0.1 T0=[1,1,1]
    # L(w)对a求偏导=i从1到100[(f(xi)-yi)xi1]=ra 表示a的梯度方向
    # 同理 b,c
    # 所以有 a=a-ra*rate   b=b-rb*rate  c=c-rc*rate
    # 梯度上升法
    def gradAscent(dataMatIn, classLabels):
        # m*n 100*3矩阵dataMatrix
        dataMatrix = np.mat(dataMatIn)
        # 矩阵转置
        # 类别标签矩阵  100*1
        labelMat = np.mat(classLabels).transpose()
    
        m, n = np.shape(dataMatrix)
        alpha = 0.001
        maxCycles = 500
        # 3*1
        weights = np.ones((n, 1))
        for k in range(maxCycles):
            # 计算整个数据集的梯度  100*1
            # 每次都是处理所有数据集
            h = sigmoid(dataMatrix * weights)
            # 真实类别与预测类别的差值
            error = (labelMat - h)
            # 更新回归系数向量   3*1=3*1+0.001*[(3*100)(100*1)])
            weights = weights + alpha * dataMatrix.transpose() * error
        return weights
    
    
    # 随机梯度上升算法
    # 可以进行增量式更新
    def stoGradAscent0(dataMatrix, classLabels):
        m, n = np.shape(dataMatrix)
        alpha = 0.01
        # array([ 1.,  1.,  1.])
        weigths = np.ones(n)
        for i in range(m):
            # 一次仅用一个样本来更新数据
            h = sigmoid(sum(dataMatrix[i] * weigths))
            error = classLabels[i] - h
            weigths = weigths + alpha * error * dataMatrix[i]
        return weigths
    
    
    # 改进的随机梯度上升算法
    def stoGradAscent1(dataMatrix, classLabels, numIter=150):
        m, n = np.shape(dataMatrix)
        weights = np.ones(n)
        for j in range(numIter):
            dataIndex = list(range(m))
            for i in range(m):
                # 每次迭代时调整alpha
                alpha = 4 / (1.0 + j + i) + 0.01
                # index = int(np.random.uniform(0, len(dataIndex)))
                # randIndex = dataIndex[index]
                randIndex = int(np.random.uniform(0, len(dataIndex)))
                h = sigmoid(sum(dataMatrix[randIndex] * weights))
                error = classLabels[randIndex] - h
                weights = weights + alpha * error * dataMatrix[randIndex]
                del [dataIndex[randIndex]]
        return weights
    
    
    # 画出数据集和最佳拟合直线的函数
    def plotBestFit(wei):
        import matplotlib.pyplot  as plt
        weights = wei.getA()
        # weights = wei
        dataMat, labelMat = loadDataSet()
        dataArr = np.array(dataMat)
        n = np.shape(dataArr)[0]
        xcord1 = []
        ycord1 = []
        xcord2 = []
        ycord2 = []
        for i in range(n):
            if int(labelMat[i]) == 1:
                xcord1.append(dataArr[i][1])
                ycord1.append(dataArr[i][2])
            else:
                xcord2.append(dataArr[i][1])
                ycord2.append(dataArr[i][2])
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
        ax.scatter(xcord2, ycord2, s=30, c='green')
        x = np.arange(-3.0, 3.0, 0.1)
        # z=w0*1+w1*x1+w2*x2
        # h=1.0/(1+exp(-z))
        # 当 z=0时,h=0.5正好是判断类别是1或者0的边界 从而 z=0为最佳拟合曲线
        # 即 wo*1+w1*x1+w2*x2=0  而x2=y 从而 y = (-weights[0] - weights[1] * x) / weights[2]
        y = (-weights[0] - weights[1] * x) / weights[2]
        ax.plot(x, y)
        plt.xlabel('x1')
        plt.ylabel('x2')
        plt.show()
    
    
    # 修改之后的随机梯度上升法
    # dataArra, labelMat = loadDataSet()
    # weights = stoGradAscent1(np.array(dataArra), labelMat, numIter=150)
    # print(weights)
    # weights = np.matrix(weights).transpose()
    # plotBestFit(weights)
    
    # 随机梯度上升法
    
    # dataArra, labelMat = loadDataSet()
    # weights=stoGradAscent0(np.array(dataArra),labelMat)
    # print(weights)
    # weights = np.matrix(weights).transpose()
    # plotBestFit(weights)
    
    
    # 梯度上升法
    dataArra, labelMat = loadDataSet()
    weights=gradAscent(dataArra,labelMat)
    print(weights)
    weights = np.matrix(weights)
    plotBestFit(weights)
    
    
    # weights = gradAscent(dataArra, labelMat)
    # weights = stoGradAscent1(np.array(dataArra), labelMat)
    # print(weights)
    # plotBestFit(np.mat(weights.transpose()))
    
    # import matplotlib.pyplot as plt
    # # plt.plot([1, 2, 3, 4], [1, 4, 9, 16],'r')
    # plt.axis([0, 5, 0, 20])
    # t = np.arange(0, 5, 0.2)
    # plt.plot(t, t, 'r--', t, t ** 2, 'bs', t, t ** 3, 'g^')
    # plt.ylabel('some numbers')
    # plt.show()
  • 相关阅读:
    Oracle 11g学习笔记(3)
    模式识别,图像处理工程师的要求
    vc++17 进程间的通信
    vc++学习笔记16 线程同步,异步套接字
    类型转换 float与int ,(int&)a,(int)&a
    vc++学习笔记16 线程同步,异步套接字
    vc++学习之15 多线程与聊天室程序的创建
    testl指令的问题
    C语言的几种位运算
    C语言的几种位运算
  • 原文地址:https://www.cnblogs.com/09120912zhang/p/8045930.html
Copyright © 2011-2022 走看看