zoukankan      html  css  js  c++  java
  • 机器学习实战-逻辑回归

    逻辑回归:简单的来说,在线性回归的基础上加入了Sigmoid函数!

    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.font_manager import FontProperties
    #加载数据集
    def loadData(filename):
        dataMat = []
        labelMat = []
        with open(filename) as f:
            for line in f.readlines():
                line = line.strip().split()
                dataMat.append([1,float(line[0]),float(line[1])])
                labelMat.append(int(line[2]))
            return dataMat,labelMat
    
    #绘制数据集
    def plot(dataMat,labelMat):
        x0cord1 = []
        x0cord2 = []
        x1cord1 = []
        x1cord2 = []
        n = len(labelMat)
        for i in range(n):
            if labelMat[i] == 1:
                x0cord1.append(dataMat[i][1])
                x0cord2.append(dataMat[i][2])
            else:
                x1cord1.append(dataMat[i][1])
                x1cord2.append(dataMat[i][2])
        plt.scatter(x0cord1,x0cord2,c='red',s=20,alpha=0.5,marker='s')
        plt.scatter(x1cord1,x1cord2,c='green',s=20,alpha=0.5)
        plt.title('DataSet')
        plt.xlabel('x1')
        plt.ylabel('x2')
        plt.show()
    
    #随机梯度上升进行LR训练
    def stogradAscent(dataMat,labelMat,num_iter=150):
        dataMat = np.array(dataMat)
        m,n = np.shape(dataMat)      #矩阵有多少列 m=100 n=3
        weights = np.ones(n)        #即待优化的参数[1,1,1]
        weights_array = np.array([])
        for j in range(num_iter):
            dataIndex = list(range(m))
            for i in range(m):
                alpha = 1 / (i+j+1.0) + 0.001
                rangeIndex = int(np.random.uniform(0,len(dataIndex)))
                error = labelMat[rangeIndex] - sigmoid(sum(dataMat[rangeIndex] * weights))  # 一个数
                weights = weights + alpha * dataMat[rangeIndex] * error
                weights_array = np.append(weights_array,weights,axis=0)
                del(dataIndex[rangeIndex])
        weights_array = weights_array.reshape(num_iter*m,n)
        return weights,weights_array
    
    #批量梯度上升进行LR训练
    def gradAscent(dataMat,labelMat):
        dataMartix = np.mat(dataMat)
        labelMartix = np.mat(labelMat).transpose()
        n = np.shape(dataMartix)[1]       #矩阵有多少列
        weights = np.ones((n,1))        #即待优化的参数
        alpha = 0.001
        maxiter = 500
        weights_array = np.array([])
        for i in range(maxiter):
            error = labelMartix - sigmoid(dataMartix * weights)  # 100×1
            weights = weights + alpha * dataMartix.transpose() * error
            weights_array = np.append(weights_array, weights)
        weights_array = weights_array.reshape(maxiter,n)
        return np.asarray(weights),weights_array
    
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))
    
    def plotWeights(weights_array1,weights_array2):
        #设置汉字格式
        font = FontProperties(fname=r"C:WindowsFontssimsun.ttc", size=14)
        #将fig画布分隔成1行1列,不共享x轴和y轴,fig画布的大小为(13,8)
        #当nrow=3,nclos=2时,代表fig画布被分为六个区域,axs[0][0]表示第一行第一列
        fig, axs = plt.subplots(nrows=3, ncols=2,sharex=False, sharey=False, figsize=(20,10))
        x1 = np.arange(0, len(weights_array1), 1)
        #绘制w0与迭代次数的关系
        axs[0][0].plot(x1,weights_array1[:,0])
        axs0_title_text = axs[0][0].set_title(u'梯度上升算法:回归系数与迭代次数关系',FontProperties=font)
        axs0_ylabel_text = axs[0][0].set_ylabel(u'W0',FontProperties=font)
        plt.setp(axs0_title_text, size=20, weight='bold', color='black')
        plt.setp(axs0_ylabel_text, size=20, weight='bold', color='black')
        #绘制w1与迭代次数的关系
        axs[1][0].plot(x1,weights_array1[:,1])
        axs1_ylabel_text = axs[1][0].set_ylabel(u'W1',FontProperties=font)
        plt.setp(axs1_ylabel_text, size=20, weight='bold', color='black')
        #绘制w2与迭代次数的关系
        axs[2][0].plot(x1,weights_array1[:,2])
        axs2_xlabel_text = axs[2][0].set_xlabel(u'迭代次数',FontProperties=font)
        axs2_ylabel_text = axs[2][0].set_ylabel(u'W1',FontProperties=font)
        plt.setp(axs2_xlabel_text, size=20, weight='bold', color='black')
        plt.setp(axs2_ylabel_text, size=20, weight='bold', color='black')
    
    
        x2 = np.arange(0, len(weights_array2), 1)
        #绘制w0与迭代次数的关系
        axs[0][1].plot(x2,weights_array2[:,0])
        axs0_title_text = axs[0][1].set_title(u'改进的随机梯度上升算法:回归系数与迭代次数关系',FontProperties=font)
        axs0_ylabel_text = axs[0][1].set_ylabel(u'W0',FontProperties=font)
        plt.setp(axs0_title_text, size=20, weight='bold', color='black')
        plt.setp(axs0_ylabel_text, size=20, weight='bold', color='black')
        #绘制w1与迭代次数的关系
        axs[1][1].plot(x2,weights_array2[:,1])
        axs1_ylabel_text = axs[1][1].set_ylabel(u'W1',FontProperties=font)
        plt.setp(axs1_ylabel_text, size=20, weight='bold', color='black')
        #绘制w2与迭代次数的关系
        axs[2][1].plot(x2,weights_array2[:,2])
        axs2_xlabel_text = axs[2][1].set_xlabel(u'迭代次数',FontProperties=font)
        axs2_ylabel_text = axs[2][1].set_ylabel(u'W1',FontProperties=font)
        plt.setp(axs2_xlabel_text, size=20, weight='bold', color='black')
        plt.setp(axs2_ylabel_text, size=20, weight='bold', color='black')
    
        plt.show()
    def plotBestFit(weights,dataMat,labelMat):
        x0cord1 = []
        x0cord2 = []
        x1cord1 = []
        x1cord2 = []
        n = len(labelMat)
        for i in range(n):
            if labelMat[i] == 1:
                x0cord1.append(dataMat[i][1])
                x0cord2.append(dataMat[i][2])
            else:
                x1cord1.append(dataMat[i][1])
                x1cord2.append(dataMat[i][2])
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.scatter(x0cord1,x0cord2,c='red',s=20,alpha=0.5,marker='s')
        ax.scatter(x1cord1,x1cord2,c='green',s=20,alpha=0.5)
        # plt.title('DataSet')
        # plt.xlabel('x1')
        # plt.ylabel('x2')
        # plt.show()
    
        w = - weights[1] / weights[2]
        b = -weights[0] / weights[2]
        x = np.arange(-3,3,0.1)
        y = w * x + b
        ax.plot(x,y)
        plt.show()
    
    
    if __name__=='__main__':
        dataMat,labelMat = loadData('testSet.txt')
        # print(dataMat)
        # plot(dataMat,labelMat)
        weights,weights_array1 = stogradAscent(dataMat,labelMat)
        plotBestFit(weights,dataMat,labelMat)
        print(weights)
    
    
        weights2,weights_array2 = gradAscent(dataMat,labelMat)
        # print(weights2)
        plotWeights(weights_array2, weights_array1)
  • 相关阅读:
    ORA-28001 has expired错误密码过期问题
    Oracle数据库无用户名密码登录
    使用Navicat连接Oracle数据库出现12541或者28547错误代码
    Solr的helloWord程序
    JS工具方法
    python发送邮件
    jmeter的性能监控框架搭建记录(Influxdb+Grafana+Jmeter)
    spotlight on mysql 监控
    linux下安装python环境
    yum的方式搭建mysql
  • 原文地址:https://www.cnblogs.com/logo-88/p/10162813.html
Copyright © 2011-2022 走看看