zoukankan      html  css  js  c++  java
  • logistic回归

    #!/usr/bin/env python
    # encoding:utf-8
    
    import math
    import numpy
    import time
    import matplotlib.pyplot as plt
    
    
    def sigmoid(x):
        return 1.0 / (1 + numpy.exp(-x))
    
    
    def loadData():
        dataMat = []
        laberMat = []
        with open("test.txt", 'r') as f:
            for line in f.readlines():
                arry = line.strip().split()
                dataMat.append([1.0, float(arry[0]), float(arry[1])])
                laberMat.append(float(arry[2]))
        return numpy.mat(dataMat), numpy.mat(laberMat).transpose()
    
    
    def gradAscent(dataMat, laberMat, alpha=0.001, maxCycle=500):
        """general gradscent"""
        start_time = time.time()
        m, n = numpy.shape(dataMat)
        weights = numpy.ones((n, 1)) 
        for i in range(maxCycle):
            h = sigmoid(dataMat * weights)
            error = laberMat - h 
            weights += alpha * dataMat.transpose() * error
        duration = time.time() - start_time
        print "duration of time:", duration
        return weights
    
    
    def stocGradAscent(dataMat, laberMat, alpha=0.01):
        start_time = time.time()
        m, n = numpy.shape(dataMat)
        weights = numpy.ones((n, 1)) 
        for i in range(m):
            h = sigmoid(dataMat[i] * weights)
            error = laberMat[i] - h 
            weights += alpha * dataMat[i].transpose() * error
        duration = time.time() - start_time
        print "duration of time:", duration
        return weights
    
    
    def betterStocGradAscent(dataMat, laberMat, alpha=0.01, numIter=150):
        """better one, use a dynamic alpha"""
        start_time = time.time()
        m, n = numpy.shape(dataMat)
        weights = numpy.ones((n, 1)) 
        for j in range(numIter):
            for i in range(m):
                alpha = 4 / (1 + j + i) + 0.01
                h = sigmoid(dataMat[i] * weights)
                error = laberMat[i] - h 
                weights += alpha * dataMat[i].transpose() * error
        duration = time.time() - start_time
        print "duration of time:", duration
        return weights
        start_time = time.time()
    
    
    def show(dataMat, laberMat, weights):
        m, n = numpy.shape(dataMat) 
        min_x = min(dataMat[:, 1])[0, 0]
        max_x = max(dataMat[:, 1])[0, 0]
        xcoord1 = []; ycoord1 = []
        xcoord2 = []; ycoord2 = []
        for i in range(m):
            if int(laberMat[i, 0]) == 0:
                xcoord1.append(dataMat[i, 1]); ycoord1.append(dataMat[i, 2]) 
            elif int(laberMat[i, 0]) == 1:
                xcoord2.append(dataMat[i, 1]); ycoord2.append(dataMat[i, 2]) 
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.scatter(xcoord1, ycoord1, s=30, c="red", marker="s")
        ax.scatter(xcoord2, ycoord2, s=30, c="green")
        x = numpy.arange(min_x, max_x, 0.1)
        y = (-weights[0] - weights[1]*x) / weights[2]
        ax.plot(x, y)
        plt.xlabel("x1"); plt.ylabel("x2")
        plt.show()
    
        
    if __name__ == "__main__":
        dataMat, laberMat = loadData()
        #weights = gradAscent(dataMat, laberMat, maxCycle=500)
        #weights = stocGradAscent(dataMat, laberMat)
        weights = betterStocGradAscent(dataMat, laberMat, numIter=80)
        show(dataMat, laberMat, weights)

    参考:http://www.cnblogs.com/coder2012/p/4598913.html

     
  • 相关阅读:
    我的大厂面试经历(附100+面试题干货)
    大厂面试题:集群部署时的分布式 session 如何实现?
    【转载】Android数据库(SqlLite)操作和db文件查看
    【转载】android ListView详解
    C#根据经纬度获取物理地址
    C#计算两个经纬度的距离
    EXT编写日志文件
    动态数组
    System.Windows.Forms.Timer和System.Timers.Timer的区别 [转]
    SQL Prompt 3 优秀的SQL查询工具 收藏
  • 原文地址:https://www.cnblogs.com/lovephysics/p/7248025.html
Copyright © 2011-2022 走看看