zoukankan      html  css  js  c++  java
  • 《机器学习实战》笔记——逻辑回归

    书上没有给具体的逻辑回归的课程,就直接上了代码,这很不好!

    可以参考ng的课程,或者看这篇博文:http://blog.csdn.net/wlmnzf/article/details/72855610?utm_source=itdadao

    过程还是比较浅显易懂的,就没怎么备注了。

      1 # _*_ coding:utf-8 _*_
      2 
      3 from numpy import *
      4 def loadDataSet():
      5     dataMat = []
      6     labelMat = []
      7     fr = open('testSet.txt')
      8     for line in fr.readlines():
      9         lineArr = line.strip().split()
     10         dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
     11         labelMat.append(int(lineArr[2]))
     12     return dataMat, labelMat
     13 
     14 def sigmoid(inX):
     15     return 1.0/(1 + exp(-inX))
     16 
     17 def gradAscent(dataMatIn, classLabels):
     18     dataMatrix =  mat(dataMatIn)
     19     labelMat = mat(classLabels).transpose()
     20     m,n = shape(dataMatrix)
     21     alpha = 0.001
     22     maxCycles = 500
     23     weights = ones((n,1))
     24     for k in range(maxCycles):
     25         h = sigmoid(dataMatrix*weights)
     26         error = (labelMat - h)  # 是数  这里没给出推导过程,推导过程上文有链接
     27         weights = weights + alpha * dataMatrix.transpose() * error
     28     return weights
     29 
     30 # 5-3 随机梯度上升算法
     31 def stocGradAscent0(dataMatrix, classLabels):
     32     m,n = shape(dataMatrix)
     33     alpha = 0.01
     34     weights = ones(n)
     35     for i in range(m):
     36         h = sigmoid(sum(dataMatrix[i]*weights))
     37         error = classLabels[i] - h  # 是向量
     38         weights = weights + alpha * error * dataMatrix[i]
     39     return weights
     40 
     41 # 5-4 改进的随机梯度上升算法
     42 def stocGradAscent1(dataMatrix, classLabels, numIter=150):
     43     m,n = shape(dataMatrix)
     44 
     45     weights = ones(n)
     46     for j in range(numIter):
     47         dataIndex = range(m)
     48         for i in range(m):
     49             alpha = 4/(1.0+j+i) + 0.01
     50             randIndex = int(random.uniform(0, len(dataIndex)))
     51             h = sigmoid(sum(dataMatrix[randIndex]*weights))
     52             error = classLabels[randIndex] - h  # 是向量
     53             weights = weights + alpha * error * dataMatrix[randIndex]
     54             del(dataIndex[randIndex])
     55     return weights
     56 
     57 
     58 def plotBestFit(weights):
     59     import matplotlib.pyplot as plt
     60     # weights = wei.getA()    # 把matrix变为array
     61     dataMat, labelMat = loadDataSet()
     62     dataArr = array(dataMat)
     63     n = shape(dataArr)[0]
     64     xcord1 = []
     65     ycord1 = []
     66     xcord2 = []
     67     ycord2 = []
     68     for i in range(n):
     69         if int(labelMat[i])==1:
     70             xcord1.append(dataArr[i,1])
     71             ycord1.append(dataArr[i,2])
     72         else:
     73             xcord2.append(dataArr[i,1])
     74             ycord2.append(dataArr[i,2])
     75     fig = plt.figure()
     76     ax = fig.add_subplot(111)
     77     ax.scatter(xcord1, ycord1, c='red', s=30, marker='s')   # marker中s代表square
     78     ax.scatter(xcord2, ycord2, c='green', s=30)
     79     x = arange(-3, 3, 0.1)
     80     y = (-weights[0] - weights[1] * x) / weights[2]
     81     ax.plot(x, y)
     82     plt.xlabel('X1')
     83     plt.ylabel('X2')
     84     plt.show()
     85 
     86 def classifyVector(inX, weights):
     87     prob = sigmoid(sum(inX * weights))
     88     if prob > 0.5: return 1.0
     89     else: return 0.0
     90 
     91 def colicTest():
     92     frTrain = open('horseColicTraining.txt')
     93     frTest = open('horseColicTest.txt')
     94     trainingSet = []
     95     trainingLabels = []
     96     for line in frTrain.readlines():
     97         currLine = line.strip().split('	')
     98         lineArr = []
     99         for i in range(21):
    100             lineArr.append(float(currLine[i]))
    101         trainingSet.append(lineArr)
    102         trainingLabels.append(float(currLine[21]))
    103     trainWeights = stocGradAscent1(array(trainingSet), trainingLabels, 500)
    104     errorCount = 0
    105     numTestVec = 0.0
    106     for line in frTest.readlines():
    107         numTestVec += 1.0
    108         currLine = line.strip().split('	')
    109         lineArr = []
    110         for i in range(21):
    111             lineArr.append(float(currLine[i]))
    112         if int(classifyVector(array(lineArr), trainWeights)) != int(currLine[21]):
    113             int(currLine[21])
    114             errorCount += 1
    115         errorRate = (float(errorCount)/numTestVec)
    116         print "the error rate of this test is: %f" % errorRate
    117         return errorRate
    118 
    119 def multiTest():
    120     numTests = 10
    121     errorSum = 0.0
    122     for k in range(numTests):
    123         errorSum += colicTest()
    124     print "after %d iterations the average error rate is: %f" % (numTests, errorSum/float(numTests))
    125 
    126 multiTest()
  • 相关阅读:
    火狐浏览器看哪些地方加nofollow
    2017.6.14-网站分析
    2017.6.11-目标关键词优化 三个方面内容
    2017-6-9长尾关键词优化
    2017.6.7seowhy学习笔记---seo知识总纲
    使用 WordPress 自定义字段功能为文章添加下载按钮
    xftp和xshell有什么区别
    决定网站排名的6个干货
    转载:如何在wordpress主题中添加设置页面
    win10+ubuntu双系统安装方案
  • 原文地址:https://www.cnblogs.com/DianeSoHungry/p/7083007.html
Copyright © 2011-2022 走看看