zoukankan      html  css  js  c++  java
  • 机器学习5—logistic回归学习笔记

    机器学习实战之logistic回归

    test5.py

    #-*- coding:utf-8
    
    import sys
    sys.path.append("logRegres.py")
    
    from numpy import *
    import logRegres
    
    dataArr, labelMat = logRegres.loadDataSet()
    logRegres.gradAscent(dataArr, labelMat)
    
    # weights = logRegres.gradAscent(dataArr, labelMat)
    # logRegres.plotBestFit(weights.getA())
    
    # weights = logRegres.stocGradAscent0(array(dataArr), labelMat)
    # logRegres.plotBestFit(weights)
    
    weights = logRegres.stocGradAscent1(array(dataArr), labelMat)
    logRegres.plotBestFit(weights)
    
    logRegres.multiTest()
    
    print("over")

    logRegres.py

    '''
    Created on Oct 27, 2010
    Logistic Regression Working Module
    @author: Peter
    '''
    from numpy import *
    
    def loadDataSet():
        dataMat = []; labelMat = []
        fr = open('testSet.txt')
        for line in fr.readlines():
            lineArr = line.strip().split()
            dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
            labelMat.append(int(lineArr[2]))
        return dataMat,labelMat
    
    def sigmoid(inX):
        return 1.0/(1+exp(-inX))
    
    def gradAscent(dataMatIn, classLabels):
        dataMatrix = mat(dataMatIn)             #convert to NumPy matrix
        labelMat = mat(classLabels).transpose() #convert to NumPy matrix
        m,n = shape(dataMatrix)
        alpha = 0.001
        maxCycles = 500
        weights = ones((n,1))
        for k in range(maxCycles):              #heavy on matrix operations
            h = sigmoid(dataMatrix*weights)     #matrix mult
            error = (labelMat - h)              #vector subtraction
            weights = weights + alpha * dataMatrix.transpose()* error #matrix mult
        return weights
    
    def plotBestFit(weights):
        import matplotlib.pyplot as plt
        dataMat,labelMat=loadDataSet()
        dataArr = array(dataMat)
        n = shape(dataArr)[0] 
        xcord1 = []; ycord1 = []
        xcord2 = []; ycord2 = []
        for i in range(n):
            if int(labelMat[i])== 1:
                xcord1.append(dataArr[i,1]); ycord1.append(dataArr[i,2])
            else:
                xcord2.append(dataArr[i,1]); ycord2.append(dataArr[i,2])
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
        ax.scatter(xcord2, ycord2, s=30, c='green')
        x = arange(-3.0, 3.0, 0.1)
        y = (-weights[0]-weights[1]*x)/weights[2]
        ax.plot(x, y)
        plt.xlabel('X1'); plt.ylabel('X2');
        plt.show()
    
    def stocGradAscent0(dataMatrix, classLabels):
        m,n = shape(dataMatrix)
        alpha = 0.01
        weights = ones(n)   #initialize to all ones
        for i in range(m):
            inx = sum(dataMatrix[i]*weights)
            h = sigmoid(inx)
            # h = sigmoid(sum(dataMatrix[i]*weights))
            error = classLabels[i] - h
            weights = weights + alpha * error * dataMatrix[i]
        return weights
    
    def stocGradAscent1(dataMatrix, classLabels, numIter=150):
        m,n = shape(dataMatrix)
        weights = ones(n)   #initialize to all ones
        for j in list(range(numIter)):
            dataIndex = list(range(m))
            for i in list(range(m)):
                alpha = 4/(1.0+j+i)+0.0001    #apha decreases with iteration, does not 
                randIndex = int(random.uniform(0,len(dataIndex)))#go to 0 because of the constant
                h = sigmoid(sum(dataMatrix[randIndex]*weights))
                error = classLabels[randIndex] - h
                weights = weights + alpha * error * dataMatrix[randIndex]
                del(dataIndex[randIndex])
        return weights
    
    def classifyVector(inX, weights):
        prob = sigmoid(sum(inX*weights))
        if prob > 0.5: return 1.0
        else: return 0.0
    
    def colicTest():
        frTrain = open('horseColicTraining.txt'); frTest = open('horseColicTest.txt')
        trainingSet = []; trainingLabels = []
        for line in frTrain.readlines():
            currLine = line.strip().split('	')
            lineArr =[]
            for i in range(21):
                lineArr.append(float(currLine[i]))
            trainingSet.append(lineArr)
            trainingLabels.append(float(currLine[21]))
        trainWeights = stocGradAscent1(array(trainingSet), trainingLabels, 1000)
        errorCount = 0; numTestVec = 0.0
        for line in frTest.readlines():
            numTestVec += 1.0
            currLine = line.strip().split('	')
            lineArr =[]
            for i in range(21):
                lineArr.append(float(currLine[i]))
            if int(classifyVector(array(lineArr), trainWeights))!= int(currLine[21]):
                errorCount += 1
        errorRate = (float(errorCount)/numTestVec)
        print("the error rate of this test is: %f" % errorRate)
        return errorRate
    
    def multiTest():
        numTests = 10; errorSum=0.0
        for k in range(numTests):
            errorSum += colicTest()
        print("after %d iterations the average error rate is: %f" % (numTests, errorSum/float(numTests)))
  • 相关阅读:
    Web功能测试常用方法
    linux常用指令集说明
    Linux常用指令集
    Web测试
    APP常用测试方法总结
    软件测试笔试题初级篇
    Java 接口自动化系列--用例类之BaseCase 基础父类
    Java + Selenium 系列之失败操作截图及调用
    Java + Selenium 系列之Allure报告集成
    Java 接口自动化系列--工具类之数据库连接与操作
  • 原文地址:https://www.cnblogs.com/Vae1990Silence/p/8370910.html
Copyright © 2011-2022 走看看