zoukankan      html  css  js  c++  java
  • 逻辑回归

    from numpy import *
    import matplotlib.pyplot as plt
    
    
    def loadDataSet():
        data_mat = []
        label_mat = []
        fr = open('testSet.txt')
        for line in fr.readlines():
            line_arr = line.strip().split()
            data_mat.append([1.0, float(line_arr[0]), float(line_arr[1])])
            label_mat.append(int(line_arr[2]))
        return data_mat, label_mat
    
    
    def sigmoid(in_x):  # sigmoid函数
        return 1.0 / (1 + exp(-in_x))
    
    
    def gradAscent(data_mat_in, class_labels):
        data_matrix = mat(data_mat_in)  # 将列表转换为矩阵
        label_mat = mat(class_labels).transpose()  # 将列表转换为竖向量
        m, n = shape(data_matrix)  # 向量行列数,100行,3列
        alpha = 0.001
        max_cycles = 500
        weights = ones((n, 1))  # 生成三个1的竖向量
        for k in range(max_cycles):
            h = sigmoid(data_matrix * weights)  # 矩阵相乘
            error = (label_mat - h)  # 将sigmoid中x>部分的图像沿y=0.5做轴对称
            weights = weights + alpha * data_matrix.transpose() * error  # w = w +α*梯度
        return weights
    
    
    def plotBestFit(weights):
        data_mat, label_mat = loadDataSet()
        data_arr = array(data_mat)
        n = shape(data_arr)[0]
        xcord1 = []
        ycord1 = []
        xcord2 = []
        ycord2 = []
        for i in range(n):
            if int(label_mat[i]) == 1:
                xcord1.append(data_arr[i, 1])
                ycord1.append(data_arr[i, 2])
            else:
                xcord2.append(data_arr[i, 1])
                ycord2.append(data_arr[i, 2])
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
        ax.scatter(xcord2, ycord2, s=30, c='green')
        x = arange(-3.0, 3.0, 0.1)
        y = (-weights[0] - weights[1] * x) / weights[2]
        ax.plot(x, y)  # 画线
        plt.xlabel('x1')
        plt.ylabel('x2')
        plt.show()
    
    
    def stocGradAscent0(data_matrix, class_labels):
        m, n = shape(data_matrix)
        alpha = 0.01
        weights = ones(n)
        for i in range(m):
            h = sigmoid(sum(data_matrix[i] * weights))  # 向量相乘,得一个数
            error = class_labels[i] - h  # 一个数
            weights = weights + alpha * error * data_matrix[i]  # 求所有向量和
        return weights
    
    
    def stocGradAscent1(data_matrix, class_labels, num_iter=150):
        m, n = shape(data_matrix)
        weights = ones(n)
        for j in range(num_iter):
            data_index = list(range(m))
            for i in range(m):
                alpha = 4 / (1.0 + j + i) + 0.01  # 避免参数的严格下降
                randindex = int(random.uniform(0, len(data_index)))  # 随机选择
                h = sigmoid(sum(data_matrix[randindex] * weights))
                error = class_labels[randindex] - h
                weights = weights + alpha * error * data_matrix[randindex]
                del data_index[randindex]
        return weights
    
    
    def classifyVector(in_x,weights):
        prob = sigmoid(sum(in_x*weights))
        if prob > 0.5:
            return 1.0
        else:
            return 0.0
    
    
    def colicTest():
        fr_train = open('horseColicTraining.txt')
        fr_test = open('horseColicTest.txt')
        training_set = []
        training_labels = []
        for line in fr_train.readlines():
            curr_line = line.strip().split('	')
            line_arr =[]
            for i in range(21):
                line_arr.append(float(curr_line[i]))
            training_set.append(line_arr)
            training_labels.append(float(curr_line[21]))
        train_weights = stocGradAscent1(array(training_set),training_labels,200)
        error_count = 0
        num_test_voc = 0.0
        for line in fr_test.readlines():
            num_test_voc +=1
            curr_line = line.strip().split('	')
            line_arr = []
            for i in range(21):
                line_arr.append(float(curr_line[i]))
            if int(classifyVector(array(line_arr),train_weights)) != int(curr_line[21]):
                error_count += 1
        error_rate = (float(error_count)/num_test_voc)
        print('the error rate of this test is : %s' % error_rate)
        return error_rate
    
    
    def multiTest():
        num_tests = 10
        error_sum = 0.0
        for k in range(num_tests):
            error_sum += colicTest()
        print('after %s iterations the average error rate is: %s' % (num_tests,error_sum/float(num_tests)))
    

      

  • 相关阅读:
    BZOJ 1651: [Usaco2006 Feb]Stall Reservations 专用牛棚
    AC日记——绿豆蛙的归宿 codevs 2488
    AC日记——codeforces Ancient Berland Circus 1c
    AC日记——平衡树练习 codevs 4244
    AC日记——[NOIP2015]运输计划 cogs 2109
    AC日记——pigs poj 1149
    AC日记——Card Game codeforces 808f
    AC日记——斐波那契数列(升级版) 洛谷 P2626
    AC日记——Collectors Problem uva 10779
    AC日记——中山市选[2009]小明的游戏 bzoj 2464
  • 原文地址:https://www.cnblogs.com/luck-L/p/9168549.html
Copyright © 2011-2022 走看看