zoukankan      html  css  js  c++  java
  • Python 实现多元线性回归预测

    一、二元输入特征线性回归

    测试数据为:ex1data2.txt

    2104,3,399900
    1600,3,329900
    2400,3,369000
    1416,2,232000
    3000,4,539900
    1985,4,299900
    1534,3,314900
    1427,3,198999
    1380,3,212000
    1494,3,242500
    1940,4,239999
    2000,3,347000
    1890,3,329999
    4478,5,699900
    1268,3,259900
    2300,4,449900
    1320,2,299900
    1236,3,199900
    2609,4,499998
    3031,4,599000
    1767,3,252900
    1888,2,255000
    1604,3,242900
    1962,4,259900
    3890,3,573900
    1100,3,249900
    1458,3,464500
    2526,3,469000
    2200,3,475000
    2637,3,299900
    1839,2,349900
    1000,1,169900
    2040,4,314900
    3137,3,579900
    1811,4,285900
    1437,3,249900
    1239,3,229900
    2132,4,345000
    4215,4,549000
    2162,4,287000
    1664,2,368500
    2238,3,329900
    2567,4,314000
    1200,3,299000
    852,2,179900
    1852,4,299900
    1203,3,239500

    Python代码如下:

    #-*- coding: UTF-8 -*-
    
    import random
    import numpy as np
    import matplotlib.pyplot as plt
    
    #加载数据
    def load_exdata(filename):
        data = []
        with open(filename, 'r') as f:
            for line in f.readlines():
                line = line.split(',')
                current = [int(item) for item in line] //根据数据输入的不同确定是int 还是其他类型
                #5.5277,9.1302
                data.append(current)
        return data
    
    data = load_exdata('ex1data2.txt');
    data = np.array(data,np.int64)//根据数据输入的不同确定是int 还是其他类型
    
    
    #特征缩放
    def featureNormalize(X):
        X_norm = X;
        mu = np.zeros((1,X.shape[1]))
        sigma = np.zeros((1,X.shape[1]))
        for i in range(X.shape[1]):
            mu[0,i] = np.mean(X[:,i]) # 均值
            sigma[0,i] = np.std(X[:,i])     # 标准差
    #     print(mu)
    #     print(sigma)
        X_norm  = (X - mu) / sigma
        return X_norm,mu,sigma
     
    #计算损失
    def computeCost(X, y, theta):
        m = y.shape[0]
    #     J = (np.sum((X.dot(theta) - y)**2)) / (2*m)
        C = X.dot(theta) - y
        J2 = (C.T.dot(C))/ (2*m)
        return J2
     
    #梯度下降
    def gradientDescent(X, y, theta, alpha, num_iters):
        m = y.shape[0]
        #print(m)
        # 存储历史误差
        J_history = np.zeros((num_iters, 1))
        for iter in range(num_iters):
            # 对J求导,得到 alpha/m * (WX - Y)*x(i), (3,m)*(m,1)  X (m,3)*(3,1) = (m,1)
            theta = theta - (alpha/m) * (X.T.dot(X.dot(theta) - y))
            J_history[iter] = computeCost(X, y, theta)
        return J_history,theta
         
     
    iterations = 10000  #迭代次数
    alpha = 0.01    #学习率
    x = data[:,(0,1)].reshape((-1,2))
    y = data[:,2].reshape((-1,1))
    m = y.shape[0]
    x,mu,sigma = featureNormalize(x)
    X = np.hstack([x,np.ones((x.shape[0], 1))])
    # X = X[range(2),:]
    # y = y[range(2),:]
     
    theta = np.zeros((3, 1))
     
    j = computeCost(X,y,theta)
    J_history,theta = gradientDescent(X, y, theta, alpha, iterations)
     
     
    print('Theta found by gradient descent',theta)
    
    def predict(data):
        testx = np.array(data)
        testx = ((testx - mu) / sigma)
        testx = np.hstack([testx,np.ones((testx.shape[0], 1))])
        price = testx.dot(theta)
        print('price is %d ' % (price))
     
    predict([1650,3])

    二、多元线性回归,以三个特征输入为例

    输入数据:testdata.txt。其中第一列是指输入的数据序列,不可读入

    1,230.1,37.8,69.2,22.1
    2,44.5,39.3,45.1,10.4
    3,17.2,45.9,69.3,9.3
    4,151.5,41.3,58.5,18.5
    5,180.8,10.8,58.4,12.9
    6,8.7,48.9,75,7.2
    7,57.5,32.8,23.5,11.8
    8,120.2,19.6,11.6,13.2
    9,8.6,2.1,1,4.8
    10,199.8,2.6,21.2,10.6
    11,66.1,5.8,24.2,8.6
    12,214.7,24,4,17.4
    13,23.8,35.1,65.9,9.2
    14,97.5,7.6,7.2,9.7
    15,204.1,32.9,46,19
    16,195.4,47.7,52.9,22.4
    17,67.8,36.6,114,12.5
    18,281.4,39.6,55.8,24.4
    19,69.2,20.5,18.3,11.3
    20,147.3,23.9,19.1,14.6
    21,218.4,27.7,53.4,18
    22,237.4,5.1,23.5,12.5
    23,13.2,15.9,49.6,5.6
    24,228.3,16.9,26.2,15.5
    25,62.3,12.6,18.3,9.7
    26,262.9,3.5,19.5,12
    27,142.9,29.3,12.6,15
    28,240.1,16.7,22.9,15.9
    29,248.8,27.1,22.9,18.9
    30,70.6,16,40.8,10.5
    31,292.9,28.3,43.2,21.4
    32,112.9,17.4,38.6,11.9
    33,97.2,1.5,30,9.6
    34,265.6,20,0.3,17.4
    35,95.7,1.4,7.4,9.5
    36,290.7,4.1,8.5,12.8
    37,266.9,43.8,5,25.4
    38,74.7,49.4,45.7,14.7
    39,43.1,26.7,35.1,10.1
    40,228,37.7,32,21.5
    41,202.5,22.3,31.6,16.6
    42,177,33.4,38.7,17.1
    43,293.6,27.7,1.8,20.7
    44,206.9,8.4,26.4,12.9
    45,25.1,25.7,43.3,8.5
    46,175.1,22.5,31.5,14.9
    47,89.7,9.9,35.7,10.6
    48,239.9,41.5,18.5,23.2
    49,227.2,15.8,49.9,14.8
    50,66.9,11.7,36.8,9.7
    51,199.8,3.1,34.6,11.4
    52,100.4,9.6,3.6,10.7
    53,216.4,41.7,39.6,22.6
    54,182.6,46.2,58.7,21.2
    55,262.7,28.8,15.9,20.2
    56,198.9,49.4,60,23.7
    57,7.3,28.1,41.4,5.5
    58,136.2,19.2,16.6,13.2
    59,210.8,49.6,37.7,23.8
    60,210.7,29.5,9.3,18.4
    61,53.5,2,21.4,8.1
    62,261.3,42.7,54.7,24.2
    63,239.3,15.5,27.3,15.7
    64,102.7,29.6,8.4,14
    65,131.1,42.8,28.9,18
    66,69,9.3,0.9,9.3
    67,31.5,24.6,2.2,9.5
    68,139.3,14.5,10.2,13.4
    69,237.4,27.5,11,18.9
    70,216.8,43.9,27.2,22.3
    71,199.1,30.6,38.7,18.3
    72,109.8,14.3,31.7,12.4
    73,26.8,33,19.3,8.8
    74,129.4,5.7,31.3,11
    75,213.4,24.6,13.1,17
    76,16.9,43.7,89.4,8.7
    77,27.5,1.6,20.7,6.9
    78,120.5,28.5,14.2,14.2
    79,5.4,29.9,9.4,5.3
    80,116,7.7,23.1,11
    81,76.4,26.7,22.3,11.8
    82,239.8,4.1,36.9,12.3
    83,75.3,20.3,32.5,11.3
    84,68.4,44.5,35.6,13.6
    85,213.5,43,33.8,21.7
    86,193.2,18.4,65.7,15.2
    87,76.3,27.5,16,12
    88,110.7,40.6,63.2,16
    89,88.3,25.5,73.4,12.9
    90,109.8,47.8,51.4,16.7
    91,134.3,4.9,9.3,11.2
    92,28.6,1.5,33,7.3
    93,217.7,33.5,59,19.4
    94,250.9,36.5,72.3,22.2
    95,107.4,14,10.9,11.5
    96,163.3,31.6,52.9,16.9
    97,197.6,3.5,5.9,11.7
    98,184.9,21,22,15.5
    99,289.7,42.3,51.2,25.4
    100,135.2,41.7,45.9,17.2
    101,222.4,4.3,49.8,11.7
    102,296.4,36.3,100.9,23.8
    103,280.2,10.1,21.4,14.8
    104,187.9,17.2,17.9,14.7
    105,238.2,34.3,5.3,20.7
    106,137.9,46.4,59,19.2
    107,25,11,29.7,7.2
    108,90.4,0.3,23.2,8.7
    109,13.1,0.4,25.6,5.3
    110,255.4,26.9,5.5,19.8
    111,225.8,8.2,56.5,13.4
    112,241.7,38,23.2,21.8
    113,175.7,15.4,2.4,14.1
    114,209.6,20.6,10.7,15.9
    115,78.2,46.8,34.5,14.6
    116,75.1,35,52.7,12.6
    117,139.2,14.3,25.6,12.2
    118,76.4,0.8,14.8,9.4
    119,125.7,36.9,79.2,15.9
    120,19.4,16,22.3,6.6
    121,141.3,26.8,46.2,15.5
    122,18.8,21.7,50.4,7
    123,224,2.4,15.6,11.6
    124,123.1,34.6,12.4,15.2
    125,229.5,32.3,74.2,19.7
    126,87.2,11.8,25.9,10.6
    127,7.8,38.9,50.6,6.6
    128,80.2,0,9.2,8.8
    129,220.3,49,3.2,24.7
    130,59.6,12,43.1,9.7
    131,0.7,39.6,8.7,1.6
    132,265.2,2.9,43,12.7
    133,8.4,27.2,2.1,5.7
    134,219.8,33.5,45.1,19.6
    135,36.9,38.6,65.6,10.8
    136,48.3,47,8.5,11.6
    137,25.6,39,9.3,9.5
    138,273.7,28.9,59.7,20.8
    139,43,25.9,20.5,9.6
    140,184.9,43.9,1.7,20.7
    141,73.4,17,12.9,10.9
    142,193.7,35.4,75.6,19.2
    143,220.5,33.2,37.9,20.1
    144,104.6,5.7,34.4,10.4
    145,96.2,14.8,38.9,11.4
    146,140.3,1.9,9,10.3
    147,240.1,7.3,8.7,13.2
    148,243.2,49,44.3,25.4
    149,38,40.3,11.9,10.9
    150,44.7,25.8,20.6,10.1
    151,280.7,13.9,37,16.1
    152,121,8.4,48.7,11.6
    153,197.6,23.3,14.2,16.6
    154,171.3,39.7,37.7,19
    155,187.8,21.1,9.5,15.6
    156,4.1,11.6,5.7,3.2
    157,93.9,43.5,50.5,15.3
    158,149.8,1.3,24.3,10.1
    159,11.7,36.9,45.2,7.3
    160,131.7,18.4,34.6,12.9
    161,172.5,18.1,30.7,14.4
    162,85.7,35.8,49.3,13.3
    163,188.4,18.1,25.6,14.9
    164,163.5,36.8,7.4,18
    165,117.2,14.7,5.4,11.9
    166,234.5,3.4,84.8,11.9
    167,17.9,37.6,21.6,8
    168,206.8,5.2,19.4,12.2
    169,215.4,23.6,57.6,17.1
    170,284.3,10.6,6.4,15
    171,50,11.6,18.4,8.4
    172,164.5,20.9,47.4,14.5
    173,19.6,20.1,17,7.6
    174,168.4,7.1,12.8,11.7
    175,222.4,3.4,13.1,11.5
    176,276.9,48.9,41.8,27
    177,248.4,30.2,20.3,20.2
    178,170.2,7.8,35.2,11.7
    179,276.7,2.3,23.7,11.8
    180,165.6,10,17.6,12.6
    181,156.6,2.6,8.3,10.5
    182,218.5,5.4,27.4,12.2
    183,56.2,5.7,29.7,8.7
    184,287.6,43,71.8,26.2
    185,253.8,21.3,30,17.6
    186,205,45.1,19.6,22.6
    187,139.5,2.1,26.6,10.3
    188,191.1,28.7,18.2,17.3
    189,286,13.9,3.7,15.9
    190,18.7,12.1,23.4,6.7
    191,39.5,41.1,5.8,10.8
    192,75.5,10.8,6,9.9
    193,17.2,4.1,31.6,5.9
    194,166.8,42,3.6,19.6
    195,149.7,35.6,6,17.3
    196,38.2,3.7,13.8,7.6
    197,94.2,4.9,8.1,9.7
    198,177,9.3,6.4,12.8
    199,283.6,42,66.2,25.5
    200,232.1,8.6,8.7,13.4

    python 代码:

    #-*- coding: UTF-8 -*-
    
    
    
    import random
    import numpy as np
    import matplotlib.pyplot as plt
    
    
    #加载数据
    def load_exdata(filename):
        data = []
        with open(filename, 'r') as f:
            for line in f.readlines():
                line = line.split(',')
                current = [float(item) for item in line]
                #5.5277,9.1302
                data.append(current)
        return data
    
    data = load_exdata('testdata.txt');
    data = np.array(data,np.float64)//数据是浮点型
    
    
    # 特征缩放
    def featureNormalize(X):
        X_norm = X;
        mu = np.zeros((1, X.shape[1]))
        sigma = np.zeros((1, X.shape[1]))
        for i in range(X.shape[1]):
            mu[0, i] = np.mean(X[:, i])  # 均值
            sigma[0, i] = np.std(X[:, i])  # 标准差
        # print(mu)
        #     print(sigma)
        X_norm = (X - mu) / sigma
        return X_norm, mu, sigma
    
    
    # 计算损失
    def computeCost(X, y, theta):
        m = y.shape[0]
        #     J = (np.sum((X.dot(theta) - y)**2)) / (2*m)
        C = X.dot(theta) - y
        J2 = (C.T.dot(C)) / (2 * m)
        return J2
    
    
    # 梯度下降
    def gradientDescent(X, y, theta, alpha, num_iters):
        m = y.shape[0]
        # print(m)
        # 存储历史误差
        J_history = np.zeros((num_iters, 1))
        for iter in range(num_iters):
            # 对J求导,得到 alpha/m * (WX - Y)*x(i), (3,m)*(m,1)  X (m,3)*(3,1) = (m,1)
            theta = theta - (alpha / m) * (X.T.dot(X.dot(theta) - y))
            J_history[iter] = computeCost(X, y, theta)
        return J_history, theta
    
    
    iterations = 10000  # 迭代次数
    alpha = 0.01  # 学习率
    x = data[:, ( 1,2,3)].reshape((-1, 3))//数据特征输入,采用数据集一行的,第1,2,3个数据,然后将其变成一行,所以用shape
    y = data[:, 4].reshape((-1, 1))//输出特征,数据集的第四位
    m = y.shape[0]
    x, mu, sigma = featureNormalize(x)
    X = np.hstack([x, np.ones((x.shape[0], 1))])
    # X = X[range(2),:]
    # y = y[range(2),:]
    
    theta = np.zeros((4, 1))//因为x+y.总共有四个输入,所以theta是四维
    
    j = computeCost(X, y, theta)
    J_history, theta = gradientDescent(X, y, theta, alpha, iterations)
    
    print('Theta found by gradient descent', theta)
    
    
    def predict(data):
        testx = np.array(data)
        testx = ((testx - mu) / sigma)
        testx = np.hstack([testx, np.ones((testx.shape[0], 1))])
        price = testx.dot(theta)
        print('predit value is %f ' % (price))
    
    predict([151.5,41.3,58.5])//输入为3维


  • 相关阅读:
    c:forTokens标签循环输出
    jsp转long类型为date,并且格式化
    spring中@Param和mybatis中@Param使用区别(暂时还没接触)
    734. Sentence Similarity 有字典数组的相似句子
    246. Strobogrammatic Number 上下对称的数字
    720. Longest Word in Dictionary 能连续拼接出来的最长单词
    599. Minimum Index Sum of Two Lists两个餐厅列表的索引和最小
    594. Longest Harmonious Subsequence强制差距为1的最长连续
    645. Set Mismatch挑出不匹配的元素和应该真正存在的元素
    409. Longest Palindrome 最长对称串
  • 原文地址:https://www.cnblogs.com/Anita9002/p/7641959.html
Copyright © 2011-2022 走看看