zoukankan      html  css  js  c++  java
  • 数据挖掘实践(4):基础理论(四)数学基础(四)向量与矩阵(四)最⼩⼆乘法(二)代码

    import numpy as np
    import matplotlib.pyplot as plt
    # 在直线 y = 5x+3 附近⽣成随机点
    X = np.arange(0, 5, 0.1) Z = [5 * x + 3 for x in X] Y = [np.random.normal(z, 0.5) for z in Z]
    plt.plot(X, Y, 'ro')
    plt.show()

    from scipy.optimize import leastsq
    # 需要拟合的函数func :指定函数的形状
    def func(p,x):
     k,b=p
     return k*x+b
    # 误差函数:
    def error(p,x,y):
     return func(p,x)-y
    p0=[1,20]
    Para=leastsq(error,p0,args=(X,Y))
    k,b=Para[0]
    _X = [0, 5] 
    _Y = [b + k * x for x in _X]
    plt.plot(X, Y, 'ro', _X, _Y, 'b', linewidth=2) 
    plt.title("y = {}x + {}".format(k, b)) 
    plt.show()

    # 线性回归解正则⽅程
    def linear_regression(x, y): 
     N = len(x)
     sumx = sum(x)
     sumy = sum(y)
     sumx2 = sum(x**2)
     sumxy = sum(x*y)
     
     A = np.mat([[N, sumx], [sumx, sumx2]])
     b = np.array([sumy, sumxy])
     
     return np.linalg.solve(A, b)
     
    a0, a1 = linear_regression(X, Y)
    # ⽣成拟合直线的绘制点
    _X = [0, 5] 
    _Y = [a0 + a1 * x for x in _X]
     
    plt.plot(X, Y, 'ro', _X, _Y, 'b', linewidth=2) 
    plt.title("y = {} + {}x".format(a0, a1)) 
    plt.show()

    import numpy as np 
    import matplotlib.pyplot as plt
     
    # y = 2 + 3x + 4x^2
    X = np.arange(0, 5, 0.1) 
    Z = [2 + 3 * x + 4 * x ** 2 for x in X] 
    Y = np.array([np.random.normal(z,3) for z in Z])
     
    plt.plot(X, Y, 'ro') 
    plt.show()

    from scipy.optimize import leastsq
    # 需要拟合的函数func :指定函数的形状
    def func(p,x):
     m,n,o = p
     return m + n*x + o*x**2
    # 偏差函数:x,y都是列表:这⾥的x,y跟上⾯的Xi,Yi中是⼀⼀对应的
    def error(p,x,y):
     return func(p,x)-y
    p0=[0,5,10]
    Para=leastsq(error,p0,args=(X,Y))
    m,n,o=Para[0]
    _X = np.arange(0, 5, 0.1) 
    _Y = np.array([m + n*x + o*x**2 for x in _X])
    plt.plot(X, Y, 'ro', _X, _Y, 'b', linewidth=2) 
    plt.title("y = {} + {}x + {}$x^2$ ".format(m, n, o)) 
    plt.show()

    # ⽣成系数矩阵A
    def gen_coefficient_matrix(X, Y): 
     N = len(X)
     m = 3
     A = []
    # 计算每⼀个⽅程的系数
     for i in range(m):
     a = []
     # 计算当前⽅程中的每⼀个系数
     for j in range(m):
     a.append(sum(X ** (i+j)))
     A.append(a)
     return A
     
    # 计算⽅程组的右端向量b
    def gen_right_vector(X, Y): 
     N = len(X)
     m = 3
     b = []
     for i in range(m):
     b.append(sum(X**i * Y))
     return b A = gen_coefficient_matrix(X, Y) 
    b = gen_right_vector(X, Y)
     
    a0, a1, a2 = np.linalg.solve(A, b)
    print(a0,a1,a2)
    2.760380334517119 2.771490852340913 3.99403510389479
    # ⽣成拟合曲线的绘制点
    _X2 = np.arange(0, 5, 0.1) 
    _Y2 = np.array([a0 + a1*x + a2*x**2 for x in _X2])
     
    plt.plot(X, Y, 'ro', _X2, _Y2, 'b', linewidth=2) 
    plt.title("y = {} + {}x + {}$x^2$ ".format(a0, a1, a2)) 
    plt.show()

    六、总结
    向量和矩阵的概念和计算
    向量和矩阵的区别与联系
    损失函数最⼩化的意义
  • 相关阅读:
    互联网协议入门(二)
    互联网协议入门(一)
    careercup-扩展性和存储限制10.6
    careercup-扩展性和存储限制10.4
    careercup-扩展性和存储限制10.3
    写一个函数找到给定字符串的位置
    手工删除crfclust.bdb文件
    VirtualBox 报错VERR_VD_IMAGE_READ_ONLY
    Oracle DG测试failover和后续恢复报告
    ASM的备份集在文件系统上恢复测试
  • 原文地址:https://www.cnblogs.com/qiu-hua/p/14321450.html
Copyright © 2011-2022 走看看