zoukankan      html  css  js  c++  java
  • 最小二乘法 python实现

    #-*-coding:UTF-8-*-
    # Created on 2015年10月20日
    # @author: hanahimi
    import numpy as np
    import random
    import matplotlib.pyplot as plt
    
    def randData():
        # 生成曲线上各个点
        x = np.arange(-1,1,0.02)
        y = [2*a+3 for a in x]  # 直线
    #     y = [((a*a-1)*(a*a-1)*(a*a-1)+0.5)*np.sin(a*2) for a in x]  # 曲线
        xa = []; ya = []
        # 对曲线上每个点进行随机偏移
        for i in range(len(x)):
            d = np.float(random.randint(90,120))/100
            ya.append(y[i]*d)
            xa.append(x[i]*d)
        return xa,ya
    
    def hypfunc(x,A):
        # 输入:x 横坐标数值, A 多项式系数 [a0,a1,...,an-1]
        # 返回 y = hypfunc(x)
        return np.sum(A[i]*(x**i) for i in range(len(A)))
    
    # 使用 θ = (X.T*X + λI)^-1 * X.T * y求解直线参数
    # 该函数会在X的前面添加偏移位X0 = 1
    def LS_line(X,Y, lam = 0.01):
        X = np.array(X)
        X = np.vstack((np.ones((len(X),)),X)) # 往上面添加X0
        X = np.mat(X).T     # (m,n)
        Y = np.mat(Y).T     # (m,1)
        M, N = X.shape
        I = np.eye(N, N)    # 单位矩阵
        
        theta = ((X.T * X + lam*I)**-1)*X.T*Y       # 核心公式
        theta = np.array(np.reshape(theta,len(theta)))[0]
        return theta    # 返回一个一维数组
    
    
    # 使用随机梯度下降法求解最小二参数:
    # alpha 迭代步长(固定步长),epslion 收敛标准
    def LS_sgd(X,Y,alpha=0.1, epslion = 0.003):
        X = [[1,xi] for xi in X]        # 补上偏移量x0
        N = len(X[0])   # X的维度
        M = len(X)      # 样本个数
        theta = np.zeros((N,))   # 参数初始值
        last_theta = np.zeros(theta.shape)
           
        times = 10000
        while times > 0:
            times -= 1
            for i in range(M):
                last_theta[:] = theta[:]
                for j in range(N):
                    theta[j] -= alpha * (np.dot(theta,X[i])-Y[i])*X[i][j]
            if np.sum((theta - last_theta)**2) <= epslion:  # 当前后参数的变化小于一定程度时可以终止迭代
                break
        return theta
            
    
    # 根据输入值:X向量,即拟合阶数,计算对应的范德蒙矩阵
    def vandermonde_matrix(X, Y, order=1):
        # 根据数据点构造X,Y的 范德蒙德矩阵
        m = len(Y)
        matX = np.array([[np.sum([X[i]**(k2+k1) for i in range(m)]) 
                  for k2 in range(order+1)] for k1 in range(order+1)])
        matY = np.array([np.sum([(X[i]**k)*Y[i] for i in range(m)])
                for k in range(order+1)])
        theta = np.linalg.solve(matX, matY)
        return theta
    
    
    if __name__=="__main__":
        pass
        X, Y = randData()
        theta = vandermonde_matrix(X, Y, order=1)
        theta = LS_sgd(X,Y)
        
        # 画出数据点与拟合曲线
        plt.figure()
        plt.plot(X,Y,linestyle='',marker='.')
        yhyp = [hypfunc(X[i],theta) for i in range(len(X))]
        plt.plot(X, yhyp,linestyle='-')
        plt.show()
        
  • 相关阅读:
    Vue.js
    Spark Streaming自定义Receiver
    Hive UDF函数
    HBase表预分区与压缩
    Hive映射HBase表的几种方式
    Spark源码阅读之存储体系--存储体系概述与shuffle服务
    Spark Streaming实时写入数据到HBase
    基于Spark的用户行为路径分析
    Spark Streaming消费Kafka Direct方式数据零丢失实现
    CountDownLatch如何使用
  • 原文地址:https://www.cnblogs.com/hanahimi/p/4693282.html
Copyright © 2011-2022 走看看