zoukankan      html  css  js  c++  java
  • 时间序列预测入(二)

    ARIMA预测

    # -*- coding: utf-8 -*-
    """
    Created on Fri Mar 22 21:03:34 2019
    
    @author: Administrator
    """
    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    from datetime import datetime
    from datetime import timedelta
    
    num = 14
    filenames = []
    basepath = 'D:\pworkspace\data\Metro_train\'
    for i in range(1, num+1):
        if i < 10:
            filenames.append(basepath + 'record_2019-01-0' + str(i) + '.csv')
        else:
            filenames.append(basepath + 'record_2019-01-' + str(i) + '.csv')
    
    flag = True      
    for filename in filenames:
        df = pd.read_csv(filename)
        df['time'] = df['time'].str[:-4] + '0:00'
        df['time'] = pd.to_datetime(df['time'])
        df0 = df[df['stationID'] == 0].copy()
        del df
        user_in = df0[df0['status'] == 1]
        user_out = df0[df0['status'] == 0]
        user_in = user_in.groupby('time')
        user_out = user_out.groupby('time')
        user_in = user_in.count()
        user_out = user_out.count()
        user_in['count'] = user_in['userID']
        user_out['count'] = user_out['userID']
        user_in = user_in.drop(['lineID', 'stationID', 'deviceID',
                                'status', 'payType', 'userID'], axis=1)
        user_out = user_out.drop(['lineID', 'stationID', 'deviceID',
                                'status', 'payType', 'userID'], axis=1)
        if flag:
            user_in_all = user_in
            #user_out_all = user_out
            flag = False
        else:
            user_in_all = pd.concat([user_in_all,user_in], axis=0)
            #user_out_all = pd.concat([user_out_all,user_out], axis=0)
            
    
    #start = datetime(2019,1,1,0,0,0)
    #timelist = [ str(start + timedelta(seconds=600*i)) for i in range(24 * 6 * 2)]
    
    startdate = datetime(2019,1,1,0,0,0)
    enddate = startdate + timedelta(days=num-1, minutes=50, hours=23)
    
    all_time_data = pd.DataFrame({'time' : pd.date_range(start=str(startdate), 
                                   end=str(enddate),  freq='10T')})
    all_time_data['count'] = 0
    all_time_data.index = all_time_data['time']
    all_time_data = all_time_data.drop('time', axis=1)
    
    user_in_all = pd.merge(all_time_data, user_in_all, right_on='time', 
             left_index=True, how='outer')
    user_in_all[np.isnan(user_in_all['count_y'])] = 0
    user_in_all['count_x'] = user_in_all['count_x'] + user_in_all['count_y']
    
    user_in_all['count'] = user_in_all['count_x']
    user_in_all = user_in_all.drop(['count_x', 'count_y'], axis=1)
    user_in_all.plot(figsize=(15,8))
    plt.show()
    
    ts = user_in_all['count']
    ts_ewma = pd.DataFrame(ts).ewm(span=60).mean()
    ts_ewma.plot(figsize=(15,8))
    plt.show()
    
    from statsmodels.tsa.stattools import acf, pacf, adfuller
    from statsmodels.stats.diagnostic import acorr_ljungbox
    from statsmodels.tsa.arima_model import ARIMA
    #import statsmodels.api as sm
    
    ts_diff_1 = ts_ewma.diff(1).dropna(axis=0, how='any')
    ts_diff_1 = ts_diff_1['count']
    
    # ADF平稳性检验
    adfuller(ts_diff_1, autolag='AIC')
    # 白噪声检验
    acorr_ljungbox(ts_diff_1, 1)
    # ACF PACF
    lag_acf = acf(ts_diff_1, nlags=50)
    lag_pacf = pacf(ts_diff_1, nlags=50)
    plt.figure(facecolor='white', figsize=(15, 8))
    plt.plot(lag_acf)
    plt.show()
    plt.figure(facecolor='white', figsize=(15, 8))
    plt.plot(lag_pacf)
    plt.axhline(y=-1.9/np.sqrt(len(ts_diff_1)), linestyle='--', color='gray')
    plt.axhline(y=1.9/np.sqrt(len(ts_diff_1)), linestyle='--', color='gray')
    plt.show()
    
    model = ARIMA(ts_diff_1, order=(6, 0, 0))
    ts_predict = model.fit().predict()
    rmse = np.sqrt(sum((ts_predict - ts_diff_1)**2) / ts_diff_1.size)
    
    plt.figure(facecolor='white', figsize=(15, 8))
    plt.plot(ts_predict, lw=0.5, color='blue', label='Predict')
    plt.plot(ts_diff_1, lw=0.5, color='red', label='Original')
    plt.legend(loc='lower right')
    #plt.ylim((-1000, 1000))
    plt.show()

    运行结果

  • 相关阅读:
    Django DTL模板语法中的过滤器
    Django DTL模板语法中的url反转
    Django DTL模板语法中定义变量
    Django DTL模板语法中的循环的笔记
    UOJ #310 黎明前的巧克力 FWT dp
    6.15 省选模拟赛 老魔杖 博弈论 SG函数
    luogu P4887 模板 莫队二次离线 莫队 离线
    一本通 高手训练 1788 爬山 dp 斜率 凸包
    luogu P5289 [十二省联考2019]皮配 背包
    6.10 省选模拟赛 小C的利是 高斯消元 矩阵行列式
  • 原文地址:https://www.cnblogs.com/coshaho/p/10590940.html
Copyright © 2011-2022 走看看