zoukankan      html  css  js  c++  java
  • 统计模型应用--基本预测手法

    分类器:

    import sklearn
    from sklearn.ensemble import RandomFoerestClassifier
    from sklearn.linear.linear_model import LogisticRegression
    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
    from sklearn.metrics import confusion_matrix
    from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
    from sklearn.svm import LinearSVC, SVC

    def create_lagged_series(symbol, start_date, end_date, lags=5):
    ts = web.DataReader(
    symbol, "quandl",
    start_date-datetime.timedelta(days=365),
    end_date
    ).sort_index()
    tslag = pd.DataFrame(index=ts.index)
    tslag["Today"] = ts["AdjClose"]
    tslag['Volume'] = ts['Volume']
    for i in range(0, lags):
    tslag["lag%s"% str(i+1)] = ts['AdjClose'].shift(i+1)
    tsret = pd.DataFrame(index=tslag.index)
    tsret["Volume"] = tslag["Vloume"]
    tsret["Today"] = talag["Today"].pct_change()*100.0
    for i,x in enumerate(tsret['Today']):
    if (abs(x) < 0.0001):
    tsret['Today'][i] = 0.0001
    for i in range(0,lags):
    tsret['lag%s'% str(i+1)] =
    tsret['lag%s'% str(i+1)].pct_change()*100.0
    tsret["Direction"] = np.sign(tsret['Today'])
    tsret = tsret[tsret.index >= start_date]
    return tsret
    if __name__ == '__main__':
    snpret = create_lagged_series(
    "AAPL.US", datetime.datetime(2001,1,10),
    datetime.datetime(2005,12,31), lags=5
    )
    x= snpret[['Lag1', 'Lag2']]
    y = snpret["Direction"]
    start_test = datetime.datetime(2005,1,1)
    X_train = X[X.index < start_test]
    X_test = X[X.index >= start_test]
    Y_train = Y[Y.index < start_test]
    Y_test = Y[Y.index >= start_test]
    print("Hit Rates/Confusion Matrices: ")
    models = [
    ('LR', LogisticRegression()),
    ('LDA',LDA()),
    ('QDA',QDA()),
    ("LSVC",LinearSVC()),
    ("RSVM",SVC(
    C=1000000.0, cache_size=200, class_weight=None,
    coef0=0.0, degree=3, gamma=0.0001, kernel='rbf',
    max_iter=-1, probability=False,random_state=None,
    shrinkinf=True, tol=0.001,verbose=False
    )),
    ('RF', RandomForestClassifier(
    n_estimators=1000,criterion='gini',
    max_depth=None, min_samples_split=2,
    min_samples_leaf=1, max_features='auto',
    bootstrap=True, oob_score=False, n_jobs=1,
    random_state=None, verbose=0)
    )]
    for m in models:
    m[1].fit(X_train, Y_train)
    pred = m[1].predict(X_test)
    print("%s: %0.3f" % (m[0],m[1].score(X_test,Y_test)))
    print("%s " % confusion_matrix(pred, Y_test))

       

  • 相关阅读:
    2.5星|《无条件增长》:管理学常识+一些自己的管理案例
    3.5星|《壹棉壹世界》:棉花引发罪恶的黑奴贸易,影响美国南北战争
    只运行一个exe应用程序的使用案例
    WPF中使用WPFMediaKit视频截图案例
    Meta http-equiv属性详解
    层级数据模板 案例(HierarchicalDataTemplateWindow)
    ApplicationCommands 应用程序常见命令
    mvvm command的使用案例
    MatserDetail自动展开
    键盘焦点和逻辑焦点(Logic Focus与Keyboard Focus )
  • 原文地址:https://www.cnblogs.com/kuku0223/p/11064558.html
Copyright © 2011-2022 走看看