zoukankan      html  css  js  c++  java
  • xgboost load model from demp text file

    python package : https://github.com/mwburke/xgboost-python-deploy

    import xgboost as xgb
    import numpy as np
    import pandas as pd
    from xgb_deploy.fmap import generate_fmap_from_pandas
    from xgb_deploy.model import ProdEstimator
    from sklearn.model_selection import train_test_split
    import json
    import random
    
    dim_float = 80
    dim_int = 20
    n = 50000 
    
    df_float = pd.DataFrame(np.random.rand(n,dim_float))
    df_float.columns = ['float_%s'%i for i in range(dim_float)]
    df_int = pd.DataFrame(np.random.randint(0,10,size=(n,dim_int)))
    df_int.columns = ['int_%s'%i for i in range(dim_int)]
    feature_cols = list(df_float.columns)+list(df_int.columns)
    df_data = pd.concat([df_float,df_int],axis=1)
    df_data['label'] = np.random.randint(0,2,n)
    print(df_data['label'].value_counts())
    print(df_data.shape)
    print(df_data.head(5))
    
    generate_fmap_from_pandas(df_data, 'demo_fmap.txt')
    
    X_train, X_test, y_train, y_test = train_test_split(df_data[feature_cols], df_data['label'], test_size=0.33)
    
    dtrain = xgb.DMatrix(data=X_train, label=y_train)
    dtest = xgb.DMatrix(data=X_test, label=y_test)
    
    classification_params = {
        'base_score': 0.5,  # np.mean(y_train),
        'max_depth': 3,
        'eta': 0.1,
        'objective': 'binary:logistic',
        'eval_metric': 'auc',
        'silent': 1,
        'n_jobs ':-1
    }
    
    clf = xgb.XGBClassifier(**classification_params)
    clf.fit(X_train, y_train,eval_set=[(X_train, y_train), (X_test, y_test)],eval_metric='logloss',verbose=True)
    X_test['pred1'] = clf.predict_proba(X_test)[:,1]
    model = clf._Booster
    
    model.dump_model(fout='demo_xgb.json', fmap='demo_fmap.txt', dump_format='json')
    
    
    with open('demo_xgb.json', 'r') as f:
        model_data = json.load(f)
    
    estimator = ProdEstimator(model_data, pred_type='classification', base_score=classification_params['base_score'])
    X_test['pred2'] = estimator.predict(X_test.to_dict(orient='records'))
    X_test['diff'] = X_test['pred1'] - X_test['pred2']
    print(X_test[['pred1','pred2','diff']].head(30))
    print(X_test['diff'].sum())
    
    
          pred1     pred2          diff
    

    33243 0.515672 0.515672 1.635301e-08
    15742 0.478694 0.478694 3.468678e-08
    24815 0.596091 0.596091 -5.536898e-09
    33120 0.489696 0.489696 4.128085e-08
    29388 0.472804 0.472804 -6.701184e-09
    33662 0.478668 0.478668 1.495377e-08
    15019 0.495415 0.495415 -1.104315e-09
    7787 0.555280 0.555280 -1.022957e-08
    39378 0.494439 0.494439 5.891659e-08
    15317 0.481563 0.481563 1.630472e-08
    31946 0.533403 0.533403 -2.231835e-08
    16784 0.484454 0.484454 2.196223e-08
    13511 0.529494 0.529494 -2.274838e-09
    11304 0.492583 0.492583 -1.724794e-09
    9583 0.501279 0.501279 -1.815183e-09
    31448 0.517019 0.517019 -2.593171e-08
    38030 0.482880 0.482880 -1.191063e-08
    49734 0.479614 0.479614 -1.770112e-08
    15682 0.479675 0.479675 4.876058e-09
    30756 0.539753 0.539753 9.885628e-09
    4829 0.507685 0.507685 2.341456e-08
    49888 0.502952 0.502952 2.951946e-08
    41311 0.500395 0.500395 1.270836e-08
    22434 0.486226 0.486226 1.047917e-08
    45807 0.531456 0.531457 -3.217818e-08
    25009 0.490071 0.490071 2.752955e-08
    3419 0.516763 0.516763 -2.142890e-09
    18176 0.486686 0.486686 -5.403653e-09
    18296 0.490275 0.490275 -3.624349e-08
    314 0.496112 0.496112 -1.507733e-08
    -0.05263647978160496

  • 相关阅读:
    C# Linq Enumerable 技巧
    Winform 踩坑
    BootStrap Table
    java8+junit5实现并发测试(多线程)
    Junit5+REST-assured 做接口测试
    log4j的使用
    ASP.NET项目启用SSL
    hyper-v虚拟机内存占用过高
    C#使用qq邮箱的smtp服务发邮件
    CALayer设置圆角
  • 原文地址:https://www.cnblogs.com/sandy-t/p/11672563.html
Copyright © 2011-2022 走看看