zoukankan      html  css  js  c++  java
  • python, 在信用评级中,计算KS statistic值

    # -*- coding: utf-8 -*-
    
    import pandas as pd
    from sklearn.grid_search import GridSearchCV
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler
    from sklearn.utils import shuffle
    import numpy as np
    from sklearn import metrics
    from sklearn.metrics import log_loss, recall_score, precision_score, accuracy_score,f1_score
    from sklearn.metrics import roc_curve, precision_recall_curve, roc_auc_score
    # from sklearn.model_selection import cross_val_score
    import lightgbm
    
    
    def ks_statistic(Y,Y_hat):
        data = {"Y":Y,"Y_hat":Y_hat}
        df = pd.DataFrame(data)
        bins = np.array([-0.1,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0])
        category = pd.cut(df["Y_hat"],bins=bins)
        category = category.sort_values()
        #max_index = len(np.unique(df["Y_hat"]))
        Y = df.ix[category.index,:]['Y']
        Y_hat = df.ix[category.index,:]['Y_hat']
        df2 = pd.concat([Y,Y_hat],axis=1)
        df3 = pd.pivot_table(df2,values = ['Y_hat'],index ='Y_hat',columns='Y',aggfunc=len,fill_value=0)
        df4 = np.cumsum(df3)
        df5 = df4/df4.iloc[:,1].max()
        ks = max(abs(df5.iloc[:,0] - df5.iloc[:,1]))
        return ks/len(bins)
    
    
    
    
    df = pd.read_csv('DC_ALL_20170217.csv', header=0)
    X = df[df.columns.drop(['user_id','overdue'])].fillna(-999)
    # X = df[['count','time_stamp','credit_limit','credit_card_use_rate','credit_count_x','bank_count','sex','occupation','education','marriage','hukou']]
    y = df['overdue']
    train = X.head(55596)
    test = X.tail(69495-55596)
    
    train_label = y.head(55596).convert_objects(convert_numeric=True)
    X_train, X_test, y_train, y_test = train_test_split(
    	 train.values,  train_label, test_size=0.2, random_state=42)
    
    max_depth = 5
    subsample=0.8
    learning_rate=0.01
    n_estimators=400
    random_state=3
    nthread=4
    is_unbalance=True
    objective ='binary'
    LGBM = lightgbm.LGBMClassifier(max_depth=max_depth, learning_rate=learning_rate, 
    n_estimators=n_estimators, objective=objective,is_unbalance=is_unbalance, nthread=nthread,subsample=subsample)
    LGBM.fit(X_train, y_train)
    y_test_v = LGBM.predict(X_test)
    y_test_p = LGBM.predict_proba(X_test)[:, 1]
    
    
    print 'auc: ', roc_auc_score(y_test, y_test_p)
    print 'log_loss: ', log_loss(y_test, y_test_p)
    print 'precision: ', precision_score(y_test, y_test_v)
    print 'recall: ', recall_score(y_test, y_test_v)
    print 'accuracy: ', accuracy_score(y_test, y_test_v)
    print 'f1_score: ', f1_score(y_test, y_test_v)
    print 'ks_statistic: ', ks_statistic(y_test.values, y_test_v)
    
  • 相关阅读:
    Vue demo
    netcore使用IOptions
    CS0656 缺少编译器要求的成员“Microsoft.CSharp.RuntimeBinder.CSharpArgumentInfo.Create”
    vs删除空白行 注释
    RabbitMQ入门
    EF 大数据量批量处理
    常用链接字符串
    DEA使用git提交代码时,点了commit之后卡死在performing code analysis部分,或者performing code analysis结束后没有进入下一步操作。
    Ubuntu16.04安装MySQL5.7
    ubuntu16.04源码编译安装nginx1.14.2
  • 原文地址:https://www.cnblogs.com/huadongw/p/6415447.html
Copyright © 2011-2022 走看看