zoukankan      html  css  js  c++  java
  • 关于决策树的示例

    # -*- coding: utf-8 -*-
    """
    Created on Tue Aug 09 16:15:03 2016
    
    @author: Administrator
    """
    
    import numpy as np
    import pandas as pd
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.cross_validation import train_test_split
    from sklearn.metrics import classification_report
    from sklearn.pipeline import Pipeline
    from sklearn.grid_search import GridSearchCV
    
    if __name__ == '__main__':
        '''
        df = pd.read_csv('ad.data', header=None)
        explanatory_variable_columns = set(df.columns.values)
        response_variable_column = df[len(df.columns.values)-1]
        # The last column describes the targets
        explanatory_variable_columns.remove(len(df.columns.values)-1)
        y = [1 if e == 'ad.' else 0 for e in response_variable_column]
        X = df[list(explanatory_variable_columns)]
        X.replace(to_replace=' *?', value=-1, regex=True, inplace=True)
     '''
        X = np.array([[0,0,0,0],
                      [0,0,0,1],
                      [0,0,1,0],
                      [0,0,1,1],
                      [0,1,0,0],
                      [0,1,0,1],
                      [0,1,1,0],
                      [0,1,1,1],
                      [1,0,0,0],
                      [1,0,0,1],
                      [1,0,1,0],
                      [1,0,1,1],
                      [1,1,0,0],
                      [1,1,0,1],
                      [1,1,1,0],
                      [1,1,1,1]])
        y = np.array([0,1,1,0,2,1,0,0,0,2,1,0,2,1,0,0]) #就要是一行向量(如果是多行,会报错)
       
       
        
        
        X_train, X_test, y_train, y_test = train_test_split(X, y)
        pipeline = Pipeline([
        ('clf', DecisionTreeClassifier(criterion='entropy'))
        ])
        parameters = {
        'clf__max_depth': (150, 155, 160),
        'clf__min_samples_split': (1, 2, 3),
        'clf__min_samples_leaf': (1, 2, 3)
        }
        
        grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1,verbose=1, scoring='f1')
        grid_search.fit(X_train, y_train)
        print 'Best score: %0.3f' % grid_search.best_score_
        print 'Best parameters set:'
        best_parameters = grid_search.best_estimator_.get_params()
        for param_name in sorted(parameters.keys()):
            print '	%s: %r' % (param_name, best_parameters[param_name])
        predictions = grid_search.predict(X_test)
        print classification_report(y_test, predictions)
  • 相关阅读:
    在单向链表中删除指定的key
    双向链表反转
    单向链表反转
    认识异或运算
    二分查找
    插入排序
    冒泡排序
    选择排序
    go 语言环境安装
    欧几里得算法
  • 原文地址:https://www.cnblogs.com/qqhfeng/p/5754174.html
Copyright © 2011-2022 走看看