zoukankan      html  css  js  c++  java
  • 吴裕雄 python 机器学习——集成学习AdaBoost算法分类模型

    import numpy as np
    import matplotlib.pyplot as plt
    
    from sklearn import datasets,ensemble
    from sklearn.model_selection import train_test_split
    
    def load_data_classification():
        '''
        加载用于分类问题的数据集
        '''
        # 使用 scikit-learn 自带的 digits 数据集
        digits=datasets.load_digits() 
        # 分层采样拆分成训练集和测试集,测试集大小为原始数据集大小的 1/4
        return train_test_split(digits.data,digits.target,test_size=0.25,random_state=0,stratify=digits.target) 
    
    #集成学习AdaBoost算法分类模型
    def test_AdaBoostClassifier(*data):
        '''
        测试 AdaBoostClassifier 的用法,绘制 AdaBoostClassifier 的预测性能随基础分类器数量的影响
        '''
        X_train,X_test,y_train,y_test=data
        clf=ensemble.AdaBoostClassifier(learning_rate=0.1)
        clf.fit(X_train,y_train)
        ## 绘图
        fig=plt.figure()
        ax=fig.add_subplot(1,1,1)
        estimators_num=len(clf.estimators_)
        X=range(1,estimators_num+1)
        ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score")
        ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score")
        ax.set_xlabel("estimator num")
        ax.set_ylabel("score")
        ax.legend(loc="best")
        ax.set_title("AdaBoostClassifier")
        plt.show()
        
    # 获取分类数据
    X_train,X_test,y_train,y_test=load_data_classification() 
    # 调用 test_AdaBoostClassifier
    test_AdaBoostClassifier(X_train,X_test,y_train,y_test) 

    def test_AdaBoostClassifier_base_classifier(*data):
        '''
        测试  AdaBoostClassifier 的预测性能随基础分类器数量和基础分类器的类型的影响
        '''
        from sklearn.naive_bayes import GaussianNB
        
        X_train,X_test,y_train,y_test=data
        fig=plt.figure()
        ax=fig.add_subplot(2,1,1)
        ########### 默认的个体分类器 #############
        clf=ensemble.AdaBoostClassifier(learning_rate=0.1)
        clf.fit(X_train,y_train)
        ## 绘图
        estimators_num=len(clf.estimators_)
        X=range(1,estimators_num+1)
        ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score")
        ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score")
        ax.set_xlabel("estimator num")
        ax.set_ylabel("score")
        ax.legend(loc="lower right")
        ax.set_ylim(0,1)
        ax.set_title("AdaBoostClassifier with Decision Tree")
        ####### Gaussian Naive Bayes 个体分类器 ########
        ax=fig.add_subplot(2,1,2)
        clf=ensemble.AdaBoostClassifier(learning_rate=0.1,base_estimator=GaussianNB())
        clf.fit(X_train,y_train)
        ## 绘图
        estimators_num=len(clf.estimators_)
        X=range(1,estimators_num+1)
        ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="Traing score")
        ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="Testing score")
        ax.set_xlabel("estimator num")
        ax.set_ylabel("score")
        ax.legend(loc="lower right")
        ax.set_ylim(0,1)
        ax.set_title("AdaBoostClassifier with Gaussian Naive Bayes")
        plt.show()
        
    # 调用 test_AdaBoostClassifier_base_classifier
    test_AdaBoostClassifier_base_classifier(X_train,X_test,y_train,y_test) 

    def test_AdaBoostClassifier_learning_rate(*data):
        '''
        测试  AdaBoostClassifier 的预测性能随学习率的影响
        '''
        X_train,X_test,y_train,y_test=data
        learning_rates=np.linspace(0.01,1)
        fig=plt.figure()
        ax=fig.add_subplot(1,1,1)
        traing_scores=[]
        testing_scores=[]
        for learning_rate in learning_rates:
            clf=ensemble.AdaBoostClassifier(learning_rate=learning_rate,n_estimators=500)
            clf.fit(X_train,y_train)
            traing_scores.append(clf.score(X_train,y_train))
            testing_scores.append(clf.score(X_test,y_test))
        ax.plot(learning_rates,traing_scores,label="Traing score")
        ax.plot(learning_rates,testing_scores,label="Testing score")
        ax.set_xlabel("learning rate")
        ax.set_ylabel("score")
        ax.legend(loc="best")
        ax.set_title("AdaBoostClassifier")
        plt.show()
            
    # 调用 test_AdaBoostClassifier_learning_rate
    test_AdaBoostClassifier_learning_rate(X_train,X_test,y_train,y_test)

    def test_AdaBoostClassifier_algorithm(*data):
        '''
        测试  AdaBoostClassifier 的预测性能随学习率和 algorithm 参数的影响
        '''
        X_train,X_test,y_train,y_test=data
        algorithms=['SAMME.R','SAMME']
        fig=plt.figure()
        learning_rates=[0.05,0.1,0.5,0.9]
        for i,learning_rate in enumerate(learning_rates):
            ax=fig.add_subplot(2,2,i+1)
            for i ,algorithm in enumerate(algorithms):
                clf=ensemble.AdaBoostClassifier(learning_rate=learning_rate,algorithm=algorithm)
                clf.fit(X_train,y_train)
                ## 绘图
                estimators_num=len(clf.estimators_)
                X=range(1,estimators_num+1)
                ax.plot(list(X),list(clf.staged_score(X_train,y_train)),label="%s:Traing score"%algorithms[i])
                ax.plot(list(X),list(clf.staged_score(X_test,y_test)),label="%s:Testing score"%algorithms[i])
            ax.set_xlabel("estimator num")
            ax.set_ylabel("score")
            ax.legend(loc="lower right")
            ax.set_title("learing rate:%f"%learning_rate)
        fig.suptitle("AdaBoostClassifier")
        plt.show()
        
    # 调用 test_AdaBoostClassifier_algorithm
    test_AdaBoostClassifier_algorithm(X_train,X_test,y_train,y_test)

  • 相关阅读:
    修复 Visual Studio Error “No exports were found that match the constraint”
    RabbitMQ Config
    Entity Framework Extended Library
    Navisworks API 简单二次开发 (自定义工具条)
    NavisWorks Api 简单使用与Gantt
    SQL SERVER 竖表变成横表
    SQL SERVER 多数据导入
    Devexpress GridControl.Export
    mongo DB for C#
    Devexress XPO xpPageSelector 使用
  • 原文地址:https://www.cnblogs.com/tszr/p/10801543.html
Copyright © 2011-2022 走看看