zoukankan      html  css  js  c++  java
  • 随机森林学习-2-sklearn

    # -*- coding: utf-8 -*-
    """
    RandomForestClassifier
    skleran的9个模型在3份数据上的使用。
    1. 知识点: sklearn自生成分类样本集、标准化、 画等高线图、拆分训练和测试集
    2. 结果: 对于2维的线性和非线性的3个分类问题, 都证明了 随机森林可以取得较好效果。
    """
    
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.colors import ListedColormap
    from sklearn.cross_validation import train_test_split
    from sklearn.preprocessing import StandardScaler
    from sklearn.datasets import make_moons, make_circles, make_classification
    # make_classification 随机生成连续自变量和分类因变量
    # make_moons 生成二维自变量 和分类自变量,生成半环形图、月亮型
    # make_circles 生成二维自变量 和分类自变量,生成半环形图、月亮型
    
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import SVC
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA    # 线性判别分析(LDA)
    from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA # 二次判别分析(QDA)
    
    h = .02  # step size in the mesh
    
    names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Decision Tree",
             "Random Forest", "AdaBoost", "Naive Bayes", "LDA", "QDA"]
    classifiers = [
        KNeighborsClassifier(3),
        SVC(kernel="linear", C=0.025),
        SVC(gamma=2, C=1),
        DecisionTreeClassifier(max_depth=5),
        RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
        AdaBoostClassifier(),
        GaussianNB(),
        LDA(),
        QDA()]
    
    # 样本集包含2个自变量, n_classes=2表示因变量类别中包含2类
    X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
                               random_state=1, n_clusters_per_class=1,n_classes=2) #生成样本集
    plt.scatter(X[:, 0], X[:, 1], marker='o', c=y)
    rng = np.random.RandomState(2)  #每次实例生成后,第n个实例的第i次随机,永远与第m个实例的第i次随机相同。
    X += 2 * rng.uniform(size=X.shape)
    linearly_separable = (X, y)
    
    datasets = [make_moons(noise=0.3, random_state=0),
                make_circles(noise=0.2, factor=0.5, random_state=1),
                linearly_separable
                ]
    
    figure = plt.figure(figsize=(27, 9))
    i = 1
    # iterate over datasets
    for ds in datasets:
        # preprocess dataset, split into training and test part
        X, y = ds
        X = StandardScaler().fit_transform(X) # 标准化转换
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4) #划分训练和测试集
    
        x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
        y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                             np.arange(y_min, y_max, h)) # 输出的xx,yy,就是坐标矩阵
    
        # just plot the dataset first
        cm = plt.cm.RdBu
        cm_bright = ListedColormap(['#FF0000', '#0000FF'])
        ax = plt.subplot(len(datasets), len(classifiers) + 1, i) # 画布
        # Plot the training points
        ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright)
        # and testing points
        ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6)
        ax.set_xlim(xx.min(), xx.max())
        ax.set_ylim(yy.min(), yy.max())
        ax.set_xticks(()) #清空了坐标轴数字
        ax.set_yticks(()) #清空了坐标轴数字
        i += 1
    
        # iterate over classifiers
        for name, clf in zip(names, classifiers):
            ax = plt.subplot(len(datasets), len(classifiers) + 1, i)
            clf.fit(X_train, y_train)
            score = clf.score(X_test, y_test)
    
            # Plot the decision boundary. For that, we will assign a color to each
            # point in the mesh [x_min, m_max]x[y_min, y_max].
            if hasattr(clf, "decision_function"):
                Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) #np.c_按照行连接两个矩阵[[1,2],[1,2],[1,2]] ,对mesh矩阵中每个样本点输入 经过f(x,y)输出一个预测。
            else:
                Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
    
            # Put the result into a color plot
            Z = Z.reshape(xx.shape)
            ax.contourf(xx, yy, Z, cmap=cm, alpha=.8)   #画 预测函数在坐标系中的登高线图
    
            # Plot also the training points
            ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright) # 画训练数据散点图
            # and testing points
            ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright,
                       alpha=0.6) # 测试数据的散点图
    
            ax.set_xlim(xx.min(), xx.max()) #设置坐标轴范围
            ax.set_ylim(yy.min(), yy.max())
            ax.set_xticks(())  #清空坐标轴刻度
            ax.set_yticks(())
            ax.set_title(name) #设置正上方的标题
            ax.text(xx.max() - .3, yy.min() + .3, ('%.2f' % score).lstrip('0'),
                    size=15, horizontalalignment='right') #在坐标系中指定位置,添加文本
            i += 1
    
    figure.subplots_adjust(left=.02, right=.98) #调整 图像间的空白区域
    plt.show()

    原始来源网址

  • 相关阅读:
    struts2校验器规范错误解决
    java extend 和 implements 的区别
    Java 中 synchronized的用法详解
    事件处理程序
    Web字体(链接)嵌入
    常见浏览器bug(针对IE6及更低版本)及其修复方法
    canvas实现的时钟效果
    SSM(springMVCspringmybatis)环境搭建01建立Mavenweb项目
    Android中关于软键盘的一些设置
    Android调用系统相机和相册
  • 原文地址:https://www.cnblogs.com/andylhc/p/10315641.html
Copyright © 2011-2022 走看看