zoukankan      html  css  js  c++  java
  • 机器学习——多方法简单分类(含有 决策树、朴素贝叶斯、随机森林、svm向量机)含源码 自学指南

    直接给代码:

      1 # -- coding: gbk --
      2 from sklearn.datasets import load_breast_cancer
      3 from sklearn.tree import DecisionTreeClassifier
      4 from sklearn.model_selection import  train_test_split
      5 from sklearn.tree import export_graphviz
      6 import pandas as pd
      7 import graphviz
      8 import mglearn
      9 from sklearn.ensemble import RandomForestClassifier
     10 from sklearn.datasets import make_moons
     11 from sklearn.ensemble import GradientBoostingClassifier
     12 from sklearn.svm import SVC
     13 from pylab import *
     14 def 决策树():
     15     cancer = load_breast_cancer()
     16     X_train, X_test, y_train, y_test = train_test_split(
     17         cancer.data, cancer.target, stratify=cancer.target, random_state=42)
     18     tree = DecisionTreeClassifier(random_state=0)
     19     print(X_train)
     20     print(y_train.shape)
     21     tree.fit(X_train, y_train)
     22     y_predict=tree.predict(X_test)
     23     print("Accuracy on training set: {:.3f}".format(tree.score(X_train, y_train)))
     24     print("Accuracy on test set: {:.3f}".format(tree.score(X_test, y_test)))
     25     '''
     26     export_graphviz(tree, out_file="tree.dot", class_names=["malignant", "benign"], feature_names=cancer.feature_names,
     27                     impurity=False, filled=True)
     28 
     29     with open("tree.dot") as f:
     30         dot_graph = f.read()
     31     graphviz.Source(dot_graph)
     32     '''
     33     print("特征的重要:
    {}".format(tree.feature_importances_))
     34 
     35 def 随机森林():
     36     X, y = make_moons(n_samples=100, noise=0.25, random_state=3)
     37     X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,random_state=42)
     38     '''五颗随机森林'''
     39     forest = RandomForestClassifier(n_estimators=5, random_state=2)
     40     forest.fit(X_train, y_train)
     41     y_pred=forest.predict(X_test)
     42     print(y_pred)
     43     print(y_test)
     44     print(np.mean(y_test==y_pred ))
     45     fig, axes = plt.subplots(2, 3, figsize=(20, 10))
     46     for i, (ax, tree) in enumerate(zip(axes.ravel(), forest.estimators_)):    ax.set_title("Tree {}".format(i))
     47     mglearn.plots.plot_tree_partition(X_train, y_train, tree, ax=ax)
     48     mglearn.plots.plot_2d_separator(forest, X_train, fill=True, ax=axes[-1, -1], alpha=.4)
     49     axes[-1, -1].set_title("Random Forest")
     50     mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train)
     51 
     52 def 梯度提升树():
     53     cancer = load_breast_cancer()
     54     X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=0)
     55     #gbrt = GradientBoostingClassifier(random_state=0, max_depth=1)
     56     gbrt = GradientBoostingClassifier(random_state=0,  learning_rate=0.01)
     57     gbrt.fit(X_train, y_train)
     58     y_pred=gbrt.predict(X_test)
     59     print(y_pred)
     60     print(np.mean(y_pred==y_test))
     61 
     62 def SVM向量机简易():
     63     X, y = mglearn.tools.make_handcrafted_dataset()
     64     '''
     65     gamma参数是上一节给出的公式中的参数,用于控制高斯核的宽度。它决定了点与点之间“靠近”是指多大的距离。
     66     C参数是正则化参数,与线性模型中用到的类似。它限制每个点的重要性(或者更确切地说,每个点的dual_coef_)。
     67 
     68     '''
     69     svm = SVC(kernel='rbf', C=10, gamma=0.1).fit(X, y)
     70     mglearn.plots.plot_2d_separator(svm, X, eps=.5)
     71     mglearn.discrete_scatter(X[:, 0], X[:, 1], y)
     72     # 画出支持向量
     73     sv = svm.support_vectors_
     74     sv_labels = svm.dual_coef_.ravel() > 0
     75     mglearn.discrete_scatter(sv[:, 0], sv[:, 1], sv_labels, s=15, markeredgewidth=3)
     76     plt.xlabel("Feature 0")
     77     plt.ylabel("Feature 1")
     78     plt.show()
     79 
     80 def 预处理向量机数据():
     81     cancer = load_breast_cancer()
     82     X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=0)
     83     svc = SVC()
     84     svc.fit(X_train, y_train)
     85     y_pred=svc.predict(X_test)
     86     print(np.mean(y_pred==y_test))
     87 
     88     '''预处理——缩放'''
     89     min_on_training = X_train.min(axis=0)
     90     range_on_training = (X_train - min_on_training).max(axis=0)
     91 
     92     X_train_scaled = (X_train - min_on_training) / range_on_training
     93     X_test_scaled = (X_test - min_on_training) / range_on_training
     94     print("Minimum for each feature
    {}".format(X_train_scaled.min(axis=0)))
     95     print("Maximum for each feature
     {}".format(X_train_scaled.max(axis=0)))
     96 
     97     '''变换'''
     98     X_test_scaled = (X_test - min_on_training) / range_on_training
     99     svc = SVC()
    100     svc.fit(X_train_scaled, y_train)
    101     y_pred=svc.predict(X_test_scaled)
    102     print(np.mean(y_pred==y_test))
    103 if __name__ =='__main__':
    104     预处理向量机数据()
  • 相关阅读:
    Android核心分析之二十五Android GDI之共享缓冲区机制
    Android核心分析之二十四Android GDI之显示缓冲管理
    Android核心分析之二十三Andoird GDI之基本原理及其总体框架
    Android核心分析之二十二Android应用框架之Activity
    Android核心分析之二十一Android应用框架之AndroidApplication
    Android核心分析之二十Android应用程序框架之无边界设计意图
    Android核心分析之十九电话系统之GSMCallTacker
    Android核心分析之十八Android电话系统之RIL-Java
    NFS 服务器的配置
    tftp 服务器的配置
  • 原文地址:https://www.cnblogs.com/smartisn/p/12578560.html
Copyright © 2011-2022 走看看