zoukankan      html  css  js  c++  java
  • 几种异常点检测算法

    代码来自 sklearn的demo:http://scikit-learn.org/stable/auto_examples/ensemble/plot_isolation_forest.html#sphx-glr-auto-examples-ensemble-plot-isolation-forest-py

    import numpy as np
    from scipy import stats
    import matplotlib.pyplot as plt
    import matplotlib.font_manager
    
    from sklearn import svm
    from sklearn.covariance import EllipticEnvelope
    from sklearn.ensemble import IsolationForest
    from sklearn.neighbors import LocalOutlierFactor
    
    
    rng = np.random.RandomState(42)
    
    # Example settings
    n_samples = 200
    outliers_fraction = 0.25
    clusters_separation = [0, 1, 2]
    
    # define two outlier detection tools to be compared
    classifiers = {
        "One-Class SVM": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,
                                         kernel="rbf", gamma=0.1),
        "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
        "Isolation Forest": IsolationForest(max_samples=n_samples,
                                            contamination=outliers_fraction,
                                            random_state=rng),
        "Local Outlier Factor": LocalOutlierFactor(
            n_neighbors=35,
            contamination=outliers_fraction)}
    
    # Compare given classifiers under given settings
    xx, yy = np.meshgrid(np.linspace(-7, 7, 100), np.linspace(-7, 7, 100))
    n_inliers = int((1. - outliers_fraction) * n_samples)
    n_outliers = int(outliers_fraction * n_samples)
    ground_truth = np.ones(n_samples, dtype=int)
    ground_truth[-n_outliers:] = -1
    
    # Fit the problem with varying cluster separation
    for i, offset in enumerate(clusters_separation):
        np.random.seed(42)
        # Data generation
        X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset
        X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset
        X = np.r_[X1, X2]
        # Add outliers
        X = np.r_[X, np.random.uniform(low=-6, high=6, size=(n_outliers, 2))]
    
        # Fit the model
        plt.figure(figsize=(9, 7))
        for i, (clf_name, clf) in enumerate(classifiers.items()):
            # fit the data and tag outliers
            if clf_name == "Local Outlier Factor":
                y_pred = clf.fit_predict(X)
                scores_pred = clf.negative_outlier_factor_
            else:
                clf.fit(X)
                scores_pred = clf.decision_function(X)
                y_pred = clf.predict(X)
                
            # 选取预定的前25%的分数的分界线作为阈值
            threshold = stats.scoreatpercentile(scores_pred,100 * outliers_fraction)
            
            # 计算误差
            n_errors = (y_pred != ground_truth).sum()
            
            
            # 绘制等高线
            if clf_name == "Local Outlier Factor":
                # decision_function is private for LOF
                Z = clf._decision_function(np.c_[xx.ravel(), yy.ravel()])
            else:
                Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
            Z = Z.reshape(xx.shape)
            subplot = plt.subplot(2, 2, i + 1)
            subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),
                             cmap=plt.cm.Blues_r)
            
            # 用红线画阈值边界
            a = subplot.contour(xx, yy, Z, levels=[threshold],
                                linewidths=2, colors='red')
            # 用橙色填充阈值区域内的背景
            subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],
                             colors='orange')
            b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',
                                s=20, edgecolor='k')
            c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',
                                s=20, edgecolor='k')
            subplot.axis('tight')
            subplot.legend(
                [a.collections[0], b, c],
                ['learned decision function', 'true inliers', 'true outliers'],
                prop=matplotlib.font_manager.FontProperties(size=10),
                loc='lower right')
            subplot.set_xlabel("%d. %s (errors: %d)" % (i + 1, clf_name, n_errors))
            subplot.set_xlim((-7, 7))
            subplot.set_ylim((-7, 7))
        plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)
        plt.suptitle("Outlier detection")
    
    plt.show()
  • 相关阅读:
    树莓派 配置 OMV 搭建 NAS(六) 多用户多权限共享文件夹
    树莓派 配置 OMV 搭建 NAS(五) 设置共享权限
    Windows10 切换用户访问共享
    唯一析因环
    实分析p78 两个解释
    群表示论置换矩阵
    欧几里得环
    立体角-高斯定理札记
    环同态p64推论
    泛函p121可分Hilbert空间都同构于l^2
  • 原文地址:https://www.cnblogs.com/JohnRain/p/9542445.html
Copyright © 2011-2022 走看看