zoukankan      html  css  js  c++  java
  • 吴裕雄 python 机器学习——数据预处理嵌入式特征选择

    import numpy as np
    import  matplotlib.pyplot as plt
    
    from sklearn.svm import LinearSVC
    from sklearn.linear_model import Lasso
    from sklearn.model_selection import train_test_split
    from sklearn.feature_selection import SelectFromModel
    from sklearn.datasets import load_digits,load_diabetes
    
    #数据预处理嵌入式特征选择SelectFromModel模型
    def test_SelectFromModel():
        digits=load_digits()
        X=digits.data
        y=digits.target
        estimator=LinearSVC(penalty='l1',dual=False)
        selector=SelectFromModel(estimator=estimator,threshold='mean')
        selector.fit(X,y)
        selector.transform(X)
        print("Threshold %s"%selector.threshold_)
        print("Support is %s"%selector.get_support(indices=True))
        
    #调用test_SelectFromModel()
    test_SelectFromModel()

    def load_diabetes():
        digits=load_digits()
        X=digits.data
        y=digits.target
        return X,y
    
    def test_Lasso(*data):
        '''
        测试 alpha 与稀疏性的关系
        '''
        X,y=data
        alphas=np.logspace(-2,2)
        zeros=[]
        for alpha in alphas:
            regr=Lasso(alpha=alpha)
            regr.fit(X,y)
            ### 计算零的个数 ###
            num=0
            for ele in regr.coef_:
                if abs(ele) < 1e-5:num+=1
            zeros.append(num)
        ##### 绘图
        fig=plt.figure()
        ax=fig.add_subplot(1,1,1)
        ax.plot(alphas,zeros)
        ax.set_xlabel(r"$alpha$")
        ax.set_xscale("log")
        ax.set_ylim(0,X.shape[1]+1)
        ax.set_ylabel("zeros in coef")
        ax.set_title("Sparsity In Lasso")
        plt.show()
        
    X,y = load_diabetes()
    test_Lasso(X,y)

    def test_LinearSVC(*data):
        '''
        测试 C  与 稀疏性的关系
        '''
        X,y=data
        Cs=np.logspace(-2,2)
        zeros=[]
        for C in Cs:
            clf=LinearSVC(C=C,penalty='l1',dual=False)
            clf.fit(X,y)
         ### 计算零的个数 ###
            num=0
            for row in clf.coef_:
                for ele in row:
                    if abs(ele) < 1e-5:num+=1
            zeros.append(num)
        ##### 绘图
        fig=plt.figure()
        ax=fig.add_subplot(1,1,1)
        ax.plot(Cs,zeros)
        ax.set_xlabel("C")
        ax.set_xscale("log")
        ax.set_ylabel("zeros in coef")
        ax.set_title("Sparsity In SVM")
        plt.show()
        
    X,y = load_diabetes()
    test_LinearSVC(X,y)

  • 相关阅读:
    R语言对苏格兰独立民意调查的Meta分析
    R语言中固定与随机效应Meta分析
    Comet OJ
    luoguP6070 [MdOI2020] Decrease 贪心+二维差分
    luoguP6071 [MdOI2020] Treequery DFS序+主席树
    AT2064 [AGC005F] Many Easy Problems 容斥+NTT
    BZOJ 4650: [Noi2016]优秀的拆分 后缀自动机+启发式合并+线段树合并
    BZOJ 1498: [NOI2006]神奇的口袋 性质分析+高精度
    BZOJ 1819: [JSOI]Word Query电子字典 搜索+trie
    robotframework 随机选中下拉框中的值
  • 原文地址:https://www.cnblogs.com/tszr/p/10802130.html
Copyright © 2011-2022 走看看