zoukankan      html  css  js  c++  java
  • 数据挖掘特征提取方法-汇集

    1.基于树模型提取特征

      1 # 使用树模型提取特征
      2 import numpy as np
      3 from sklearn import feature_selection
      4 from sklearn.ensemble import GradientBoostingClassifier
      5 
      6 matrix = np.array(X)
      7 target = np.array(target)
      8 temp = feature_selection.SelectFromModel(GradientBoostingClassifier()).fit(matrix, target)
      9 indx = temp._get_support_mask().tolist()
     10 scores = get_importance(temp.estimator_).tolist()
     11 result = temp.transform(matrix).tolist()
     12 return scores, indx, result
     13 
     14 # X: array-like
     15 # target: array-like
     16 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectFromModel.html
     17 

    2.基于L1,L2惩罚值提取特征

      1 # 基于L1,L2惩罚值提取特征
      2 import numpy as np
      3 from sklearn import feature_selection
      4 from sklearn.linear_model import LogisticRegression
      5 
      6 matrix = np.array(arr0)
      7 target = np.array(target)
      8 temp = feature_selection.SelectFromModel(LogisticRegression(penalty="l1", C=0.1)).fit(matrix, target)
      9 indx = temp._get_support_mask().tolist()
     10 scores = get_importance(temp.estimator_).tolist()
     11 result = temp.transform(matrix).tolist()
     12 return scores, indx, result
     13 
     14 # X: array-like
     15 # target: array-like
     16 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectFromModel.html
     17 

    3.递归特征消除法提取特征

      1 # 递归特征消除法
      2 import numpy as np
      3 from sklearn import feature_selection
      4 from sklearn.linear_model import LogisticRegression
      5 
      6 matrix = np.array(X)
      7 target = np.array(target)
      8 temp = feature_selection.RFE(estimator=LogisticRegression(), n_features_to_select=n_features).fit(matrix, target)
      9 scores = temp.ranking_.tolist()
     10 indx = temp.support_.tolist()
     11 result = temp.transform(matrix).tolist()
     12 return scores, indx, result
     13 
     14 # X: array-like
     15 # target: array-like
     16 # n-features: int
     17 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFE.html
     18 

    4.互信息选择法提取特征

      1 # 互信息选择法
      2 from minepy import MINE
      3 import numpy as np
      4 from sklearn import feature_selection
      5 
      6 matrix = np.array(X)
      7 target = np.array(target)
      8 def mic(x, y):
      9     m = MINE()
     10     m.compute_score(x, y)
     11     return (m.mic(), 0.5)
     12 temp = feature_selection.SelectKBest(lambda X, Y: np.array(list(map(lambda x: mic(x, Y), X.T))).T[0], k=k).fit(matrix, target)
     13 scores = temp.scores_.tolist()
     14 indx = temp.get_support().tolist()
     15 result = temp.transform(matrix).tolist()
     16 return scores, indx, result
     17 
     18 # X: array-like
     19 # target: array-like
     20 # k: int
     21 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html
     22 

    5.利用相关系数选择特征

      1 # 利用相关系数选择特征
      2 import numpy as np
      3 from sklearn import feature_selection
      4 from sklearn.feature_selection import chi2
      5 
      6 matrix = np.array(X)
      7 target = np.array(target)
      8 temp = feature_selection.SelectKBest(lambda X, Y: np.array(list(map(lambda x: abs(pearsonr(x, Y)[0]), X.T))), k=k).fit(matrix, target)
      9 scores = temp.scores_.tolist()
     10 indx = temp.get_support().tolist()
     11 result = temp.transform(matrix).tolist()
     12 return scores, indx, result
     13 
     14 # X: array-like
     15 # target: array-like
     16 # k: int
     17 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html
     18 

    6.卡方检验法提取特征

      1 # 卡方检验法提取特征
      2 import numpy as np
      3 from sklearn import feature_selection
      4 from sklearn.feature_selection import chi2
      5 
      6 matrix = np.array(X)
      7 target = np.array(target)
      8 temp = feature_selection.SelectKBest(chi2, k=k).fit(matrix, target)
      9 scores = temp.scores_.tolist()
     10 indx = temp.get_support().tolist()
     11 result = temp.transform(matrix).tolist()
     12 return scores, indx, result
     13 
     14 # X: array-like
     15 # target: array-like
     16 # k: int
     17 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.SelectKBest.html
     18 

    7.利用方差选择特征

      1 # 利用方差选择特征
      2 import numpy as np
      3 from sklearn import feature_selection
      4 
      5 matrix = np.array(X)
      6 temp = feature_selection.VarianceThreshold(threshold=t).fit(matrix)
      7 scores = [np.var(el) for el in matrix.T]
      8 indx = temp.get_support().tolist()
      9 result = temp.transform(matrix).tolist()
     10 return scores, indx, result
     11 
     12 # X: array-like
     13 # t: float
     14 # http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html
     15 

    参考:https://www.kesci.com/

  • 相关阅读:
    c语言 inline函数
    ubuntu18.04 安装新版本openssl
    convert_mnist_data.cpp
    caffe 安装方法和记录
    【转】jdbc:oracle:thin:@localhost:1521:orcl与jdbc:oracle:thin:@//localhost:1521/orcl区别
    【原创】【HCIA】验证GaussDB 100系统函数to_nchar()是否能支持最大字节长度8000
    【原创】达梦数据库DM7备份与恢复
    【LINUX】操作系统时钟与硬件时钟设置
    【文档】ORACLE RAC 修改SCAN监听端口、VIP网络配置参考文档
    【转】11G R2 RAC: HOW TO IDENTIFY THE MASTER NODE IN RAC
  • 原文地址:https://www.cnblogs.com/jean925/p/9314864.html
Copyright © 2011-2022 走看看