zoukankan      html  css  js  c++  java
  • 机器学习连载001

    字典预处理

    from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
    from sklearn.feature_extraction import DictVectorizer
    from sklearn.preprocessing import MinMaxScaler, StandardScaler
    from sklearn.feature_selection import VarianceThreshold
    from sklearn.decomposition import PCA
    from scipy.stats import pearsonr
    import jieba
    import pandas as pd
    
    
    
    
    def dict_vec():
    
        # 实例化dict
        # dict = DictVectorizer()
        dict = DictVectorizer(sparse=False)
        # diaoyong fit_transform
        data = dict.fit_transform([{'city': '北京','temperature':100},{'city': '上海','temperature':60},{'city': '深圳','temperature':30}])
    
        # 打印每一个列的名称
        print(dict.get_feature_names())
        print(data)
    
        return None
    
    if __name__ == '__main__':
        dict_vec()
    View Code

     文本的预处理

    from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
    from sklearn.feature_extraction import DictVectorizer
    from sklearn.preprocessing import MinMaxScaler, StandardScaler
    from sklearn.feature_selection import VarianceThreshold
    from sklearn.decomposition import PCA
    from scipy.stats import pearsonr
    import jieba
    import pandas as pd
    
    
    def dict_vec():
    
        # 实例化dict
        # dict = DictVectorizer()
        dict = DictVectorizer(sparse=False)
        # diaoyong fit_transform
        data = dict.fit_transform([{'city': '北京','temperature':100},{'city': '上海','temperature':60},{'city': '深圳','temperature':30}])
    
        # 打印每一个列的名称
        print(dict.get_feature_names())
        print(data)
    
        return None
    
    
    def countvec():
        # 实例化conunt
        count = CountVectorizer()
        # 对两篇文章进行特征抽取
        data = count.fit_transform(["人生 人生 苦短,我 喜 欢Python", "生 活太 长 久,我不 喜欢P ython"])
        # 内容
        print(count.get_feature_names())
        print(data.toarray())
        # print(data)
    
        return None
    
    if __name__ == '__main__':
        countvec()
    View Code
  • 相关阅读:
    基础总结篇之三:Activity的task相关
    基础总结篇之一:Activity生命周期
    基础总结篇之二:Activity的四种launchMode
    SAP_清除默认Action
    FICO_Delete error message
    FICO_无法生成凭证(System status CLSD is active (WBS K/A6020372-205-KCCL))
    FICO_导出8月KOB3报表
    FICO_更改BP
    FICO_月末关帐
    SAP_清除默认导出格式
  • 原文地址:https://www.cnblogs.com/cerofang/p/10161069.html
Copyright © 2011-2022 走看看