zoukankan      html  css  js  c++  java
  • 机器学习连载001

    字典预处理

    from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
    from sklearn.feature_extraction import DictVectorizer
    from sklearn.preprocessing import MinMaxScaler, StandardScaler
    from sklearn.feature_selection import VarianceThreshold
    from sklearn.decomposition import PCA
    from scipy.stats import pearsonr
    import jieba
    import pandas as pd
    
    
    
    
    def dict_vec():
    
        # 实例化dict
        # dict = DictVectorizer()
        dict = DictVectorizer(sparse=False)
        # diaoyong fit_transform
        data = dict.fit_transform([{'city': '北京','temperature':100},{'city': '上海','temperature':60},{'city': '深圳','temperature':30}])
    
        # 打印每一个列的名称
        print(dict.get_feature_names())
        print(data)
    
        return None
    
    if __name__ == '__main__':
        dict_vec()
    View Code

     文本的预处理

    from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
    from sklearn.feature_extraction import DictVectorizer
    from sklearn.preprocessing import MinMaxScaler, StandardScaler
    from sklearn.feature_selection import VarianceThreshold
    from sklearn.decomposition import PCA
    from scipy.stats import pearsonr
    import jieba
    import pandas as pd
    
    
    def dict_vec():
    
        # 实例化dict
        # dict = DictVectorizer()
        dict = DictVectorizer(sparse=False)
        # diaoyong fit_transform
        data = dict.fit_transform([{'city': '北京','temperature':100},{'city': '上海','temperature':60},{'city': '深圳','temperature':30}])
    
        # 打印每一个列的名称
        print(dict.get_feature_names())
        print(data)
    
        return None
    
    
    def countvec():
        # 实例化conunt
        count = CountVectorizer()
        # 对两篇文章进行特征抽取
        data = count.fit_transform(["人生 人生 苦短,我 喜 欢Python", "生 活太 长 久,我不 喜欢P ython"])
        # 内容
        print(count.get_feature_names())
        print(data.toarray())
        # print(data)
    
        return None
    
    if __name__ == '__main__':
        countvec()
    View Code
  • 相关阅读:
    自然语言处理NLP快速入门
    2019年机器学习:追踪人工智能发展之路
    中科院院士谭铁牛:人工智能发展需要理性务实
    SAP MM ME21N 创建PO时报错
    最全的机器学习资料
    SAP MM盘点流程里如何处理事务代码MI11 Recount过的盘点凭证?
    如何“快”、“准”、“狠”成为优秀算法工程师
    解码以色列人工智能产业:正在崛起的竞争者
    周志华:关于机器学习的一点思考
    projects
  • 原文地址:https://www.cnblogs.com/cerofang/p/10161069.html
Copyright © 2011-2022 走看看