zoukankan      html  css  js  c++  java
  • 机器学习连载001

    字典预处理

    from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
    from sklearn.feature_extraction import DictVectorizer
    from sklearn.preprocessing import MinMaxScaler, StandardScaler
    from sklearn.feature_selection import VarianceThreshold
    from sklearn.decomposition import PCA
    from scipy.stats import pearsonr
    import jieba
    import pandas as pd
    
    
    
    
    def dict_vec():
    
        # 实例化dict
        # dict = DictVectorizer()
        dict = DictVectorizer(sparse=False)
        # diaoyong fit_transform
        data = dict.fit_transform([{'city': '北京','temperature':100},{'city': '上海','temperature':60},{'city': '深圳','temperature':30}])
    
        # 打印每一个列的名称
        print(dict.get_feature_names())
        print(data)
    
        return None
    
    if __name__ == '__main__':
        dict_vec()
    View Code

     文本的预处理

    from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
    from sklearn.feature_extraction import DictVectorizer
    from sklearn.preprocessing import MinMaxScaler, StandardScaler
    from sklearn.feature_selection import VarianceThreshold
    from sklearn.decomposition import PCA
    from scipy.stats import pearsonr
    import jieba
    import pandas as pd
    
    
    def dict_vec():
    
        # 实例化dict
        # dict = DictVectorizer()
        dict = DictVectorizer(sparse=False)
        # diaoyong fit_transform
        data = dict.fit_transform([{'city': '北京','temperature':100},{'city': '上海','temperature':60},{'city': '深圳','temperature':30}])
    
        # 打印每一个列的名称
        print(dict.get_feature_names())
        print(data)
    
        return None
    
    
    def countvec():
        # 实例化conunt
        count = CountVectorizer()
        # 对两篇文章进行特征抽取
        data = count.fit_transform(["人生 人生 苦短,我 喜 欢Python", "生 活太 长 久,我不 喜欢P ython"])
        # 内容
        print(count.get_feature_names())
        print(data.toarray())
        # print(data)
    
        return None
    
    if __name__ == '__main__':
        countvec()
    View Code
  • 相关阅读:
    韩国新地图格式
    这次是真的准备出售用于传奇开发的HGE图形引擎了
    减小Delphi2010程序的尺寸(关闭RTTI反射机制)
    hook api 保护进程
    使用静态库
    iOS中的Block动画
    IOS中忽略字符串两边空格比较
    判断是否输入有汉字
    EasyDSS产生cache缓存如何设定定时清理缓存?
    EasyDSS定制项目中ETCD服务挂掉如何设定定时重启?
  • 原文地址:https://www.cnblogs.com/cerofang/p/10161069.html
Copyright © 2011-2022 走看看