zoukankan      html  css  js  c++  java
  • 统计词频

    
    
    import re
    from collections import Counter
    
    string = """   Lorem ipsum dolor sit amet, consectetur
        adipiscing elit. Nunc ut elit id mi ultricies
        adipiscing. Nulla facilisi. Praesent pulvinar,
        sapien vel feugiat vestibulum, nulla dui pretium orci,
        non ultricies elit lacus quis ante. Lorem ipsum dolor
        sit amet, consectetur adipiscing elit. Aliquam
        pretium ullamcorper urna quis iaculis. Etiam ac massa
        sed turpis tempor luctus. Curabitur sed nibh eu elit
        mollis congue. Praesent ipsum diam, consectetur vitae
        ornare a, aliquam a nunc. In id magna pellentesque
        tellus posuere adipiscing. Sed non mi metus, at lacinia
        augue. Sed magna nisi, ornare in mollis in, mollis
        sed nunc. Etiam at justo in leo congue mollis.
        Nullam in neque eget metus hendrerit scelerisque
        eu non enim. Ut malesuada lacus eu nulla bibendum
        id euismod urna sodales.  """
    
    words = re.findall(r'w+', string) #This finds words in the document
    
    lower_words = [word.lower() for word in words] #lower all the words
    
    word_counts = Counter(lower_words) #counts the number each time a word appears
    print word_counts
    
    # Counter({'elit': 5, 'sed': 5, 'in': 5, 'adipiscing': 4, 'mollis': 4, 'eu': 3, 
    # 'id': 3, 'nunc': 3, 'consectetur': 3, 'non': 3, 'ipsum': 3, 'nulla': 3, 'pretium':
    # 2, 'lacus': 2, 'ornare': 2, 'at': 2, 'praesent': 2, 'quis': 2, 'sit': 2, 'congue': 2, 'amet': 2, 
    # 'etiam': 2, 'urna': 2, 'a': 2, 'magna': 2, 'lorem': 2, 'aliquam': 2, 'ut': 2, 'ultricies': 2, 'mi': 2, 
    # 'dolor': 2, 'metus': 2, 'ac': 1, 'bibendum': 1, 'posuere': 1, 'enim': 1, 'ante': 1, 'sodales': 1, 'tellus': 1,
    # 'vitae': 1, 'dui': 1, 'diam': 1, 'pellentesque': 1, 'massa': 1, 'vel': 1, 'nullam': 1, 'feugiat': 1, 'luctus': 1, 
    # 'pulvinar': 1, 'iaculis': 1, 'hendrerit': 1, 'orci': 1, 'turpis': 1, 'nibh': 1, 'scelerisque': 1, 'ullamcorper': 1,
    # 'eget': 1, 'neque': 1, 'euismod': 1, 'curabitur': 1, 'leo': 1, 'sapien': 1, 'facilisi': 1, 'vestibulum': 1, 'nisi': 1, 
    # 'justo': 1, 'augue': 1, 'tempor': 1, 'lacinia': 1, 'malesuada': 1})
    
    
    
    
    
  • 相关阅读:
    RibbonToggleButton
    civil 3d 体积曲面提取等高线
    Civil 3D中各种ObjectID集合中的对象具体是什么类型?
    Civil 3D曲面高程分析
    C++不允许使用指向不完整类型的指针
    LNK2019 无法解析的外部符号 "int __cdecl acedGetReal(wchar_t const *,double *)"
    Civil 3D中获取路线上任意一点处切线方位角
    AutoCAD 实体添加超级链接
    论坛中看到的代码,留存备用 批量创建dwg文件
    ionic3 新增主题色
  • 原文地址:https://www.cnblogs.com/tingshuo123/p/6917817.html
Copyright © 2011-2022 走看看