zoukankan      html  css  js  c++  java
  • 06.齐普夫定律验证

    import re
    from operator import itemgetter
    import matplotlib.pyplot as plt
    from collections import Counter
    
    
    # 英文:
    frequency = {}
    with open("Alice's adventures in wonderland.txt") as f:
        file_to_string = f.read()
    
    words = re.findall(r"([A-Za-z][a-z]{2,9})", file_to_string)
    
    for word in words:
        count = frequency.get(word, 0)
        frequency[word] = count + 1
    
    # 用于打印输出前100名
    for key, value in sorted(frequency.items(), key=itemgetter(1), reverse=True)[:100]:
        print(key, value)
    
    sorted_freq = sorted(frequency.values(), reverse=True)
    
    # 用matplotlib验证Zipf-Law并出图
    plt.title("Zipf-Law")
    plt.xlabel("rank")
    plt.ylabel("freq")
    x = [i for i in range(100)]
    plt.loglog(x, sorted_freq[:100])
    plt.show()
    
    # 条形图
    plt.bar(x, sorted_freq[:100])
    plt.show()

  • 相关阅读:
    python3之datetime模块
    python3之time模块
    前端面试题01
    前端面试题02
    angularjs
    nodejs
    android 报错记录
    android知识点回顾二
    android知识点回顾
    Broadcast广播代码例子
  • 原文地址:https://www.cnblogs.com/waterr/p/13947992.html
Copyright © 2011-2022 走看看