import string #punctuation = [',','.','!','?','’',':','$','%'] prep = ['a','in','of','the','to','at','it','on','and','so','his','that', 'not','was','my','were','we','he','an','as','is','for','mr','us','me'] punctuation = list(string.punctuation) #String 模块提供的标点符号字符串
with open('article.txt','r') as f:
article = f.read()
new_art = article for i in range(len(punctuation)): new_art = new_art.replace(punctuation[i],'') #删除标点符号 new_art = new_art.lower() #替换成小写 art_list = new_art.split() #以空格将字符串分成一个个单词的列表 dic = dict() for i in art_list: dic[i] = new_art.count(i) #将单词和其出现次数用字典记录 for i in prep: if(dic.get(i)!=None): #如果为介词之类的无意义的词,将其删去 del(dic[i]) new_dic = sorted(dic.items(),key=lambda x:x[1],reverse = True) ''' dic.items()返回可遍历的元祖 key后面跟一个函数 lambda是定义一个匿名函数 lambda x:x[1]相当于 def f(x): return x[1] x[0]表示键key,x[1]表示值value 这里是使用值(单词出现次数)进行降序排序 reverse = True表示降序 ''' for i in range(10): print(new_dic[i]) #取前10个单词
截图如下: