import re regex = "[a-zA-Z]+" with open("./test.py") as f: lines = f.readlines() worddict = dict() for line in lines: words = re.findall(regex, line) for word in words: if word in worddict.keys(): worddict[word] += 1 else: worddict[word] = 1 words_top10 = sorted(worddict.items(), key=lambda x: x[1], reverse=True) print(words_top10)