# -*- coding: UTF-8 -*-# -*-
str='''An empty street
An empty house
A hole inside my heart
I'm all alone
The rooms are getting smaller
I wonder how
I wonder why
I wonder where they are
The days we had
The songs we sang together
Oh yeah
And oh my love
I'm holding on forever
Reaching for a love that seems so far
So i say a little prayer
And hope my dreams will take me there
Where the skies are blue to see you once again, my love
Over seas and coast to coast
To find a place i love the most
Where the fields are green to see you once again, my love
I try to read
I go to work
I'm laughing with my friends
But i can't stop to keep myself from thinking
Oh no I wonder how
I wonder why
I wonder where they are
The days we had
The songs we sang together
Oh yeah And oh my love
I'm holding on forever
Reaching for a love that seems so far Mark:
To hold you in my arms
To promise you my love
To tell you from the heart
You're all i'm thinking of
I'm reaching for a love that seems so far
So i say a little prayer
And hope my dreams will take me there
Where the skies are blue to see you once again, my love
Over seas and coast to coast
To find a place i love the most
Where the fields are green to see you once again,my love
say a little prayer
dreams will take me there
Where the skies are blue to see you once again'''
a={",",".","?","!","'",":"}#将所有分隔符全部替换为空格
for i in a:
str=str.replace(i," ")
print(str)
str=str.lower()#将所有大写转换为小写
print(str)
str=str.split()
ls=list(str) #单词列表
print(ls)
set=set(ls) #将列表转为集合,将重复元素去掉
print(set)
ls1=list(set) #再把集合转成列表作为无重复列表
print(ls1)
ls2=[] #空列表,存放词频
for i in ls:
ls2.append(str.count(i)) #统计词频
print(ls2)
dict=dict(zip(ls1,ls2)) #将单词列表与对应频数组成字典
print(dict)
Dec = sorted(dict.keys()) #排序
print(Dec)
#排除语法型词汇,代词、冠词、连词
str1=['a','an','all','the','are','how','my','why','they','where','had','on','that','so','i','for','and','over','there','will']
for i in str1:
del dict[i]
dict2=sorted(dict.items(), key=lambda x: x[1], reverse=True)
#输出词频最大TOP10
for i in range(10):
print(dict2[i])