1.列表的增删查改
>>> ls=list('112233123') >>> ls ['1', '1', '2', '2', '3', '3', '1', '2', '3'] >>> ls.sort() >>> ls ['1', '1', '1', '2', '2', '2', '3', '3', '3'] >>> ls.pop() '3' >>> ls ['1', '1', '1', '2', '2', '2', '3', '3'] >>> ls.append(5) >>> ls ['1', '1', '1', '2', '2', '2', '3', '3', 5] >>> ls.insert(1,4) >>> ls ['1', 4, '1', '1', '2', '2', '2', '3', '3', 5] >>> ls.index(4) 1 >>> ls[1]=6 >>> ls ['1', 6, '1', '1', '2', '2', '2', '3', '3', 5] >>>
>>> s=list('turtle')
>>> s
['t', 'u', 'r', 't', 'l', 'e']
>>>
2.字典
>>> d={'广东':'广州','广西':'南宁','福建':'福州','江西':'南昌'} >>> d {'广东': '广州', '广西': '南宁', '福建': '福州', '江西': '南昌'} >>> d[广东] Traceback (most recent call last): File "<pyshell#17>", line 1, in <module> d[广东] NameError: name '广东' is not defined >>> d['广东'] '广州' >>> d.pop('广州') Traceback (most recent call last): File "<pyshell#19>", line 1, in <module> d.pop('广州') KeyError: '广州' >>> d {'广东': '广州', '广西': '南宁', '福建': '福州', '江西': '南昌'} >>> d.pop('广东') '广州' >>> d {'广西': '南宁', '福建': '福州', '江西': '南昌'} >>> d.keys() dict_keys(['广西', '福建', '江西']) >>> d.values() dict_values(['南宁', '福州', '南昌']) >>> d.items() dict_items([('广西', '南宁'), ('福建', '福州'), ('江西', '南昌')]) >>> d.get('江西') '南昌' >>> d['湖南']='长沙' >>> d {'广西': '南宁', '福建': '福州', '江西': '南昌', '湖南': '长沙'} >>>
3.列表、元组、集合、字典。
>>> d {'广西': '南宁', '福建': '福州', '江西': '南昌', '湖南': '长沙'} >>> s=set(ls) >>> s {'1', 5, 6, '2', '3'} >>> s=set('112233123') >>> >>> s {'1', '3', '2'} >>> a=set(d) >>> a {'江西', '广西', '福建', '湖南'} >>> tu=tuple('112233123456') >>> tu ('1', '1', '2', '2', '3', '3', '1', '2', '3', '4', '5', '6') >>> for i in ls: print(i) 1 6 1 1 2 2 2 3 3 5 >>> for i in tu: print(i,end=' ') 1 1 2 2 3 3 1 2 3 4 5 6 >>> for i in s: print(i) 1 3 2 >>> for i in d: print(i) 广西 福建 江西 湖南 >>> for i in d: print(d[i]) 南宁 福州 南昌 长沙 >>> for i in d: printi,(d[i]) Traceback (most recent call last): File "<pyshell#53>", line 2, in <module> printi,(d[i]) NameError: name 'printi' is not defined >>> for i in d: print(i,d[i]) 广西 南宁 福建 福州 江西 南昌 湖南 长沙 >>>
4.词频统计
news='''My father was a self-taught mandolin player. He was one of the best string instrument players in our town. He could not read music, but if he heard a tune a few times, he could play it. When he was younger, he was a member of a small country music band. They would play at local dances and on a few occasions would play for the local radio station. He often told us how he had auditioned and earned a position in a band that featured Patsy Cline as their lead singer. He told the family that after he was hired he never went back. Dad was a very religious man. He stated that there was a lot of drinking and cursing the day of his audition and he did not want to be around that type of environment. ''' news=news.lower() for i in ',.': news=news.replace(i,' ') words=news.split(' ') dict={} keys=set(words) for i in words: dict[i]=words.count(i) count=list(dict.items()) count.sort(key=lambda x:x[1],reverse=True) for i in range(10): print(count[i])
wc.sort(key=lambda x:x[1],reverse=True
fo=open('a.txt','w') fo.write('''My father was a self-taught mandolin player. He was one of the best string instrument players in our town. He could not read music, but if he heard a tune a few times, he could play it. When he was younger, he was a member of a small country music band. They would play at local dances and on a few occasions would play for the local radio station. He often told us how he had auditioned and earned a position in a band that featured Patsy Cline as their lead singer. He told the family that after he was hired he never went back. Dad was -a very religious man. He stated that there was a lot of drinking and cursing the day of his audition and he did not want to be around that type of environment.''') fo.close() fo=open('a.txt','r') news=fo.read() news=news.lower() for i in ',.': news=news.replace(i,' ') words=news.split(' ')#所有单词的列表 exp={'','the','a','was','of','and','that','he','in'}#要排除的语法型词汇 dict={} keys=set(words)-exp#要统计词频的单词——键 for i in keys: dict[i]=words.count(i)#计算每个键的值 count=list(dict.items())
#转换成可排序的列表
count.sort(key=lambda x:x[1],reverse=True) #按元组的第二个值(出现的次数)排序
for i in range(10):#输出TOP10
print(count[i]) fo.close()