zoukankan html css js c++ java

python练习：正则表达式

import re
#过滤出中国，其中name不一定总为name
str = '<div class ="name">中国</div>'
resu = re.findall(r'<div class =".*">(.*?)</div>',str)
print(resu)

#过滤中文：汉字整体打印
str1 = "not 404 6.775 found   魏无羡  888  蓝湛"
print ("
练习：过滤出“",str1,"”中的中文，汉字整体打印")
list= str1.split(" ")               #['not', '404', '6.775', 'found', '', '', '魏无羡', '', '888', '', '蓝湛']
pattern = 'd+.?d*|[a-zA-Z]+'     #d+.?d*  过滤整数、小数等，[a-zA-Z]  过滤字母
resu1 = re.findall(pattern,str1)    #['not', '404', '6.775', 'found', '888']
for i in resu1:
    list.remove(i)                  #list：['', '', '魏无羡', '', '', '蓝湛']
while '' in list:
    list.remove('')                 #去掉列表中的空格，不去也可以的，list：['魏无羡', '蓝湛']
str1_resu = ' '.join(list)
print (str1_resu)


#过滤中文2:单个汉字打印
str1 = "not 404 6.775 found  魏无羡  888  蓝湛"
print ("
练习2：过滤出",str1,"中的中文，单个汉字打印")
list= str1.split(" ")
print (list)
pattern = '[u4e00-u9fa5]'     #[u4e00-u9fa5]  过滤汉字
resu1 = re.findall(pattern,str1)
print (resu1)

结果：

['中国']

练习：过滤出“ not 404 6.775 found   魏无羡  888  蓝湛 ”中的中文，汉字整体打印
魏无羡 蓝湛

练习2：过滤出 not 404 6.775 found  魏无羡  888  蓝湛 中的中文，单个汉字打印
['not', '404', '6.775', 'found', '', '魏无羡', '', '888', '', '蓝湛']
['魏', '无', '羡', '蓝', '湛']

Process finished with exit code 0

查看全文

相关阅读:
6.一个python-selenium的实战
 9.python-ini文件使用(读和写)
4.表数据的操作-insert、delete
5.数据库的查询-select
2.自动化测试之python+selenium基础
 6.python中目录的操作
 sql如何先排序再去重
 hbase(0.94) get、scan源码分析
 Antlr 在 idea 中正确使用的方式
 某日看代码对代码可读性的思考

原文地址：https://www.cnblogs.com/jxba/p/11842386.html