1 # Author: Sure Feng
2
3 '''
4 re模块
5 '''
6
7 import re
8
9
10 # 最常用的匹配语法(匹配的套路)
11 # 1、re.match 匹配开头,没有则匹配不成功
12 print(re.match("s+","asddf123fssa")) # None
13 print(re.match(".","asddf123fssa")) # <re.Match object; span=(0, 1), match='a'>
14
15 # 2、re.search 从头到位匹配
16 print(re.search("s+","asddf123fssa")) # <re.Match object; span=(1, 2), match='s'>
17
18 # 3、re.findall 把所有匹配成功的字符放到列表中并返回
19 print(re.findall("s+","asddf123fssa")) # ['s', 'ss']
20
21 # 4、re.sub 把成功匹配到的字符替换,并返回一个新的字符串
22 # re.sub(pattern, repl, string, count=0, flags=0),count为替换次数,不写默认全替换
23 print(re.sub("s+","&","asddf123fssa",1)) # a&ddf123fssa,count=1匹配成功一次结束
24
25
26 # 常用正则表达式符号
27 # '.'默认匹配除
之外的任意一个字符, 若指定flag DOTALL,则匹配任意字符,包括换行
28 print(re.search("v.","asddf123fsvsa")) # <re.Match object; span=(10, 12), match='vs'>
29
30 # '^'、'A'匹配字符开头,re.match用不上
31 # 若指定flags MULTILINE,这种也可以匹配上,(r"^a","
abc
eee",flags=re.MULTILINE)
32 print(re.search("^v.","asddf123fsvsa")) # None
33 print(re.search("Av.","asddf123fsvsa")) # None
34
35 # '$'、''匹配字符结尾,或e.search("foo$","bfoo
sdfsf",flags=re.MULTILINE).group()也可以
36 print(re.search(".a$","asddf123fsvsa")) # <re.Match object; span=(11, 13), match='sa'>
37 print(re.search(".a","asddf123fsvsa")) # <re.Match object; span=(11, 13), match='sa'>
38
39 # '*'匹配*号前的字符0次或多次
40 print(re.findall("ab*","cabb3abcbbac")) # ['abb', 'ab', 'a'],pattern可以是:a/ab/abb/abbb....
41
42 # '+'匹配前一个字符1次或多次,re.findall("ab+","ab+cd+abb+bba") 结果['ab', 'abb']
43 print(re.findall("ab+","cabb3abcbbac")) # ['abb', 'ab'],pattern可以是:ab/abb/abbb
44
45 # '?'匹配前一个字符1次或0次
46 print(re.findall("ab?","cabb3abcbbac")) # ['ab', 'ab', 'a'],pattern可以是:a/ab
47
48 # '{m}' 匹配前一个字符m次
49 print(re.findall("ab{2}","abb abc abbcbbb")) # ['abb', 'abb'],pattern是:abb
50
51 # '{n,m}'匹配前一个字符n到m次
52 print(re.findall("ab{1,3}","abbb abc abbcbbb"))#['abbb','ab','abb'],pattern可以是:ab/abb/abbb
53
54 # '|'匹配|左或|右的字符
55 print(re.search("abc|ABC","ABCBabcCD").group()) # ABC, group()返回匹配成功的结果
56
57 # '(...)' 分组匹配
58 print(re.search("(abc){2}a(123|456)c", "abcabca456c").group()) # abcabca456c
59
60 # '(?P<name>...)' 分组匹配
61 print(re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})","371481199306143242").groupdict())
62 # 结果:{'province': '3714', 'city': '81', 'birthday': '1993'}
63
64
65 # 'd' 匹配数字0-9
66 # 'D' 匹配非数字
67 # 'w' 匹配[A-Za-z0-9]
68 # 'W' 匹配非[A-Za-z0-9]
69 # 's' 匹配空白字符、 、
、
70 print(re.search("s+",'ab c1
3').group()) # 结果 ' '
71
72 # re.I(re.IGNORECASE): 忽略大小写(括号内是完整写法,下同)
73 print(re.search("[a-z]+","AFJa",flags=re.I)) # <re.Match object; span=(0, 4), match='AFJa'>
74
75 # M(MULTILINE): 多行模式,改变'^'和'$'的行为(参见上图)
76 print(re.search(r"^e.","
abc
eee",flags=re.M)) # <re.Match object; span=(5, 7), match='ee'>
77
78 # S(DOTALL): 点任意匹配模式,改变'.'的行为
79 print(re.search("v.+","asddf123fsvsa
", flags=re.S)) # <re.Match object; span=(10, 14), match='vsa
'>
1 None
2 <re.Match object; span=(0, 1), match='a'>
3 <re.Match object; span=(1, 2), match='s'>
4 ['s', 'ss']
5 a&ddf123fssa
6 <re.Match object; span=(10, 12), match='vs'>
7 None
8 None
9 <re.Match object; span=(11, 13), match='sa'>
10 <re.Match object; span=(11, 13), match='sa'>
11 ['abb', 'ab', 'a']
12 ['abb', 'ab']
13 ['ab', 'ab', 'a']
14 ['abb', 'abb']
15 ['abbb', 'ab', 'abb']
16 ABC
17 abcabca456c
18 {'province': '3714', 'city': '81', 'birthday': '1993'}
19
20 <re.Match object; span=(0, 4), match='AFJa'>
21 <re.Match object; span=(5, 7), match='ee'>
22 <re.Match object; span=(10, 14), match='vsa
'>
answer