re模块 之分组
>>> import re >>> re.findall("ab|c","sdfab|csdf") ['ab', 'c'] >>> re.findall("ab|cd","sdfab|cdsdf") ['ab', 'cd'] >>> re.findall ("abc+","abccccc") ['abccccc'] >>> re.findall ("abc*","abccccc") ['abccccc'] >>> re.findall("(abc)+","abcabcabcabc") # 分组 ['abc'] >>> re.findall ("(abc)*","abcabcabcabc") ['abc', ''] >>> re.findall ("abc+","abcabcabcabcabc") ['abc', 'abc', 'abc', 'abc', 'abc']
search分组用法 (只返回一个对象)
>>> re.search("(?P<name>w+)","abcccc") <re.Match object; span=(0, 6), match='abcccc'> >>> re.search("(?P<name>w+)","abcccc").group() 'abcccc'
根据组名称 提取内容
>>> re.search("(?P<name>[a-z]+)d+","john18musicxiaoming20movie").group("name") 'john' >>> re.search("(?P<name>[a-z]+)(?P<age>d+)","john18musicxiaoming20movie").group("age") '18' >>>
re模块的其他常用方法
>>> re.match("d+","44safs45asdf321saf789").group() # 只取一个对象,只能取开头的 '44' >>> re.split(" ","hello world") # 按空格为分界线取 ['hello', 'world'] >>> re.split("[ |]","hello world|aaa") ['hello', 'world', 'aaa'] >>> re.split("[ab]","safdbds") # 按 a b 为分界线取 ['s', 'fd', 'ds']
--替换
>>> re.sub("d+","A","a1sd4fg45h") 'aAsdAfgAh' >>> re.sub("d","A","a1sd4fg45h") 'aAsdAfgAAh' >>> re.sub("d+","A","a1sd4fg45h",2) 'aAsdAfg45h'
>>> a = re.compile ("d+") # 定义规则 >>> a.findall("safsd12a45") # 直接调用 (可以调用多次) ['12', '45']
>>> re.finditer ("d+","asfa45saf56") # 将数字放到迭代器内存里 <callable_iterator object at 0x00000220B1E50E80>
获取里面的内容
>>> b = re.finditer ("d+","asfa45saf56") >>> next(b) <re.Match object; span=(4, 6), match='45'> >>> next(b) <re.Match object; span=(9, 11), match='56'>
优先级
>>> re.findall("www.(baidu|123).com","www.baidu.com") ['baidu'] >>> re.findall("www.(?:baidu|123).com","www.baidu.com") # ‘ ?:’去掉优先级 ['www.baidu.com']