zoukankan      html  css  js  c++  java
  • python基础语法21 re模块补充 正则表达式

    正则表达式

      1 import re
      2 
      3 print(re.findall('w', 'hello 123_ */-='))
      4 print(len(re.findall('w', 'hello 123_ */-=')))
      5 
      6 print(re.findall('W', 'hello 123_ */-='))
      7 
      8 print(re.findall('s', 'hell
    o 12	3_ */-='))
      9 
     10 print(re.findall('S', 'hell
    o 12	3_ */-='))
     11 
     12 print(re.findall('d', 'hell
    o 12	3_ */-='))
     13 print(re.findall('D', 'hell
    o 12	3_ */-='))
     14 
     15 print(re.findall('
    ', 'hell
    o 12	3_ */-='))
     16 print(re.findall('	', 'hell
    o 12	3_ */-='))
     17 print(re.findall('l', 'hell
    o 12	3_ */-='))
     18 
     19 print(re.findall('tank', 'my name is tank, tank is handsome'))
     20 print(re.findall('^tank', 'tank my name is tank, tank is handsome'))
     21 print(re.findall('tank$', 'tank my name is tank,tank is handsome tank'))
     22 
     23 重复匹配
     24 .:匹配换行符以外的任意一个字符
     25 ['abc','a1c','aac','aac','a*c','a+c']
     26 print(re.findall('a.c', 'abc a1c aac asd aaaaac a*c a+c abasd'))
     27 a.c
     28 print(re.findall('a.c', 'abc a1c aac a
    c asd aaaaac a*c a+c abasd', re.DOTALL))
     29 
     30 []: 匹配一个字符,该字符属于中括号内指定的字符
     31 print(re.findall('a..c', 'abc a1 c aac asd aaaaac a *c a+c abasd ='))
     32 print(re.findall('a.c', 'abc a1 c aac aAc aBc asd aaaaac a-c a/c a *c a+c abasd = a1c a2c'))
     33 print(re.findall('a[a-z]c', 'abc a1 c aac aAc aBc asd aaaaac a-c a/c a *c a+c abasd = a1c a2c'))
     34 print(re.findall('a[A-Z]c', 'abc a1 c aac aAc aBc asd aaaaac a-c a/c a *c a+c abasd = a1c a2c'))
     35 
     36 []内的+ * 不是量词
     37 print(re.findall('a[-+*/]c', 'abc a1 c aac aAc aBc asd aaaaac a-c a/c a *c a+c abasd = a1c a2c'))
     38 print(re.findall('a[a-z][a-z]c', 'abc a1 c aac aAc aBc asd aaaaac a-c a/c a *c a+c abasd = a1c a2c'))
     39 ^在[]内代表非
     40 print(re.findall('a[^a-z]c', 'abc a1 c aac aAc aBc asd aaaaac a-c a/c a *c a+c abasd = a1c a2c'))
     41 
     42 *: 必须与其他字符连用,代表左侧的字符出现0次或者无穷次
     43 ab* ---》 匹配0个或多个b字符, 并且是a开头的
     44 print(re.findall('ab*', 'a ab abbb abbbb a1bbbb a-123'))
     45 # ['a','ab','abbb','abbbb','a','a']
     46 * == {0,}
     47 print(re.findall('ab{0,}', 'a ab abbb abbbb a1bbbb a-123'))
     48 
     49 ?: 必须与其他字符连用,代表左侧的字符出现0次或者1次
     50 print(re.findall('ab?', 'a ab abbb abbbb a1bbbb a-123'))
     51 # ab?
     52 # ['a','ab','ab','ab','a','a']
     53 {0,1} == ?
     54 print(re.findall('ab{0,1}', 'a ab abbb abbbb a1bbbb a-123'))
     55 
     56 +: 必须与其他字符连用,代表左侧的字符出现1次或者无穷次
     57 ab+
     58 print(re.findall('ab+', 'a ab abbb abbbb a1bbbb a-123'))
     59 # ['ab','abbb','abbbb']
     60 {1,} == +
     61 print(re.findall('ab{1,}', 'a ab abbb abbbb a1bbbb a-123'))
     62 
     63 # {n,m}: 必须与其他字符连用
     64 ab{1,3}  b字符出现1次——3次
     65 print(re.findall('ab{1,3}', 'a ab abbb abb abbbb a1bbbb a-123'))
     66 # ['ab', 'abbb', 'abb', 'abbb']
     67 
     68 
     69 .*:贪婪匹配
     70 a.*d ---> 匹配字符以最后的d作为结束标识
     71 print(re.findall('a.*d', 'ab123adfc1134124123aasfc123123'))
     72 
     73 # .*?:非贪婪匹配
     74 a.*?c
     75 print(re.findall('a.*?c', 'ab123adfc1134124123adasfc123123'))
     76 
     77 ():分组
     78 expression=".*?"
     79 print(re.findall('expression="(.*?)"', 'expression="1+2+3/4*5" tank="handsome"'))
     80 print(re.findall('href="(.*?)"',
     81                  '<p>段落</p><a href="https://www.sb.com">点我啊</a><h1>标题</h1><a href="https://www.sb.com">点我啊</a>'))
     82 
     83 a|b
     84 print(re.findall('a|b', 'ab123abasdfaf'))
     85 
     86  companies  company
     87 (?:)表示非捕获分组,和捕获分组唯一的区别在于,非捕获分组匹配的值不会保存起来
     88 (?:)---> 将ies或者y保留与compan拼接
     89 print(re.findall('compan(?:ies|y)',
     90                  'Too many companies have gone bankrupt, and the next one is my company'))
     91 
     92 标识性字符(提取的内容)
     93 print(re.findall('ale(x)', 'alex is SB,alex is bigSB'))
     94 print(re.search('alex', 'alex is SB,alex is bigSB').group())
     95 print(re.search('abcdefg', 'alex is SB,alex is bigSB'))
     96 
     97 print(re.search('^alex', '123alex is SB,alex is bigSB'))
     98 print(re.match('alex', '123alex is SB,alex is bigSB'))
     99 
    100 l = 'tank:17:male'.split(':')
    101 print(l)
    102  了解: 根据“ ” or “:” or “/” or “-” 来进行切分
    103 l1 = re.split('[ :/-]', 'a-b/c tank:17:male xxx')
    104 print(l1)
    105 
    106 [a-z]+xx
    107 了解: sub: 替换  ---》 第二个参数 根据正则替换到第三参数中
    108 print(re.sub('[a-z]+xx', 'yxp', 'lxx is good,sb is lllxx wxx is good cxx is good'))
    109 
    110 了解: compile可以将字符转成字节
    111 pattern = re.compile('alex')  # 返回一个对象
    112 print(pattern)
    113 print(pattern.findall('alex is SB,alex is bigSB'))
    114 print(re.findall('alex', 'alex is SB,alex is bigSB'))
    115 
    116 print(pattern.search('alex is SB,alex is bigSB').group())  # alex
    117 
    118 import re
    119 
    120 str1 = '1abc a1 c aac aAc
     	aBc asd aaaaac a-c a/c a *c a+c abasd = a1c a2c'
    121 print(re.findall('w', str1))  # w---匹配字母数字及下划线
    122 print(re.findall('W', str1))  # w---匹配非字母数字及下划线 
     	
    123 print(re.findall('s', str1))  # 匹配任意空白字符
    	
    f
    124 print(re.findall('S', str1))  # 匹配非空白字符
    125 print(re.findall('d', str1))  # 匹配数字等价0-9
    126 print(re.findall('D', str1))  # 匹配任意非数字0-9
    127 print(re.findall('Aac', str1))  # 匹配字母开始
    128 print(re.findall('\n', str1))  # 匹配字母结束,只匹配到换行前的结束字符串
    129 print(re.findall('
    ', str1))  # 匹配换行符
    130 print(re.findall('	', str1))  # 匹配换行符
    131 print(re.findall('^1abc', str1))  # 匹配以什么开头
    132 print(re.findall('c$', str1))  # 匹配以什么结尾
    133 
    134 str1 = '1abbb a1 a
    bc aac aAc
     	aBc asd aaaaac a-c a/c a *c a+c abasd = a1c a2c'
    135 print(re.findall('a.b', str1))  # 匹配中间是任意字符除了换行符
    136 print(re.findall('a.b', str1, re.S))  # 匹配中间是任意字符包含换行符
    137 print(re.findall('a.b', str1, re.DOTALL))  # 匹配中间是任意字符包含换行符
    138 print(re.findall('ab*', str1))  # 匹配0个或多个表达式
    139 print(re.findall('ab+', str1))  # 匹配1个或多个表达式
    140 print(re.findall('ab?', str1))  # 匹配0个或1个表达式
    141 print(re.findall('ab?a', str1))  # 匹配0个或1个表达式指代找b
    142 print(re.findall('ab{2}', 'abbb aabxbaa'))  # 表示1个a2个b
    143 print(re.findall('a[1*-]b', 'a1b  a
    b a*b a-b'))  # ['a1b', 'a*b', 'a-b']
    144 print(re.findall('a[^1*-]b', 'a1b a*b a-b a=b'))  # []内的^表示取反
    145 print(re.findall('a[0-9]b', 'a1b a*b a-b a=b'))  # ['a1b']
    146 print(re.findall('a[a-z]b', 'a1b a*b a-b a=b aeb'))  # ['aeb']
    147 print(re.findall('a[a-zA-Z]b', 'a1b a*b a-b a=b aeb aEb'))  # ['aeb', 'aEb']
    148 print(re.findall(r'a\c', 'ac'))
    149 print(re.findall('(ab)+123', 'ababab123'))
    150 print(re.findall('(?:ab)+123', 'xxxaab123'))  # ['ab123']
    151 print(re.findall('(?:ab)+123', '12abab123'))  # ['abab123']如果有相同的ab连接在一起就一起显示
    152 print(re.findall('compan(?:ies|y)', 'Too many companies have gone bankrupt, and the next one is my company'))
    153 print(re.findall('href="(.*?)"',
    154                  '<p>段落</p><a href="https://www.sb.com">点我啊</a><h1>标题</h1><a href="https://www.sb.com">点我啊</a>'))
    155 print(re.findall('a|b', 'ab123abasdfaf'))
    156 print(re.split('ab', 'abcd'))  # ['', 'cd']
    157 print(re.split('[ab]', 'abcd'))  # ['', '', 'cd'] #如果是列表按照索引取
    158 print('===>', re.sub('a', 'A', 'alex make love'))  # ===> Alex mAke love,不指定n,默认替换所有
    159 print('===>', re.sub('a', 'A', 'alex make love', 1))  # ===> Alex make love
    160 
    161 obj = re.compile('d{3}')  # 查找3个数字还要连续的
    162 print(obj.search('abc123eee1e').group())  # 12
    163 print(obj.findall('abc123eeee'))  # ['12'],重用了obj
    164 
    165 print(re.findall('a,b|c', 'ac,a,b,accc'))
    166 print(re.findall('ab?', 'a'))
    167 
    168 import re
    169 
    170 print(re.findall("<(?P<tag_name>w+)>w+</(?P=tag_name)>", "<h1>hello</h1>"))  # ['h1']
    171 
    172 
    173 import re
    174 
    175 str1 = '<h1>www.oldboyedu.*+com<h1>'
    176 
    177 # www.oldboyedu.
    178 re.findall(
    179     # . == '.'
    180     # . == 任意字符
    181     'www.*.*+',
    182     str1
    183 )
  • 相关阅读:
    [原]poj-2680-Choose the best route-dijkstra(基础最短路)
    [转]c/c++输入函数
    [原]poj-2524(裸并查集)
    [原]poj-1611-The Suspects(水并查集)
    ccnu-线段树-简单的区间更新(三题)
    团队博客(3)
    个人NABCD
    团队博客(2)
    团队博客(1)
    课堂练习:返回一个二维数组中最大子数组的和
  • 原文地址:https://www.cnblogs.com/ludingchao/p/12118141.html
Copyright © 2011-2022 走看看