zoukankan      html  css  js  c++  java
  • re模式

    re模式

    一:什么是正则

    正则就是用一些具有特殊含义符号组合到一起(称之为正则表达式)来描述字符或者

    字符串的方法。或者说:正则就是用来描述一类事物的规则。(在python中)它内嵌在

    python中,并通过re模式实现。正则表达模式被编译成一系列字节码,然后由用c编写匹配引擎执行。

    生活中处处都是正则:

    比如我们描述:4条腿

      你可能会想到的是四条腿的动物或者桌子,这些就是条件,你去选取东西的条件。re模式就是条件,方便你去

    找到你要的东西。

     

    import re
    # print(re.findall('w','ab 12+- *&_'))
    #                                  w
    # print(re.findall('W','ab 12+- *&_'))
    # print(re.findall('s','ab 
    1
    2	+- *&_'))
    # print(re.findall('S','ab 
    1
    2	+- *&_'))
    # print(re.findall('d','ab 
    1
    2	+- *&_'))
                        #                    d
    # print(re.findall('D','ab 
    1
    2	+- *&_'))
    
    # print(re.findall('w_sb','egon alex_sb123123wxx_sb,lxx_sb'))
    #                                                       w_sb
    
    # print(re.findall('Aalex','abcalex is salexb'))
    # print(re.findall('Aalex','alex is salexb'))
    # print(re.findall('^alex','alex is salexb'))
    # print(re.findall('sb','alexsb is sbalexbsb'))
    # print(re.findall('sb$','alexsb is sbalexbsb'))
    #                                           sb
    
    # print(re.findall('^ebn$','ebn1'))
    #                           ebn
    
    
    # print(re.findall('a
    c','a
    c a	c a1c'))
    
    
    # 重复匹配:
    #.   ?   *   +  {m,n}  .*  .*?
    #1、.:代表除了换行符外的任意一个字符
    # print(re.findall('a.c','abc a1c aAc aaaaaca
    c'))
    #                                           a.c
    # print(re.findall('a.c','abc a1c aAc aaaaaca
    c',re.DOTALL))
    
    #2、?:代表左边那一个字符重复0次或1次
    # print(re.findall('ab?','a ab abb abbb abbbb abbbb'))
    #                                      ab?
    
    #3、*:代表左边那一个字符出现0次或无穷次
    # print(re.findall('ab*','a ab abb abbb abbbb abbbb a1bbbbbbb'))
    #                                                   ab*
    
    #4、+ :代表左边那一个字符出现1次或无穷次
    # print(re.findall('ab+','a ab abb abbb abbbb abbbb a1bbbbbbb'))
    #                                                    ab+
    
    #5、{m,n}:代表左边那一个字符出现m次到n次
    # print(re.findall('ab?','a ab abb abbb abbbb abbbb'))
    # print(re.findall('ab{0,1}','a ab abb abbb abbbb abbbb'))
    
    # print(re.findall('ab*','a ab abb abbb abbbb abbbb a1bbbbbbb'))
    # print(re.findall('ab{0,}','a ab abb abbb abbbb abbbb a1bbbbbbb'))
    
    # print(re.findall('ab+','a ab abb abbb abbbb abbbb a1bbbbbbb'))
    # print(re.findall('ab{1,}','a ab abb abbb abbbb abbbb a1bbbbbbb'))
    
    
    # print(re.findall('ab{1,3}','a ab abb abbb abbbb abbbb a1bbbbbbb'))
    
    
    #6、.*:匹配任意长度,任意的字符=====》贪婪匹配
    # print(re.findall('a.*c','ac a123c aaaac a *123)()c asdfasfdsadf'))
    #                        a.*c
    
    #7、.*?:非贪婪匹配
    # print(re.findall('a.*?c','a123c456c'))
    
    
    
    
    # ():分组
    # print(re.findall('(alex)_sb','alex_sb asdfsafdafdaalex_sb'))
    
    #                            (alex)_sb
    
    # print(re.findall(
    #     'href="(.*?)"',
    #     '<li><a id="blog_nav_sitehome" class="menu" href="http://www.cnblogs.com/">博客园</a></li>')
    # )
    # <li><a id="blog_nav_sitehome" class="menu" href="http://www.cnblogs.com/">博客园</a></li>
    #                                           href=".*?"
    
    
    # []:匹配一个指定范围内的字符(这一个字符来自于括号内定义的)
    # print(re.findall('a[0-9][0-9]c','a1c a+c a2c a9c a11c a-c acc aAc'))
    
    #当-需要被当中普通符号匹配时,只能放到[]的最左边或最 右边
    # print(re.findall('a[-+*]c','a1c a+c a2c a9c a*c a11c a-c acc aAc'))
    
    # print(re.findall('a[a-zA-Z]c','a1c a+c a2c a9c a*c a11c a-c acc aAc'))
    
    
    # []内的^代表取反的意思
    # print(re.findall('a[^a-zA-Z]c','a c a1c a+c a2c a9c a*c a11c a-c acc aAc'))
    # print(re.findall('a[^0-9]c','a c a1c a+c a2c a9c a*c a11c a-c acc aAc'))
    
    
    # print(re.findall('([a-z]+)_sb','egon alex_sb123123wxxxxxxxxxxxxx_sb,lxx_sb'))
    #                                                [a-z]+_sb
    
    
    
    # | :或者
    # print(re.findall('compan(ies|y)','Too many companies have gone bankrupt, and the next one is my company'))
    
    # (?:):代表取匹配成功的所有内容,而不仅仅只是括号内的内容
    # print(re.findall('compan(?:ies|y)','Too many companies have gone bankrupt, and the next one is my company'))
    
    # print(re.findall('alex|sb','alex sb sadfsadfasdfegon alex sb egon'))
    
    
    
    
    # re模块的其他方法:
    # print(re.findall('alex|sb','123123 alex sb sadfsadfasdfegon alex sb egon'))
    # print(re.search('alex|sb','123213 alex sb sadfsadfasdfegon alex sb egon').group())
    # print(re.search('^alex','123213 alex sb sadfsadfasdfegon alex sb egon'))
    
    # print(re.search('^alex','alex sb sadfsadfasdfegon alex sb egon').group())
    # print(re.match('alex','alex sb sadfsadfasdfegon alex sb egon').group())
    # print(re.match('alex','123213 alex sb sadfsadfasdfegon alex sb egon'))
    
    
    # info='a:b:c:d'
    # print(info.split(':'))
    # print(re.split(':',info))
    
    # info=r'get :a.txt3333/rwx'
    # print(re.split('[ :\/]',info))
    
    
    # print('egon is beutifull egon'.replace('egon','EGON',1))
    
    # print(re.sub('(.*?)(egon)(.*?)(egon)(.*?)',r'123EGON5','123 egon is beutifull egon 123'))
    
    #              (123 )(egon)( is beutifull )(egon)( 123)
    
    #123EGON5
    
    # print(re.sub('(lqz)(.*?)(SB)',r'321',r'lqz is SB'))
    # print(re.sub('([a-zA-Z]+)([^a-zA-Z]+)([a-zA-Z]+)([^a-zA-Z]+)([a-zA-Z]+)',r'52341',r'lqzzzz123+ is SB'))
    
    #(lqzzzz)(123+ )(is)( )(SB)
    
    
    pattern=re.compile('alex')
    print(pattern.findall('alex is alex alex'))
    print(pattern.findall('alexasdfsadfsadfasdfasdfasfd is alex alex'))

        

  • 相关阅读:
    C字符串和C++字符串
    Linux的用户态和内核态
    知乎问答:如何理解“In UNIX, everything is a file”?
    科普:并行计算、分布式计算、集群计算和云计算
    个人使用VIM的一些总结
    C语言回调函数学习
    Python的函数参数传递:传值?引用?
    ECMAScript 函数函数概述
    ECMAScript 函数arguments 对象
    ECMAScript 语句with 语句
  • 原文地址:https://www.cnblogs.com/wuchenyu/p/8761688.html
Copyright © 2011-2022 走看看