zoukankan      html  css  js  c++  java
  • 正则表达式使用

    import re
    
    #  匹配- match从头匹配 (第一个参数就是正则表达式, 数据) 如果开始不能匹配就匹配失败
    # 正则表达式都是以r字符开始的 -- 墙裂建议
    # 返回值就是匹配结果的对象 如果需要获取对象中的结果就需要.group()
    res = re.match(r'ello','ello meizi hello')
    
    # 如果成功匹配 则返回对象 如果没有匹配结果则返回值为空
    if res:
        print(res.group())
    else:
        print("没有匹配结果")
    
    # 搜索 查找 --- 如果开始不能匹配 则继续往后搜索 尝试看能否匹配
    res = re.search(r'hello','ello meizi hello')
    
    # 如果成功匹配 则返回对象 如果没有匹配结果则返回值为空
    if res:
        print(res.group())
    else:
        print("没有匹配结果")
        """match函数和search函数的功能 区别和联系"""
    In [1]: import re
    In [2]: re.match(r"hello","hello").group()
    # ----------1. .匹配一个任意字符(除去
    )-----------------------------------------
    In [3]: re.match(r".ello","hello").group()
    In [4]: re.match(r".ello","Hello").group()
    In [5]: re.match(r".ello","1ello").group()
    In [6]: re.match(r".ello","&ello").group()
    In [7]: re.match(r".ello","
    ello").group()  # 出错
    In [14]: re.match(r".ello",".ello").group()
    In [15]: re.match(r".ello",".ello").group()
    
    ------------2. []匹配集合中任意一个字符----------------------------------------
    In [8]: re.match(r"[Hh]ello","Hello").group()
    In [9]: re.match(r"[Hh]ello","hello").group()
    In [10]: re.match(r"[Hh]ello","1ello").group() # 出错
    
    In [11]: re.match(r"[0123456789]ello","1ello").group()
    In [12]: re.match(r"[0123456789]ello","9ello").group()
    In [13]: re.match(r"[0123456789]ello","Hello").group()  # 出错
    
    In [16]: re.match(r"[0123456789]ello","Hello").group()  # 出错
    In [17]: re.match(r"[0123456789]ello","1ello").group()
    
    ------------3. [-]匹配范围内部的任意一个字符-----------------
    In [18]: re.match(r"[0-9]ello","1ello").group()
    In [19]: re.match(r"[0-35-9]ello","1ello").group()
    In [20]: re.match(r"[0-35-9]ello","9ello").group()
    In [21]: re.match(r"[0-35-9]ello","4ello").group()  #  出错
    In [22]: re.match(r"[0-9a-zA-Z]ello","4ello").group()
    In [23]: re.match(r"[0-9a-zA-Z]ello","aello").group()
    In [24]: re.match(r"[0-9a-zA-Z]ello","Hello").group()
    In [25]: re.match(r"[0-35-9]ello","9ello").group()
    In [26]: re.match(r"[0-35-9]ello","4ello").group()  # 出错
    
    ------------4. [^] 禁止匹配 范围内部的任意一个字符-----------------
    
    In [27]: re.match(r"[^4]ello","4ello").group()
    In [28]: re.match(r"[^4]ello","0ello").group()
    In [29]: re.match(r"[^4]ello","9ello").group()
    
    ------------5. 'd'匹配一个任意数字字符  'D'匹配任意个非数字字符 -----------------
    
    In [30]: re.match(r"dello","9ello").group()
    In [31]: re.match(r"dello","0ello").group()
    In [32]: re.match(r"dello","@ello").group()
    
    In [33]: re.match(r"Dello","0ello").group()
    In [34]: re.match(r"Dello","@ello").group()
    In [35]: re.match(r"Dello","?ello").group()
    
    ------------6. 's'匹配一个任意空白字符  'S'匹配一个任意 非空白字符串-------------------------------
    In [36]: re.match(r"Dello"," ello").group()
    
    In [37]: re.match(r"[	
    vf ]ello"," ello").group()
    In [38]: re.match(r"sello"," ello").group()
    In [39]: re.match(r"Sello"," ello").group()
    In [40]: re.match(r"Sello","1ello").group()
    
    -----------7. 'w'匹配一个任意单词字符 'W'匹配一个任务 非单词字符-----------------------------
    In [41]: re.match(r"wello","1ello").group()
    In [42]: re.match(r"wello","hello").group()
    In [43]: re.match(r"wello","Hello").group()
    In [44]: re.match(r"wello","_ello").group()
    In [45]: re.match(r"Wello","_ello").group()
    In [46]: re.match(r"Wello","?ello").group()
    
    ------------'w'语义拓展
    In [2]: re.match(r"wBC","ABC")
    In [3]: re.match(r"wBC","ABC").group()
    In [4]: re.match(r"wBC","呵BC").group()
    In [5]: re.match(r"wBC","呵BC",re.ASCII).group()
    In [6]: re.match(r"wBC","呵BC",re.UNICODE).group()
    In [7]: re.match(r"wBC","呵BC").group()
    
    
    -----------二 量词  匹配多个字符---------------------------
    In [47]: re.match(r"嫦娥号d升空了","嫦娥1号升空了").group()
    In [48]: re.match(r"嫦娥d号升空了","嫦娥1号升空了").group()
    In [49]: re.match(r"嫦娥d号升空了","嫦娥9号升空了").group()
    In [50]: re.match(r"嫦娥d号升空了","嫦娥10号升空了").group()
    In [51]: re.match(r"嫦娥dd号升空了","嫦娥10号升空了").group()
    In [52]: re.match(r"嫦娥dd号升空了","嫦娥99号升空了").group()
    In [53]: re.match(r"嫦娥dd号升空了","嫦娥100号升空了").group()
    In [54]: re.match(r"嫦娥ddd号升空了","嫦娥100号升空了").group()
    In [55]: re.match(r"嫦娥ddddd号升空了","嫦娥10000号升空了").group()
    
    In [56]: re.match(r"嫦娥d{5}号升空了","嫦娥10000号升空了").group()
    In [57]: re.match(r"嫦娥d{3}号升空了","嫦娥10000号升空了").group()
    In [58]: re.match(r"嫦娥d{3}号升空了","嫦娥100号升空了").group()
    
    In [59]: re.match(r"嫦娥d{1,3}号升空了","嫦娥100号升空了").group()
    In [60]: re.match(r"嫦娥d{1,3}号升空了","嫦娥1号升空了").group()
    In [61]: re.match(r"嫦娥d{1,3}号升空了","嫦娥10号升空了").group()
    In [62]: re.match(r"嫦娥d{1,1}号升空了","嫦娥10号升空了").group()
    In [64]: re.match(r"嫦娥d{0,3}号升空了","嫦娥号升空了").group()
    
    In [65]: re.match(r"嫦娥d{0,}号升空了","嫦娥号升空了").group()
    In [66]: re.match(r"嫦娥d*号升空了","嫦娥号升空了").group()
    
    In [67]: re.match(r"嫦娥d{1,}号升空了","嫦娥1号升空了").group()
    In [68]: re.match(r"嫦娥d{1,}号升空了","嫦娥号升空了").group()
    In [69]: re.match(r"嫦娥d{1,}号升空了","嫦娥1000号升空了").group()
    In [70]: re.match(r"嫦娥d+号升空了","嫦娥1000号升空了").group()
    
    In [8]: re.match(r"w?BC","ABC").group()
    In [9]: re.match(r"w?BC","BC").group()
    
    
    ---------------三 匹配开始^ 和结束位置$--------------------------
    
    In [71]: re.match(r"w{4,20}@163.com","hello@163.com").group()
    In [72]: re.match(r"w{4,20}@163.com","hello@163Acom").group()
    In [73]: re.match(r"w{4,20}@163.com","hello@163Acom").group()
    In [74]: re.match(r"w{4,20}@163.com","hello@163.com").group()
    In [75]: re.match(r"w{4,20}@163.com","hello@163.com.cn").group()
    In [76]: re.match(r"w{4,20}@163.com","cc.hello@163.com").group()
    In [77]: re.search(r"w{4,20}@163.com","cc.hello@163.com").group()
    In [78]: re.search(r"w{4,20}@163.com","cc.hello@163.com.cn").group()
    
    In [79]: re.search(r"^w{4,20}@163.com","cc.hello@163.com.cn").group()
    In [80]: re.search(r"^w{4,20}@163.com","hello@163.com.cn").group()
    
    In [81]: re.search(r"^w{4,20}@163.com$","hello@163.com.cn").group()
    In [82]: re.search(r"^w{4,20}@163.com$","hello@163.com").group()
    In [83]: re.match(r"^w{4,20}@163.com","hello@163.com.cn").group()
    In [84]: re.match(r"^w{4,20}@163.com$","hello@163.com.cn").group()
    In [85]: re.match(r"^w{4,20}@163.com$","hello@163.com").group()
    
    
    -------------四 匹配分组 ()将感兴趣的数据进行提取------
    In [86]: re.match(r"嫦娥(d+)号升空了","嫦娥1000号升空了").group()
    In [87]: re.match(r"嫦娥(d+)号升空了","嫦娥1000号升空了").group(0)
    In [88]: re.match(r"嫦娥(d+)号升空了","嫦娥1000号升空了").group(1)
    In [89]: re.match(r"^(w{4,20})@(163).com$","hello@163.com").group()
    In [90]: re.match(r"^(w{4,20})@(163).com$","hello@163.com").group(1)
    In [91]: re.match(r"^(w{4,20})@(163).com$","hello@163.com").group(2)
    
    -------(|)匹配其中任何一个表达式并且放入分组中----
    
    In [92]: re.match(r"^(w{4,20})@(163|qq).com$","hello@263.com").group(2)
    In [93]: re.match(r"^(w{4,20})@(163|qq).com$","hello@qq.com").group(2)
    In [94]: re.match(r"^(w*)hello(w*)$","hellohello@qq.com").group(1)
    
    
    ---------------------'分组编号' 使用某个分组的数据在后面某个位置继续匹配 ----
    
    In [11]: re.match(r"^w{4,20}@163.com$|^w{4,20}@qq.com$","hello@163.com").group()
    In [12]: re.match(r"^w{4,20}@163.com$|^w{4,20}@qq.com$","hello@qq.com").group()
    In [13]: re.match(r"^w{4,20}@(163|qq).com$","hello@qq.com").group()
    In [14]: re.match(r"^w{4,20}@(163|qq).com$","hello@163.com").group()
    
    In [15]: re.match(r"(d{3,4})-(d{6,8})","0755-12345678").group()
    In [16]: re.match(r"(d{3,4})-(d{6,8})","0755-12345678").group(1)
    In [17]: re.match(r"(d{3,4})-(d{6,8})","0755-12345678").group(2)
    
    In [18]: re.match(r"(d{3,4})-(d{6,8}) 1-2","0755-12345678 0755-1234567").group(2)
    In [19]: re.match(r"(d{3,4})-(d{6,8}) 1-2","0755-12345678 0755-12345678").group()
    In [20]: re.match(r"(d{3,4})-(d{6,8}) 1-2","0755-12345678 0755-1234567").group()
    In [21]: re.match(r"<(w+)>","<html>aaa</html>").group(1)
    In [22]: re.match(r"<(w+)>(.*)</1>","<html>aaa</html>").group(1)
    In [23]: re.match(r"<(w+)>(.*)</1>","<html>aaa</html2>").group(1)
    
    匹配多个标签中的数据 
    <html><body>hello</body></html>
    """思考问题  如何创建有名分组  如何引用有名分组"""
    
    In [24]: re.match(r"<(w+)><(w+)>(.+)</2></1>","<html><body>hello</body></html>").group()
    In [25]: re.match(r"<(w+)><(w+)>(.+)</2></1>","<html><body>hello</body></html>").group(1)
    In [26]: re.match(r"<(w+)><(w+)>(.+)</2></1>","<html><body>hello</body></html>").group(2)
    In [27]: re.match(r"<(w+)><(w+)>(.+)</2></1>","<html><body>hello</body></html1>").group(2)
    In [28]: re.match(r"<(w+)><(w+)>(.+)</2></1>","<html><body>hello</body></html1>").group()
    
    In [29]: re.match(r"((d{3,4})-(d{6,8}))","0755-12345678").group()
    In [30]: re.match(r"((d{3,4})-(d{6,8}))","0755-12345678").group(1)
    In [31]: re.match(r"((d{3,4})-(d{6,8}))","0755-12345678").group(2)
    In [32]: re.match(r"((d{3,4})-(d{6,8}))","0755-12345678").group(3)
    
    In [34]: re.match(r"((d{3,4})-(d{6,8})) 2-3","0755-12345678 0755-12345678").group()
    
    In [35]: re.match(r"(?P<quhao>d{3,4})-(?P<zuoji>d{6,8})","0755-12345678").group()
    In [36]: re.match(r"(?P<quhao>d{3,4})-(?P<zuoji>d{6,8})","0755-12345678").group(1)
    In [37]: re.match(r"(?P<quhao>d{3,4})-(?P<zuoji>d{6,8})","0755-12345678").group(2)
    In [38]: re.match(r"(?P<quhao>d{3,4})-(?P<zuoji>d{6,8})","0755-12345678").group('quhao')
    In [39]: re.match(r"(?P<quhao>d{3,4})-(?P<zuoji>d{6,8})","0755-12345678").group('zuoji')
    In [40]: re.match(r"(?P<quhao>d{3,4})-(?P<zuoji>d{6,8}) (?P=quhao)-(?P=zuoji)","0755-12345678 07
        ...: 55-12345678").group()
    
    In [41]: re.match(r"((?P<quhao>d{3,4})-(?P<zuoji>d{6,8})) (?P=quhao)-(?P=zuoji)","0755-12345678 
        ...: 0755-12345678").group()
    
    
    ---------------------------re模块高级函数--------------------------
    In [42]: ret = re.search(r"d+", "阅读次数为 9999").group()
    
    In [43]: re.search(r"d+", "阅读次数为 9999").group()
    Out[43]: '9999'
    
    In [46]: re.findall(r"d+", "python = 9999, c = 7890, c++ = 12345")
    Out[46]: ['9999', '7890', '12345']
    
    In [47]: re.sub(r"d+","998","python=997")
    Out[47]: 'python=998'
    
    In [48]: re.sub(r"d+","998","python=997 c=988")
    Out[48]: 'python=998 c=998'
    
    In [49]: re.sub(r"d+","998","python=997 c=988",1)
    Out[49]: 'python=998 c=988'
    
    In [52]: def func(matchobj):
        ...:     data = matchobj.group()
        ...:     str_data = str( int(data) + 1)
        ...:     return str_data
        ...: 
    
    In [53]: re.sub(r"d+",func, "age=17")
    Out[53]: 'age=18'
    
    In [54]: data = """
        ...: <div>
        ...:         <p>岗位职责:</p>
        ...: <p>完成推荐算法、数据统计、接口、后台等服务器端相关工作</p>
        ...: <p><br></p>
        ...: <p>必备要求:</p>
        ...: <p>良好的自我驱动力和职业素养,工作积极主动、结果导向</p>
        ...: <p>&nbsp;<br></p>
        ...: <p>技术要求:</p>
        ...: <p>1、一年以上 Python 开发经验,掌握面向对象分析和设计,了解设计模式</p>
        ...: <p>2、掌握HTTP协议,熟悉MVC、MVVM等概念以及相关WEB开发框架</p>
        ...: <p>3、掌握关系数据库开发设计,掌握 SQL,熟练使用 MySQL/PostgreSQL 中的一种<br></p>
        ...: <p>4、掌握NoSQL、MQ,熟练使用对应技术解决方案</p>
        ...: <p>5、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js</p>
        ...: <p>&nbsp;<br></p>
        ...: <p>加分项:</p>
        ...: <p>大数据,数理统计,机器学习,sklearn,高性能,大并发。</p>
        ...: 
        ...:         </div>"""
    
    In [55]: re.sub(r"<.*>","",data)
    Out[55]: '
    
            
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
            '
    
    In [56]: re.sub(r"<w+>","",data)
    Out[56]: '
    
            岗位职责:</p>
    完成推荐算法、数据统计、接口、后台等服务器端相关工作</p>
    </p>
    必备要求:</p>
    良好的自我驱动力和职业素养,工作积极主动、结果导向</p>
    &nbsp;</p>
    技术要求:</p>
    1、一年以上 Python 开发经验,掌握面向对象分析和设计,了解设计模式</p>
    2、掌握HTTP协议,熟悉MVC、MVVM等概念以及相关WEB开发框架</p>
    3、掌握关系数据库开发设计,掌握 SQL,熟练使用 MySQL/PostgreSQL 中的一种</p>
    4、掌握NoSQL、MQ,熟练使用对应技术解决方案</p>
    5、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js</p>
    &nbsp;</p>
    加分项:</p>
    大数据,数理统计,机器学习,sklearn,高性能,大并发。</p>
    
            </div>'
    
    In [57]: re.sub(r"</?w+>","",data)
    Out[57]: '
    
            岗位职责:
    完成推荐算法、数据统计、接口、后台等服务器端相关工作
    
    必备要求:
    良好的自我驱动力和职业素养,工作积极主动、结果导向
    &nbsp;
    技术要求:
    1、一年以上 Python 开发经验,掌握面向对象分析和设计,了解设计模式
    2、掌握HTTP协议,熟悉MVC、MVVM等概念以及相关WEB开发框架
    3、掌握关系数据库开发设计,掌握 SQL,熟练使用 MySQL/PostgreSQL 中的一种
    4、掌握NoSQL、MQ,熟练使用对应技术解决方案
    5、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js
    &nbsp;
    加分项:
    大数据,数理统计,机器学习,sklearn,高性能,大并发。
    
            '
    
    In [58]: re.sub(r"</?w+>|
    ","",data)
    Out[58]: '        岗位职责:完成推荐算法、数据统计、接口、后台等服务器端相关工作必备要求:良好的自我驱动力和职业素养,工作积极主动、结果导向&nbsp;技术要求:1、一年以上 Python 开发经验,掌握面向对象分析和设计,了解设计模式2、掌握HTTP协议,熟悉MVC、MVVM等概念以及相关WEB开发框架3、掌握关系数据库开发设计,掌握 SQL,熟练使用 MySQL/PostgreSQL 中的一种4、掌握NoSQL、MQ,熟练使用对应技术解决方案5、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js&nbsp;加分项:大数据,数理统计,机器学习,sklearn,高性能,大并发。        '
    
    In [59]: re.sub(r"</?w+>|
    |&nbsp","",data)
    Out[59]: '        岗位职责:完成推荐算法、数据统计、接口、后台等服务器端相关工作必备要求:良好的自我驱动力和职业素养,工作积极主动、结果导向;技术要求:1、一年以上 Python 开发经验,掌握面向对象分析和设计,了解设计模式2、掌握HTTP协议,熟悉MVC、MVVM等概念以及相关WEB开发框架3、掌握关系数据库开发设计,掌握 SQL,熟练使用 MySQL/PostgreSQL 中的一种4、掌握NoSQL、MQ,熟练使用对应技术解决方案5、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js;加分项:大数据,数理统计,机器学习,sklearn,高性能,大并发。        '
    
    In [60]: re.split(r" ","age=18 name=tom")
    Out[60]: ['age=18', 'name=tom']
    
    In [61]: re.split(r" |=","age=18 name=tom")
    Out[61]: ['age', '18', 'name', 'tom']
    
    ----------------------?号将正则转化为非贪婪模式(懒惰模式)-------------------------------
    
    In [62]: re.match(r"(d+)(d?)","12345678").group()
    Out[62]: '12345678'
    
    In [63]: re.match(r"(d+)(d?)","12345678").group(1)
    Out[63]: '12345678'
    
    In [64]: re.match(r"(d+)(d?)","12345678").group(2)
    Out[64]: ''
    
    In [65]: re.match(r"(d+?)(d?)","12345678").group(2)
    Out[65]: '2'
    
    In [66]: re.match(r"(d+?)(d+)","12345678").group(2)
    Out[66]: '2345678'
    
    In [67]: re.match(r"(d+?)(d+)","12345678").group(1)
    Out[67]: '1'
    
    In [68]: re.match(r"(d+)(d+)","12345678").group(1)
    Out[68]: '1234567'
    
    In [69]: re.match(r"(d+)(d+)","12345678").group(2)
    Out[69]: '8'
    In [70]: url = """<img alt="丁叮c的直播" data-original="https://rpic.douyucdn.cn/live-cover/appCov
        ...: ers/2017/12/27/462253_20171227014914_big.jpg" src="https://rpic.douyucdn.cn/live-cover/ap
        ...: pCovers/2017/12/27/462253_20171227014914_big.jpg" width="283" height="163" style="display
        ...: : block;">"""
    
    In [72]: re.search(r"http.*jpg",url).group()
    Out[72]: 'https://rpic.douyucdn.cn/live-cover/appCovers/2017/12/27/462253_20171227014914_big.jpg" src="https://rpic.douyucdn.cn/live-cover/appCovers/2017/12/27/462253_20171227014914_big.jpg'
    
    In [73]: re.search(r"http.*?jpg",url).group()
    Out[73]: 'https://rpic.douyucdn.cn/live-cover/appCovers/2017/12/27/462253_20171227014914_big.jpg'
    
    
    
    ----------------r原生字符串 可以自动将其中的反斜线 进行转义----------------------
    In [74]: path = "c:\a\b"
    
    In [75]: print(path)
    c:a
    
    In [76]: path = "c:a
    "
    
    In [77]: print(path)
    c:
    
    
    In [78]: re.match("c:\a","c:\a\b\c").group()
    ---------------------------------------------------------------------------
    AttributeError                            Traceback (most recent call last)
    <ipython-input-78-c8bbf045c2be> in <module>()
    ----> 1 re.match("c:\a","c:\a\b\c").group()
    
    AttributeError: 'NoneType' object has no attribute 'group'
    
    In [79]: re.match("c:\\a","c:\a\b\c").group()
    Out[79]: 'c:\a'
    
    In [80]: re.match("c:\\a\\b\\c","c:\a\b\c").group()
    Out[80]: 'c:\a\b\c'
    
    In [81]: re.match(r"c:\a\b\c","c:\a\b\c").group()
    Out[81]: 'c:\a\b\c'
    
    In [82]: r"c:\a\b\c"
    Out[82]: 'c:\\a\\b\\c'
  • 相关阅读:
    PAIRING WORKFLOW MANAGER 1.0 WITH SHAREPOINT 2013
    Education resources from Microsoft
    upgrade to sql server 2012
    ULSViewer sharepoint 2013 log viewer
    Top 10 Most Valuable Microsoft SharePoint 2010 Books
    讨论 Setsockopt选项
    使用 Alchemy 技术编译 C 语言程序为 Flex 可调用的 SWC
    Nagle's algorithm
    Nagle算法 TCP_NODELAY和TCP_CORK
    Design issues Sending small data segments over TCP with Winsock
  • 原文地址:https://www.cnblogs.com/snailon/p/11371785.html
Copyright © 2011-2022 走看看