zoukankan      html  css  js  c++  java
  • 正则表达式使用

    import re
    
    #  匹配- match从头匹配 (第一个参数就是正则表达式, 数据) 如果开始不能匹配就匹配失败
    # 正则表达式都是以r字符开始的 -- 墙裂建议
    # 返回值就是匹配结果的对象 如果需要获取对象中的结果就需要.group()
    res = re.match(r'ello','ello meizi hello')
    
    # 如果成功匹配 则返回对象 如果没有匹配结果则返回值为空
    if res:
        print(res.group())
    else:
        print("没有匹配结果")
    
    # 搜索 查找 --- 如果开始不能匹配 则继续往后搜索 尝试看能否匹配
    res = re.search(r'hello','ello meizi hello')
    
    # 如果成功匹配 则返回对象 如果没有匹配结果则返回值为空
    if res:
        print(res.group())
    else:
        print("没有匹配结果")
        """match函数和search函数的功能 区别和联系"""
    In [1]: import re
    In [2]: re.match(r"hello","hello").group()
    # ----------1. .匹配一个任意字符(除去
    )-----------------------------------------
    In [3]: re.match(r".ello","hello").group()
    In [4]: re.match(r".ello","Hello").group()
    In [5]: re.match(r".ello","1ello").group()
    In [6]: re.match(r".ello","&ello").group()
    In [7]: re.match(r".ello","
    ello").group()  # 出错
    In [14]: re.match(r".ello",".ello").group()
    In [15]: re.match(r".ello",".ello").group()
    
    ------------2. []匹配集合中任意一个字符----------------------------------------
    In [8]: re.match(r"[Hh]ello","Hello").group()
    In [9]: re.match(r"[Hh]ello","hello").group()
    In [10]: re.match(r"[Hh]ello","1ello").group() # 出错
    
    In [11]: re.match(r"[0123456789]ello","1ello").group()
    In [12]: re.match(r"[0123456789]ello","9ello").group()
    In [13]: re.match(r"[0123456789]ello","Hello").group()  # 出错
    
    In [16]: re.match(r"[0123456789]ello","Hello").group()  # 出错
    In [17]: re.match(r"[0123456789]ello","1ello").group()
    
    ------------3. [-]匹配范围内部的任意一个字符-----------------
    In [18]: re.match(r"[0-9]ello","1ello").group()
    In [19]: re.match(r"[0-35-9]ello","1ello").group()
    In [20]: re.match(r"[0-35-9]ello","9ello").group()
    In [21]: re.match(r"[0-35-9]ello","4ello").group()  #  出错
    In [22]: re.match(r"[0-9a-zA-Z]ello","4ello").group()
    In [23]: re.match(r"[0-9a-zA-Z]ello","aello").group()
    In [24]: re.match(r"[0-9a-zA-Z]ello","Hello").group()
    In [25]: re.match(r"[0-35-9]ello","9ello").group()
    In [26]: re.match(r"[0-35-9]ello","4ello").group()  # 出错
    
    ------------4. [^] 禁止匹配 范围内部的任意一个字符-----------------
    
    In [27]: re.match(r"[^4]ello","4ello").group()
    In [28]: re.match(r"[^4]ello","0ello").group()
    In [29]: re.match(r"[^4]ello","9ello").group()
    
    ------------5. 'd'匹配一个任意数字字符  'D'匹配任意个非数字字符 -----------------
    
    In [30]: re.match(r"dello","9ello").group()
    In [31]: re.match(r"dello","0ello").group()
    In [32]: re.match(r"dello","@ello").group()
    
    In [33]: re.match(r"Dello","0ello").group()
    In [34]: re.match(r"Dello","@ello").group()
    In [35]: re.match(r"Dello","?ello").group()
    
    ------------6. 's'匹配一个任意空白字符  'S'匹配一个任意 非空白字符串-------------------------------
    In [36]: re.match(r"Dello"," ello").group()
    
    In [37]: re.match(r"[	
    vf ]ello"," ello").group()
    In [38]: re.match(r"sello"," ello").group()
    In [39]: re.match(r"Sello"," ello").group()
    In [40]: re.match(r"Sello","1ello").group()
    
    -----------7. 'w'匹配一个任意单词字符 'W'匹配一个任务 非单词字符-----------------------------
    In [41]: re.match(r"wello","1ello").group()
    In [42]: re.match(r"wello","hello").group()
    In [43]: re.match(r"wello","Hello").group()
    In [44]: re.match(r"wello","_ello").group()
    In [45]: re.match(r"Wello","_ello").group()
    In [46]: re.match(r"Wello","?ello").group()
    
    ------------'w'语义拓展
    In [2]: re.match(r"wBC","ABC")
    In [3]: re.match(r"wBC","ABC").group()
    In [4]: re.match(r"wBC","呵BC").group()
    In [5]: re.match(r"wBC","呵BC",re.ASCII).group()
    In [6]: re.match(r"wBC","呵BC",re.UNICODE).group()
    In [7]: re.match(r"wBC","呵BC").group()
    
    
    -----------二 量词  匹配多个字符---------------------------
    In [47]: re.match(r"嫦娥号d升空了","嫦娥1号升空了").group()
    In [48]: re.match(r"嫦娥d号升空了","嫦娥1号升空了").group()
    In [49]: re.match(r"嫦娥d号升空了","嫦娥9号升空了").group()
    In [50]: re.match(r"嫦娥d号升空了","嫦娥10号升空了").group()
    In [51]: re.match(r"嫦娥dd号升空了","嫦娥10号升空了").group()
    In [52]: re.match(r"嫦娥dd号升空了","嫦娥99号升空了").group()
    In [53]: re.match(r"嫦娥dd号升空了","嫦娥100号升空了").group()
    In [54]: re.match(r"嫦娥ddd号升空了","嫦娥100号升空了").group()
    In [55]: re.match(r"嫦娥ddddd号升空了","嫦娥10000号升空了").group()
    
    In [56]: re.match(r"嫦娥d{5}号升空了","嫦娥10000号升空了").group()
    In [57]: re.match(r"嫦娥d{3}号升空了","嫦娥10000号升空了").group()
    In [58]: re.match(r"嫦娥d{3}号升空了","嫦娥100号升空了").group()
    
    In [59]: re.match(r"嫦娥d{1,3}号升空了","嫦娥100号升空了").group()
    In [60]: re.match(r"嫦娥d{1,3}号升空了","嫦娥1号升空了").group()
    In [61]: re.match(r"嫦娥d{1,3}号升空了","嫦娥10号升空了").group()
    In [62]: re.match(r"嫦娥d{1,1}号升空了","嫦娥10号升空了").group()
    In [64]: re.match(r"嫦娥d{0,3}号升空了","嫦娥号升空了").group()
    
    In [65]: re.match(r"嫦娥d{0,}号升空了","嫦娥号升空了").group()
    In [66]: re.match(r"嫦娥d*号升空了","嫦娥号升空了").group()
    
    In [67]: re.match(r"嫦娥d{1,}号升空了","嫦娥1号升空了").group()
    In [68]: re.match(r"嫦娥d{1,}号升空了","嫦娥号升空了").group()
    In [69]: re.match(r"嫦娥d{1,}号升空了","嫦娥1000号升空了").group()
    In [70]: re.match(r"嫦娥d+号升空了","嫦娥1000号升空了").group()
    
    In [8]: re.match(r"w?BC","ABC").group()
    In [9]: re.match(r"w?BC","BC").group()
    
    
    ---------------三 匹配开始^ 和结束位置$--------------------------
    
    In [71]: re.match(r"w{4,20}@163.com","hello@163.com").group()
    In [72]: re.match(r"w{4,20}@163.com","hello@163Acom").group()
    In [73]: re.match(r"w{4,20}@163.com","hello@163Acom").group()
    In [74]: re.match(r"w{4,20}@163.com","hello@163.com").group()
    In [75]: re.match(r"w{4,20}@163.com","hello@163.com.cn").group()
    In [76]: re.match(r"w{4,20}@163.com","cc.hello@163.com").group()
    In [77]: re.search(r"w{4,20}@163.com","cc.hello@163.com").group()
    In [78]: re.search(r"w{4,20}@163.com","cc.hello@163.com.cn").group()
    
    In [79]: re.search(r"^w{4,20}@163.com","cc.hello@163.com.cn").group()
    In [80]: re.search(r"^w{4,20}@163.com","hello@163.com.cn").group()
    
    In [81]: re.search(r"^w{4,20}@163.com$","hello@163.com.cn").group()
    In [82]: re.search(r"^w{4,20}@163.com$","hello@163.com").group()
    In [83]: re.match(r"^w{4,20}@163.com","hello@163.com.cn").group()
    In [84]: re.match(r"^w{4,20}@163.com$","hello@163.com.cn").group()
    In [85]: re.match(r"^w{4,20}@163.com$","hello@163.com").group()
    
    
    -------------四 匹配分组 ()将感兴趣的数据进行提取------
    In [86]: re.match(r"嫦娥(d+)号升空了","嫦娥1000号升空了").group()
    In [87]: re.match(r"嫦娥(d+)号升空了","嫦娥1000号升空了").group(0)
    In [88]: re.match(r"嫦娥(d+)号升空了","嫦娥1000号升空了").group(1)
    In [89]: re.match(r"^(w{4,20})@(163).com$","hello@163.com").group()
    In [90]: re.match(r"^(w{4,20})@(163).com$","hello@163.com").group(1)
    In [91]: re.match(r"^(w{4,20})@(163).com$","hello@163.com").group(2)
    
    -------(|)匹配其中任何一个表达式并且放入分组中----
    
    In [92]: re.match(r"^(w{4,20})@(163|qq).com$","hello@263.com").group(2)
    In [93]: re.match(r"^(w{4,20})@(163|qq).com$","hello@qq.com").group(2)
    In [94]: re.match(r"^(w*)hello(w*)$","hellohello@qq.com").group(1)
    
    
    ---------------------'分组编号' 使用某个分组的数据在后面某个位置继续匹配 ----
    
    In [11]: re.match(r"^w{4,20}@163.com$|^w{4,20}@qq.com$","hello@163.com").group()
    In [12]: re.match(r"^w{4,20}@163.com$|^w{4,20}@qq.com$","hello@qq.com").group()
    In [13]: re.match(r"^w{4,20}@(163|qq).com$","hello@qq.com").group()
    In [14]: re.match(r"^w{4,20}@(163|qq).com$","hello@163.com").group()
    
    In [15]: re.match(r"(d{3,4})-(d{6,8})","0755-12345678").group()
    In [16]: re.match(r"(d{3,4})-(d{6,8})","0755-12345678").group(1)
    In [17]: re.match(r"(d{3,4})-(d{6,8})","0755-12345678").group(2)
    
    In [18]: re.match(r"(d{3,4})-(d{6,8}) 1-2","0755-12345678 0755-1234567").group(2)
    In [19]: re.match(r"(d{3,4})-(d{6,8}) 1-2","0755-12345678 0755-12345678").group()
    In [20]: re.match(r"(d{3,4})-(d{6,8}) 1-2","0755-12345678 0755-1234567").group()
    In [21]: re.match(r"<(w+)>","<html>aaa</html>").group(1)
    In [22]: re.match(r"<(w+)>(.*)</1>","<html>aaa</html>").group(1)
    In [23]: re.match(r"<(w+)>(.*)</1>","<html>aaa</html2>").group(1)
    
    匹配多个标签中的数据 
    <html><body>hello</body></html>
    """思考问题  如何创建有名分组  如何引用有名分组"""
    
    In [24]: re.match(r"<(w+)><(w+)>(.+)</2></1>","<html><body>hello</body></html>").group()
    In [25]: re.match(r"<(w+)><(w+)>(.+)</2></1>","<html><body>hello</body></html>").group(1)
    In [26]: re.match(r"<(w+)><(w+)>(.+)</2></1>","<html><body>hello</body></html>").group(2)
    In [27]: re.match(r"<(w+)><(w+)>(.+)</2></1>","<html><body>hello</body></html1>").group(2)
    In [28]: re.match(r"<(w+)><(w+)>(.+)</2></1>","<html><body>hello</body></html1>").group()
    
    In [29]: re.match(r"((d{3,4})-(d{6,8}))","0755-12345678").group()
    In [30]: re.match(r"((d{3,4})-(d{6,8}))","0755-12345678").group(1)
    In [31]: re.match(r"((d{3,4})-(d{6,8}))","0755-12345678").group(2)
    In [32]: re.match(r"((d{3,4})-(d{6,8}))","0755-12345678").group(3)
    
    In [34]: re.match(r"((d{3,4})-(d{6,8})) 2-3","0755-12345678 0755-12345678").group()
    
    In [35]: re.match(r"(?P<quhao>d{3,4})-(?P<zuoji>d{6,8})","0755-12345678").group()
    In [36]: re.match(r"(?P<quhao>d{3,4})-(?P<zuoji>d{6,8})","0755-12345678").group(1)
    In [37]: re.match(r"(?P<quhao>d{3,4})-(?P<zuoji>d{6,8})","0755-12345678").group(2)
    In [38]: re.match(r"(?P<quhao>d{3,4})-(?P<zuoji>d{6,8})","0755-12345678").group('quhao')
    In [39]: re.match(r"(?P<quhao>d{3,4})-(?P<zuoji>d{6,8})","0755-12345678").group('zuoji')
    In [40]: re.match(r"(?P<quhao>d{3,4})-(?P<zuoji>d{6,8}) (?P=quhao)-(?P=zuoji)","0755-12345678 07
        ...: 55-12345678").group()
    
    In [41]: re.match(r"((?P<quhao>d{3,4})-(?P<zuoji>d{6,8})) (?P=quhao)-(?P=zuoji)","0755-12345678 
        ...: 0755-12345678").group()
    
    
    ---------------------------re模块高级函数--------------------------
    In [42]: ret = re.search(r"d+", "阅读次数为 9999").group()
    
    In [43]: re.search(r"d+", "阅读次数为 9999").group()
    Out[43]: '9999'
    
    In [46]: re.findall(r"d+", "python = 9999, c = 7890, c++ = 12345")
    Out[46]: ['9999', '7890', '12345']
    
    In [47]: re.sub(r"d+","998","python=997")
    Out[47]: 'python=998'
    
    In [48]: re.sub(r"d+","998","python=997 c=988")
    Out[48]: 'python=998 c=998'
    
    In [49]: re.sub(r"d+","998","python=997 c=988",1)
    Out[49]: 'python=998 c=988'
    
    In [52]: def func(matchobj):
        ...:     data = matchobj.group()
        ...:     str_data = str( int(data) + 1)
        ...:     return str_data
        ...: 
    
    In [53]: re.sub(r"d+",func, "age=17")
    Out[53]: 'age=18'
    
    In [54]: data = """
        ...: <div>
        ...:         <p>岗位职责:</p>
        ...: <p>完成推荐算法、数据统计、接口、后台等服务器端相关工作</p>
        ...: <p><br></p>
        ...: <p>必备要求:</p>
        ...: <p>良好的自我驱动力和职业素养,工作积极主动、结果导向</p>
        ...: <p>&nbsp;<br></p>
        ...: <p>技术要求:</p>
        ...: <p>1、一年以上 Python 开发经验,掌握面向对象分析和设计,了解设计模式</p>
        ...: <p>2、掌握HTTP协议,熟悉MVC、MVVM等概念以及相关WEB开发框架</p>
        ...: <p>3、掌握关系数据库开发设计,掌握 SQL,熟练使用 MySQL/PostgreSQL 中的一种<br></p>
        ...: <p>4、掌握NoSQL、MQ,熟练使用对应技术解决方案</p>
        ...: <p>5、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js</p>
        ...: <p>&nbsp;<br></p>
        ...: <p>加分项:</p>
        ...: <p>大数据,数理统计,机器学习,sklearn,高性能,大并发。</p>
        ...: 
        ...:         </div>"""
    
    In [55]: re.sub(r"<.*>","",data)
    Out[55]: '
    
            
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    
            '
    
    In [56]: re.sub(r"<w+>","",data)
    Out[56]: '
    
            岗位职责:</p>
    完成推荐算法、数据统计、接口、后台等服务器端相关工作</p>
    </p>
    必备要求:</p>
    良好的自我驱动力和职业素养,工作积极主动、结果导向</p>
    &nbsp;</p>
    技术要求:</p>
    1、一年以上 Python 开发经验,掌握面向对象分析和设计,了解设计模式</p>
    2、掌握HTTP协议,熟悉MVC、MVVM等概念以及相关WEB开发框架</p>
    3、掌握关系数据库开发设计,掌握 SQL,熟练使用 MySQL/PostgreSQL 中的一种</p>
    4、掌握NoSQL、MQ,熟练使用对应技术解决方案</p>
    5、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js</p>
    &nbsp;</p>
    加分项:</p>
    大数据,数理统计,机器学习,sklearn,高性能,大并发。</p>
    
            </div>'
    
    In [57]: re.sub(r"</?w+>","",data)
    Out[57]: '
    
            岗位职责:
    完成推荐算法、数据统计、接口、后台等服务器端相关工作
    
    必备要求:
    良好的自我驱动力和职业素养,工作积极主动、结果导向
    &nbsp;
    技术要求:
    1、一年以上 Python 开发经验,掌握面向对象分析和设计,了解设计模式
    2、掌握HTTP协议,熟悉MVC、MVVM等概念以及相关WEB开发框架
    3、掌握关系数据库开发设计,掌握 SQL,熟练使用 MySQL/PostgreSQL 中的一种
    4、掌握NoSQL、MQ,熟练使用对应技术解决方案
    5、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js
    &nbsp;
    加分项:
    大数据,数理统计,机器学习,sklearn,高性能,大并发。
    
            '
    
    In [58]: re.sub(r"</?w+>|
    ","",data)
    Out[58]: '        岗位职责:完成推荐算法、数据统计、接口、后台等服务器端相关工作必备要求:良好的自我驱动力和职业素养,工作积极主动、结果导向&nbsp;技术要求:1、一年以上 Python 开发经验,掌握面向对象分析和设计,了解设计模式2、掌握HTTP协议,熟悉MVC、MVVM等概念以及相关WEB开发框架3、掌握关系数据库开发设计,掌握 SQL,熟练使用 MySQL/PostgreSQL 中的一种4、掌握NoSQL、MQ,熟练使用对应技术解决方案5、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js&nbsp;加分项:大数据,数理统计,机器学习,sklearn,高性能,大并发。        '
    
    In [59]: re.sub(r"</?w+>|
    |&nbsp","",data)
    Out[59]: '        岗位职责:完成推荐算法、数据统计、接口、后台等服务器端相关工作必备要求:良好的自我驱动力和职业素养,工作积极主动、结果导向;技术要求:1、一年以上 Python 开发经验,掌握面向对象分析和设计,了解设计模式2、掌握HTTP协议,熟悉MVC、MVVM等概念以及相关WEB开发框架3、掌握关系数据库开发设计,掌握 SQL,熟练使用 MySQL/PostgreSQL 中的一种4、掌握NoSQL、MQ,熟练使用对应技术解决方案5、熟悉 Javascript/CSS/HTML5,JQuery、React、Vue.js;加分项:大数据,数理统计,机器学习,sklearn,高性能,大并发。        '
    
    In [60]: re.split(r" ","age=18 name=tom")
    Out[60]: ['age=18', 'name=tom']
    
    In [61]: re.split(r" |=","age=18 name=tom")
    Out[61]: ['age', '18', 'name', 'tom']
    
    ----------------------?号将正则转化为非贪婪模式(懒惰模式)-------------------------------
    
    In [62]: re.match(r"(d+)(d?)","12345678").group()
    Out[62]: '12345678'
    
    In [63]: re.match(r"(d+)(d?)","12345678").group(1)
    Out[63]: '12345678'
    
    In [64]: re.match(r"(d+)(d?)","12345678").group(2)
    Out[64]: ''
    
    In [65]: re.match(r"(d+?)(d?)","12345678").group(2)
    Out[65]: '2'
    
    In [66]: re.match(r"(d+?)(d+)","12345678").group(2)
    Out[66]: '2345678'
    
    In [67]: re.match(r"(d+?)(d+)","12345678").group(1)
    Out[67]: '1'
    
    In [68]: re.match(r"(d+)(d+)","12345678").group(1)
    Out[68]: '1234567'
    
    In [69]: re.match(r"(d+)(d+)","12345678").group(2)
    Out[69]: '8'
    In [70]: url = """<img alt="丁叮c的直播" data-original="https://rpic.douyucdn.cn/live-cover/appCov
        ...: ers/2017/12/27/462253_20171227014914_big.jpg" src="https://rpic.douyucdn.cn/live-cover/ap
        ...: pCovers/2017/12/27/462253_20171227014914_big.jpg" width="283" height="163" style="display
        ...: : block;">"""
    
    In [72]: re.search(r"http.*jpg",url).group()
    Out[72]: 'https://rpic.douyucdn.cn/live-cover/appCovers/2017/12/27/462253_20171227014914_big.jpg" src="https://rpic.douyucdn.cn/live-cover/appCovers/2017/12/27/462253_20171227014914_big.jpg'
    
    In [73]: re.search(r"http.*?jpg",url).group()
    Out[73]: 'https://rpic.douyucdn.cn/live-cover/appCovers/2017/12/27/462253_20171227014914_big.jpg'
    
    
    
    ----------------r原生字符串 可以自动将其中的反斜线 进行转义----------------------
    In [74]: path = "c:\a\b"
    
    In [75]: print(path)
    c:a
    
    In [76]: path = "c:a
    "
    
    In [77]: print(path)
    c:
    
    
    In [78]: re.match("c:\a","c:\a\b\c").group()
    ---------------------------------------------------------------------------
    AttributeError                            Traceback (most recent call last)
    <ipython-input-78-c8bbf045c2be> in <module>()
    ----> 1 re.match("c:\a","c:\a\b\c").group()
    
    AttributeError: 'NoneType' object has no attribute 'group'
    
    In [79]: re.match("c:\\a","c:\a\b\c").group()
    Out[79]: 'c:\a'
    
    In [80]: re.match("c:\\a\\b\\c","c:\a\b\c").group()
    Out[80]: 'c:\a\b\c'
    
    In [81]: re.match(r"c:\a\b\c","c:\a\b\c").group()
    Out[81]: 'c:\a\b\c'
    
    In [82]: r"c:\a\b\c"
    Out[82]: 'c:\\a\\b\\c'
  • 相关阅读:
    Http协议的断点续传下载器,使用观察者模式监视下载进度,使用xml保存下载进度。
    C++ 复制到粘贴板
    编译防火墙——C++的Pimpl惯用法解析
    字符串输出
    windows路径操作API函数
    Boost解析xml——xml写入
    智能指针shared_ptr
    Boost 解析xml——插入Item
    ListCtrl添加右键菜单(在对话框类中)
    抓包工具Charles的使用说明
  • 原文地址:https://www.cnblogs.com/snailon/p/11371785.html
Copyright © 2011-2022 走看看