zoukankan      html  css  js  c++  java
  • re(正则)模块

    re(正则)模块

    re.findall

    import re
    
    s = "meet_宝元_meet"
    print(re.findall("meet",s))
    # ['meet', 'meet']
    # 从字符串中全部查找内容,返回一个列表
    
    s = "meet_宝元_meet123"
    print(re.findall("w",s))
    # ['m', 'e', 'e', 't', '_', '宝', '元', '_', 'm', 'e', 'e', 't', '1', '2', '3']
    # 查找数字,字母(中文),下划线
    
    s = "meet_宝元_meet123!@#"
    print(re.findall("W",s))
    # ['!', '@', '#']
    # 查找非数字,字母(中文),下划线
    
    s = "meet_  宝元_  me  et	 
    "
    print(re.findall("s",s))
    # [' ', ' ', ' ', ' ', ' ', ' ', '	', ' ', '
    ']
    # 查找任意空格,换行符,制表符
    
    s = "meet_  宝元_  me et	 
    "
    print(re.findall("S",s))
    # ['m', 'e', 'e', 't', '_', '宝', '元', '_', 'm', 'e', 'e', 't']
    # 查找非任意空格,换行符,制表符
    
    s = "meet_ 宝元_123me  et	 
    "
    print(re.findall("d",s))
    # ['1', '2', '3']
    # 查找数字
    
    s = "meet_ 宝元_123me  et	 
    "
    print(re.findall("D",s))
    # ['m', 'e', 'e', 't', '_', ' ', '宝', '元', '_', 'm', 'e', ' ', ' ', 'e', 't', '	', ' ', '
    ']
    # 查找非数字
    
    s = "meet宝元_123meet	 
    "
    print(re.findall("Ameet",s))
    # ['meet']
    # 查找是否以什么开头
    
    s = "meet宝元_123meet"
    print(re.findall("meet",s))
    # ['meet']
    # 查找是否以什么结尾
    
    s = "meet宝元_123meet 
     	 
    "
    print(re.findall("
    ",s))
    # ['
    ', '
    ']
    # 查找换行符
    
    s = "meet宝元_123meet 
     	 
    "
    print(re.findall("	",s))
    # ['	']
    # 查找制表符
    
    s = "mtet宝元_123maet 
     	"
    print(re.findall("m.e",s))
    # ['mte', 'mae']
    # .只能匹配任意一个内容(非换行符)
    
    s = "m
    et宝元_123maet 
     	 "
    print(re.findall("m.e",s,re.DOTALL))
    # ['m
    e', 'mae']
    # .只能匹配任意一个内容
    
    s = "meet宝元_1A-23maet"
    print(re.findall("[a-z]",s))
    # ['m', 'e', 'e', 't', 'm', 'a', 'e', 't']
    # 小写的a到z
    
    print(re.findall("[A-Z]",s))
    # ['A']
    # 大写的A到Z
    
    print(re.findall("[A-Za-z]",s))
    # ['m', 'e', 'e', 't', 'A', 'm', 'a', 'e', 't']
    # 大写和小写的A到Z,a到z
    
    print(re.findall("[a-z0-9]",s))
    # ['m', 'e', 'e', 't', '1', '2', '3', 'm', 'a', 'e', 't']
    # 小写的a到z和数字0到9
    
    s = "meet宝元_1A-23maet"
    print(re.findall("[^0-9]",s))
    # ['m', 'e', 'e', 't', '宝', '元', '_', 'A', '-', 'm', 'a', 'e', 't']
    # [^0-9] 查找非0-9的内容
    
    s = "mmmmmm"
    print(re.findall("m*",s))
    # ['mmmmmm', '']
    # 匹配 * 前元素0个或多个 [贪婪匹配] 
    
    s = "meet_asdf_meees_mmns_aaam_meeeee"
    print(re.findall("me+",s))
    # ['mee', 'meee', 'meeeee']
    # 匹配 + 前元素一个或多个 
    
    s = "meet_asdf_m"
    print(re.findall("m?",s))
    # ['m', '', '', '', '', '', '', '', '', '', 'm', '']
    # 匹配 ? 前元素0个或一个[非贪婪匹配]
    
    s = "meet_asdf_msss_mmns_aaam"
    print(re.findall("s{3}",s))
    # ['sss']
    # 匹配 s{3} s重复3次 == sss 
    
    s = "meet_assdf_msss_mmns_aaam"
    print(re.findall("s{1,3}",s))
    # ['ss', 'sss', 's']
    # 指定元素最少重复多少次,最多重复多少次
    
    s = "meet_assdf_msss_mmns_aaam"
    print(re.findall("m|s",s))
    # ['m', 's', 's', 'm', 's', 's', 's', 'm', 'm', 's', 'm']
    # m|s,m或者s 
    
    s = "meet_meet_assdf_mssst_(.)mmns_aaamt"
    print(re.findall("m(.+)t",s))
    # ['eet_meet_assdf_mssst_(.)mmns_aaam']
    # m()t,取m与t中间的所有元素 [贪婪匹配]
    
    s = "meet_assdf_mssst_(.)mmns_aaamaaat"
    print(re.findall("m(?:..?)t",s))
    # ["meet"]
    # ?: 会匹配到括号两边的元素
    

    扩展练习:

    扩展练习:
    import re
    s = "alex_sb ale123_sb wu12sir_sb wusir_sb ritian_sb 的 alex wusir "
    print(re.findall("w+_sb",s))
    print(re.findall("[a-z]+_sb",s))
    # ['alex_sb', 'ale123_sb', 'wu12sir_sb', 'wusir_sb', 'ritian_sb']
    # ['alex_sb', 'sir_sb', 'wusir_sb', 'ritian_sb']
    
    print(re.findall("常(.*)娃","常鑫垃圾_井盖_烧饼吃娃娃_自行车_葫芦爷爷救娃娃"))
    print(re.findall("常(.*?)娃","常鑫垃圾_井盖_烧饼吃娃娃_自行车_葫芦爷爷救娃娃"))
    # ['鑫垃圾_井盖_烧饼吃']
    # ['鑫垃圾_井盖_烧饼吃娃娃_自行车_葫芦爷爷救娃']
    

    re.search

    import re
    s = "_sb alex 123_sb wu12sir_sb wusir_sb ritian_sb 的 x wusir "
    print(re.search("ale",s).group())
    # "ale"
    # 找到一个元素后就停止查找,从字符串中进行查找,找到后返回的是一个元素,查看元素加.group()
    # 如果字符串中没有该元素,报错
    

    re.match

    import re
    s = 'alex_sb alex 123_sb wu12sir_sb wusir_sb ritian_sb 的 x wusir '
    print(re.match("ale",s).group())
    # "ale"
    # match 找到1个后就停止查找了,只从字符串的开头查找.找到后返回的是一个对象,查看元素.group()
    # 如果字符串开头没有则报错
    

    re.split

    import re
    s = "_sb alex,123:sb;wu12sir#sb*wusir!sb ritian_sb 的 x wusir "
    print(re.split("[#,:!*]",s))
    # ['_sb alex', '123', 'sb;wu12sir', 'sb', 'wusir', 'sb ritian_sb 的 x wusir ']
    # 分割
    

    re.sub

    import re
    print(re.sub("barry","太亮","barry是最好的讲师,barry就是一个普通老师,请不要将barry当男神对待"))
    # 太亮是最好的讲师,太亮就是一个普通老师,请不要将太亮当男神对待
    # 替换
    

    compile

    obj = re.compile("w")
    print(obj.findall("meet_宝元_常鑫垃圾"))
    # ['m', 'e', 'e', 't', '_', '宝', '元', '_', '常', '鑫', '垃', '圾']
    # 自定义匹配规则
    

    finditer

    g = re.finditer("w","常鑫垃圾")
    for i in g:
        print(i.group())
    # 常 鑫 垃 圾
    # 返回的是一个迭代器的地址,查看元素加 .group()
    

    给分组起名字

    import re
    ret = re.search("<(?P<tag_name>w+)>w+</w+>","<h1>hello</h1>")
    print(ret.group("tag_name"))
    print(ret.group())
    # h1
    # <h1>hello</h1>
    
    import re
    ret = re.search(r"<(w+)>w+</1>","<h1>hello</h1>")
    print(ret.group(1))
    print(ret.group())
    # h1
    # <h1>hello</h1>
    
    import re
    ret = re.search("(?P<aaa>w+)dfa","asbsadfasdfa")
    print(ret.group())
    print(ret.group("aaa"))
    # asbsadfasdfa
    # asbsadfas
    
  • 相关阅读:
    Haskell Interactive Development in Emacs
    Access Java API in Groovy Script
    手工设置Eclipse文本编辑器的配色
    Color Theme of Emacs
    Gnucash的投资记录
    Special Forms and Syntax Sugars in Clojure
    Use w3m as Web Browser
    SSE指令集加速之 I420转BGR24
    【图像处理】 增加程序速度的方法
    TBB 入门笔记
  • 原文地址:https://www.cnblogs.com/beichen123/p/11265033.html
Copyright © 2011-2022 走看看