zoukankan      html  css  js  c++  java
  • day18——re正则表达式

    day18

    re模块——正则表达式

    匹配方法
    • findall():从字符串中全部查找内容,返回一个列表
    s = "meet_宝元_meet"
    print(re.findall("meet",s))
    # ['meet', 'meet']
    
    • w:查找数字、字母(中文)、下划线
    • W:查找非数字、字母(中文)、下划线
    s = "meet_宝元_meet123"
    print(re.findall("w",s))
    print(re.findall("W",s))
    # ['m', 'e', 'e', 't', '_', '宝', '元', '_', 'm', 'e', 'e', 't', '1', '2', '3']
    # []
    
    • s:查找任意空格、换行符、制表符
    • S:查找非任意空格、换行符、制表符
    s = "meet_ 宝元_ meet123    "
    print(re.findall("s",s))
    print(re.findall("S",s))
    # [' ', ' ', ' ', ' ', ' ', ' ']
    # ['m', 'e', 'e', 't', '_', '宝', '元', '_', 'm', 'e', 'e', 't', '1', '2', '3']
    
    • d:查找数字
    • D:查找非数字
    s = "meet_ 宝元_ 123    "
    print(re.findall("d",s))
    print(re.findall("D",s))
    # ['1', '2', '3']
    # ['m', 'e', 'e', 't', '_', ' ', '宝', '元', '_', ' ', ' ', ' ', ' ', ' ']
    
    • A或^:查找是否以什么开头
    s = "meet_ 宝元_ 123    "
    print(re.findall("Ameet",s))
    print(re.findall("^meet",s))
    # ['meet']
    # ['meet']
    
    • 或$:查找是否以什么结尾
    s = "meet_ 宝元_ 123    tt"
    print(re.findall("t",s))
    print(re.findall("tt$",s))
    # ['t']
    # ['tt']
    
    • :查找换行符
    s = "meet宝元_123meet 
     	 
    "
    print(re.findall("
    ",s))
    # ['
    ', '
    ']
    
    • :查找制表符
    s = "meet宝元_123meet 
     	 
    "
    print(re.findall("	", s))
    # ['	']
    
    • . :只能匹配任意一个内容(非换行符)
    s = "m
    et宝元_123maet 
     	 "
    print(re.findall("m.e", s))
    print(re.findall("m.e", s,re.DOTALL))
    # ['mae']
    # ['m
    e', 'mae']
    
    • []:匹配字符组中的字符
    s = "meet宝元_1A-23maet"
    print(re.findall("[a-z]", s))  # 小写的a-z
    print(re.findall("[A-Z]", s))  # 大写的A-Z
    print(re.findall("[a-zA-Z]", s))  # 大小写的20字母
    print(re.findall("[a-zA-Z0-9]", s))  # 大小写的20字母,0-9
    print(re.findall("[^0-9]", s))  # 查找非0-9的内容
    # ['m', 'e', 'e', 't', 'm', 'a', 'e', 't']
    # ['A']
    # ['m', 'e', 'e', 't', 'A', 'm', 'a', 'e', 't']
    # ['m', 'e', 'e', 't', '1', 'A', '2', '3', 'm', 'a', 'e', 't']
    # ['m', 'e', 'e', 't', '宝', '元', '_', 'A', '-', 'm', 'a', 'e', 't']
    
    • *:匹配0个或者多个 [贪婪匹配]
    s = "m mm mmmmmm"
    print(re.findall("m*", s))
    print(re.findall("m*m", s))
    # ['m', '', 'mm', '', 'mmmmmm', '']
    # ['m', 'mm', 'mmmmmm']
    
    • +:匹配1个或多个 [贪婪匹配]
    s = "m mm mmmmmm"
    print(re.findall("m+", s))
    print(re.findall("m+m", s))
    # ['m', 'mm', 'mmmmmm']
    # ['mm', 'mmmmmm']
    
    • ?:匹配0个或1个 [非贪婪匹配]
    s = "m mm mmmmmm"
    print(re.findall("m?",s))
    # ['m', '', 'm', 'm', '', 'm', 'm', 'm', 'm', 'm', 'm', '']
    
    • 内容{n}:查找内容重复n次的
    s = "m mm mmm mmmm mmmmm mmmmmm"
    print(re.findall("m{2}", s))
    print(re.findall("m{4}", s))
    print(re.findall("m{6}", s))
    # ['mm', 'mm', 'mm', 'mm', 'mm', 'mm', 'mm', 'mm', 'mm']
    # ['mmmm', 'mmmm', 'mmmm']
    # ['mmmmmm']
    
    • 内容{n,m}:查找内容重复n-m次
    s = "m mm mmm mmmm mmmmm mmmmmm"
    print(re.findall("m{2,5}", s))
    # ['mm', 'mmm', 'mmmm', 'mmmmm', 'mmmmm']
    
    • a|b :或
    s = "m mm mmss ss s"
    print(re.findall("m|s", s))
    # ['m', 'm', 'm', 'm', 'm', 's', 's', 's', 's', 's']
    
    • ():匹配括号内的表达式,也表示一个组
    s = "meet_assdf_mssst_(.)mmns_aaamaaatmsssssssssssstt"
    print(re.findall("m(..)t", s))
    print(re.findall("m(.?)t", s))
    print(re.findall("m(..?)t", s))
    print(re.findall("m(?:..?)t",s))  # ?:m和t都连上
    print(re.findall("m(.*)t", s))
    print(re.findall("m(.*?)t", s))
    print(re.findall("m(.+)t", s))
    print(re.findall("m(.+?)t", s))
    # ['ee']
    # []
    # ['ee']
    # ['meet']
    # ['eet_assdf_mssst_(.)mmns_aaamaaatmsssssssssssst']
    # ['ee', 'sss', 'mns_aaamaaa', 'ssssssssssss']
    # ['eet_assdf_mssst_(.)mmns_aaamaaatmsssssssssssst']
    # ['ee', 'sss', 'mns_aaamaaa', 'ssssssssssss']
    
    print(re.search("(?P<tag_name>w+)w+w+","h1hellh1").group())
    print(re.search("(?P<aaa>w+)dfa","asbsadfasdfa").group("aaa"))
    print(re.search("(?P<cx>w+)dfa","asbsadfasdfa").group())
    # h1hellh1
    # asbsadfas   # ?P:命名
    # asbsadfasdfa
    
    s = 'alex_sb ale123_sb wu12sir_sb wusir_sb ritian_sb 的 alex wusir '
    print(re.findall("w+_sb",s))
    print(re.findall("[a-z]+_sb",s))
    # ['alex_sb', 'ale123_sb', 'wu12sir_sb', 'wusir_sb', 'ritian_sb']
    # ['alex_sb', 'sir_sb', 'wusir_sb', 'ritian_sb']
    
    
    方法
    • search():找到一个后就停止查找,从字符串中进行查找,找到后返回的是一个对象,查看元素(group())
    s = '_sb alex 123_sb wu12sir_sb wusir_sb ritian_sb 的 x wusir '
    print(re.search("ale",s)) # 如果不存在返回None
    print(re.search("ale",s).group()) # 如果不存在报错
    # <re.Match object; span=(4, 7), match='ale'>
    # ale
    
    
    • match():找到一个后就停止查找,只从字符串的开头查找,找到后返回一个对象,查看元素(group())
    s = 'ale_sb alex 123_sb wu12sir_sb wusir_sb ritian_sb 的 x wusir '
    print(re.match("ale",s))  # 开头找不到就返回None
    print(re.match("ale",s).group()) # 开头找不到就报错
    # <re.Match object; span=(0, 3), match='ale'>
    # ale
    
    
    • split():分割
    s = '_sb alex,123:sb;wu12sir#sb*wusir!sb ritian_sb 的 x wusir '
    print(re.split("[#,:!*]",s))
    # ['_sb alex', '123', 'sb;wu12sir', 'sb', 'wusir', 'sb ritian_sb 的 x wusir ']
    
    
    • sub():替换
    print(re.sub("barry","宝元",'barry是最好的讲师,barry就是一个普通老师,请不要将barry当男神对待。'))
    # 宝元是最好的讲师,宝元就是一个普通老师,请不要将宝元当男神对待。
    
    
    • compile():定义匹配规则
    obj = re.compile("w")
    print(obj.findall("meet_宝元_阿萨大大"))
    print(re.findall("w","meet_宝元_阿萨大大"))
    # ['m', 'e', 'e', 't', '_', '宝', '元', '_', '阿', '萨', '大', '大']
    # ['m', 'e', 'e', 't', '_', '宝', '元', '_', '阿', '萨', '大', '大']
    
    
    • finditer():返回一个迭代器的地址
    g = re.finditer("w","大大撒旦")
    # print(next(g).group())
    for i in g:
        print(i.group())
    # 大
    # 大
    # 撒
    # 旦
    
    
    练习
    1、取整数
    s = "1-2*(60+(-40.35/5)-(-4*3))"
    print(re.findall("d+",s))
    # ['1', '2', '60', '40', '35', '5', '4', '3']
    
    1.2 匹配所有的数字(包含小数)
    s = "1-2*(60+(-40.35/5)-(-4*3))"
    print(re.findall("d+.d+|d+",s))
    # ['1', '2', '60', '40.35', '5', '4', '3']
    
    匹配所有的数字(包含小数包含负号)
    s = "1-2*(60+(-40.35/5)-(-4*3))"
    print(re.findall("-?d+.d+|-?d+",s))
    # ['1', '-2', '60', '-40.35', '5', '-4', '3']
    
    2,匹配一段你文本中的每行的邮箱
    http://blog.csdn.net/make164492212/article/details/51656638 匹配所有邮箱
    s = "http://blog.csdn.net/make164492212/article/details/51656638 匹配所有邮箱"
    print(re.findall("h(?:.+)ls/", s))
    # ['http://blog.csdn.net/make164492212/article/details/']
    
    3,匹配一段你文本中的每行的时间字符串 这样的形式:'1995-04-27'
    s1 = '''
    时间就是1995-04-27,2005-04-27
    1999-04-27 老男孩教育创始人
    老男孩老师 alex 1980-04-27:1980-04-27
    2018-12-08
    '''
    print(re.findall("d+-d+-d+",s1))
    # ['1995-04-27', '2005-04-27', '1999-04-27', '1980-04-27', '1980-04-27', '2018-12-08']
    
    4、匹配qq号:腾讯从10000开始:
    num = input("请输入你的数字:")
    print(re.findall("[1-9][0-9]{5,10}",num))
    # 请输入你的数字:1719932187171993218
    # ['1719932187', '171993218']
    
    s1 = '''
    <div id="cnblogs_post_body" class="blogpost-body"><h3><span style="font-family: 楷体;">python基础篇</span></h3>
    <p><span style="font-family: 楷体;">&nbsp; &nbsp;<strong><a href="http://www.cnblogs.com/guobaoyuan/p/6847032.html" target="_blank">python 基础知识</a></strong></span></p>
    <p><span style="font-family: 楷体;"><strong>&nbsp; &nbsp;<a href="http://www.cnblogs.com/guobaoyuan/p/6627631.html" target="_blank">python 初始python</a></strong></span></p>
    <p><span style="font-family: 楷体;"><strong>&nbsp; &nbsp;<strong><a href="http://www.cnblogs.com/guobaoyuan/articles/7087609.html" target="_blank">python 字符编码</a></strong></strong></span></p>
    <p><span style="font-family: 楷体;"><strong><strong>&nbsp; &nbsp;<a href="http://www.cnblogs.com/guobaoyuan/articles/6752157.html" target="_blank">python 类型及变量</a></strong></strong></span></p>
    <p><span style="font-family: 楷体;"><strong>&nbsp; &nbsp;<a href="http://www.cnblogs.com/guobaoyuan/p/6847663.html" target="_blank">python 字符串详解</a></strong></span></p>
    <p><span style="font-family: 楷体;">&nbsp; &nbsp;<strong><a href="http://www.cnblogs.com/guobaoyuan/p/6850347.html" target="_blank">python 列表详解</a></strong></span></p>
    <p><span style="font-family: 楷体;"><strong>&nbsp; &nbsp;<a href="http://www.cnblogs.com/guobaoyuan/p/6850496.html" target="_blank">python 数字元祖</a></strong></span></p>
    <p><span style="font-family: 楷体;">&nbsp; &nbsp;<strong><a href="http://www.cnblogs.com/guobaoyuan/p/6851820.html" target="_blank">python 字典详解</a></strong></span></p>
    <p><span style="font-family: 楷体;"><strong>&nbsp; &nbsp;<strong><a href="http://www.cnblogs.com/guobaoyuan/p/6852131.html" target="_blank">python 集合详解</a></strong></strong></span></p>
    <p><span style="font-family: 楷体;"><strong>&nbsp; &nbsp;<a href="http://www.cnblogs.com/guobaoyuan/articles/7087614.html" target="_blank">python 数据类型</a>&nbsp;</strong></span></p>
    <p><span style="font-family: 楷体;"><strong>&nbsp; &nbsp;<a href="http://www.cnblogs.com/guobaoyuan/p/6752169.html" target="_blank">python文件操作</a></strong></span></p>
    <p><span style="font-family: 楷体;"><strong>&nbsp; &nbsp;<a href="http://www.cnblogs.com/guobaoyuan/p/8149209.html" target="_blank">python 闭包</a></strong></span></p>
    <p><span style="font-family: 楷体;"><strong>&nbsp; &nbsp;<a href="http://www.cnblogs.com/guobaoyuan/articles/6705714.html" target="_blank">python 函数详解</a></strong></span></p>
    <p><span style="font-family: 楷体;"><strong>&nbsp; &nbsp;<a href="http://www.cnblogs.com/guobaoyuan/articles/7087616.html" target="_blank">python 函数、装饰器、内置函数</a></strong></span></p>
    <p><span style="font-family: 楷体;"><strong>&nbsp; &nbsp;<a href="http://www.cnblogs.com/guobaoyuan/articles/7087629.html" target="_blank">python 迭代器 生成器</a>&nbsp;&nbsp;</strong></span></p>
    <p><span style="font-family: 楷体;"><strong>&nbsp; &nbsp;<a href="http://www.cnblogs.com/guobaoyuan/articles/6757215.html" target="_blank">python匿名函数、内置函数</a></strong></span></p>
    </div>
    '''
    1,找到所有的span标签的内容
    ret = re.findall('<span(.*?)>', s1)
    print(ret)
    
    print(re.findall('<a href="(.*?)"',s1))
    
    
  • 相关阅读:
    js概念理解
    web性能瓶颈
    http协议
    jquery插件开发
    Razor(cshtml)
    从局域网内的其他Linux主机下载文件
    Java多线程学习笔记
    java中String s="abc"及String s=new String("abc")详解
    Object中toString方法
    DAO层,Service层,Controller层、View层、entity层
  • 原文地址:https://www.cnblogs.com/NiceSnake/p/11284928.html
Copyright © 2011-2022 走看看