zoukankan      html  css  js  c++  java
  • Python中re模块详细介绍

    正则的介绍及应用实例详解

    """
    1、什么是正则
        正则就是用一系列具有特殊含义的字符组成一套规则,该规则用来描述具有某一特征的字符串,
        正则就是用来去一个大的字符串中匹配出符合规则的子字符串
    
    2、为什么要用正则
        1、用户注册
        2、爬虫程序
    
    3、如何用正则
    
    
    """
    import re
    #w匹配字母数字下划线-----------W匹配非字母数字下划线
    print(re.findall('w','hello 123_ */-='))
    print(re.findall('W','hello 123_ */-='))
    
    #s匹配任意空白字符(
     	都是空,都可以被s匹配)-----------S匹配任意非空白字符
    print(re.findall('s','hell
    o 12	3_ */-='))
    print(re.findall('S','hell
    o 12	3_ */-='))
    
    #d匹配任意数字-------------D匹配任意非数字
    print(re.findall('d','hell
    o 12	3_ */-='))
    print(re.findall('D','hell
    o 12	3_ */-='))
    
    print(re.findall('
    ','hell
    o 12	3_ */-='))
    print(re.findall('	','hell
    o 12	3_ */-='))
    print(re.findall('l','hell
    o 12	3_ */-='))
    
    #直接匹配我们输入的内容
    print(re.findall('egon','my name is egon,egon is beautiful'))
    #                                                     egon
    # ^只匹配字符串的开头----------$只匹配字符串的末尾
    print(re.findall('^egon','egon my name is egon,egon is beautiful'))
    print(re.findall('egon$','egon my name is egon,egon is beautifulegon1'))
    #                                                                egon
    
    
    # 重复匹配
    # .:匹配换行符以外的任意一个字符,(也可以用多个点,匹配多个任意字符)
    print(re.findall('a.c','abc a1c aac asd aaaaac a*c a+c abasd')) #['abc','a1c','aac','aac','a*c','a+c']
    #                                                        a.c
    print(re.findall('a.c','abc a1c aac a
    c asd aaaaac a*c a+c abasd',re.DOTALL))  #re.DOTALL匹配所有任意字符,
    可以被匹配到
    print(re.findall('a..c','abc a1c aac a
    c asd aaaaac a*c a+c abasd',re.DOTALL))
    
    # []:匹配一个字符,该字符属于中括号内指定的字符,(可以缩小点的精度)
    print(re.findall('a..c','abc a1 c aac asd aaaaac a *c a+c abasd ='))
    print(re.findall('a.c','abc a1 c aac aAc aBc asd aaaaac a-c a/c a *c a+c abasd = a1c a2c'))
    print(re.findall('a[a-z]c','abc a1 c aac aAc aBc asd aaaaac a-c a/c a *c a+c abasd = a1c a2c'))
    print(re.findall('a[A-Z]c','abc a1 c aac aAc aBc asd aaaaac a-c a/c a *c a+c abasd = a1c a2c'))
    print(re.findall('a[-+*/]c','abc a1 c aac aAc aBc asd aaaaac a-c a/c a *c a+c abasd = a1c a2c'))
    print(re.findall('a[a-z][a-z]c','abc a1 c aac aAc aBc asd aaaaac a-c a/c a *c a+c abasd = a1c a2c'))
    print(re.findall('a[^a-z]c','abc a1 c aac aAc aBc asd aaaaac a-c a/c a *c a+c abasd = a1c a2c'))
    
    
    # *: 必须与其他字符连用,代表左侧的字符出现0次或者无穷次
    print(re.findall('ab*','a ab abbb abbbb a1bbbb a-123'))
    #                                              ab*
    #['a','ab','abbb','abbbb','a','a']
    print(re.findall('ab{0,}','a ab abbb abbbb a1bbbb a-123'))
    
    
    
    # ?: 必须与其他字符连用,代表左侧的字符出现0次或者1次
    print(re.findall('ab?','a ab abbb abbbb a1bbbb a-123'))
    #                                              ab?
    #['a','ab','ab','ab','a','a']
    print(re.findall('ab{0,1}','a ab abbb abbbb a1bbbb a-123'))
    
    
    # +: 必须与其他字符连用,代表左侧的字符出现1次或者无穷次
    print(re.findall('ab+','a ab abbb abbbb a1bbbb a-123'))
    #                                              ab+
    # ['ab','abbb','abbbb']
    print(re.findall('ab{1,}','a ab abbb abbbb a1bbbb a-123'))
    
    
    # {n,m}: 必须与其他字符连用,代表左侧的字符出现{n,m}次,完全可以取代*、?、+
    print(re.findall('ab{1,3}','a ab abbb abbbb a1bbbb a-123'))
    #                                                  ab{1,3}
    # ['ab','abbb','abbb']
    
    
    # .*:贪婪匹配(匹配不精准)
    print(re.findall('a.*c','ab123adfc1134124123adasfc123123'))
    
    # .*?:非贪婪匹配:推荐使用(爬虫经常用)
    print(re.findall('a.*?c','ab123adfc1134124123adasfc123123'))
    #                                            a.*?c
    
    
    #():分组,可以结合非贪婪匹配,匹配网址,()不影响匹配的结果,最终取的是分组内的
    print(re.findall('expression="(.*?)"','expression="1+2+3/4*5" egon="beautiful"'))
    #                                       expression=".*?"
    
    
    print(re.findall('href="(.*?)"','<p>段落</p><a href="https://www.sb.com">点我啊</a><h1>标题</h1><a href="https://www.sb.com">点我啊</a>'))
    
    
    
    #|:   #连接左右两个正则表达式,匹配一个成功即成功匹配
    print(re.findall('a|b','ab123abasdfaf'))
    #                        a|b
    #先不看分组,先按照正常的进行匹配,
    print(re.findall('compan(?:ies|y)','Too many companies have gone bankrupt, and the next one is my company'))
    
    #companies   company
    
    
    
    print(re.findall(r'a\c','ac a1c aAc aac'))
    print(re.findall('a\\c','ac a1c aAc aac'))
    
    print(re.findall('ale(x)','alex is SB,alex is bigSB'))
    print(re.search('alex','alex is SB,alex is bigSB'))
    print(re.search('ale(x)','alex is SB,alex is bigSB').group())
    print(re.search('abcdefg','alex is SB,alex is bigSB'))
    
    print(re.search('^alex','123alex is SB,alex is bigSB'))
    print(re.match('alex','123alex is SB,alex is bigSB'))
    
    l='egon:18:male'.split(':')
    print(l)
    l1=re.split('[ :/-]','a-b/c egon:18:male xxx')
    print(l1)
    
    print(re.sub('[a-z]+xx','yxp','lxx is good,sb is lllxx wxx is good cxx is good'))
    #                                                   [a-z]+xx
    
    pattern=re.compile('alex')
    print(pattern.findall('alex is SB,alex is bigSB'))
    print(pattern.search('alex is SB,alex is bigSB'))
  • 相关阅读:
    Scrapy+Scrapy-redis+Scrapyd+Gerapy 分布式爬虫框架整合
    centos7 安装软件指南
    Kafka--消费者
    Kafka--生产者
    Kafka--初识Kafka
    Kafka--Kafka简述
    NetWork--记一次Http和TLS抓包
    JVM--a == (a = b)基于栈的解释器执行过程
    Java容器--Queue
    Idea--使用Idea调试设置
  • 原文地址:https://www.cnblogs.com/sui776265233/p/9210545.html
Copyright © 2011-2022 走看看