zoukankan      html  css  js  c++  java
  • 网页提取所有邮箱

    import re
    from urllib import request
    
    # 挖掘邮箱
    def getEmailsByLine(url):
        """按行提取邮箱"""
        emailregex = re.compile(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+.[a-zA-Z0-9-.]+)", re.IGNORECASE)#忽略异常情况和大小写
        for line in request.urlopen(url):
            emaillist = emailregex.findall(line.decode("utf-8"))
            if emaillist:#不为空
                print(emaillist)
    
    def getEmailsByAll(url):
        """一次读取,一次提取所有邮箱"""
        emailregex = re.compile(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+.[a-zA-Z0-9-.]+)", re.IGNORECASE)#忽略异常情况和大小写
        html = request.urlopen(url).read().decode("utf-8")
        emaillist = emailregex.findall(html)
        if emaillist:
            print(emaillist)
        
        
    if __name__ == "__main__":
        url = "http://bbs.tianya.cn/post-140-393973-1.shtml"
        # getEmailsByLine(url)
        getEmailsByAll(url)
    
    
  • 相关阅读:
    colormap
    tensorflow4
    tensorflow3
    attention 机制
    tensorflow2
    Android 再谈handler
    Android表格布局之设置边框
    Android AsyncTask异步加载WebAPI
    Android JPush极光推送应用
    Android规划周期任务
  • 原文地址:https://www.cnblogs.com/zxfei/p/12046104.html
Copyright © 2011-2022 走看看