zoukankan      html  css  js  c++  java
  • crawler_exa1

    编辑中...

    #! /usr/bin/env python
    # -*- coding:utf-8 -*-
    # Author: Tdcqma
    
    '''
    网页爬虫,版本 2017-09-20 21:16
    '''
    
    import urllib.request
    import ssl,re
    import smtplib,email
    import datetime
    
    url="http://www.nsfocus.net/index.php?act=sec_bug&type_id=&os=&keyword=&page=1"
    
    request = urllib.request.Request(url)
    
    # 当尝试访问https开始当站点时,设置全局取消SSL证书验证
    ssl._create_default_https_context = ssl._create_unverified_context
    
    response = urllib.request.urlopen(request)
    data = response.read().decode('utf-8')
    
    # 保存当前日期到today变量里
    today = str(datetime.date.today())
    
    # 使用正则表达式匹配指定安全警报内容
    str = "<li><span>"+today+".*"
    res = re.findall(str,data)
    
    # 发送邮件
    chst = email.charset.Charset(input_charset = 'utf-8')
    header = ("From: %s
    To: %s
    Subject: %s
    
    " %
              ("from_mail@163.com",
               "to_mail@163.com",
               chst.header_encode("[爬虫安全通告-绿盟]")))
    
    # 打开本地文件,目的为将抓取到的报警信息写入文件
    f = open("secInfo-lvmeng.txt",'w',encoding='utf-8')
    
    # 生产列表保存所有安全漏洞标题
    
    tomcat_sec = ["Apache Tomcat 漏洞告警(当前生产版本为7.0.68)
    
    "]
    Cisco_sec = ["Cisco 漏洞告警(当前生产版本为1.0.35)
    
    "]
    WebLogic_sec = ["WebLogic 漏洞告警(当前生产版本为10.33)
    
    "]
    
    tomcat_msg = ''' '''
    WebLogic_msg = ''' '''
    Cisco_msg = ''' '''
    
    msg = ''' '''
    for line in res:
        tomcat_title = "Apache Tomcat 漏洞告警(当前生产版本为7.0.68)"
        sub_url = "http://www.nsfocus.net"+line[37:50]  # 获取安全漏洞链接
        vul_title = line[52:-40]    # 获取安全漏洞标题
        if "Apache Tomca" in vul_title:
            line = ""+today+" "+vul_title+"
          >> "+sub_url+'
    
    '
            tomcat_msg += line   # 逐行读取,将其保存到msg字符变量里。
            tomcat_sec.append(line)
        elif "WebLogic" in vul_title:
            line = "" + today + " " + vul_title + "
          >> " + sub_url + '
    
    '
            WebLogic_msg += line
            WebLogic_sec.append(line)
        elif "Cisco" in vul_title:
            line = "" + today + " " + vul_title + "
           >> " + sub_url + '
    
    '
            Cisco_msg += line
            Cisco_sec.append(line)
    
    msg = [tomcat_sec,WebLogic_sec,Cisco_sec]
    secu_msg = ''' '''
    
    for i in range(len(msg)):
        if len(msg[i]) > 1:
            for j in range(len(msg[i])):
                secu_msg += msg[i][j]
    
    f.writelines(secu_msg)
    f.close()
    msg = secu_msg
    
    # 借用163smtp服务器发送邮件,将上面读到的报警信息作为邮件正文发送。
    email_con = header.encode('utf-8') + msg.encode('utf-8')
    smtp = smtplib.SMTP("smtp.163.com")
    smtp.login("from_mail@163.com","from_mail_pass")
    smtp.sendmail('from_mail@163.com','to_mail@163.com',email_con)
    print('mail send success!')
    smtp.quit()

    获取爬虫邮件截图:

     

  • 相关阅读:
    第五章 数据的共享与保护
    实验6
    实验5
    实验4 类与对象2)
    实验三 类与对象
    实验2
    2018—3-21第二章程序例题(2)
    第二章思维导图
    2018—3-18C++第二章程序例题
    汇编实验九
  • 原文地址:https://www.cnblogs.com/tdcqma/p/7565194.html
Copyright © 2011-2022 走看看