zoukankan      html  css  js  c++  java
  • 给cc爬取一下百度的榜单

    给cc爬取一下百度的榜单

    import requests
    from bs4 import BeautifulSoup
    from openpyxl import Workbook
    import smtplib
    from email.mime.text import MIMEText
    from email.mime.multipart import MIMEMultipart
    from email.utils import formataddr
    
    # 第一步:打开百度,获取到html
    
    headers = {
        "User-Agent":
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
        'Chrome/78.0.3904.108 Safari/537.36'
    }
    
    re = requests.get("https://www.baidu.com", headers=headers)
    html = re.content.decode("utf-8")
    bs = BeautifulSoup(html,"html.parser")
    
    # 第二步:提取html里面的6个排行榜
    
    six_hot_alink_list = bs.find(id="hotsearch-content-wrapper").find_all("a")
    my_hot_list = []
    
    for item in six_hot_alink_list:
        my_hot_list.append([item.get_text(), item.get("href")])
    
    
    # 第三步:保存到excel
    
    book = Workbook()
    sheet = book.create_sheet("baidu",0)
    
    for item in my_hot_list:
        sheet.append(item)
    
    book.save("baidu_hot_six.xlsx")
    
    
    # 第四步:发送邮件
    
    mail_host = 'smtp.qq.com'
    mail_port = '465'
    login_sender = 'XXX@qq.com'
    login_pass = 'XXX'.  #
    str = "get_baidu_six_hot"
    sendName = "XX@qq.com"
    resName = "XXX@qq.com"
    title = "get_baidu_six_hot"
    
    
    def sendQQ(receivers):
    
        msg = MIMEMultipart(str,'related')
        # 发送excel-附件
        message_xlsx = MIMEText(open('baidu_hot_six.xlsx', 'rb').read(), 'base64', 'utf-8')
        message_xlsx['Content-disposition'] = 'attachment;filename="baidu_hot_six.xlsx'
        msg.attach(message_xlsx)
    
        # 发送py-附件
        message_py = MIMEText(open('get_baidu_six_hot.py', 'rb').read(), 'base64', 'utf-8')
        message_py['Content-disposition'] = 'attachment;filename="get_baidu_six_hot.py'
        msg.attach(message_py)
    
        msg['From'] = formataddr([sendName, login_sender])
        # 邮件的标题
        msg['Subject'] = title
        try:
                server = smtplib.SMTP_SSL(mail_host, mail_port)
                server.login(login_sender, login_pass)
                server.sendmail(login_sender, receivers, msg.as_string())
                print("已发送到" + "".join(receivers) + "的邮箱中!")
                server.quit()
    
        except smtplib.SMTPException:
            print("发送邮箱失败!")
    
    
    sendQQ(['XXXX@qq.com', 'XXX@qq.com'])
  • 相关阅读:
    python 线程Queue 用法代码展示
    Python中的join()函数的用法
    python 中爬虫 content和text的区别
    免费代理ip爬虫分享
    django数据库的表已迁移的不能重新迁移的解决办法
    RuntimeError: Model class app_anme.models.User doesn't declare an explicit app_label and isn't in an application in INSTALLED_APPS.---python学习错误记录
    MYSQL查询操作 详细
    mysql数据库的基本操作命令总结
    http短连接与长连接简介
    浅谈http协议
  • 原文地址:https://www.cnblogs.com/andy0816/p/14769259.html
Copyright © 2011-2022 走看看