zoukankan      html  css  js  c++  java
  • 【python】python每天抓取一篇英语美文,发送到邮箱

    import requests,os,time
    from bs4 import BeautifulSoup
    import smtplib
    from email.mime.text import MIMEText
    
    header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
                            ' AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
    #TODO 进入主页面的函数,找到文章链接
    def findEssay(rootUrl,pages,recordsPath):
        try:
            htmlpp = requests.get(rootUrl, headers=header)
            html = htmlpp.text.encode(htmlpp.encoding)
            # 获取dirname
            dirUrl1 = os.path.dirname(rootUrl)
            dirUrl = os.path.dirname(dirUrl1)
            soup = BeautifulSoup(html, 'html.parser')
            essayTags = soup.select('.node_list a')  # 得到当前页面的文章Tag值
            for essayTag in essayTags:
                essayUrl = essayTag.get('href')  # 获取文章的链接
                essayName = essayTag.text  # 获取文章的名字
                if essayUrl not in pages:
                    downloadEssay(dirUrl, essayUrl, essayName)
                    pages.add(essayUrl)
                    with open(recordsPath, 'a+') as attach:
                        attach.write(str(essayUrl) + '
    ')
                        print('写入记录成功')
                    return
            nextPageBaseUrl = soup.select('.page a')[-2].get('href')     #取得下一页的链接
            nextPageUrl = os.path.join(dirUrl1,nextPageBaseUrl)            #组装成完整的链接
            findEssay(nextPageUrl,pages,recordsPath)
        except Exception as e:
            print('根链接出现错误'+str(e))
    #TODO 下载文章内容
    def downloadEssay(dirUrl,essayUrl,essayName):
        try:
            htmlpp = requests.get(dirUrl + essayUrl)
            html = htmlpp.text.encode(htmlpp.encoding)
            soup = BeautifulSoup(html, 'html.parser')
            paras = soup.select('#dede_content div')
            mailTo(essayName,paras)
        except Exception as e:
            with open(recordsPath, 'a+') as attach:
                attach.write(str(essayUrl) + '
    ')
            findEssay(rootUrl, pages, recordsPath)
            print('下载文章失败 '+str(e))
    #TODO 制作成word文档,命名为日期,发送邮件的函数,发送完成删除文件
    def mailTo(essayName,paras):
        content = ""
        for para in paras:
            content = content + '<p>' + para.getText() + '</p>'
        # #发送方邮件地址
        sender = '发件人@163.com'
        # 发送方邮件密码
        pwd = input('Password: ')
        receivers = ['收件人1@qq.com','收件人2@qq.com']  # 输入一个你要收取邮件的邮箱地址
    
        # 邮件的内容、收件人、发件人信息
        mail_message = '<html><body><h1>'+essayName+'</h1>' + 
                       '<article>'+content+'</article>' + 
                       '</body></html>'
        message = MIMEText(mail_message, 'html', 'utf-8')  # 发送含HTML内容的邮件
        message['To'] = ';'.join(receivers)  # 填入收件人邮箱地址
        message['From'] = sender  # 填入发件人邮箱地址
    
        # 邮件的标题
        today = time.strftime('%y%m%d')  # 以当前日期命名文档
        today = str(today)
        subject = '今日美文'+today
        message['Subject'] = subject  # 可以不设置编码
    
        try:
            smtpObj = smtplib.SMTP_SSL('smtp.163.com', 465)  # 网易163邮箱 使用非本地服务器,需要建立和网易邮件服务 的SSL链接,端口465
            smtpObj.login(sender, pwd)  # 登录认证
            smtpObj.sendmail(sender, receivers, message.as_string())  # 发送邮件主题
            print('邮件发送成功!')
            smtpObj.quit()
        except smtplib.SMTPException as e:
            print('邮件发送失败,失败原因:', e)
    
    if __name__ == '__main__':
        recordsPath = 'C:\enEssaysToLH.txt'
        pages = set()
        if not os.path.exists(recordsPath):
            with open(recordsPath,'w'):
                print('创建记录文件')
        with open(recordsPath,'r') as readFile:
            for line in readFile.readlines():
                pages.add(line.rstrip())
        #TODO 解析主链接,生成dirname,进入主页面的函数
        rootUrl = 'http://www.enread.com/essays/index.html'
        findEssay(rootUrl,pages,recordsPath)

    发送了很多次邮件,每次英文做主题(subject)的时候,都会出现554问题。当把邮件的题目统一换成中文后,同一个文章就能发送出去。可能这里面涉及了编码的问题,待以后研究。

  • 相关阅读:
    设计模式:单一职责原则
    多线程的创建
    Android开发基础(java)14
    面向对象编程的思想(6)
    面向对象编程的思想(5)未完成
    面向对象编程的思想(4)
    面向对象编程的思想(3)
    面向对象编程的思想(2)
    面向对象编程的思想(1)
    GDB 命令详细解释
  • 原文地址:https://www.cnblogs.com/to-red/p/9774607.html
Copyright © 2011-2022 走看看