给cc爬取一下百度的榜单
import requests from bs4 import BeautifulSoup from openpyxl import Workbook import smtplib from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart from email.utils import formataddr # 第一步:打开百度,获取到html headers = { "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/78.0.3904.108 Safari/537.36' } re = requests.get("https://www.baidu.com", headers=headers) html = re.content.decode("utf-8") bs = BeautifulSoup(html,"html.parser") # 第二步:提取html里面的6个排行榜 six_hot_alink_list = bs.find(id="hotsearch-content-wrapper").find_all("a") my_hot_list = [] for item in six_hot_alink_list: my_hot_list.append([item.get_text(), item.get("href")]) # 第三步:保存到excel book = Workbook() sheet = book.create_sheet("baidu",0) for item in my_hot_list: sheet.append(item) book.save("baidu_hot_six.xlsx") # 第四步:发送邮件 mail_host = 'smtp.qq.com' mail_port = '465' login_sender = 'XXX@qq.com' login_pass = 'XXX'. # str = "get_baidu_six_hot" sendName = "XX@qq.com" resName = "XXX@qq.com" title = "get_baidu_six_hot" def sendQQ(receivers): msg = MIMEMultipart(str,'related') # 发送excel-附件 message_xlsx = MIMEText(open('baidu_hot_six.xlsx', 'rb').read(), 'base64', 'utf-8') message_xlsx['Content-disposition'] = 'attachment;filename="baidu_hot_six.xlsx' msg.attach(message_xlsx) # 发送py-附件 message_py = MIMEText(open('get_baidu_six_hot.py', 'rb').read(), 'base64', 'utf-8') message_py['Content-disposition'] = 'attachment;filename="get_baidu_six_hot.py' msg.attach(message_py) msg['From'] = formataddr([sendName, login_sender]) # 邮件的标题 msg['Subject'] = title try: server = smtplib.SMTP_SSL(mail_host, mail_port) server.login(login_sender, login_pass) server.sendmail(login_sender, receivers, msg.as_string()) print("已发送到" + ",".join(receivers) + "的邮箱中!") server.quit() except smtplib.SMTPException: print("发送邮箱失败!") sendQQ(['XXXX@qq.com', 'XXX@qq.com'])