一共两个脚本:
第一个是【借书完毕以及借书信息有变更(续借等)】的时候需要执行的脚本;实现模拟登陆,同时最新的借书信息的下载到本地文本;之所以没有这样做,是因为如果每次爬取一遍的话,需要每次输入一遍验证码,就没有意义了;现在输入一次就可以了;导出到文本上,以后直接读取就行了。
第二个脚本,需要添加到启动计划中每天自动执行的(每天自动检查有没有超期书籍)
1 __author__ = 'Oscar_Yang' 2 #-*- coding= utf-8 -*- 3 import subprocess 4 import sys 5 import os 6 import requests 7 import re 8 import random 9 import time 10 from bs4 import BeautifulSoup 11 import smtplib 12 from email.mime.text import MIMEText 13 from email.header import Header 14 import pytesseract 15 from PIL import Image 16 17 18 def login(name, password): 19 random_num = random.random() # 生成随机数,构造获取验证码的链接 20 url = 'http://202.206.242.99//reader/captcha.php?' + str(random_num) 21 22 get_captcha = session.get(url).content 23 with open('captcha.png', 'wb') as f: 24 f.write(get_captcha) 25 f.close() 26 27 ''' 28 这段代码是为了方便我们打开图片,它可以直接打开图片 29 我们就不用去文件夹里去找,里面是判断使用什么系统, 30 不同系统打开方式有点差异,可以找python文档了解这部分内容 31 ''' 32 if sys.platform.find('darwin') >= 0: 33 subprocess.call(['open', 'captcha.png']) 34 elif sys.platform.find('linux') >= 0: 35 subprocess.call(['xdg-open', 'captcha.png']) 36 else: 37 os.startfile('captcha.png') 38 39 # image = Image.open('captcha.png') 40 # input_captcha = pytesseract.image_to_string(image) 41 # # print(vcode) 42 43 input_captcha = input('请输入验证码:') 44 input_captcha = str(input_captcha) 45 46 # 构造登录表单,里面就是我们上面提及的四项 47 post_data = { 48 'number': name, 49 'passwd': password, 50 'captcha': input_captcha, 51 'select': 'cert_no' 52 } 53 54 login_url = 'http://202.206.242.99/reader/redr_verify.php' 55 56 html = session.post(login_url, data=post_data).content 57 58 book_hist_url = 'http://202.206.242.99/reader/book_lst.php' 59 content = session.get(book_hist_url).content.decode('utf-8') 60 from bs4 import BeautifulSoup 61 soup=BeautifulSoup(content,"lxml") 62 return soup 63 64 def get_data(soup): 65 titles=soup.select("a.blue") 66 deadlines=soup.select("font")[1:] 67 item_urls=soup.select("a.blue") 68 # print(item_urls) 69 base_data_list=[] 70 for title,deadline,item_url in zip(titles,deadlines,item_urls): 71 base_data={ 72 "title":title.text, 73 "deadline":deadline.text.strip(), 74 "item_url":"http://202.206.242.99/"+item_url["href"] 75 } 76 # print(data) 77 base_data_list.append(base_data) 78 return base_data_list 79 80 # deadline = data["deadline"] 81 # yinghuan_time_list = deadline.split("-") 82 # yinghuan_month = yinghuan_time_list[1] 83 # yinghuan_day = yinghuan_time_list[2] 84 def get_detail_data(item_url): 85 res=requests.get(item_url) 86 res.encoding="utf8" 87 soup=BeautifulSoup(res.text,"lxml") 88 intro=soup.find_all(class_="sharing_zy") 89 # tupus="http://202.206.242.99/"+soup.select("p > a > img")[0]["src"] 90 intro=re.findall(r'href="(.*?)"',str(intro)) 91 # data={ 92 # "tupu:":tupus, 93 # "xiangxi:":intro 94 # } 95 print(intro) 96 # return data 97 98 99 def send_email(deadline,title,item_url, day,name): 100 from_addr = '###' 101 password = '###' 102 to_addr = '###' 103 smtp_server = '###' 104 105 msg=MIMEText('''hello: 《{}》这本书还有{}天到期,deadline为{},尽快去还吧! 注意:为了防止被识别为垃圾邮件,以下内容为自动添加,同时供您查看! 点击链接查看图书详情{}'''.format(title,day,deadline,item_url), 'plain', 'utf-8') 106 msg['From'] = Header("{}请注意借书到期通知".format(name), 'utf-8') 107 msg['To'] = Header("{}同学".format(name), 'utf-8') 108 109 subject = 'hello' 110 msg['Subject'] = Header(subject, 'utf-8') 111 112 server = smtplib.SMTP(smtp_server, 25) 113 server.set_debuglevel(1) 114 server.login(from_addr, password) 115 server.sendmail(from_addr, [to_addr], msg.as_string()) 116 server.quit() 117 118 def data1file(data): 119 path = r"C:UsersOscarDesktop数据.txt" 120 file = open(path, "a", encoding="utf-8") 121 file.write(" ") 122 file.write(str(data)) 123 file.close() 124 125 126 if __name__ == '__main__': 127 """ 128 获取当前时间 129 """ 130 local_time = time.strftime("%Y-%m-%d", time.localtime()) # 获取当前时间 131 local_time = str(local_time) 132 times = re.split(r'-', local_time) 133 year = times[0] 134 now_month = times[1] 135 now_day = times[2] 136 137 session = requests.Session() 138 session.headers = { 139 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.82 Safari/537.36' 140 } 141 142 name = input("请输入用户名:") 143 password = input("请输入密码:") 144 soup=login(name,password) 145 # print(soup) 146 147 base_data_list=get_data(soup) 148 for base_data in base_data_list: 149 deadline = base_data["deadline"] #应还时间 150 title=base_data["title"] #书名 151 item_url=base_data["item_url"] #图书馆的详情页 152 yinghuan_time_list = deadline.split("-") 153 154 yinghuan_month = yinghuan_time_list[1] 155 yinghuan_day = yinghuan_time_list[2] 156 data_all={ 157 "deadline":deadline, 158 "title":title, 159 "item_url":item_url, 160 "name":name 161 } 162 data1file(data_all) 163 164 if int(now_month) == int(yinghuan_month) - 1: 165 day = 30 - int(now_day) + int(yinghuan_day) 166 if day < 7: 167 send_email(deadline,title,item_url, day,name) 168 elif now_month == yinghuan_month: 169 day = int(yinghuan_day) - int(now_day) 170 if day < 7: 171 send_email(deadline,title,item_url, day,name) 172 # time.sleep(120) 173 else: 174 pass 175 # time.sleep(200)
import subprocess import sys import os import requests import re import random import time from bs4 import BeautifulSoup import smtplib from email.mime.text import MIMEText from email.header import Header def send_email(deadline,title,item_url, day,name): from_addr = '###@qq.com' password = '###' to_addr = '###@qq.com' smtp_server = 'smtp.qq.com' msg=MIMEText('''hello: 《{}》这本书还有{}天到期,deadline为{},尽快去还吧! 注意:为了防止被识别为垃圾邮件,以下内容为自动添加,同时供您查看! 点击链接查看图书详情{}'''.format(title,day,deadline,item_url), 'plain', 'utf-8') msg['From'] = Header("还书通知:{}请注意".format(name), 'utf-8') msg['To'] = Header("{}".format(name), 'utf-8') subject = 'hello' msg['Subject'] = Header(subject, 'utf-8') server = smtplib.SMTP(smtp_server, 25) server.set_debuglevel(1) server.login(from_addr, password) server.sendmail(from_addr, [to_addr], msg.as_string()) server.quit() if __name__ == '__main__': """ 先是获取当前时间 """ local_time = time.strftime("%Y-%m-%d", time.localtime()) # 获取当前时间 local_time = str(local_time) times = re.split(r'-', local_time) now_year = times[0] now_month = times[1] now_day = times[2] """ 读取之前存好的下载好的借书信息 """ path=r"C:UsersOscarDesktop数据.txt" with open(path,encoding="utf8") as f: a=f.readlines() # print(a) a=a[1:] for item in a: # print(item) deadline=re.findall(r"'deadline': '(.*?)'",item) item_url=re.findall(r"'item_url': '(.*?)'",item) title=re.findall(r"'title': '(.*?)'",item) name=re.findall(r"'name': '(.*?)'",item) yinghuan_time_list = str(deadline).split("-") yinghuan_month = yinghuan_time_list[1] yinghuan_day = yinghuan_time_list[2].split("'")[0] title=str(title).split("'")[1] name=str(name).split("'")[1] item_url=str(item_url).split("'")[1] """ 判断发不发邮件 """ if int(now_month) == int(yinghuan_month) - 1: day = 30 - int(now_day) + int(yinghuan_day) if day < 7: send_email(deadline, title, item_url, day,name) elif now_month == yinghuan_month: day = int(yinghuan_day) - int(now_day) if day < 7: send_email(deadline, title, item_url, day,name) # time.sleep(120) else: pass # time.sleep(200)
最后效果