zoukankan      html  css  js  c++  java
  • 使用python对小说更新进行提醒

    总管写的书一直都很喜欢,从《雪中悍刀行》到《剑来》。
    其实我还是最喜欢那个雪中的鼠标垫,哈哈哈

    针对笔趣阁小说进行数据爬取

    上源码

    #filename=get_data.py
    # -*-coding:utf-8 -*-
    # BY WANGCC
    
    
    from bs4 import BeautifulSoup
    import urllib.request
    import os
    from send_mail import sms
    from ip_to_mysql import mysql_proxies
    import logger
    log = logger.Logger("debug")
    
    
    test_file="剑来" + ".txt"
    def gain_html_content(url):
        """获取网页的html内容
            url:目标url地址
            content:返回的页面内容
        """
        # 构建请求对象
    
        headers = {
             "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"
         }
        # proxies = mysql_proxies()
        # print(proxies)
        #
        # request = urllib.request.Request(url,headers=headers)
        proxies=mysql_proxies()
        proxies_str=proxies.split(":")
        Agreement=proxies_str[0]
        ip=str(proxies_str[1])
        port=str(proxies_str[2])
        proxies_new=(ip[2:]+":"+port)
        # 构建代理Handler
        #http://111.26.9.26:80
        httpproxy_handler = urllib.request.ProxyHandler({Agreement: proxies_new})
        #httpproxy_handler = urllib.request.ProxyHandler({'http': '116.114.19.211:443'})
    
        opener = urllib.request.build_opener(httpproxy_handler)
        request = urllib.request.Request(url=url,headers=headers)
        #request = urllib.request.Request(url,headers=header)
        response = opener.open(request)
    
        log.info('获取代理成功,请求页面成功!')
        # 发送请求
        #response = urllib.request.urlopen(request)
        # 读取文件
        content = response.read().decode('utf-8')
        return content
    
    
    def get_chapter(content):
        # 先构建一个soup对象
        soup = BeautifulSoup(content, "lxml")
        # 找到小说的内容(是在div标签里面,并且这个div标签的id为"list")
        content1 = soup.find("meta", property="og:novel:latest_chapter_name")
        content=content1['content']
    
        return content
    
    def readfile(content):
        if not os.path.exists(test_file):
            write2file(content)
            log.info('将当前内容写入文档,生成剑来.txt文档')
        with open(test_file, 'r',encoding='utf-8') as f:
            str=f.read()
            log.info('读取剑来.txt文档')
        return str
    
    
    def write2file(content):
        """将小说写入本地文件"""
        with open(test_file, 'w',encoding='utf-8') as f:
            f.write(content)
        log.info('将小说写入本地文件,生成剑来.txt文档')
    
    
    
    def main():
        # 获取页面内容
        tar_url = 'https://www.qu.la/book/31177/'
        content_url = gain_html_content(tar_url)
        log.info('页面下载完成')
        content=get_chapter(content_url)
        old_str=readfile(content)
        if content == old_str:
            log.info("没更新呢!")
        else:
            write2file(content)
            sms(content)
            log.info('发送邮件提醒')
    
    #main()
    
    if __name__ == "__main__":
        main()
    
    发送邮件部分
    # -*-coding:utf-8 -*-
    # BY WANGCC
    import smtplib
    from email.mime.multipart import MIMEMultipart
    from email.mime.text import MIMEText
    import logger
    log = logger.Logger("debug")
    
    
    smtpserver = 'smtp.163.com'
    username = 'xxxxx@163.com'
    password = 'xxxxxx'
    sender = 'xxxx@163.com'
    # receiver='XXX@126.com'
    # 收件人为多个收件人
    receiver = ['xxxxxxx@139.com','xxxxx@wo.cn']
    #这里使用运行商邮箱可以配置短信提醒,非常好用,就像短信提醒一样
    
    def sms(contect):
        print("input sms...")
        subject = contect
        #通过Header对象编码的文本,包含utf-8编码信息和Base64编码信息。以下中文名测试ok
        #subject = '中文标题'
        #subject=Header(subject, 'utf-8').encode()
    
        #构造邮件对象MIMEMultipart对象
        #下面的主题,发件人,收件人,日期是显示在邮件页面上的。
        msg = MIMEMultipart('mixed')
        msg['Subject'] = subject
        msg['From'] = 'wangcc <wangcc7777@163.com>'
        #msg['To'] = 'XXX@126.com'
        #收件人为多个收件人,通过join将列表转换为以;为间隔的字符串
        msg['To'] = ";".join(receiver)
        #msg['Date']='2019-3-16'
    
        #构造文字内容
        text = "小说更新了!"
        text_plain = MIMEText(text,'plain', 'utf-8')
        msg.attach(text_plain)
    
    
        smtp = smtplib.SMTP_SSL(host='smtp.163.com')
        smtp.connect(host='smtp.163.com',port=465)
        #我们用set_debuglevel(1)就可以打印出和SMTP服务器交互的所有信息。
        #smtp.set_debuglevel(1)
        smtp.login(username, password)
        print("进入发送")
        smtp.sendmail(sender, receiver, msg.as_string())
        print('success....')
        s_receiver=str(receiver)
        log.info('发送提醒邮件给:'+s_receiver)
    
        smtp.quit()
    
    if __name__ == "__main__":
        sms('c测试~~')
    
    数据库连接
    # -*-coding:utf-8 -*-
    # BY WANGCC
    
    import pymysql,datetime
    import logger,random
    
    log = logger.Logger("debug")
    
    DB_CONFIG = {
        "host": "xxxxxxxx",
        "port": xxxxx,
        "user": "xxxx",
        "passwd": "111111111",
        "db": "xxxxx",
        "charset": "utf8"
    }
    
    def get_random():
        numbers = range(1,10)
        chosen = random.choice(numbers)
        return chosen
    
    def mysql(ip_list):
        # 打开数据库连接
        db = pymysql.connect(
            host=DB_CONFIG["host"],
            port=DB_CONFIG["port"],
            user=DB_CONFIG["user"],
            passwd=DB_CONFIG["passwd"],
            db=DB_CONFIG["db"],
            charset=DB_CONFIG["charset"])
        # 使用cursor()方法获取操作游标
        cursor = db.cursor()
        date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        for ip in ip_list:
            check_sql="select count(*) from ip_original where ip='%s'"%(ip)
            insert_sql = "insert into ip_original(ip,date)value ('%s','%s')" % (ip, date)
            cursor.execute(check_sql)
            number=cursor.fetchall()
            new_num=number[0][0]
            if number[0][0] == 0:
                try:
                    # 执行sql语句
                    cursor.execute(insert_sql)
                    log.info(ip+'insert to ip_original success!')
                    # 提交到数据库执行
                    db.commit()
                except Exception as e:
                    log.info('执行sql-->'+insert_sql+'fail')
                    # 发生错误时回滚
                    db.rollback()
            else:
                log.info(ip+': is existence !!',)
        # 关闭数据库连接
        db.close()
    
    #采集用一个ip代理
    def mysql_proxies():
        # 打开数据库连接
        db = pymysql.connect(
            host=DB_CONFIG["host"],
            port=DB_CONFIG["port"],
            user=DB_CONFIG["user"],
            passwd=DB_CONFIG["passwd"],
            db=DB_CONFIG["db"],
            charset=DB_CONFIG["charset"])
        # 使用cursor()方法获取操作游标
        cursor = db.cursor()
        check_sql="SELECT * FROM ip_original where check_date is not NULL ORDER BY RAND() LIMIT 10 "
        cursor.execute(check_sql)
        number=cursor.fetchmany(10)
        chose=get_random()
        proxies=number[chose][1]
        print(proxies)
    # 关闭数据库连接
        db.close()
        return proxies
    
    #验证用一个ip代理
    def mysql_old():
        # 打开数据库连接
        db = pymysql.connect(
            host=DB_CONFIG["host"],
            port=DB_CONFIG["port"],
            user=DB_CONFIG["user"],
            passwd=DB_CONFIG["passwd"],
            db=DB_CONFIG["db"],
            charset=DB_CONFIG["charset"])
        # 使用cursor()方法获取操作游标
        cursor = db.cursor()
        check_sql="SELECT * FROM ip_original ORDER BY RAND() LIMIT 10 "
        cursor.execute(check_sql)
        number=cursor.fetchmany(10)
        chose=get_random()
        proxies=number[chose][1]
        print(proxies)
    # 关闭数据库连接
        db.close()
        return proxies
    
    
    
    #删除一条数据
    def mysql_delete(proxies):
        # 打开数据库连接
        db = pymysql.connect(
            host=DB_CONFIG["host"],
            port=DB_CONFIG["port"],
            user=DB_CONFIG["user"],
            passwd=DB_CONFIG["passwd"],
            db=DB_CONFIG["db"],
            charset=DB_CONFIG["charset"])
        # 使用cursor()方法获取操作游标
        cursor = db.cursor()
        check_sql="delete  from ip_original  where ip = '%s'"%(proxies)
        log.info('delete ip-->'+check_sql)
        cursor.execute(check_sql)
        db.commit()
    
    
    # 关闭数据库连接
        db.close()
        return proxies
    
    #更新来源和验证时间
    def mysql_update(str_from,proxies_yuan):
        # 打开数据库连接
        db = pymysql.connect(
            host=DB_CONFIG["host"],
            port=DB_CONFIG["port"],
            user=DB_CONFIG["user"],
            passwd=DB_CONFIG["passwd"],
            db=DB_CONFIG["db"],
            charset=DB_CONFIG["charset"])
        # 使用cursor()方法获取操作游标
        cursor = db.cursor()
        date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        update_sql = "update  ip_original set from_area='%s',check_date='%s' where ip='%s'" % (str_from, date,prox
    ies_yuan)
        try:
            print(update_sql)
            # 执行sql语句
            cursor.execute(update_sql)
            log.info(proxies_yuan+'---->'+str_from+'--> updata success!')
            # 提交到数据库执行
            db.commit()
        except Exception as e:
            log.info(str_from+'failed')
            print(e)
            # 发生错误时回滚
            db.rollback()
        # 关闭数据库连接
        db.close()
    
    if  __name__=="__main__":
        ip_list = ['http://117.191.11.108:80', 'http://134.209.15.143:8080', 'http://157.230.232.130:80',
                   'http://111.206.6.100:80', 'http://159.138.5.222:80', 'http://178.128.12.118:8080',
                   'http://83.142.126.147:80', 'http://150.109.55.190:83', 'http://165.227.62.167:8080',
                   'http://167.114.153.18:80', 'http://39.137.69.10:8080', 'http://111.206.6.101:80',
                   'http://165.227.29.189:8080', 'http://175.139.252.192:80', 'http://103.42.213.176:8080',
                   'http://211.23.149.29:80', 'http://211.23.149.28:80', 'http://47.94.57.119:80',
                   'http://175.139.252.194:80', 'http://47.94.217.37:80']
        #mysql(ip_list)
        number=mysql_proxies()
    

    思路

    每次爬取,从数据库随机抽一个代理ip来用,如果没用就销毁。
    数据爬取后,存在本地txt。留着和下次作比对,如果一致则更新,并发送邮件。

  • 相关阅读:
    H5中获取图片中的主色调
    vue provide/inject响应式
    微信小程序入坑之路
    H5入坑之路
    uni-app状态栏相关问题
    uni-app使用iconfont相关
    uni-app入坑之路
    uni-app自定义导航栏(搜索框和按钮)
    浅析浏览器的渲染过程
    SAP ABAP MB51新增栏位字段
  • 原文地址:https://www.cnblogs.com/wangcc7/p/13648933.html
Copyright © 2011-2022 走看看