zoukankan      html  css  js  c++  java
  • 使用python对小说更新进行提醒

    总管写的书一直都很喜欢,从《雪中悍刀行》到《剑来》。
    其实我还是最喜欢那个雪中的鼠标垫,哈哈哈

    针对笔趣阁小说进行数据爬取

    上源码

    #filename=get_data.py
    # -*-coding:utf-8 -*-
    # BY WANGCC
    
    
    from bs4 import BeautifulSoup
    import urllib.request
    import os
    from send_mail import sms
    from ip_to_mysql import mysql_proxies
    import logger
    log = logger.Logger("debug")
    
    
    test_file="剑来" + ".txt"
    def gain_html_content(url):
        """获取网页的html内容
            url:目标url地址
            content:返回的页面内容
        """
        # 构建请求对象
    
        headers = {
             "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36"
         }
        # proxies = mysql_proxies()
        # print(proxies)
        #
        # request = urllib.request.Request(url,headers=headers)
        proxies=mysql_proxies()
        proxies_str=proxies.split(":")
        Agreement=proxies_str[0]
        ip=str(proxies_str[1])
        port=str(proxies_str[2])
        proxies_new=(ip[2:]+":"+port)
        # 构建代理Handler
        #http://111.26.9.26:80
        httpproxy_handler = urllib.request.ProxyHandler({Agreement: proxies_new})
        #httpproxy_handler = urllib.request.ProxyHandler({'http': '116.114.19.211:443'})
    
        opener = urllib.request.build_opener(httpproxy_handler)
        request = urllib.request.Request(url=url,headers=headers)
        #request = urllib.request.Request(url,headers=header)
        response = opener.open(request)
    
        log.info('获取代理成功,请求页面成功!')
        # 发送请求
        #response = urllib.request.urlopen(request)
        # 读取文件
        content = response.read().decode('utf-8')
        return content
    
    
    def get_chapter(content):
        # 先构建一个soup对象
        soup = BeautifulSoup(content, "lxml")
        # 找到小说的内容(是在div标签里面,并且这个div标签的id为"list")
        content1 = soup.find("meta", property="og:novel:latest_chapter_name")
        content=content1['content']
    
        return content
    
    def readfile(content):
        if not os.path.exists(test_file):
            write2file(content)
            log.info('将当前内容写入文档,生成剑来.txt文档')
        with open(test_file, 'r',encoding='utf-8') as f:
            str=f.read()
            log.info('读取剑来.txt文档')
        return str
    
    
    def write2file(content):
        """将小说写入本地文件"""
        with open(test_file, 'w',encoding='utf-8') as f:
            f.write(content)
        log.info('将小说写入本地文件,生成剑来.txt文档')
    
    
    
    def main():
        # 获取页面内容
        tar_url = 'https://www.qu.la/book/31177/'
        content_url = gain_html_content(tar_url)
        log.info('页面下载完成')
        content=get_chapter(content_url)
        old_str=readfile(content)
        if content == old_str:
            log.info("没更新呢!")
        else:
            write2file(content)
            sms(content)
            log.info('发送邮件提醒')
    
    #main()
    
    if __name__ == "__main__":
        main()
    
    发送邮件部分
    # -*-coding:utf-8 -*-
    # BY WANGCC
    import smtplib
    from email.mime.multipart import MIMEMultipart
    from email.mime.text import MIMEText
    import logger
    log = logger.Logger("debug")
    
    
    smtpserver = 'smtp.163.com'
    username = 'xxxxx@163.com'
    password = 'xxxxxx'
    sender = 'xxxx@163.com'
    # receiver='XXX@126.com'
    # 收件人为多个收件人
    receiver = ['xxxxxxx@139.com','xxxxx@wo.cn']
    #这里使用运行商邮箱可以配置短信提醒,非常好用,就像短信提醒一样
    
    def sms(contect):
        print("input sms...")
        subject = contect
        #通过Header对象编码的文本,包含utf-8编码信息和Base64编码信息。以下中文名测试ok
        #subject = '中文标题'
        #subject=Header(subject, 'utf-8').encode()
    
        #构造邮件对象MIMEMultipart对象
        #下面的主题,发件人,收件人,日期是显示在邮件页面上的。
        msg = MIMEMultipart('mixed')
        msg['Subject'] = subject
        msg['From'] = 'wangcc <wangcc7777@163.com>'
        #msg['To'] = 'XXX@126.com'
        #收件人为多个收件人,通过join将列表转换为以;为间隔的字符串
        msg['To'] = ";".join(receiver)
        #msg['Date']='2019-3-16'
    
        #构造文字内容
        text = "小说更新了!"
        text_plain = MIMEText(text,'plain', 'utf-8')
        msg.attach(text_plain)
    
    
        smtp = smtplib.SMTP_SSL(host='smtp.163.com')
        smtp.connect(host='smtp.163.com',port=465)
        #我们用set_debuglevel(1)就可以打印出和SMTP服务器交互的所有信息。
        #smtp.set_debuglevel(1)
        smtp.login(username, password)
        print("进入发送")
        smtp.sendmail(sender, receiver, msg.as_string())
        print('success....')
        s_receiver=str(receiver)
        log.info('发送提醒邮件给:'+s_receiver)
    
        smtp.quit()
    
    if __name__ == "__main__":
        sms('c测试~~')
    
    数据库连接
    # -*-coding:utf-8 -*-
    # BY WANGCC
    
    import pymysql,datetime
    import logger,random
    
    log = logger.Logger("debug")
    
    DB_CONFIG = {
        "host": "xxxxxxxx",
        "port": xxxxx,
        "user": "xxxx",
        "passwd": "111111111",
        "db": "xxxxx",
        "charset": "utf8"
    }
    
    def get_random():
        numbers = range(1,10)
        chosen = random.choice(numbers)
        return chosen
    
    def mysql(ip_list):
        # 打开数据库连接
        db = pymysql.connect(
            host=DB_CONFIG["host"],
            port=DB_CONFIG["port"],
            user=DB_CONFIG["user"],
            passwd=DB_CONFIG["passwd"],
            db=DB_CONFIG["db"],
            charset=DB_CONFIG["charset"])
        # 使用cursor()方法获取操作游标
        cursor = db.cursor()
        date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        for ip in ip_list:
            check_sql="select count(*) from ip_original where ip='%s'"%(ip)
            insert_sql = "insert into ip_original(ip,date)value ('%s','%s')" % (ip, date)
            cursor.execute(check_sql)
            number=cursor.fetchall()
            new_num=number[0][0]
            if number[0][0] == 0:
                try:
                    # 执行sql语句
                    cursor.execute(insert_sql)
                    log.info(ip+'insert to ip_original success!')
                    # 提交到数据库执行
                    db.commit()
                except Exception as e:
                    log.info('执行sql-->'+insert_sql+'fail')
                    # 发生错误时回滚
                    db.rollback()
            else:
                log.info(ip+': is existence !!',)
        # 关闭数据库连接
        db.close()
    
    #采集用一个ip代理
    def mysql_proxies():
        # 打开数据库连接
        db = pymysql.connect(
            host=DB_CONFIG["host"],
            port=DB_CONFIG["port"],
            user=DB_CONFIG["user"],
            passwd=DB_CONFIG["passwd"],
            db=DB_CONFIG["db"],
            charset=DB_CONFIG["charset"])
        # 使用cursor()方法获取操作游标
        cursor = db.cursor()
        check_sql="SELECT * FROM ip_original where check_date is not NULL ORDER BY RAND() LIMIT 10 "
        cursor.execute(check_sql)
        number=cursor.fetchmany(10)
        chose=get_random()
        proxies=number[chose][1]
        print(proxies)
    # 关闭数据库连接
        db.close()
        return proxies
    
    #验证用一个ip代理
    def mysql_old():
        # 打开数据库连接
        db = pymysql.connect(
            host=DB_CONFIG["host"],
            port=DB_CONFIG["port"],
            user=DB_CONFIG["user"],
            passwd=DB_CONFIG["passwd"],
            db=DB_CONFIG["db"],
            charset=DB_CONFIG["charset"])
        # 使用cursor()方法获取操作游标
        cursor = db.cursor()
        check_sql="SELECT * FROM ip_original ORDER BY RAND() LIMIT 10 "
        cursor.execute(check_sql)
        number=cursor.fetchmany(10)
        chose=get_random()
        proxies=number[chose][1]
        print(proxies)
    # 关闭数据库连接
        db.close()
        return proxies
    
    
    
    #删除一条数据
    def mysql_delete(proxies):
        # 打开数据库连接
        db = pymysql.connect(
            host=DB_CONFIG["host"],
            port=DB_CONFIG["port"],
            user=DB_CONFIG["user"],
            passwd=DB_CONFIG["passwd"],
            db=DB_CONFIG["db"],
            charset=DB_CONFIG["charset"])
        # 使用cursor()方法获取操作游标
        cursor = db.cursor()
        check_sql="delete  from ip_original  where ip = '%s'"%(proxies)
        log.info('delete ip-->'+check_sql)
        cursor.execute(check_sql)
        db.commit()
    
    
    # 关闭数据库连接
        db.close()
        return proxies
    
    #更新来源和验证时间
    def mysql_update(str_from,proxies_yuan):
        # 打开数据库连接
        db = pymysql.connect(
            host=DB_CONFIG["host"],
            port=DB_CONFIG["port"],
            user=DB_CONFIG["user"],
            passwd=DB_CONFIG["passwd"],
            db=DB_CONFIG["db"],
            charset=DB_CONFIG["charset"])
        # 使用cursor()方法获取操作游标
        cursor = db.cursor()
        date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        update_sql = "update  ip_original set from_area='%s',check_date='%s' where ip='%s'" % (str_from, date,prox
    ies_yuan)
        try:
            print(update_sql)
            # 执行sql语句
            cursor.execute(update_sql)
            log.info(proxies_yuan+'---->'+str_from+'--> updata success!')
            # 提交到数据库执行
            db.commit()
        except Exception as e:
            log.info(str_from+'failed')
            print(e)
            # 发生错误时回滚
            db.rollback()
        # 关闭数据库连接
        db.close()
    
    if  __name__=="__main__":
        ip_list = ['http://117.191.11.108:80', 'http://134.209.15.143:8080', 'http://157.230.232.130:80',
                   'http://111.206.6.100:80', 'http://159.138.5.222:80', 'http://178.128.12.118:8080',
                   'http://83.142.126.147:80', 'http://150.109.55.190:83', 'http://165.227.62.167:8080',
                   'http://167.114.153.18:80', 'http://39.137.69.10:8080', 'http://111.206.6.101:80',
                   'http://165.227.29.189:8080', 'http://175.139.252.192:80', 'http://103.42.213.176:8080',
                   'http://211.23.149.29:80', 'http://211.23.149.28:80', 'http://47.94.57.119:80',
                   'http://175.139.252.194:80', 'http://47.94.217.37:80']
        #mysql(ip_list)
        number=mysql_proxies()
    

    思路

    每次爬取,从数据库随机抽一个代理ip来用,如果没用就销毁。
    数据爬取后,存在本地txt。留着和下次作比对,如果一致则更新,并发送邮件。

  • 相关阅读:
    SQL Server, Timeout expired.all pooled connections were in use and max pool size was reached
    javascript 事件调用顺序
    Best Practices for Speeding Up Your Web Site
    C语言程序设计 使用VC6绿色版
    破解SQL Prompt 3.9的几步操作
    Master page Path (MasterPage 路径)
    几个小型数据库的比较
    CSS+DIV 完美实现垂直居中的方法
    由Response.Redirect引发的"Thread was being aborted. "异常的处理方法
    Adsutil.vbs 在脚本攻击中的妙用
  • 原文地址:https://www.cnblogs.com/wangcc7/p/13648933.html
Copyright © 2011-2022 走看看