zoukankan      html  css  js  c++  java
  • python使用mysql

    链接mysql做了个小练习:爬取 http://wufazhuce.com 上的问题,描述和答案,存到本地的数据库里。

    数据表结构:

    CREATE TABLE `questions` (
    `title` varchar(2000) DEFAULT NULL,
    `description` varchar(200) DEFAULT NULL,
    `answers` varchar(2000) DEFAULT NULL,
    `url` varchar(2000) DEFAULT NULL,
    `daynum` varchar(20) DEFAULT NULL
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8
    View Code

    代码:

    # author:
    
    import pymysql.cursors
    import requests
    from bs4 import BeautifulSoup
    
    con = pymysql.connect(host='192.168.86.130', user='root', password='letmein', db='0603simon', port=3306,
                          charset='utf8')
    
    # with con.cursor() as cur:
    #     sql = 'show tables'
    #     result = cur.execute(sql)
    #     print(result)
    # exit()
    cur = con.cursor()
    for p_num in range(1, 1872):
        # for p_num in range(8,9):
        url = 'http://wufazhuce.com/question/%s' % p_num
        response = requests.get(url=url)
        response.encoding = response.apparent_encoding
        soup = BeautifulSoup(response.text, features="html.parser")
        # print(soup)
        tar = soup.find('div', class_='one-cuestion')
        # print(tar)
        if not tar:
            print('not tar')
            sql = '''insert into questions (title,description,answers,url,daynum) values('404','404','404','%s','%s')''' % (
                url, p_num)
            print(sql)
            result = cur.execute(sql)
            con.commit()
            print('执行结果:' + str(result))
            continue
        title = tar.find('h4').text.strip()
        # print (title)
        # exit()
        desc = soup.find('div', class_='cuestion-contenido').text.strip()
        # print(desc)
        ans = soup.find_all('div', class_='cuestion-contenido')
        for index, answer in enumerate(ans):
            if index == 0:
                continue
                # print(answer.text.strip())
        answer = answer.text.strip()
        # print(url)
        if answer.__len__() > 1800:
            answer = answer[0:1800]
        sql = '''insert into questions (title,description,answers,url,daynum) values(%s,%s,%s,'%s','%s')''' % (
            con.escape(title), con.escape(desc), con.escape(answer), url, p_num)
    
        print(sql)
        result = cur.execute(sql)
        con.commit()
        print('执行结果:' + str(result))
    
    cur.close()
    con.close()
    View Code

    邮箱的使用

    # coding:utf-8
    def get_question():
        obj = {}
        import requests
        from bs4 import BeautifulSoup
        import time
        first_num = 2593
        first_date = time.mktime(time.strptime('2019-09-08', '%Y-%m-%d'))
        now_date = time.mktime(time.localtime())
        cur_num = first_num + (int((now_date - first_date) / (24 * 60 * 60)))
        print(cur_num)
    
        url = 'http://wufazhuce.com/question/%s' % cur_num
        response = requests.get(url=url)
        response.encoding = response.apparent_encoding
        soup = BeautifulSoup(response.text, features="html.parser")
        tar = soup.find('div', class_='one-cuestion')
    
        if not tar:
            print('not tar')
            obj['title'] = ''
            return obj
        title = tar.find('h4').text.strip()
    
        desc = soup.find('div', class_='cuestion-contenido').text.strip()
        ans = soup.find_all('div', class_='cuestion-contenido')
        answer = ''
        for index, answer2 in enumerate(ans):
            if index == 0:
                continue
            answer = answer2.text.strip()
        # obj = {'title': '', 'desc': '', 'answer': ''}
        obj['title'] = title
        obj['desc'] = desc
        obj['answer'] = answer
        return obj
    
    
    def send_email(title, desc, content):
        import smtplib  # 加载smtplib模块
        from email.mime.text import MIMEText
        from email.utils import formataddr
        from email.mime.application import MIMEApplication
        from email.mime.image import MIMEImage
        from email.mime.multipart import MIMEMultipart
    
        sender = 'xxxxx@163.com'  # 发件人邮箱账号
        receive = 'xxxxx@qq.com'  # 收件人邮箱账号
        passwd = 'xxxxx'
        mailserver = 'smtp.163.com'
        port = '25'
    
        try:
            msg = MIMEMultipart('related')
            msg['From'] = formataddr(["sender", sender])  # 发件人邮箱昵称、发件人邮箱账号
            msg['To'] = formataddr(["receiver", receive])  # 收件人邮箱昵称、收件人邮箱账号
            msg['Subject'] = title
            # 文本信息
            # txt = MIMEText('this is a test mail', 'plain', 'utf-8')
            # msg.attach(txt)
    
            # 附件信息
            # attach = MIMEApplication(open("D:xx\toolpycharm\1.csv").read())
            # attach.add_header('Content-Disposition', 'attachment', filename='1.csv')
            # msg.attach(attach)
    
            # 正文显示图片
            body = '''【描述】:%s/br
            【回答】:%s''' % (desc, content)
            text = MIMEText(body, 'html', 'utf-8')
            msg.attach(text)
    
            server = smtplib.SMTP(mailserver, port)  # 发件人邮箱中的SMTP服务器,端口是25
            server.login(sender, passwd)  # 发件人邮箱账号、邮箱密码
            server.sendmail(sender, receive, msg.as_string())  # 发件人邮箱账号、收件人邮箱账号、发送邮件
            server.quit()
            print('success')
        except Exception as e:
            print(e)
    
    
    def main_to():
        obj = get_question()
        res = 0
        if obj['title']:
            send_email(obj['title'], obj['desc'], obj['answer'])
            res = 1
        else:
            res = 0
        return res
    
    
    from threading import Timer
    import time
    
    timer_interval = 1
    
    
    def delayrun():
        print('running')
    
    
    first_time = 3 * 60 * 60
    time.sleep(first_time)
    print('先歇一下')
    t = Timer(timer_interval, delayrun)
    t.start()
    wait_time = 60
    while True:
        print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())))
        res = main_to()
        if res:
            wait_time = 60 * 60 * 24
        else:
            wait_time = 60
        time.sleep(wait_time)
    View Code
  • 相关阅读:
    Robot Framework (十)html基础
    Robot Framework (九)Selenium的安装
    Robot Framework (八)循环&分支
    Robot Framework (七)Keyword 关键字
    约瑟夫环问题的两种解法(详解)
    msdn
    java同一个包中,类之间的的调用
    循环语句中break 与 continue的区别
    memset()函数
    DFS(深搜)算法
  • 原文地址:https://www.cnblogs.com/Simonsun002/p/9152944.html
Copyright © 2011-2022 走看看