zoukankan      html  css  js  c++  java
  • python爬虫之PyQuery

    # -*- coding: UTF-8 -*-
    from pyquery import PyQuery as pq
    import re
    from datetime import datetime,timedelta
    import pymysql
    import sys
    reload(sys)
    sys.setdefaultencoding('utf8')
    
    
    def data_ana(item):
        return item.text().encode('utf-8').split('(')[0]
    
    def community_daily(start_date,date,week,month):
        filename='email_shequ'+start_date.strftime('%m%d')+'.htm'
        doc = pq(filename=filename,encoding='utf-8')
        community_tab=re.findall( r'd{5,6}',doc('tr:eq(3) td:eq(0)').text())[0]
        topic_posts_app=doc('tr:eq(24) td:eq(1)')
        question_posts_app=doc('tr:eq(26) td:eq(1)')
        share_posts_app=doc('tr:eq(27) td:eq(1)')
        vote_posts_app=doc('tr:eq(28) td:eq(1)')
        bycar_posts_app=doc('tr:eq(29) td:eq(1)')
        posts_entry=doc('tr:eq(37) td:eq(1)')
        specific_posts_entry=doc('tr:eq(38) td:eq(1)')
        posts_publish=doc('tr:eq(39) td:eq(1)')
    
        list=[community_tab,data_ana(topic_posts_app),data_ana(question_posts_app),data_ana(share_posts_app),data_ana(vote_posts_app),data_ana(bycar_posts_app),data_ana(posts_entry),data_ana(specific_posts_entry),data_ana(posts_publish),week,month,date]
        print list
        return list
    
    if __name__=='__main__':
        s_date=sys.argv[1]
        e_date=sys.argv[2]
        s_year,s_month,s_day=s_date.split('-')
        e_year,e_month,e_day=e_date.split('-')
        start_date=datetime(int(s_year),int(s_month),int(s_day))
        end_date  =datetime(int(e_year),int(e_month),int(e_day))
    
        community_daily_sql='''**** '''
        db_params = {'host':'localhost', 'user':'****', 'passwd':'****', 'db':'****', 'charset':'utf8'}
        conn = pymysql.connect(**db_params)
        cursor = conn.cursor()
        while start_date<end_date:
            week_start=start_date-timedelta(start_date.weekday())
            week_end=week_start+timedelta(days=6)
            week=week_start.strftime('%m/%d')+'~'+week_end.strftime('%m/%d')
            month=start_date.strftime('%Y/%m')
            date=start_date.strftime('%Y-%m-%d')
            community_daily_list=community_daily(start_date,date,week,month)
            cursor.execute(community_daily_sql,community_daily_list)
            start_date=start_date+timedelta(days=1)
        conn.commit()
        cursor.close()
        conn.close()    
  • 相关阅读:
    左右对齐Justify遇到的坑
    JS中的相等性判断===, ==, Object.is()
    JS调用栈的一些总结
    VueI18n
    【转】Webpack 快速上手(下)
    【转】Webpack 快速上手(中)
    【转】Webpack 快速上手(上)
    springboot打包排除指定jar包依赖
    prometheus+grafana搭建
    fbctf 安装部署出现的问题
  • 原文地址:https://www.cnblogs.com/wangbin2188/p/6555219.html
Copyright © 2011-2022 走看看