zoukankan      html  css  js  c++  java
  • 爬虫监控

    数据监控:

    import requests
    import urllib
    import time
    import pymongo
    
    # 必须写在外面,否则无法导入
    client=pymongo.MongoClient('localhost',27017)
    book_qunar=client['qunar']
    sheet_qunar_zyx=book_qunar['qunar_zyx']
    
    
    
    # 解析数据
    def get_list(dep,item):
        url = 'https://touch.dujia.qunar.com/list?modules=list,bookingInfo&dep={}&query={}&mtype=all&ddt=false&mobFunction=%E6%89%A9%E5%B1%95%E8%87%AA%E7%94%B1%E8%A1%8C&cfrom=zyx&it=FreetripTouchin&et=FreetripTouch&date=&configDepNew=&needNoResult=true&originalquery={}&limit=0,20&includeAD=true&qsact=search'.format(
            urllib.request.quote(dep), urllib.request.quote(item), urllib.request.quote(item))
        strhtml = get_json(url)
        try:
            routeCount = int(strhtml['data']['limit']['routeCount'])
        except:
            return
        for limit in range(0, routeCount, 20):
            url = 'https://touch.dujia.qunar.com/list?modules=list,bookingInfo&dep={}&query={}&mtype=all&ddt=false&mobFunction=%E6%89%A9%E5%B1%95%E8%87%AA%E7%94%B1%E8%A1%8C&cfrom=zyx&it=FreetripTouchin&et=FreetripTouch&date=&configDepNew=&needNoResult=true&originalquery={}&limit={},20&includeAD=true&qsact=search'.format(
                urllib.request.quote(dep), urllib.request.quote(item),
                urllib.request.quote(item), limit)
            strhtml = get_json(url)
            result = {
                'date': time.strftime('%Y-%m-%d', time.localtime(time.time())),
                'dep': dep,
                'arrive': item,
                'limit': limit,
                'result': strhtml
            }
            sheet_qunar_zyx.insert_one(result)
    
    def connect_mongo():
        client=pymongo.MongoClient('localhost',27017)
        book_qunar=client['qunar']
        return book_qunar['qunar_zyx']
    
    
    def get_json(url):
        strhtml=requests.get(url)
        time.sleep(1)
        return strhtml.json()
    
    if __name__ == "__main__":
    
        url='https://touch.dujia.qunar.com/depCities.qunar'
        dep_dict=get_json(url)
        for dep_item in dep_dict['data']:
            for dep in dep_dict['data'][dep_item]:
                a = []
                url = 'https://m.dujia.qunar.com/golfz/sight/arriveRecommend?dep={}&exclude=&extensionImg=255,175'.format(urllib.request.quote(dep))
                arrive_dict = get_json(url)
                for arr_item in arrive_dict['data']:
                    for arr_item_1 in arr_item['subModules']:
                        for query in arr_item_1['items']:
                            if query['query'] not in a:
                                a.append(query['query'])
                for item in a:
                    get_list(dep,item)
    

    监控:

    from test import sheet_qunar_zyx
    import time
    
    # 数据库监控数量 
    while True:
        print(sheet_qunar_zyx.find().count())
        time.sleep(10)
    
  • 相关阅读:
    7. Spring验证、数据绑定和类型转换
    J2EE应用与移动互联网-写在前头
    IT基础设施资源的实践----写在前头
    JavaScript随笔记(一)基础概念以及变量类型
    js函数表达式
    js面形对象(2)
    js面向对象
    viPlugin安装破解
    Ubuntu12.04 使用中遇到的问题
    关于sizeof
  • 原文地址:https://www.cnblogs.com/star-py-blog/p/13740148.html
Copyright © 2011-2022 走看看