zoukankan      html  css  js  c++  java
  • pyspider-崔庆才猫途鹰

    #!/usr/bin/env python
    # -*- encoding: utf-8 -*-
    # Created on 2020-04-07 08:14:57
    # Project: tripadvisor

    from pyspider.libs.base_handler import *

    import pymongo
    class Handler(BaseHandler):
    crawl_config = {
    }

    client = pymongo.MongoClient('localhost')
    db = client['trip']

    @every(minutes=24 * 60)
    def on_start(self):
    #在crawl处填入URL
    self.crawl('https://www.tripadvisor.cn/Attraction_Review-g187147-d188150-Reviews-Musee_d_Orsay-Paris_Ile_de_France.html', callback=self.index_page, validate_cert=False)

    @config(age=10 * 24 * 60 * 60)
    def index_page(self, response):
    for each in response.doc('html').items():
    self.crawl(each.attr.href, callback=self.detail_page,validate_cert=False)

    @config(priority=2)
    def detail_page(self, response):
    name = response.doc('.shelf_row_4 .name > a').text()
    num = response.doc('.shelf_row_4 .review_count').text()
    address = response.doc('.adjust > .title').text()
    return {
    "url": response.url,
    "name" : name,
    "num" : num,
    "address":address,
    "title": response.doc('title').text(),
    }

    def on_result(self,result):
    if result:
    self.save_to_mongo(result)

    def save_to_mongo(self,result):
    if self.db['paris'].insert(result):
    print("save to mongo",result)

    结果:

  • 相关阅读:
    系统手动激活
    scroll-into-view 简单demo
    css3 box-shadow 单边阴影
    js前端常用的方法
    css 箭头
    当前页面返回前一个页面时,执行前一个页面的方法
    小程序自定义组件
    vuejs2-生命周期
    Vuejs2-mock数据
    vuejs2+axios设置
  • 原文地址:https://www.cnblogs.com/Knight66666/p/12659485.html
Copyright © 2011-2022 走看看