zoukankan      html  css  js  c++  java
  • pyspider-崔庆才猫途鹰

    #!/usr/bin/env python
    # -*- encoding: utf-8 -*-
    # Created on 2020-04-07 08:14:57
    # Project: tripadvisor

    from pyspider.libs.base_handler import *

    import pymongo
    class Handler(BaseHandler):
    crawl_config = {
    }

    client = pymongo.MongoClient('localhost')
    db = client['trip']

    @every(minutes=24 * 60)
    def on_start(self):
    #在crawl处填入URL
    self.crawl('https://www.tripadvisor.cn/Attraction_Review-g187147-d188150-Reviews-Musee_d_Orsay-Paris_Ile_de_France.html', callback=self.index_page, validate_cert=False)

    @config(age=10 * 24 * 60 * 60)
    def index_page(self, response):
    for each in response.doc('html').items():
    self.crawl(each.attr.href, callback=self.detail_page,validate_cert=False)

    @config(priority=2)
    def detail_page(self, response):
    name = response.doc('.shelf_row_4 .name > a').text()
    num = response.doc('.shelf_row_4 .review_count').text()
    address = response.doc('.adjust > .title').text()
    return {
    "url": response.url,
    "name" : name,
    "num" : num,
    "address":address,
    "title": response.doc('title').text(),
    }

    def on_result(self,result):
    if result:
    self.save_to_mongo(result)

    def save_to_mongo(self,result):
    if self.db['paris'].insert(result):
    print("save to mongo",result)

    结果:

  • 相关阅读:
    找到了2年前的一个微博小号
    Float Equal Problem
    有用的护肤品贴
    最近状态总结
    [Coursera]Machine Learning
    KMP算法(转载)
    [Leetcode] Median of Two Sorted Arrays
    [Algorithms(Princeton)] Week1
    [Algorithms(Princeton)] Week1
    [Leetcode] Binary Tree Maximum Path Sum
  • 原文地址:https://www.cnblogs.com/Knight66666/p/12659485.html
Copyright © 2011-2022 走看看