zoukankan      html  css  js  c++  java
  • scrapy-scheduler

     1 # scheduler的作用: 用于控制Request对象的存储和获取,并提供了过滤重复Request的功能。
     2 
     3 class Scheduler(object):
     4 
     5     def __init__(self, dupefilter, jobdir=None, dqclass=None, mqclass=None,
     6                  logunser=False, stats=None, pqclass=None):
     7         self.df = dupefilter
     8         self.dqdir = self._dqdir(jobdir)
     9         self.pqclass = pqclass  # 优先级队列
    10         self.dqclass = dqclass  # 磁盘序列号队列,用于断点续采
    11         self.mqclass = mqclass  # 内存队列
    12         self.logunser = logunser
    13         self.stats = stats
    14 
    15     @classmethod
    16     def from_crawler(cls, crawler):
    17         settings = crawler.settings
    18         dupefilter_cls = load_object(settings['DUPEFILTER_CLASS'])
    19         dupefilter = dupefilter_cls.from_settings(settings)
    20         pqclass = load_object(settings['SCHEDULER_PRIORITY_QUEUE'])
    21         dqclass = load_object(settings['SCHEDULER_DISK_QUEUE'])
    22         mqclass = load_object(settings['SCHEDULER_MEMORY_QUEUE'])
    23         logunser = settings.getbool('LOG_UNSERIALIZABLE_REQUESTS', settings.getbool('SCHEDULER_DEBUG'))
    24         return cls(dupefilter, jobdir=job_dir(settings), logunser=logunser,
    25                    stats=crawler.stats, pqclass=pqclass, dqclass=dqclass, mqclass=mqclass)

     

  • 相关阅读:
    Linux系统root密码修改
    网络通信
    运维平台cmdb开发-day1
    questions information
    Django Rest Framework
    Django-CBV和跨域请求伪造
    Flask学习
    会议室预定终章
    python的可变数据类型和不可变类型
    模拟admin组件自己开发stark组件之搜索和批量操作
  • 原文地址:https://www.cnblogs.com/liyugeng/p/7890149.html
Copyright © 2011-2022 走看看