zoukankan      html  css  js  c++  java
  • 在scrapy中使用mongodb管道

    pipelines.py

    import json
    from scrapy.conf import settings
    from pymongo import MongoClient
    
    class SunPipeline(object):
        def open_spider(self,spider):
            self.file = open('dongguan.json','w')
    
        def process_item(self, item, spider):
            str_data = json.dumps(dict(item),ensure_ascii=False) + ',
    '
            self.file.write(str_data)
            return item
    
        def close_spider(self,spider):
            self.file.close()
    
    
    class MongoPipeline(object):
        def __init__(self):
            # 获取数据库参数
            host = settings['MONGO_HOST']
            port = settings['MONGO_PORT']
            dbname = settings['MONGO_DBNAME']
            colname = settings['MONGO_COLNAME']
    
            # 连接数据库
            self.client = MongoClient(host, port)
            # 选择数据库
            self.db = self.client[dbname]
    
            # 选择集合
            self.col = self.db[colname]
    
        def process_item(self, item, spider):
            data = dict(item)
    
            self.col.insert(data)
    
            return item
    
    
    
        def __del__(self):
            # 关闭数据库链接
            self.client.close()
    

      

    settings.py

    BOT_NAME = 'Sun'
    
    SPIDER_MODULES = ['Sun.spiders']
    NEWSPIDER_MODULE = 'Sun.spiders'
    
    
    MONGO_HOST = '127.0.0.1'
    MONGO_PORT = 27017
    MONGO_DBNAME = 'Sun'
    MONGO_COLNAME = 'dongguan'
    
    ITEM_PIPELINES = {
       'Sun.pipelines.SunPipeline': 300,
       'Sun.pipelines.MongoPipeline': 301,
    }
    

      

  • 相关阅读:
    jvm 学习
    架构师
    关于javaScript堆、栈和队列
    ES6-对象的扩展-属性名表达式
    JS 中 ++i 和i++的区别
    递归算法讲解
    Ztree 仿淘宝树结构完美实现 移动 右键增删改
    jquery zTree异步加载实例
    【zTree】简单实例与异步加载实例
    win10中用命令行打开服务
  • 原文地址:https://www.cnblogs.com/andy9468/p/8300029.html
Copyright © 2011-2022 走看看