zoukankan      html  css  js  c++  java
  • 在scrapy中使用mongodb管道

    pipelines.py

    import json
    from scrapy.conf import settings
    from pymongo import MongoClient
    
    class SunPipeline(object):
        def open_spider(self,spider):
            self.file = open('dongguan.json','w')
    
        def process_item(self, item, spider):
            str_data = json.dumps(dict(item),ensure_ascii=False) + ',
    '
            self.file.write(str_data)
            return item
    
        def close_spider(self,spider):
            self.file.close()
    
    
    class MongoPipeline(object):
        def __init__(self):
            # 获取数据库参数
            host = settings['MONGO_HOST']
            port = settings['MONGO_PORT']
            dbname = settings['MONGO_DBNAME']
            colname = settings['MONGO_COLNAME']
    
            # 连接数据库
            self.client = MongoClient(host, port)
            # 选择数据库
            self.db = self.client[dbname]
    
            # 选择集合
            self.col = self.db[colname]
    
        def process_item(self, item, spider):
            data = dict(item)
    
            self.col.insert(data)
    
            return item
    
    
    
        def __del__(self):
            # 关闭数据库链接
            self.client.close()
    

      

    settings.py

    BOT_NAME = 'Sun'
    
    SPIDER_MODULES = ['Sun.spiders']
    NEWSPIDER_MODULE = 'Sun.spiders'
    
    
    MONGO_HOST = '127.0.0.1'
    MONGO_PORT = 27017
    MONGO_DBNAME = 'Sun'
    MONGO_COLNAME = 'dongguan'
    
    ITEM_PIPELINES = {
       'Sun.pipelines.SunPipeline': 300,
       'Sun.pipelines.MongoPipeline': 301,
    }
    

      

  • 相关阅读:
    HTML5 JSDOM
    svn 基本操作
    Flex布局
    git上传布置代码 git优势
    jsonp, json区别
    require.js 模块化简单理解
    @vue/cli 3 安装搭建及 webpack 配置
    npm 常用命令 使用命令删除 node_modules 包
    package.json字段分析
    rem适配方案
  • 原文地址:https://www.cnblogs.com/andy9468/p/8300029.html
Copyright © 2011-2022 走看看