zoukankan      html  css  js  c++  java
  • scrapy Pipeline使用twisted异步实现mysql数据插入

    from twisted.enterprise import adbapi
    class MySQLAsyncPipeline:
        def open_spider(self, spider):
            db = spider.settings.get('MYSQL_DB_NAME', 'scrapy_default')
            host = spider.settings.get('MYSQL_HOST', 'localhost')
            port = spider.settings.get('MYSQL_PORT', 3306)
            user = spider.settings.get('MYSQL_USER', 'root')
            passwd = spider.settings.get('MYSQL_PASSWORD', 'root')
    
    
            self.dbpool = adbapi.ConnectionPool('MySQLdb', host=host, db=db,
                                        user=user, passwd=passwd, port=port, charset='utf8')
                                        
        def close_spider(self, spider):
            self.dbpool.close()
    
    
        def process_item(self, item, spider):
            self.dbpool.runInteraction(self.insert_db, item)
    
            return item
    
    
        def insert_db(self, tx, item):
            values = (
                item['f1'],
                item['f2'],
            )
    
            sql = 'INSERT INTO books VALUES (%s,%s)'
            tx.execute(sql, values)
    

     #redis入库

    import redis
    from scrapy import Item
    
    
    class RedisPipeline:
        def open_spider(self, spider):
            db_host = spider.settings.get('REDIS_HOST', 'localhost')
            db_port = spider.settings.get('REDIS_PORT', 6379)
            db_index = spider.settings.get('REDIS_DB_INDEX', 0)
    
            self.db_conn = redis.StrictRedis(host=db_host, port=db_port, db=db_index)
            self.item_i = 0
    
        def close_spider(self, spider):
            self.db_conn.connection_pool.disconnect()
        
        def process_item(self, item, spider):
            self.insert_db(item)
            return item
    
        def insert_db(self, item):
            if isinstance(item, Item):
                item = dict(item)
    
            self.item_i += 1
            self.db_conn.hmset('book:%s' % self.item_i, item)
    

      

  • 相关阅读:
    linux 安装jdk
    linux 安装 Redis
    jvisualvm监控远程jvm的两种连接方式
    list转map(JDK8-Lambda表达式)
    循环删除list中的某一元素的三种方式
    SpringBoot-@ControllerAdvice 拦截异常并统一处理
    Spring-@ControllerAdvice 拦截异常并统一处理
    Tomcat系列
    thinkPHP5.1模型User设计
    uniapp后台api设计(微信user表)
  • 原文地址:https://www.cnblogs.com/mahailuo/p/11224107.html
Copyright © 2011-2022 走看看