zoukankan      html  css  js  c++  java
  • 爬取知名社区技术文章_pipelines_4

    获取字段的存储处理和获取普通的路径

    #!/usr/bin/python3
    # -*- coding: utf-8 -*-
    
    import pymysql
    import gevent
    import pymysql
    from gevent import monkey
    from scrapy.pipelines.images import ImagesPipeline
    import pymysql.cursors
    
    
    class JobboleImagerPipeline(ImagesPipeline):
        """
        获得图片下载路径
        """
        def item_completed(self, results, item, info):
            if 'img_url' in item:
                for key, value in results:
                    # print(key)
                    img_path = value['path']
                    # print(value['path'])
                    item['img_path'] = img_path
            return item
            
    
    # class SqlSave(object):
    #     """常规同步方式存入数据库"""
    #     def __init__(self):
    #         SQL_DBA = {
    #             'host': 'localhost',
    #             'db': 'jobole',
    #             'user': 'root',
    #             'password': 'password',
    #             'use_unicode': True,
    #             'charset': 'utf8'
    #         }
    #         self.conn = pymysql.connect(**SQL_DBA)
    #         self.cursor = self.conn.cursor()
    #
    #     def process_item(self, item, spider):
    #         sql = self.get_sql(item)
    #         print(sql)
    #         self.cursor.execute(sql)
    #         self.conn.commit()
    #
    #         return item
    #
    #     def get_sql(self, item):
    #         sql = """insert into article(cont_id, cont_url, title, publish_time, cont, img_url, img_path, like_num, collection_num, comment_num) value ('%s','%s','%s','%s','%s','%s','%s', %d, %d, %d)
    #         """ % (item['cont_id'], item['cont_url'],item['title'],item['publish_time'],item['cont'],item['img_url'][0],item['img_path'],item['link_num'],item['collection_num'],item['comment_num'],)
    #         return sql
    
    
    class SqlSave(object):
        """
        协程方式向数据库插入数据
        """
        
        def __init__(self):
            # 初始数据库连接和参数,SQL_DBA可写在setting中,通过 获取在settings.py中设置的SQL_DBA字典
            # @classmethod
            # def from_settings(cls, settings):
            #     sql_dba = settings[SQL_DBA]
            #     return cls(cls,sql_dba)           需要__init__中新添个参数接收这个值
            SQL_DBA = {
                'host': 'localhost',
                'db': 'jobole',
                'user': 'root',
                'password': 'password',
                'use_unicode': True,
                'charset': 'utf8'
            }
            self.conn = pymysql.connect(**SQL_DBA)
            self.cursor = self.conn.cursor()
        
        def process_item(self, item, spider):
            sql = self.__get_sql(item)
            # 协程方式对数据库插入操作
            gevent.joinall([
                gevent.spawn(self.__go_sql, self.cursor, self.conn, sql, item),
            ])
            return item
        
        def __go_sql(self, cursor, conn, sql, item):
            try:
                # 数据库插入操作
                cursor.execute(sql,
                               (item['cont_id'], item['cont_url'], item['title'], item['publish_time'],
                                item['cont'], item['img_url'][0], item['img_path'], item['link_num'],
                                item['collection_num'], item['comment_num']))
                conn.commit()
            except Exception as e:
                print(e)
        
        def __get_sql(self, item):
            # 生成sql语句
            sql = """insert into
                      article(cont_id, cont_url, title, publish_time,
                      cont, img_url, img_path, like_num,
                      collection_num, comment_num)
                    value
                      (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"""
            return sql
    

      

  • 相关阅读:
    四则运算C语言程序
    雅思听听app
    What is 软件工程
    Python(Head First)学习笔记:六
    Python(Head First)学习笔记:五
    Python(Head First)学习笔记:四
    Python(Head First)学习笔记:三
    Python(Head First)学习笔记:二
    Python(Head First)学习笔记:一
    一名前端Web架构师的成长之路(转载)
  • 原文地址:https://www.cnblogs.com/2bjiujiu/p/7233321.html
Copyright © 2011-2022 走看看