zoukankan      html  css  js  c++  java
  • scrapy 数据存储到mysql和mongodb

    pipelines.py

    将数据存储到mysql数据库中

    import pymysql
    class HuVPipeline(object):
        def process_item(self, item, spider):
            #连接数据库,
            conn = pymysql.connect(host='localhost', user='root', password='******', port=3306, db='mysql')
            #获取游标
            cusor = conn.cursor()
            #获取数据
            title=item['title']
            username=item['username']
            yijuhua=item['yijuhua']
            otherStyleTime=item['otherStyleTime']
            #sql语句
            sql = """INSERT INTO hux VALUES (%s, %s, %s, %s)"""
            #这里是元组数据,(str,str,str,str)
            cusor.execute(sql, (title, username, yijuhua, otherStyleTime))
            cusor.close()
            conn.commit()
            # 关闭数据库连接
            conn.close()
            return item

    更简单便捷的方式

    import pymysql
    
    class YiqingPipeline(object):
        def process_item(self, item, spider):
            # 连接数据库,
            conn = pymysql.connect(host='localhost', user='root', password='******', port=3306, db='db55')
            # # 获取游标
            cusor = conn.cursor()
            # sql语句
            sql = """insert into umetrip_risk (province,city,area,street,risk,updatetime,parsetime) values ("%(province)s","%(city)s","%(area)s","%(street)s","%(risk)s","%(updatetime)s","%(parsetime)s") ON DUPLICATE KEY UPDATE street="%(street)s",risk="%(risk)s", updatetime="%(updatetime)s",parsetime="%(parsetime)s";"""
            cusor.execute(sql % item)
            cusor.close()
            conn.commit()
            # 关闭数据库连接
            conn.close()
            return item
    

      

    进入D:mongoin目录下输入以下命令

    mongod --dbpath D:mongodatadb

    将数据存放到mongodb数据库中

    无密码连接

    import pymongo
    class HuVPipeline(object):
        def __init__(self):
            self.client=pymongo.MongoClient()#链接Mongodb数据库
            self.db=self.client['huxiuv3']#新建数据库
        def process_item(self, item, spider):
            self.db['hu_1'].insert(dict(item))#第一种方法  #将数据存放到插入到表中
            return item
    

      

    import pymysql
    class HuVPipeline(object):
    def process_item(self, item, spider):
    #连接数据库,
    conn = pymysql.connect(host='localhost', user='root', password='123456', port=3306, db='mysql')
    #获取游标
    cusor = conn.cursor()
    #获取数据
    title=item['title']
    username=item['username']
    yijuhua=item['yijuhua']
    otherStyleTime=item['otherStyleTime']
    #sql语句
    sql = """INSERT INTO hux VALUES (%s, %s, %s, %s)"""
    #这里是元组数据,(str,str,str,str)
    cusor.execute(sql, (title, username, yijuhua, otherStyleTime))
    cusor.close()
    conn.commit()
    # 关闭数据库连接
    conn.close()
    return item
  • 相关阅读:
    数据结构
    java web
    C++
    SQL(结构化查询语言)
    网站协议
    python
    爬虫
    select 多选
    List 去除重复数据的五种方式
    oracle锁表SID查询
  • 原文地址:https://www.cnblogs.com/zxg-1997/p/13854568.html
Copyright © 2011-2022 走看看