zoukankan      html  css  js  c++  java
  • pymongo

    config.py (创建)

    MONGO_URL='localhost'
    MONGO_DB='taobao'
    MONGO_TABLE='prodect'

    spider.py

    import pymongo
    from pymongo import MongoClient
    from config import *

    1、链接

    client=MongoClient('mongodb://root:123@localhost:27017/')
        # 或者
        # client = MongoClient('localhost', 27017)
        # client = MongoClient(MONGO_URL)

    2、use 数据库

    db=client['db2']     #等同于:client.db1 
        # db=client[MONGO_DB] # 注意是中括号 [ ]

    3、查看库下所有的集合

    print(db.collection_names(include_system_collections=False))

    4、创建集合

    table_user=db['userinfo']      #等同于:db.user

    5、增加数据

    import datetime
    user0={
        "_id":1,
        "name":"egon",
        "birth":datetime.datetime.now(),
        "age":10,
        'hobbies':['music','read','dancing'],
        'addr':{
        'country':'China',
        'city':'BJ'
        }
        }
    
    user1={
        "_id":2,
        "name":"alex",
        "birth":datetime.datetime.now(),
        "age":10,
        'hobbies':['music','read','dancing'],
        'addr':{
        'country':'China',
        'city':'weifang'
        }
        }
    View Code
    def save_to_mongo(result):
        try:
            if db[MONGO_TABLE].insert(result):
            print("保存到数据库成功',result)
        except Exception:
            print("存储失败")
        
    # res=table_user.insert_many([user0,user1]).inserted_ids
    # print(res)
    # print(table_user.count())

    6、查

    print(table_user_collection.find_one())
    for item in table_user_collection.find():
        print(item)
    # print(table_user_collection.find_one({"_id":{"$gte":1},"name":'egon'}))

    7、改

    8、删

    table_user_collection.remove({'id':2})

    9、scrapy框架pipeline

    class MongoPipeline(object):
        def __init__(self, mongo_uri, mongo_db):
            self.mongo_uri = mongo_uri
            self.mongo_db = mongo_db
    
        @classmethod
        def from_crawler(cls, crawler):
            return cls(
                mongo_uri=crawler.settings.get('MONGO_URI'),
                mongo_db=crawler.settings.get('MONGO_DATABASE')
            )
    
        def open_spider(self, spider):
            self.client = pymongo.MongoClient(self.mongo_uri)
            self.db = self.client[self.mongo_db]
    
        def close_spider(self, spider):
            self.client.close()
    
        def process_item(self, item, spider):
            self.db[item.table_name].update({'id': item.get('id')}, {'$set': dict(item)}, True)
            return item
    scrapy Pipeline
    MONGO_URI = 'localhost'
    MONGO_DATABASE = 'weibo'
    from pymongo import MongoClient
    
    class CustomPipeline(object):
        def __init__(self,host,port,user,pwd,db,table):
            self.host=host
            self.port=port
            self.user=user
            self.pwd=pwd
            self.db=db
            self.table=table
    
        @classmethod
        def from_crawler(cls, crawler):
            """
            Scrapy会先通过getattr判断我们是否自定义了from_crawler,有则调它来完
            成实例化
            """
            HOST = crawler.settings.get('HOST')
            PORT = crawler.settings.get('PORT')
            USER = crawler.settings.get('USER')
            PWD = crawler.settings.get('PWD')
            DB = crawler.settings.get('DB')
            TABLE = crawler.settings.get('TABLE')
            return cls(HOST,PORT,USER,PWD,DB,TABLE)
    
        def open_spider(self,spider):
            """
            爬虫刚启动时执行一次
            """
            self.client = MongoClient('mongodb://%s:%s@%s:%s' %(self.user,self.pwd,self.host,self.port))
    
        def close_spider(self,spider):
            """
            爬虫关闭时执行一次
            """
            self.client.close()
    
    
        def process_item(self, item, spider):
            # 操作并进行持久化
    
            d=dict(item)
            if all(d.values()):
                self.client[self.db][self.table].save(d)
    pipeline_mongo
    HOST="127.0.0.1"
    PORT=27017
    USER="root"
    PWD="123"
    DB="amazon"
    TABLE="goods"
    View Code









  • 相关阅读:
    正则表达式的与或非
    正则中需要转义的符号
    HTTP 错误状态码讯息
    HTTP协议详解
    TCP/IP、Http、Socket的区别
    CSS样式中标点符号的作用
    HighCharts: 设置时间图x轴的宽度
    可以尝试用Google Font API来摆脱网页字体的单调 仅仅抛砖引玉
    Oracle 权限(grant、revoke)
    网站加上图标
  • 原文地址:https://www.cnblogs.com/nick477931661/p/8953393.html
Copyright © 2011-2022 走看看