import pymongo class SpiderPipeline(object): def __init__(self, mongo_uri, mongo_db): # mongodb的路由 self.mongo_uri = mongo_uri # mongodb的库名 self.mongo_db = mongo_db def open_spider(self, spider): # spider开始时被调用 self.client = pymongo.MongoClient(self.mongo_uri) # db self.db = self.client[self.mongo_db] @classmethod def from_crawler(cls, crawler): # 获取配置文件 return cls( mongo_uri=crawler.settings.get('MONGO_URI'), mongo_db=crawler.settings.get('MONGO_DB'), ) def process_item(self, item, spider): # 与数据库交互存储数据 self.db['dunzi'].insert(dict(item)) return item def close_spider(self, spider): # spider关闭时被调用 self.client.close()