任务1:记住如何存储到Mysql、mongoDB数据库
'''
存储到Mysql
'''
import pymysql.cursors
class QuotePipeline(object):
def __init__(self):
self.connect = pymysql.connect(
host='localhost',
user='root',
password='',
database='quotes',
charset='utf8',
)
self.cursor = self.connect.cursor()
def process_item(self, item, spider):
item = dict(item)
table = 'quote'
keys = ','.join(item.keys())
values = ','.join(['%s'] * len(item))
sql = 'insert into {table}({keys}) values({values})'.format(table=table, keys=keys, values=values)
try:
if self.cursor.execute(sql, tuple(item.values())):
self.connect.commit()
print("Successful!")
except:
print("Failed!")
self.connect.rollback()
return item
def close_spider(self, spider):
self.cursor.close()
self.connect.close()
'''
存储到mongoDB
'''
import pymongo
class MongoPipeline(object):
# 表名字
collection = 'domo'
def __init__(self, mongo_uri, mongo_db):
self.mongo_uri = mongo_uri
self.mongo_db = mongo_db
@classmethod
# cls作为一个参数表示类本身
def from_crawler(cls, crawler):
return cls(
mongo_uri=crawler.settings.get('MONGO_URI'),
mongo_db=crawler.settings.get('MONGO_DB'),
)
def open_spider(self, spider):
self.client = pymongo.MongoClient(self.mongo_uri)
self.db = self.client[self.mongo_db]
def process_item(self, item, spider):
# 插入到mongo数据库
self.db[self.collection].insert(dict(item))
return item
def close_spider(self, spider):
self.client.close()