这里就不做详细讲解了 毕竟不是一句两句能说的清楚,所以我把代码和注释放到了这里 谢谢!
import pymysql from redis import Redis # 写入mysql class WangyiMysql(object): conn = None cursor = None def open_spider(self,spider): self.conn = pymysql.Connection(host='127.0.0.1',port=3306,user='root',password='',db='spider',charset='utf8') print(self.conn) def process_item(self, item, spider): sql = 'insert into news values ("%s","%s")'%(item['title'],item['content']) self.cursor = self.conn.cursor() try: self.cursor.execute(sql) self.conn.commit() except Exception as e: print(e) self.conn.rollback() return item def close_spider(self,spider): self.cursor.close() self.conn.close() # 写入redis class WangyiRedis(object): conn = None def open_spider(self,spider): self.conn = Redis(host='127.0.0.1',port=6379) print(self.conn) def process_item(self, item, spider): self.conn.lpush('news',item) # 写入文件 class ChoutiproPipeline(object): #重写父类方法,该方法只会被执行一次 fp = None def open_spider(self,spider): print('开始爬虫......') self.fp = open('chouti.txt','w',encoding='utf-8') #该方法调用后就可以接受爬虫类提交过来的item对象,且赋值给了item参数 def process_item(self, item, spider): author = item['title'] content = item['content'] self.fp.write(author+':'+content+' ') return item #将item传递给下一个即将被执行的管道类 def close_spider(self,spider): # print('爬虫结束!') self.fp.close()
如果 你要这三项同时执行的话,记得在setings 里面 配置一下这个
ITEM_PIPELINES = { 'first_blod.pipelines.FirstBlodPipeline': 300, 'first_blod.pipelines.MysqlPileLine': 301,
# 记得写类名,哪个执行写哪个 }