目的:提取内容进行格式化输出,类似于字典
编写item文件
class JobBoleArticleItem(scrapy.Item): title = scrapy.Field() #支持传进任何数据类型 date = scrapy.Field() praise_num = scrapy.Field() content = scrapy.Field()
爬虫文件中引入item类
from Redbacktest.items import JobBoleArticleItem #按需更改
实例化
def parse_detail(self,response): article_item = JobBoleArticleItem()
传值
article_item["title"] = title article_item["date"] = date article_item['praise_num'] = praise_num article_item["content"] = content
yield article_item #item传递到pipeline中
修改setting文件使item_pipeline生效
ITEM_PIPELINES = { 'Redbacktest.pipelines.RedbacktestPipeline': 300, }
pipeline调试