pipeline.py
class Jiandanline(FilesPipeline): def get_media_requests(self, item, info): for file_url in item['file_urls']: yield scrapy.Request(file_url) def item_completed(self, results, item, info): file_paths = [x['path'] for ok, x in results if ok] if not file_paths: raise DropItem("Item contains no files") item['file_paths'] = file_paths return item def file_path(self, request, response=None, info=None): path = super().file_path(request, response=None, info=None) file_store = os.path.join(settings.FILES_STORE,'images') if not os.path.exists(file_store): os.mkdir(file_store) file_name = os.path.join(file_store,path) # file_guid = request.url.split('/')[-1] # filename = u'full/{0[name]}/{0[albumname]}/{1}'.format(item, file_guid) return file_name
settings.py
ITEM_PIPELINES = {'jiandandan.pipelines.Jiandanline': 2} FILES_STORE =r'F:jiandan'
算是第一个运行成功的scrapy吧,特别开心