zoukankan html css js c++ java

scrapy 请求传参

class MovieSpider(scrapy.Spider):
    name = 'movie'
    allowed_domains = ['www.id97.com']
    start_urls = ['http://www.id97.com/']

    def parse(self, response):
        div_list = response.xpath('//div[@class="col-xs-1-5 movie-item"]')

        for div in div_list:
            item = MovieproItem()
            item['name'] = div.xpath('.//h1/a/text()').extract_first()
            item['score'] = div.xpath('.//h1/em/text()').extract_first()
            #xpath(string(.))表示提取当前节点下所有子节点中的数据值（.）表示当前节点
            item['kind'] = div.xpath('.//div[@class="otherinfo"]').xpath('string(.)').extract_first()
            item['detail_url'] = div.xpath('./div/a/@href').extract_first()
            #请求二级详情页面，解析二级页面中的相应内容,通过meta参数进行Request的数据传递
            yield scrapy.Request(url=item['detail_url'],callback=self.parse_detail,meta={'item':item})

    def parse_detail(self,response):
        #通过response获取item
        item = response.meta['item']
        item['actor'] = response.xpath('//div[@class="row"]//table/tr[1]/a/text()').extract_first()
        item['time'] = response.xpath('//div[@class="row"]//table/tr[7]/td[2]/text()').extract_first()
        item['long'] = response.xpath('//div[@class="row"]//table/tr[8]/td[2]/text()').extract_first()
        #提交item到管道
        yield item

查看全文

相关阅读:
echart图表--雷达图表的动态数据max
访问github个人博客时*.github.io 拒绝了我们的连接请求，错误码403
js数组对象去重
 关于vuex的demo
原生js实现正方体旋转
 实现分页加载，加载更多(按钮类型），滚动加载的方式
 this指向
 记录三段式布局
 vue 动态路由和路由模式
 vue cli 配置反向代理

原文地址：https://www.cnblogs.com/ls1997/p/10874556.html