zoukankan html css js c++ java

scrapy Request方法

# -*- coding: utf-8 -*-
import scrapy


class TestSpider(scrapy.Spider):
    name = 'test'
    allowed_domains = ['yeves.cn']
    start_urls = ['https://yeves.cn/']
    base_domain = 'https://yeves.cn{}'  # 基础域名
    def parse(self, response):

        articles = response.xpath('//*[@id="article"]//div') # 获取首页的标题和链接


        for article in articles:
            title = article.xpath('./div/article/div/header/h2/a/text()').extract_first()
            href = article.xpath('./div/article/div/header/h2/a/@href').extract_first()
            if title is not None and href is not None:
                href = self.base_domain.format(href)
                yield scrapy.Request(href,callback=self.parse_detail,meta={"title":title})  #通过标题链接获取详情 把标题带过去

    def parse_detail(self,respone):
        print(respone.url)
        print(respone.meta.get('title'))
        detail = {}
        detail['title'] = respone.meta.get('title')

        created_at = respone.xpath('/html/body/section/div/div/header/div/span[1]/time/text()').extract_first() # 拿到详情数据
        category = respone.xpath('/html/body/section/div/div/header/div/span[2]/a/text()').extract_first()
        content = respone.xpath('/html/body/section/div/div/article//text()').extract_first()

        detail['created_at'] = created_at
        detail['category'] = category
        print(detail)
        yield detail

查看全文

相关阅读:
[HNOI2002]营业额统计
 HDU 1374
HDU 3345
HDU 2089
Graham扫描法
 Codeforces 1144D Deduction Queries 并查集
 Codeforces 916E Jamie and Tree 线段树
 Codeforces 1167F Scalar Queries 树状数组
 Codeforces 1167E Range Deleting
Codeforces 749E Inversions After Shuffle 树状数组 + 数学期望

原文地址：https://www.cnblogs.com/php-linux/p/12522364.html