zoukankan html css js c++ java

爬取虎牙标题、作者、热度

# -*- coding: utf-8 -*-
import scrapy
from huyaAll1.items import Huyaall1Item


class HuyaSpider(scrapy.Spider):
    name = 'huya'
    # allowed_domains = ['www.xxx.com']
    start_urls = ['https://www.huya.com/g/xingxiu']

    # 建立通用的url模板：
    url = "https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&gameId=1663&tagAll=0&page=%d"

    def parse(self, response):
        li_list = response.xpath('//*[@id="js-live-list"]/li')
        for li in li_list:
            title = li.xpath('./a[2]/text()').extract_first()
            author = li.xpath('./span/span[1]/i/text()').extract_first()
            hot = li.xpath('./span/span[2]/i[2]/text()').extract_first()
            # 实例化item类型对象：
            item = Huyaall1Item()
            item['title'] = title
            item['author'] = author
            item['hot'] = hot
            yield item

        # 手动请求发送：
        for page in range(2, 5):
            new_url = format(self.url % 1)
            # 发起get请求：
            yield scrapy.Request(url=new_url, callback=self.parse_othor)

    # 解析方法模拟parse进行定义、必须要有和parse同样的参数：
    def parse_othor(self, response):
        print(response.text)

查看全文

相关阅读:
C++ 多态性之虚函数&抽象类&纯虚函数
 转帖：iOS UIWindow & UIWindowLevel
转帖：iOS UINavigationController 对象
 iOS 数据持久化 NSUserDefault
----百度地图api----
----改写superheros的json以及上传到github----
----XMLHttpRequestAPI简单介绍----
----event flow----
----Arrow functions----
----constructor 与 object----

原文地址：https://www.cnblogs.com/zhang-da/p/12432110.html