zoukankan      html  css  js  c++  java
  • 爬取虎牙标题、作者、热度

    # -*- coding: utf-8 -*-
    import scrapy
    from huyaAll1.items import Huyaall1Item


    class HuyaSpider(scrapy.Spider):
    name = 'huya'
    # allowed_domains = ['www.xxx.com']
    start_urls = ['https://www.huya.com/g/xingxiu']

    # 建立通用的url模板:
    url = "https://www.huya.com/cache.php?m=LiveList&do=getLiveListByPage&gameId=1663&tagAll=0&page=%d"

    def parse(self, response):
    li_list = response.xpath('//*[@id="js-live-list"]/li')
    for li in li_list:
    title = li.xpath('./a[2]/text()').extract_first()
    author = li.xpath('./span/span[1]/i/text()').extract_first()
    hot = li.xpath('./span/span[2]/i[2]/text()').extract_first()
    # 实例化item类型对象:
    item = Huyaall1Item()
    item['title'] = title
    item['author'] = author
    item['hot'] = hot
    yield item

    # 手动请求发送:
    for page in range(2, 5):
    new_url = format(self.url % 1)
    # 发起get请求:
    yield scrapy.Request(url=new_url, callback=self.parse_othor)

    # 解析方法模拟parse进行定义、必须要有和parse同样的参数:
    def parse_othor(self, response):
    print(response.text)
  • 相关阅读:
    JS原生带小白点轮播图
    JS原生轮播图
    Vue.js小案例(2)
    Vue.js小案例(1)
    Vuejs入门级简单实例
    Vue.js简单入门
    微信登录oauth2.0
    PHP四维数组、三维数组封装遍历
    常用linux命令30个
    好架构是进化来的,不是设计来的
  • 原文地址:https://www.cnblogs.com/zhang-da/p/12432110.html
Copyright © 2011-2022 走看看