zoukankan      html  css  js  c++  java
  • 用Python3Request爬取英雄联盟皮肤、单线程爬取

    """
    Hero_LOL 和王者荣耀类似
    """
    import requests
    import re
    import json
    import os
    import threading
    
    
    def hero(hero_name, hero_num):
        #统一前戳
        h_l = "https://ossweb-img.qq.com/images/lol/web201310/skin/big"
        # 逐一遍历英雄
        print(hero_num.__len__())
        num = 0  # 为了获取英雄的号码
        for i in hero_num:
            # 逐一遍历皮肤,此处假定一个英雄最多15个皮肤
            for sk_num in range(0, 15):  # 从第0个皮肤开始
                hsl = h_l + i + "00" + str(sk_num) + ".jpg"
                hl = requests.get(hsl)
                if hl.status_code == 200:
                    filename = "LOL/" + str(hero_name[num]) + str(sk_num) + ".jpg"
                    print("此时正在下载:" + filename+" 这是第"+str(num+1)+"个英雄")
                    with open(filename, "wb") as f:
                        f.write(hl.content)
                else:
                    break
            num += 1
    
    
    def main():
        """
        #获取全部英雄数据
        :return:
        """
        #JS_url
        Hero_url =  "https://lol.qq.com/biz/hero/champion.js"
        #User-Agent伪装浏览器标识
        header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'}
        #Get source code(获取源代码)
        response = requests.get(Hero_url,headers = header)
        #decode 编码为GBK,方便下方使用正则遍历
        response = response.content.decode("GBK")
        #regex(正则表达式)
        reg = """keys":([sS]*?),"data"""
        #预编译
        regex = re.compile(reg,re.IGNORECASE) #第一个参数 正则表达式 第二个参数忽略的意思,忽略大小写
        #findall 就是获取response内所有符合规则的数据,返回值为list
        response_s = regex.findall(response)
        #也就是所有符合规则的数据放到了列表里,如果只有一个,那就是list[0]
        response_s = response_s[0]
        #通过eval转换为一个dict,具体功能自己查(其实我也没大懂)
        res = eval(response_s)
        #get the keys in the dict(再转换为list)
        hero_num  = list(res.keys()) #Hero_number
        #get the values in the dict (再转换为list)
        hero_name = list(res.values())#Hero_name
    
        hero(hero_name,hero_num)if __name__ == '__main__':
        main()
  • 相关阅读:
    套件测试
    注解实战aftersuite和beforesuite
    注解实战Beforeclass和Afterclass
    Centos7下安装Mongodb
    java的算法实现冒泡
    注解实战BeforeMethed和afterMethed
    前端 HTML的规范
    前端 HTML标签介绍
    前端 HTML文档 详解
    前端 HTML 简介
  • 原文地址:https://www.cnblogs.com/wxzbk/p/10983976.html
Copyright © 2011-2022 走看看