zoukankan      html  css  js  c++  java
  • 用Python3Request爬取王者荣耀皮肤,单线程爬取、多线程爬取

    先找HERO代码:Herolist.json: https://pvp.qq.com/web201605/js/herolist.json
    皮肤URL规律:https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/英雄编号/英雄编号-bigskin-皮肤编号.jpg

    其他规律去别的博客看

    单线程爬取

    import requests
    import json
    import time
    
    def hero(hero_name, hero_num, h_l):
        # 逐一遍历英雄
        num = 0  # 为了获取英雄的号码,定义一个变量
        for i in hero_num:
            # 逐一遍历皮肤,此处假定一个英雄最多10个皮肤
            for sk_num in range(1, 10):  # 从第一个开始,没有第0个皮肤
                hsl = h_l + str(i) + "/" + str(i) + "-bigskin-" + str(sk_num) + ".jpg"
                hl = requests.get(hsl)
                if hl.status_code == 200:#判断状态码,因为如果可正确访问的话 状态码为200 不可的话就是404
                    print("此时正在下载:" + str(hero_name[num]) + str(sk_num) + "
    ")#输出一下免得自己不知道自己下载哪个文件
                    with open("Hero/" + str(hero_name[num]) + str(sk_num) + ".jpg", "wb") as f:  #记得在运行路径下新建一个Hero文件夹,否则就把Hero/+ 这几个代码删掉 
                        f.write(hl.content)
                else:
                    break  #否则的话就跳出
    
            num += 1
    
    def main():
        url = "https://pvp.qq.com/web201605/js/herolist.json"
        header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'}
        response = requests.get(url,headers = header)
        hero_list = response.json()
        h_l = "https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/"
        #提取英雄名字和数字
        hero_name = list(map(lambda x:x["cname"],hero_list))#这个lambda 自己下去看看,学习学习
        hero_num = list(map(lambda x:x["ename"],hero_list))
        print("HeroNumber:"+str(hero_name.__len__()))#输出英雄个数
        hero(hero_name,hero_num,h_l) #调用函数
    
    
    if __name__ == '__main__':  
    
        main()
    

    多线程爬取,效率超高,在爬数据的时候简直是利器!

    import requests
    import json
    import threading
    import time
    
    def hero_1(hero_name, hero_num, h_l):
        # 逐一遍历英雄
        num = 0  # 为了获取英雄的号码
        for i in hero_num:
            # 逐一遍历皮肤,此处假定一个英雄最多10个皮肤
            for sk_num in range(1, 15):  # 从第一个开始,没有第0个皮肤
                hsl = h_l + str(i) + "/" + str(i) + "-bigskin-" + str(sk_num) + ".jpg"
                hl = requests.get(hsl)
                if hl.status_code == 200:
                    print("此时正在下载:" + str(hero_name[num]) + str(sk_num) + "
    ")
                    with open("Hero/" + str(hero_name[num]) + str(sk_num) + ".jpg", "wb") as f:
                        f.write(hl.content)
                else:
                    break
    
            num += 1
    
    
    def main():
        url = "https://pvp.qq.com/web201605/js/herolist.json"
        header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'}
        response = requests.get(url,headers = header)
        hero_list = response.json()
        h_l = "https://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/"
        #提取英雄名字和数字
        hero_name = list(map(lambda x:x["cname"],hero_list))
        hero_num = list(map(lambda x:x["ename"],hero_list))
        print("HeroNumber:"+str(hero_name.__len__()))
        hero_name1 = []
        hero_name2 = []
        hero_name3 = []
        hero_num1 = []
        hero_num2 = []
        hero_num3 = []
        for i in range(93):
            if i<30:
                hero_num1.append(hero_num[i])
                hero_name1.append(hero_name[i])
            elif i<60:
                hero_num2.append(hero_num[i])
                hero_name2.append(hero_name[i])
            else:
                hero_num3.append(hero_num[i])
                hero_name3.append(hero_name[i])
        t1 = threading.Thread(target=hero_1,args=(hero_name1,hero_num1,h_l))
        t2 = threading.Thread(target=hero_1,args=(hero_name2,hero_num2,h_l))
        t3 = threading.Thread(target=hero_1,args=(hero_name3,hero_num3,h_l))
        t1.start()
        t2.start()
        t3.start()
    
    
    if __name__ == '__main__':
    
        main()
    

      

  • 相关阅读:
    IIs6基础上发布WebApi注意事项
    VS2010下开发WebApi 基本步骤
    C# 两个datatable中的数据快速比较返回交集或差集
    myeclipse快捷键使用
    Java中时间
    数组排序后插入
    所有的jsp页面都放到WEB-INF目录
    jsp内置对象浅谈
    JSP九大内置对象的作用和用法总结?
    JSP九大内置对象及四个作用域
  • 原文地址:https://www.cnblogs.com/wxzbk/p/10981098.html
Copyright © 2011-2022 走看看