zoukankan      html  css  js  c++  java
  • s6tu

    # -*- coding: utf-8 -*-
    # @Time    : 2018/03/30 15:20
    # @Author  : cxa
    # @File    : liuuchnagtu.py
    # @Software: PyCharm
    import requests
    from fake_useragent import UserAgent as UA
    from lxml import html
    import os
    import threading
    import traceback
    import time
    import random
    class GetImage():
        def __init__(self):
            self.url="http://www.s6tu.com/explore/popular/?list=images&sort=likes_desc&page={}"
            self.imgpath = "//div[@class='list-item-image fixed-size']/a/img/@src"
            self.headers = {
        'Accept': 'text/html, application/xhtml+xml, image/jxr, */*',
        'Accept - Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-Hans-CN, zh-Hans; q=0.5',
        'Connection': 'Keep-Alive',
        'User-Agent': UA().random,
        'Host': 'www.s6tu.com',}
        def get_oen_page(self):
            try:
                 os.makedirs("setu",exist_ok=True)
                 for i in range(1,999):
                     url=self.url.format(i)
                     Session=requests.session()
                     #Session.proxies.update(self.proxies)
                     req=Session.get(url,headers=self.headers)
                     if req.status_code==requests.codes.ok:
                        root=html.fromstring(req.text)
                        imglist=root.xpath(self.imgpath)
                        newlist=[i.replace(".md.",".")for i in imglist]
                        downloadThreads=[]
                        for i in range(0,len(newlist),int(len(newlist)/3)):
                            downloadThread = threading.Thread(target=self.getimglist, args=(newlist,i, i + int(len(newlist)/3)))
                            downloadThreads.append(downloadThread)
                            downloadThread.start()
                        for item in downloadThreads:
                            item.join()
                        print("get one page over")
    
                     else:
                         print("errro")
                         time.sleep(random.randint(1,5))
            except:
                print("error,here  is details:{}".format(traceback.format_exc()))
        def getimglist(self,newlist,start,end):
            if end>len(newlist):
                end=len(newlist)
            for i in range(start,end):
                imgurl=newlist[i]
                downloadThreads = []
                print(imgurl)
                req=requests.get(imgurl,headers=self.headers)
                with open(os.path.join("setu",os.path.basename(imgurl)),"wb") as fs:
                    fs.write(req.content)
    
    if __name__=="__main__":
         GetImage().get_oen_page()
      
    

      

  • 相关阅读:
    shell脚本 加密备份MySQL数据库
    C#在Linux下获取文件夹信息(所在磁盘总大小,使用空间,已用空间,使用率)
    bootstrap--- 两种bootstrap multiselect组件大比拼
    C# 文件重命名
    C#中一些常用的正则表达式
    C# 文件压缩加解密
    Python 由__dict__和dir()引发的一些思考
    python3随机生成中文字符
    Django自定义过滤器中is_safe和need_autoescape两个参数的理解
    Python格式化字符串--format
  • 原文地址:https://www.cnblogs.com/c-x-a/p/8676669.html
Copyright © 2011-2022 走看看