zoukankan      html  css  js  c++  java
  • s6tu

    # -*- coding: utf-8 -*-
    # @Time    : 2018/03/30 15:20
    # @Author  : cxa
    # @File    : liuuchnagtu.py
    # @Software: PyCharm
    import requests
    from fake_useragent import UserAgent as UA
    from lxml import html
    import os
    import threading
    import traceback
    import time
    import random
    class GetImage():
        def __init__(self):
            self.url="http://www.s6tu.com/explore/popular/?list=images&sort=likes_desc&page={}"
            self.imgpath = "//div[@class='list-item-image fixed-size']/a/img/@src"
            self.headers = {
        'Accept': 'text/html, application/xhtml+xml, image/jxr, */*',
        'Accept - Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-Hans-CN, zh-Hans; q=0.5',
        'Connection': 'Keep-Alive',
        'User-Agent': UA().random,
        'Host': 'www.s6tu.com',}
        def get_oen_page(self):
            try:
                 os.makedirs("setu",exist_ok=True)
                 for i in range(1,999):
                     url=self.url.format(i)
                     Session=requests.session()
                     #Session.proxies.update(self.proxies)
                     req=Session.get(url,headers=self.headers)
                     if req.status_code==requests.codes.ok:
                        root=html.fromstring(req.text)
                        imglist=root.xpath(self.imgpath)
                        newlist=[i.replace(".md.",".")for i in imglist]
                        downloadThreads=[]
                        for i in range(0,len(newlist),int(len(newlist)/3)):
                            downloadThread = threading.Thread(target=self.getimglist, args=(newlist,i, i + int(len(newlist)/3)))
                            downloadThreads.append(downloadThread)
                            downloadThread.start()
                        for item in downloadThreads:
                            item.join()
                        print("get one page over")
    
                     else:
                         print("errro")
                         time.sleep(random.randint(1,5))
            except:
                print("error,here  is details:{}".format(traceback.format_exc()))
        def getimglist(self,newlist,start,end):
            if end>len(newlist):
                end=len(newlist)
            for i in range(start,end):
                imgurl=newlist[i]
                downloadThreads = []
                print(imgurl)
                req=requests.get(imgurl,headers=self.headers)
                with open(os.path.join("setu",os.path.basename(imgurl)),"wb") as fs:
                    fs.write(req.content)
    
    if __name__=="__main__":
         GetImage().get_oen_page()
      
    

      

  • 相关阅读:
    软件工程基础之二——阅读《软件工程基础》的问题
    软件工程基础之一——个人介绍与计划
    个人介绍
    sudoku
    GitHub地址
    疑问②
    概览提问①
    jsp内置对象
    tomcat的环境变量配置
    构造方法的重载代码
  • 原文地址:https://www.cnblogs.com/c-x-a/p/8676669.html
Copyright © 2011-2022 走看看