zoukankan      html  css  js  c++  java
  • 多线程爬取页面图片

    详细见代码,在代码段有相应注释:

    from multiprocessing import Pool
    import requests
    import re
    import random
    
    
    class dImg():
        def __init__(self):
            self.count = 0
            pass
    
        def run(self):
            self.userSearch = input('请输入你想要下载的图片:')
            self.userNum = int(input('你想要下载多少页面图片:'))
            self.download()
    
        def processRun(self):
            self.userSearch = input('请输入你想要下载的图片:')
            self.userNum = int(input('你想要下载多少页面图片:'))
            self.multiDownPage()
    
        def download(self):
            for item in range(self.userNum):
                self.downloadPage(item)
    
        def downloadPage(self, item):
            item = item + 1
            res = requests.get('https://pixabay.com/zh/photos/?q={}&pagi={}'.format(self.userSearch, item))
            # (http.*?)表示获取以http开头的字符串
            pattern = re.compile('<div class="item".*?<img.*?src="(http.*?)".*?title="(.*?)">', re.S)
            result = re.findall(pattern, res.text)
            print(res.text)
            for item in result:
                # 请求相应链接的具体图片并保存到本地
                imgContent = requests.get(item[0])
                # 图片名随机数生成,保存图片要用二进制形式保存所有此处为wb
                # imgContent.content为请求到的图片内容
                ranNum = random.randint(0, 100000000)
                with open('./image/{}.jpg'.format(ranNum), 'wb') as f:
                    f.write(imgContent.content)
                    print('已下载完:%s图' % ranNum)
                    self.count = self.count + 1
    
        def multiDownPage(self):
            # 设置的进程数和输入下载页数一样
            p = Pool(self.userNum)
            # 在进程数为p下,非柱塞的一一执行self.userNum页数的任务
            for i in range(self.userNum):
                # 参数args=(i,)为一个元组,所以后面的逗号必须有,表示页数
                p.apply_async(self.downloadPage, args=(i,))
            p.close()
            # 暂停进程,即设置进程的间隔时间
            p.join()
            print('所有图片下载完毕')
    
    
    if __name__ == '__main__':
        # 实例化类,并调用其主函数
        a = dImg()
        # a.run()
        a.processRun()
  • 相关阅读:
    2020/5/8
    2020/5/8
    2020/5/6
    2020/4/30
    2020/4/29
    2020/4/28
    2020/4/27
    KMP算法详解
    博客搬家声明
    洛谷P2831 NOIP2016 愤怒的小鸟
  • 原文地址:https://www.cnblogs.com/Dark-fire-liehuo/p/9757130.html
Copyright © 2011-2022 走看看