zoukankan      html  css  js  c++  java
  • Python 批量爬取美女图片

    爬取妹子图目标网址:http://jandan.net/ooxx

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    import requests
    import os
    from concurrent.futures import ThreadPoolExecutor
    from bs4 import BeautifulSoup
    
    
    class MeiZi:
        def __init__(self):
            self.url = 'http://jandan.net/ooxx'
            self.path = os.path.join(os.getcwd(), 'img')
            self.count = 1
            self.pool = ThreadPoolExecutor(20)
            self.headers = {
                'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
            }
    
        def img_dir(self):
            '''存放目录处理'''
            if not os.path.isdir(self.path):
                os.makedirs(self.path)
    
        def download(self, url, path):
            '''下载'''
            by_data = requests.get(url=url, headers=self.headers).content
            with open(path, 'wb') as f1:
                f1.write(by_data)
            print('%s 成功' % path)
    
        def a_link(self, a_list):
            '''处理图片url'''
            for li in a_list:
                url = 'http:' + str(li['href'])
                path = os.path.join(self.path, "%s.jpg" % self.count)
                self.pool.submit(self.download, url, path)
                self.count += 1
    
        def run(self):
            self.img_dir()
            response = requests.get(url=self.url, headers=self.headers).text
            soup = BeautifulSoup(response, 'lxml')
            page = soup.find('a', class_="previous-comment-page")
            a_list = soup.select(".commentlist > li > div > div > div > p > a")
            self.a_link(a_list)
            if page:
                url = 'http:' + str(page.attrs['href'])
                self.url = url
                self.run()
            else:
                return None
    
    
    if __name__ == '__main__':
        M = MeiZi()
        M.run()
  • 相关阅读:
    hdu 1042 N!
    hdu 1002 A + B Problem II
    c++大数模板
    hdu 1004 Let the Balloon Rise
    hdu 4027 Can you answer these queries?
    poj 2823 Sliding Window
    hdu 3074 Multiply game
    hdu 1394 Minimum Inversion Number
    hdu 5199 Gunner
    九度oj 1521 二叉树的镜像
  • 原文地址:https://www.cnblogs.com/HByang/p/12655060.html
Copyright © 2011-2022 走看看