zoukankan      html  css  js  c++  java
  • 爬天极网线程池和进程池.py

    #导入多线程模块:
    import threading
    import os
    import requests # 发送请求
    import time
    from bs4 import BeautifulSoup # 解析文本
    #导入线程池执行器和进程池执行器:
    from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
    #导入获取CPU的数量cpu_count模块的功能:
    from multiprocessing import cpu_count
    base_path = os.path.dirname(os.path.abspath(__file__))
    img_path = os.path.join(base_path, 'img')

    def func(num):
    #小f字符串拼接下:
    response = requests.get(f'http://pic.yesky.com/c/6_20491_{num}.shtml')
    soup = BeautifulSoup(response.text, 'html.parser') # 将请求结果交给bs4解析
    div_obj = soup.find(name='div', attrs={"class": "lb_box"}) # 经过分析之后,定位到指定div

    list_dd = div_obj.find_all(name='dd')
    for dd in list_dd: # 每一张图片的dl
    a_obj = dd.find('a')
    # 拼接文件夹的路径,并创建文件夹
    dir_path = os.path.join(img_path, a_obj.text)
    if not os.path.isdir(dir_path): # 判断文件是否存在
    os.mkdir(dir_path)
    a_response = requests.get(a_obj.get('href'))
    a_response.encoding = 'GBK'
    soup2 = BeautifulSoup(a_response.text, 'html.parser')
    div_obj2 = soup2.find(name='div', attrs={"class": "overview"})
    # print(div_obj2)
    print(response.url)
    try:
    img_list = div_obj2.find_all(name='img')
    for img in img_list:
    img_src = img.get("src")
    img_response = requests.get(img_src.replace('113x113', '740x-'))
    file_path = os.path.join(dir_path, img_src.rsplit('/', 1)[-1])
    with open(file_path, 'wb') as f:
    f.write(img_response.content)
    except Exception as e:
    pass

    if __name__ == '__main__':
    #获取电脑CPU的数量:
    # print(cpu_count())
    #定义开始时间:
    start = time.time()
    #开当前电脑的cup核数的进程池:
    # p = ProcessPoolExecutor(max_workers=cpu_count())
    #循环5圈:
    # for i in range(1,6):
    #获取进程池并submit提交下:
    # p.submit(func,i)
    # p.shutdown()

    #开当前电脑的cup核数的线程池:
    t = ThreadPoolExecutor(max_workers=cpu_count())
    for i in range(1,6):
    t.submit(func,i)
    t.shutdown()
    print("执行时间:{}".format(time.time() - start))
  • 相关阅读:
    牛客多校(2020第十场)E Game
    牛客多校(2020第十场)A Permutation
    牛客多校(2020第十场)A Permutation
    牛客多校(2020第九场)A Groundhog and 2-Power Representation
    牛客多校(2020第九场)A Groundhog and 2-Power Representation
    牛客多校(2020第九场)F Groundhog Looking Dowdy
    牛客多校(2020第九场)F Groundhog Looking Dowdy
    隐式转换
    正则(草稿)
    setimout
  • 原文地址:https://www.cnblogs.com/zhang-da/p/12210152.html
Copyright © 2011-2022 走看看