zoukankan      html  css  js  c++  java
  • 爬天极网进程池.py

    import os
    import requests
    # 导入进程:
    from multiprocessing import Pool
    from bs4 import BeautifulSoup


    # 定义下载图片功能:
    def download_img(url, dirname=""):
    res = requests.request("get", url)
    filename = url.split("/")[-1]
    with open(dirname + "/" + filename, "wb") as f:
    f.write(res.content)
    print(f"{dirname}{filename}下载成功!")


    # 定义找到大图片:
    def find_big_img(url):
    res = requests.request("get", url)
    bs = BeautifulSoup(res.content, "html.parser")
    div_obj = bs.find(name="div", attrs={"class": "l_effect_img_mid"})
    img = div_obj.find("img")
    # print(img,"u")
    current_img_url = img.get("src")
    return current_img_url


    def get_page_count(url):
    res = requests.request("get", url)
    bs = BeautifulSoup(res.content, "html.parser")
    bs.find(name="div", attrs={"class": "flym"}).find_all(name="")


    baseurl = "http://pic.yesky.com/c/6_18332"


    def run(url, num):
    res = requests.request("get", f"{url}_{num}.shtml")
    bs = BeautifulSoup(res.text, "html.parser")
    # lst = bs.find(name="div",attrs={"class":"lb_box"}).find_all("a")
    lst = bs.find(name="div", attrs={"class": "lb_box"}).find_all("dd")
    print(lst)

    for i in lst:
    # i.find("a").get("title"),"qwdeawdawfesf"
    dirname = i.find("a").get("title")
    if os.path.isdir(dirname):
    pass
    else:
    os.mkdir(dirname)
    link = i.find("a").get("href")
    # print(link)
    res1 = requests.request("get", link)
    # print(res1.content)
    bs1 = BeautifulSoup(res1.content, "html.parser")
    div_obj = bs1.find(name="div", attrs={"class": "l_effect_img_mid"})
    img = div_obj.find("img")
    # print(img)
    current_img_url = img.get("src")
    download_img(current_img_url, dirname)

    div_overview = bs1.find(name="div", attrs={"class": "overview"})
    for i in div_overview.find_all("a"):
    if link == i.get("href"):
    continue
    else:
    url = i.get("href")

    download_img(find_big_img(url), dirname)


    if __name__ == '__main__':
    #开5个进程:
    pool = Pool(5)
    #循环第1到7页
    for i in range(1, 8):
    #pool.apply_async(函数名,(函数的参数))
    pool.apply_async(run, (baseurl, i))
    pool.close()
    pool.join()
    # print('非阻塞~~~~')
    # print('end')
    效果如下:



  • 相关阅读:
    linux zip命令 tar命令 【压缩、解压缩】参数列表:
    理解 uptime 的:“平均负载”? 如何模拟测试
    mark_Linux_wc
    我应该怎么学习SAP?
    SAP 销售订单交货对成本中心记账
    从华为“鸿蒙”备胎看IT项目建设
    什么样的系统算是坑
    写在Logg SAP项目上线之际
    SAP系统邮件功能配置
    警惕SAP项目被“中间商赚差价”
  • 原文地址:https://www.cnblogs.com/zhang-da/p/12209850.html
Copyright © 2011-2022 走看看