zoukankan      html  css  js  c++  java
  • 爬天极网进程池.py

    import os
    import requests
    # 导入进程:
    from multiprocessing import Pool
    from bs4 import BeautifulSoup


    # 定义下载图片功能:
    def download_img(url, dirname=""):
    res = requests.request("get", url)
    filename = url.split("/")[-1]
    with open(dirname + "/" + filename, "wb") as f:
    f.write(res.content)
    print(f"{dirname}{filename}下载成功!")


    # 定义找到大图片:
    def find_big_img(url):
    res = requests.request("get", url)
    bs = BeautifulSoup(res.content, "html.parser")
    div_obj = bs.find(name="div", attrs={"class": "l_effect_img_mid"})
    img = div_obj.find("img")
    # print(img,"u")
    current_img_url = img.get("src")
    return current_img_url


    def get_page_count(url):
    res = requests.request("get", url)
    bs = BeautifulSoup(res.content, "html.parser")
    bs.find(name="div", attrs={"class": "flym"}).find_all(name="")


    baseurl = "http://pic.yesky.com/c/6_18332"


    def run(url, num):
    res = requests.request("get", f"{url}_{num}.shtml")
    bs = BeautifulSoup(res.text, "html.parser")
    # lst = bs.find(name="div",attrs={"class":"lb_box"}).find_all("a")
    lst = bs.find(name="div", attrs={"class": "lb_box"}).find_all("dd")
    print(lst)

    for i in lst:
    # i.find("a").get("title"),"qwdeawdawfesf"
    dirname = i.find("a").get("title")
    if os.path.isdir(dirname):
    pass
    else:
    os.mkdir(dirname)
    link = i.find("a").get("href")
    # print(link)
    res1 = requests.request("get", link)
    # print(res1.content)
    bs1 = BeautifulSoup(res1.content, "html.parser")
    div_obj = bs1.find(name="div", attrs={"class": "l_effect_img_mid"})
    img = div_obj.find("img")
    # print(img)
    current_img_url = img.get("src")
    download_img(current_img_url, dirname)

    div_overview = bs1.find(name="div", attrs={"class": "overview"})
    for i in div_overview.find_all("a"):
    if link == i.get("href"):
    continue
    else:
    url = i.get("href")

    download_img(find_big_img(url), dirname)


    if __name__ == '__main__':
    #开5个进程:
    pool = Pool(5)
    #循环第1到7页
    for i in range(1, 8):
    #pool.apply_async(函数名,(函数的参数))
    pool.apply_async(run, (baseurl, i))
    pool.close()
    pool.join()
    # print('非阻塞~~~~')
    # print('end')
    效果如下:



  • 相关阅读:
    C++笔记(2018/2/6)
    2017级面向对象程序设计寒假作业1
    谁是你的潜在朋友
    A1095 Cars on Campus (30)(30 分)
    A1083 List Grades (25)(25 分)
    A1075 PAT Judge (25)(25 分)
    A1012 The Best Rank (25)(25 分)
    1009 说反话 (20)(20 分)
    A1055 The World's Richest(25 分)
    A1025 PAT Ranking (25)(25 分)
  • 原文地址:https://www.cnblogs.com/zhang-da/p/12209850.html
Copyright © 2011-2022 走看看