zoukankan      html  css  js  c++  java
  • 爬天极网进程池.py

    import os
    import requests
    # 导入进程:
    from multiprocessing import Pool
    from bs4 import BeautifulSoup


    # 定义下载图片功能:
    def download_img(url, dirname=""):
    res = requests.request("get", url)
    filename = url.split("/")[-1]
    with open(dirname + "/" + filename, "wb") as f:
    f.write(res.content)
    print(f"{dirname}{filename}下载成功!")


    # 定义找到大图片:
    def find_big_img(url):
    res = requests.request("get", url)
    bs = BeautifulSoup(res.content, "html.parser")
    div_obj = bs.find(name="div", attrs={"class": "l_effect_img_mid"})
    img = div_obj.find("img")
    # print(img,"u")
    current_img_url = img.get("src")
    return current_img_url


    def get_page_count(url):
    res = requests.request("get", url)
    bs = BeautifulSoup(res.content, "html.parser")
    bs.find(name="div", attrs={"class": "flym"}).find_all(name="")


    baseurl = "http://pic.yesky.com/c/6_18332"


    def run(url, num):
    res = requests.request("get", f"{url}_{num}.shtml")
    bs = BeautifulSoup(res.text, "html.parser")
    # lst = bs.find(name="div",attrs={"class":"lb_box"}).find_all("a")
    lst = bs.find(name="div", attrs={"class": "lb_box"}).find_all("dd")
    print(lst)

    for i in lst:
    # i.find("a").get("title"),"qwdeawdawfesf"
    dirname = i.find("a").get("title")
    if os.path.isdir(dirname):
    pass
    else:
    os.mkdir(dirname)
    link = i.find("a").get("href")
    # print(link)
    res1 = requests.request("get", link)
    # print(res1.content)
    bs1 = BeautifulSoup(res1.content, "html.parser")
    div_obj = bs1.find(name="div", attrs={"class": "l_effect_img_mid"})
    img = div_obj.find("img")
    # print(img)
    current_img_url = img.get("src")
    download_img(current_img_url, dirname)

    div_overview = bs1.find(name="div", attrs={"class": "overview"})
    for i in div_overview.find_all("a"):
    if link == i.get("href"):
    continue
    else:
    url = i.get("href")

    download_img(find_big_img(url), dirname)


    if __name__ == '__main__':
    #开5个进程:
    pool = Pool(5)
    #循环第1到7页
    for i in range(1, 8):
    #pool.apply_async(函数名,(函数的参数))
    pool.apply_async(run, (baseurl, i))
    pool.close()
    pool.join()
    # print('非阻塞~~~~')
    # print('end')
    效果如下:



  • 相关阅读:
    <转>反调试技巧总结原理和实现
    MFC CListCtrl 表格
    <转>汇编指令
    c++ builder 简单读、分析网页数据
    <转>CProcessData : A template class to ease up SendMessage calls across processes
    <转>Running console applications silently
    遍历电脑打印机、设置默认打印机、EnumPrinters ,SetDefaultPrinter,GetDefaultPrinter
    <转>运算符巧妙原理解析
    遍历 进程
    Enterprise Library5.0 Unity 试用.
  • 原文地址:https://www.cnblogs.com/zhang-da/p/12209850.html
Copyright © 2011-2022 走看看