zoukankan      html  css  js  c++  java
  • python进程池爬取下载美女图片(xpath)--lowbiprogrammer

    # -*- coding: utf-8 -*-
    import requests,os
    from lxml import etree
    import multiprocessing
    from retrying import retry
    # 创建地址池
    urllist = ["http://www.zhuangxiule.cn/c{}p{}/".format(i,x) for i in range(16,26) for x in range(0,25)]
    @retry(stop_max_attempt_number=3)
    def get_data(url):
    response = requests.get(url,timeout=3)
    data = response.content
    html = etree.HTML(data)
    # xpath匹配首页的标题和详情的url
    mes = html.xpath("//div[@class='main']/dl[@class='list-left public-box']/*")
    for i in mes:
    if i.xpath("./a/span/text()"):
    title = i.xpath("./a/span/text()")
    poto_url= i.xpath("./a/@href")[0] if len(i.xpath("./a/@href"))>0 else None
    print(title)
    poto=requests.get(poto_url)
    # 请求每个详情页的图片地址
    html = etree.HTML(poto.content)
    mes = html.xpath("//img/@src")
    # 创建下载图片的地址路径及写入图片
    path = "f:/img/"
    if not os.path.exists(path):
    os.makedirs(path)
    for photo in mes:
    potomes = requests.get(photo)
    filename = photo.split("/")[-1]
    with open(path+filename,"wb") as f:
    f.write(potomes.content)
    if __name__ == '__main__':
    # 创建进程池
    pool = multiprocessing.Pool(5)
    # 将每个url作用于get_data方法
    pool.map(get_data,urllist)
    pool.close()
    pool.join()
  • 相关阅读:
    文件的操作
    encode,decode,str,bytes
    字符串操作
    suse12安装mysql8.16
    VMware配置共享磁盘安装RAC
    Linux过滤文本并显示过滤文字的上下文
    Linux服务器卸载mysql指南
    oracle 各版本各日志存放位置
    impdp按用户导入
    数据泵expdp定时备份
  • 原文地址:https://www.cnblogs.com/xcsg/p/10138163.html
Copyright © 2011-2022 走看看