zoukankan      html  css  js  c++  java
  • 爬天极网美女图片缩略图:

    import os
    #导入发送请求模块:
    import requests
    #导入解析文本模块:
    from bs4 import BeautifulSoup
    #返回上一级目录:
    base_path = os.path.dirname(os.path.abspath(__file__))
    #路径和图片文件夹拼接:
    img1_path = os.path.join(base_path,"img1")
    #获取响应:
    response = requests.get("http://pic.yesky.com/c/6_20491_1.shtml")
    #将请求结果交给bs4解析:
    soup = BeautifulSoup(response.text,"html.parser")
    #经过分析定位到<div class="lb_box">:
    div_obj = soup.find(name="div",attrs={"class":"lb_box"})
    #从div中找所有的dl标签(每一张图片的外部标点)、结果是列表
    list_dd = div_obj.find_all(name = "dd")
    #循环每一张图片的dl:
    for dd in list_dd:
    #从dd中找到a:
    a_obj = dd.find("a")
    #拼接文件夹的路径并创建文件夹:
    dir_path = os.path.join(img1_path,a_obj.text)
    #如果文件夹不存在就创建:
    if not os.path.isdir(dir_path):
    os.mkdir(dir_path)
    #拿到链接href:
    a_response = requests.get(a_obj.get("href"))
    #转码
    a_response.encoding = "gbk"
    #文本解析
    soup2 = BeautifulSoup(a_response.text,"html.parser")
    #拿到整体的div:
    div_obj2 = soup2.find(name = "div",attrs={"class":"overview"})
    print(div_obj2)
    img_list = div_obj2.find_all(name = "img")
    for img in img_list:
    img_src = img.get("src")
    img_response = requests.get(img_src)
    file_path = os.path.join(dir_path,img_src.rsplit("/",1)[-1])
    with open(file_path,"wb") as f:
    f.write(img_response.content)
    # break
    效果如下:

  • 相关阅读:
    JavaScript语言和jQuery技术1
    JSP2
    JavaScript语言和jQuery技术2
    MYSQL2
    JSP1
    JSP5
    JSP3
    Spring框架
    JSP4
    MYSQL3(查询)
  • 原文地址:https://www.cnblogs.com/zhang-da/p/12208018.html
Copyright © 2011-2022 走看看