zoukankan html css js c++ java

最简单的爬虫——小白

import requests
from lxml import etree
import os
#页数
page = 0
#统计图片数量
num = 1
#网址链接
start_url = "http://pic.netbian.com/index_{}.html"
#1——1169页实际少一页
for page in range(1, 1169):
    url = start_url.format(page)

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36'}

#乱码解码
    response = requests.get(url, headers=headers).content.decode('gbk')
    etree_html = etree.HTML(response)
    # class="slist" ul li a href
    xpath_url = etree_html.xpath('//div[@ class="slist"]/ul/li/a/@href')
    for k in xpath_url:
        x_url = 'http://pic.netbian.com/' + k

        inner_html = requests.get(x_url, headers=headers).content.decode('gbk')

        xpath_img_url = etree.HTML(inner_html)
        # class="photo-pic" a img title
        img_title = xpath_img_url.xpath('//div[@ class="photo-pic"]/a/img/@title')
        # class="photo-pic" a img src
        img_resource = xpath_img_url.xpath('//div[@ class="photo-pic"]/a/img/@src')
        for title, img in zip(img_title, img_resource):
            img_title = title
            img_content = 'http://pic.netbian.com/' + img
            # print(img_title, img_content)
            image_content = requests.get(img_content, headers=headers).content
            with open('./不知火/{}.png'.format(img_title), 'wb') as f:
                f.write(image_content)
                print('已完成{}下载，		第{}张图片'.format(img_title, num))
                num += 1

查看全文

相关阅读:
js中的call和apply方法
 前端Cookie与Session的区别
 js中的this
Python基础语法
 Python基础安装
 Python基础字符串、列表、元组、字典
 java回调
 java内存分配与溢出
 “眉毛导航”——SiteMapPath控件的使用（ASP.NET）
Photoshop制作雪碧图技巧

原文地址：https://www.cnblogs.com/LQ970811/p/11821199.html