zoukankan html css js c++ java

爬天极网多线程.py

#导入多线程模块：
import threading
import os
import requests  # 发送请求
from bs4 import BeautifulSoup   # 解析文本
base_path = os.path.dirname(os.path.abspath(__file__))
img_path = os.path.join(base_path, 'img')

def func(a):
    #小f字符串拼接下：
    response = requests.get(f'http://pic.yesky.com/c/6_20491_{a}.shtml')
    soup = BeautifulSoup(response.text, 'html.parser')   # 将请求结果交给bs4解析
    div_obj = soup.find(name='div', attrs={"class": "lb_box"})  # 经过分析之后,定位到指定div

    list_dd = div_obj.find_all(name='dd')
    for dd in list_dd:  # 每一张图片的dl
        a_obj = dd.find('a')
        # 拼接文件夹的路径,并创建文件夹
        dir_path = os.path.join(img_path, a_obj.text)
        if not os.path.isdir(dir_path):  # 判断文件是否存在
            os.mkdir(dir_path)
        a_response = requests.get(a_obj.get('href'))
        a_response.encoding = 'GBK'
        soup2 = BeautifulSoup(a_response.text, 'html.parser')
        div_obj2 = soup2.find(name='div', attrs={"class": "overview"})
        print(div_obj2)
        try:
            img_list = div_obj2.find_all(name='img')
            for img in img_list:
                img_src = img.get("src")
                img_response = requests.get(img_src.replace('113x113', '740x-'))
                file_path = os.path.join(dir_path, img_src.rsplit('/', 1)[-1])
                with open(file_path, 'wb') as f:
                    f.write(img_response.content)
        except Exception as e:
            pass

#循环5圈：
for i in range(1,6):
    #threading.Thread(target = 函数名,args = (参数,))    #参数必须以元组的形式：
    a = threading.Thread(target=func,args=(i,))
    a.start()

效果如下：

查看全文

相关阅读:
there can be only one TIMESTAMP column with CURRENT_TIMESTAMP in DEFAULT or ON UPDATE clause
使用Mybatis-Generator自动生成Dao、Model、Mapping相关文件
 ActiveMQ使用示例之Queue
JMS基本概念之一
 @ActiveMQ简单介绍以及安装
 Spring中 @Autowired注解与@Resource注解的区别
 classpath: 和classpath*:的区别
 Mybatis整合Spring
@MyBatis主键返回
 Intellij Idea @Autowired取消提示

原文地址：https://www.cnblogs.com/zhang-da/p/12209992.html