zoukankan html css js c++ java

asyncio 简单使用

import asyncio


async def func(i, s):
    print(f'第{i}个协程启动了')
    await asyncio.sleep(s)


async def main():
    task = []
    for i in range(100):
        task.append(func(i, 2))

    await asyncio.wait(task)


if __name__ == '__main__':
    # event_loop = asyncio.get_event_loop()
    # event_loop.run_until_complete(main())
    # 上面这两句等于下面这一句
    asyncio.run(main())

asyncio爬虫

import os

import asyncio
import aiohttp
import aiofiles

from lxml import etree


async def details(url, path):
    async with aiohttp.ClientSession() as request:
        async with request.get(url, verify_ssl=False) as response:
            detail = await response.text(encoding='utf8')
            tree = etree.HTML(detail)
            # 获取章节标题
            chaptertitle = tree.xpath('.//div[contains(@class,"chaptertitle")]/h1/text()')[0].replace('/', '')
            BookText = '
'.join(tree.xpath('.//div[@id="BookText"]/text()')).replace('u3000u3000', '')
            txt_path = os.path.join(path, f'{chaptertitle}.txt')
            async with aiofiles.open(txt_path, 'w', encoding='utf8') as f:
                await f.write(BookText)
            print(chaptertitle, url, '下载完成')


async def home():
    """
    获取主页
    :return:
    """
    url = "https://www.zanghaihua.org/guichuideng/"
    task_lst = []
    async with aiohttp.ClientSession() as request:
        async with request.get(url, verify_ssl=False) as response:
            html = await response.text(encoding='utf8')
            tree = etree.HTML(html)
            booklist = tree.xpath('.//div[contains(@class,"booklist")]/span')
            dir_path = None
            for book in booklist:
                if 'v' in book.xpath('@class'):
                    # 说明是标题
                    title = book.xpath('./a/text()')[0]
                    dir_path = os.path.join(os.path.abspath('.'), 'data', title)
                    os.makedirs(dir_path, exist_ok=True)
                else:
                    # 说明是单纯的章节地址
                    href = book.xpath('./a/@href')[0]
                    if dir_path:
                        task_lst.append(details(href, dir_path))
            await asyncio.wait(task_lst)


async def main():
    await home()


if __name__ == '__main__':
    import time

    start = time.time()
    asyncio.run(main())
    print(time.time() - start)

查看全文

相关阅读:
VS2017+MPI10.0安装与环境配置
 数组翻转（非reverse）
判断一个字符串中出现次数最多的字符，统计这个次数
 Mock模拟数据，前后端分离
 js放大镜
 360度全景图片
 照片墙应用
 KindEditor配置和使用
 关于IIS的错误 312 (net::ERR_UNSAFE_PORT)：未知错误
 从C到C++：命令行参数

原文地址：https://www.cnblogs.com/wtil/p/15023262.html