zoukankan      html  css  js  c++  java
  • asyncio 简单使用

    import asyncio
    
    
    async def func(i, s):
        print(f'第{i}个协程启动了')
        await asyncio.sleep(s)
    
    
    async def main():
        task = []
        for i in range(100):
            task.append(func(i, 2))
    
        await asyncio.wait(task)
    
    
    if __name__ == '__main__':
        # event_loop = asyncio.get_event_loop()
        # event_loop.run_until_complete(main())
        # 上面这两句等于下面这一句
        asyncio.run(main())

     asyncio爬虫

    import os
    
    import asyncio
    import aiohttp
    import aiofiles
    
    from lxml import etree
    
    
    async def details(url, path):
        async with aiohttp.ClientSession() as request:
            async with request.get(url, verify_ssl=False) as response:
                detail = await response.text(encoding='utf8')
                tree = etree.HTML(detail)
                # 获取章节标题
                chaptertitle = tree.xpath('.//div[contains(@class,"chaptertitle")]/h1/text()')[0].replace('/', '')
                BookText = '
    '.join(tree.xpath('.//div[@id="BookText"]/text()')).replace('u3000u3000', '')
                txt_path = os.path.join(path, f'{chaptertitle}.txt')
                async with aiofiles.open(txt_path, 'w', encoding='utf8') as f:
                    await f.write(BookText)
                print(chaptertitle, url, '下载完成')
    
    
    async def home():
        """
        获取主页
        :return:
        """
        url = "https://www.zanghaihua.org/guichuideng/"
        task_lst = []
        async with aiohttp.ClientSession() as request:
            async with request.get(url, verify_ssl=False) as response:
                html = await response.text(encoding='utf8')
                tree = etree.HTML(html)
                booklist = tree.xpath('.//div[contains(@class,"booklist")]/span')
                dir_path = None
                for book in booklist:
                    if 'v' in book.xpath('@class'):
                        # 说明是标题
                        title = book.xpath('./a/text()')[0]
                        dir_path = os.path.join(os.path.abspath('.'), 'data', title)
                        os.makedirs(dir_path, exist_ok=True)
                    else:
                        # 说明是单纯的章节地址
                        href = book.xpath('./a/@href')[0]
                        if dir_path:
                            task_lst.append(details(href, dir_path))
                await asyncio.wait(task_lst)
    
    
    async def main():
        await home()
    
    
    if __name__ == '__main__':
        import time
    
        start = time.time()
        asyncio.run(main())
        print(time.time() - start)
  • 相关阅读:
    beanstalkd 安装和配置
    vm虚拟机用批处理启动和关闭
    Windows设置VMware开机自动启动,虚拟机也启动
    批处理脚本学习笔记1--vmware虚拟机启停控制
    Shell中uname命令查看系统内核、版本
    SHELL脚本里执行的东西需要多次回车确认,怎么实现自动回车确认?
    eclipse下搭建shell脚本编辑器--安装开发shell的eclipse插件shelled
    如何进行shell脚本正确性测试
    robot framework
    loadrunner参数化数据分配方法
  • 原文地址:https://www.cnblogs.com/wtil/p/15023262.html
Copyright © 2011-2022 走看看