zoukankan      html  css  js  c++  java
  • asyncio 简单使用

    import asyncio
    
    
    async def func(i, s):
        print(f'第{i}个协程启动了')
        await asyncio.sleep(s)
    
    
    async def main():
        task = []
        for i in range(100):
            task.append(func(i, 2))
    
        await asyncio.wait(task)
    
    
    if __name__ == '__main__':
        # event_loop = asyncio.get_event_loop()
        # event_loop.run_until_complete(main())
        # 上面这两句等于下面这一句
        asyncio.run(main())

     asyncio爬虫

    import os
    
    import asyncio
    import aiohttp
    import aiofiles
    
    from lxml import etree
    
    
    async def details(url, path):
        async with aiohttp.ClientSession() as request:
            async with request.get(url, verify_ssl=False) as response:
                detail = await response.text(encoding='utf8')
                tree = etree.HTML(detail)
                # 获取章节标题
                chaptertitle = tree.xpath('.//div[contains(@class,"chaptertitle")]/h1/text()')[0].replace('/', '')
                BookText = '
    '.join(tree.xpath('.//div[@id="BookText"]/text()')).replace('u3000u3000', '')
                txt_path = os.path.join(path, f'{chaptertitle}.txt')
                async with aiofiles.open(txt_path, 'w', encoding='utf8') as f:
                    await f.write(BookText)
                print(chaptertitle, url, '下载完成')
    
    
    async def home():
        """
        获取主页
        :return:
        """
        url = "https://www.zanghaihua.org/guichuideng/"
        task_lst = []
        async with aiohttp.ClientSession() as request:
            async with request.get(url, verify_ssl=False) as response:
                html = await response.text(encoding='utf8')
                tree = etree.HTML(html)
                booklist = tree.xpath('.//div[contains(@class,"booklist")]/span')
                dir_path = None
                for book in booklist:
                    if 'v' in book.xpath('@class'):
                        # 说明是标题
                        title = book.xpath('./a/text()')[0]
                        dir_path = os.path.join(os.path.abspath('.'), 'data', title)
                        os.makedirs(dir_path, exist_ok=True)
                    else:
                        # 说明是单纯的章节地址
                        href = book.xpath('./a/@href')[0]
                        if dir_path:
                            task_lst.append(details(href, dir_path))
                await asyncio.wait(task_lst)
    
    
    async def main():
        await home()
    
    
    if __name__ == '__main__':
        import time
    
        start = time.time()
        asyncio.run(main())
        print(time.time() - start)
  • 相关阅读:
    linux sed的使用
    linux 服务的操作
    Js apply方法详解,及其apply()方法的妙用
    call()方法和apply()方法
    javascript中的深拷贝和浅拷贝
    移动web适配利器-rem
    js 函数讲解
    try…catch 结构
    Git使用之(pathspec master did not match any file(s) known to git)
    微信小程序使用函数的三种方法
  • 原文地址:https://www.cnblogs.com/wtil/p/15023262.html
Copyright © 2011-2022 走看看