zoukankan      html  css  js  c++  java
  • 并发编程——线程池演示

    线程池演示

    from concurrent.futures import ThreadPoolExecutor
    import time
    
    # pool只能创建100个线程
    pool = ThreadPoolExecutor(100)
    
    
    def task(line):
        print(line)
        time.sleep(10)
    
    
    if __name__ == '__main__':
        for line in range(1000):
            pool.submit(task, line)
    
    
    
    通过并发(同步)爬虫某个网站的小视频
    import requests
    import re
    # import os
    # import uuid
    #
    #
    # # 1.发送请求,获取响应数据
    # def get_page(url):
    #     response = requests.get(url)
    #     if response.status_code == 200:
    #         return response
    #
    #
    # # 2.解析并提取主页id号
    # def parse_page(response):
    #     '''
    #     https://www.pearvideo.com/video_1630253
    #     https://www.pearvideo.com/video_1630042
    #     '''
    #     # 将所有电影的详情页id号,匹配获取,并放到列表中
    #     id_list = re.findall('href="video_(.*?)"', response.text, re.S)
    #     # print(len(id_list))
    #     id_list = list(set(id_list))
    #     # print(len(id_list))
    #     return id_list
    #
    #
    # def parse_detail(response):
    #     '''
    #     srcUrl="https://video.pearvideo.com/mp4/adshort/20191206/cont-1630253-14671892_adpkg-ad_hd.mp4"
    #     srcUrl="(.*?)"
    #     '''
    #     mp4_url = re.findall('srcUrl="(.*?)"', response.text, re.S)
    #     # print(mp4_url, 111111)
    #     if mp4_url:
    #         return mp4_url[0]
    #
    #
    # # 3.保存数据
    # def save_movie(movie_url):
    #     response = get_page(movie_url)
    #
    #     movie_dir = r'D:项目路径python13期day30梨视频'
    #     movie_path = os.path.join(
    #         movie_dir, str(uuid.uuid4()) + '.mp4'
    #     )
    #     # print(movie_path)
    #     with open(movie_path, 'wb') as f:
    #         for line in response.iter_content():
    #             f.write(line)
    #
    #
    # if __name__ == '__main__':
    #     response = get_page('https://www.pearvideo.com/')
    #
    #     # 解析提取所有电影详情页id号
    #     id_list = parse_page(response)
    #     # print(id_list)
    #
    #     # 循环拼接详情页链接
    #     for id_num in id_list:
    #         url = f'https://www.pearvideo.com/video_{id_num}'
    #         # print(url)
    #
    #         # 往详情页发送请求,
    #         detail_response = get_page(url)
    #         # print(detail_response.text)
    #
    #         # # 解析电影详情页,并提取视频的存放的地址
    #         mp4_url = parse_detail(detail_response)
    #         print(mp4_url)
    #
    #         # # 发送请求获取视频真实数据
    #         # movie_response = get_page(mp4_url)
    #
    #         # response.content
    #         save_movie(mp4_url)
    
    
    
    # 异步爬取梨视频
    import requests
    import re
    import os
    import uuid
    
    from concurrent.futures import ThreadPoolExecutor
    pool = ThreadPoolExecutor(100)
    
    
    # 1.发送请求,获取响应数据
    def get_page(url):
        print(f'发送get请求: {url}')
        response = requests.get(url)
        if response.status_code == 200:
            return response
    
    
    # 2.解析并提取主页id号
    def parse_page(response):
        '''
        https://www.pearvideo.com/video_1630253
        https://www.pearvideo.com/video_1630042
        '''
        # 将所有电影的详情页id号,匹配获取,并放到列表中
        id_list = re.findall('href="video_(.*?)"', response.text, re.S)
        # print(len(id_list))
        id_list = list(set(id_list))
        # print(len(id_list))
        return id_list
    
    
    # 解析详情页,获取视频链接
    def parse_detail(res):
        '''
        srcUrl="https://video.pearvideo.com/mp4/adshort/20191206/cont-1630253-14671892_adpkg-ad_hd.mp4"
        srcUrl="(.*?)"
        '''
        res2 = res.result()
        print(res2)
    
        movie_url = re.findall('srcUrl="(.*?)"', res2.text, re.S)
        print(movie_url)
        if movie_url:
            movie_url = movie_url[0]
            pool.submit(save_movie, movie_url)
    
    
    # 3.保存数据
    def save_movie(movie_url):
    
        # time.sleep(1)
        # 获取响应数据的过程是IO操作
        response = requests.get(movie_url)
    
        movie_dir = r'D:项目路径python13期day30梨视频'
        movie_path = os.path.join(
            movie_dir, str(uuid.uuid4()) + '.mp4'
        )
        # print(movie_path)
        with open(movie_path, 'wb') as f:
            for line in response.iter_content():
                f.write(line)
    
    
    if __name__ == '__main__':
        response = get_page('https://www.pearvideo.com/')
        id_list = parse_page(response)
        for id_num in id_list:
            # 每一个视频详情页
            url = f'https://www.pearvideo.com/video_{id_num}'
    
            # 异步提交并爬取详情页任务
            # add_done_callback(parse_detail): 将get_page任务结束后的结果,扔给parse_detail函数
            # parse_detail函数接收的是一个对象,对象中的result()就是get_page函数的返回值。
            pool.submit(get_page, url).add_done_callback(parse_detail)
    
        import datetime
    
        print(datetime.datetime.now())
        # 21:54 ---> 18:45
    
    我把月亮戳到天上 天就是我的 我把脚踩入地里 地就是我的 我亲吻你 你就是我的
  • 相关阅读:
    移动端默认meta标签
    css3 变形(transform)、转换(transition)和动画(animation)
    Java_ToolKit用法
    Java_I/O输入输出_实现当用户输入姓名和密码时,将每一个姓名和密码加在文件中,如果用户输入done,就结束程序。
    Java_I/O输入输出_实现读取文件时出现一个表示读取进度的进度条。可以使用java.swing包提供的输入流类ProgressMonitorInputStream
    Java_Swing程序设计_尝试开发一个登陆窗体,包括用户名、密码以及提交按钮和重置按钮,当用户输入用户名my,密码love时,弹出登陆成功提示对话框。
    Eclipse通过jdbc连接oracle数据库
    Java_I/O输入输出_使用输入输出流读取文件,将一段文字加密后存入文件,然后读取,将加密前与后的文件输出
    asp.net缓存机制
    深入理解abstract class和interface
  • 原文地址:https://www.cnblogs.com/zhulipeng-1998/p/12863900.html
Copyright © 2011-2022 走看看