zoukankan      html  css  js  c++  java
  • pycuda学习过程中的一些发现,cuda函数的初始化要在cuda内存空间初始化之后,否则会报错

    参考:

    https://www.cnblogs.com/devilmaycry812839668/p/15348610.html

    最近在看WarpDrive的代码,其中cuda上运行的代码是使用pycuda库进行连通的,使用pycuda可以很好的在python环境中调用cuda的代码,但是在使用中发现一些事情,那就是cuda函数的初始化要放在cuda内存空间初始化之后,否则会有报错。

    代码:(可以正常运行的代码)

    import numpy as np
    
    from warp_drive.managers.data_manager import CUDADataManager
    from warp_drive.managers.function_manager import (
        CUDAFunctionManager, CUDALogController, CUDASampler, CUDAEnvironmentReset
    )
    from warp_drive.utils.data_feed import DataFeed
    
    
    source_code = """
    // A function to demonstrate how to manipulate data on the GPU.
    // This function increments each the random data array we pushed to the GPU before.
    // Each index corresponding to (env_id, agent_id) in the array is incremented by "agent_id + env_id".
    // Everything inside the if() loop runs in parallel for each agent and environment.
    //
    extern "C"{
        __global__ void cuda_increment(                               
                float* data,                                  
                int num_agents                                       
        )                                                            
        {                                                            
            int env_id = blockIdx.x;                                 
            int agent_id = threadIdx.x;                             
            if (agent_id < num_agents){                              
                int array_index = env_id * num_agents + agent_id;
                int increment = env_id + agent_id;
                data[array_index] += increment;
            }                                                            
        }   
    }
    """
    
    
    
    
    from timeit import Timer
    
    
    def push_random_data_and_increment_timer(
            num_runs=1,
            num_envs=2,
            num_agents=3,
            source_code=None
    ):
    
        assert source_code is not None
    
        def push_random_data(num_agents, num_envs):
            # Initialize the CUDA data manager
            cuda_data_manager = CUDADataManager(
                num_agents=num_agents,
                num_envs=num_envs,
                episode_length=100
            )
    
            # Create random data
            random_data = np.random.rand(num_envs, num_agents)
    
            # Push data from host to device
            data_feed = DataFeed()
            data_feed.add_data(
                name="random_data",
                data=random_data,
            )
            data_feed.add_data(
                name="num_agents",
                data=num_agents
            )
            cuda_data_manager.push_data_to_device(data_feed)
    
            return cuda_data_manager
    
    
        # Initialize the CUDA function manager
        def cuda_func_init():
            cuda_function_manager = CUDAFunctionManager(
                num_agents=num_agents, #cuda_data_manager.meta_info("n_agents"),
                num_envs=num_envs #cuda_data_manager.meta_info("n_envs")
            )
    
            # Load source code and initialize function
            cuda_function_manager.load_cuda_from_source_code(
                source_code,
                default_functions_included=False
            )
            cuda_function_manager.initialize_functions(["cuda_increment"])
            increment_function = cuda_function_manager._get_function("cuda_increment")
    
            return cuda_function_manager, increment_function
    
    
        def increment_data(cuda_data_manager, cuda_function_manager, increment_function):
            increment_function(
                cuda_data_manager.device_data("random_data"),
                cuda_data_manager.device_data("num_agents"),
                block=cuda_function_manager.block,
                grid=cuda_function_manager.grid
            )
    
    
        # set variable
        # cuda_data_manager = push_random_data(num_agents, num_envs)
    
        # cuda function init
        # cuda_function_manager, increment_function = cuda_func_init()
    
        # cuda function run
        # increment_data(cuda_data_manager, cuda_function_manager, increment_function)
    
    
        data_push_time = Timer(lambda: push_random_data(num_agents, num_envs)).timeit(number=num_runs)
    
    
        cuda_data_manager = push_random_data(num_agents, num_envs)
        cuda_function_manager, increment_function = cuda_func_init()
        program_run_time = Timer(lambda: increment_data(cuda_data_manager, cuda_function_manager, increment_function)).timeit(number=num_runs)
        print(cuda_data_manager.pull_data_from_device('random_data'))
    
        return {
            "data push times": data_push_time,
            "code run time": program_run_time
        }
    
    
    
    
    num_runs = 1000
    times = {}
    
    for scenario in [
        (1, 1),
        (1, 100),
        (1, 1000),
        (100, 1000),
        (1000, 1000)
    ]:
        num_envs, num_agents = scenario
        times.update(
            {
                f"envs={num_envs}, agents={num_agents}":
                push_random_data_and_increment_timer(
                    num_runs,
                    num_envs,
                    num_agents,
                    source_code
                )
            }
        )
    
    
    print(f"Times for {num_runs} function calls")
    print("*"*40)
    for key, value in times.items():
        print(f"{key:30}: mean data push times: {value['data push times']:10.5}s,	 mean increment times: {value['code run time']:10.5}s")
    
    
    '''
    print(cuda_data_manager._meta_info)
    print(cuda_data_manager._host_data)
    print(cuda_data_manager._device_data_pointer)
    print(cuda_data_manager._scalar_data_list)
    print(cuda_data_manager._reset_data_list)
    print(cuda_data_manager._log_data_list)
    print(cuda_data_manager._device_data_via_torch)
    print(cuda_data_manager._shared_constants)
    print(cuda_data_manager._shape)
    print(cuda_data_manager._dtype)
    
    print(tensor_on_device)
    time.sleep(300)
    
    '''

    报错的代码:

    import numpy as np
    
    from warp_drive.managers.data_manager import CUDADataManager
    from warp_drive.managers.function_manager import (
        CUDAFunctionManager, CUDALogController, CUDASampler, CUDAEnvironmentReset
    )
    from warp_drive.utils.data_feed import DataFeed
    
    
    source_code = """
    // A function to demonstrate how to manipulate data on the GPU.
    // This function increments each the random data array we pushed to the GPU before.
    // Each index corresponding to (env_id, agent_id) in the array is incremented by "agent_id + env_id".
    // Everything inside the if() loop runs in parallel for each agent and environment.
    //
    extern "C"{
        __global__ void cuda_increment(                               
                float* data,                                  
                int num_agents                                       
        )                                                            
        {                                                            
            int env_id = blockIdx.x;                                 
            int agent_id = threadIdx.x;                             
            if (agent_id < num_agents){                              
                int array_index = env_id * num_agents + agent_id;
                int increment = env_id + agent_id;
                data[array_index] += increment;
            }                                                            
        }   
    }
    """
    
    
    
    
    from timeit import Timer
    
    
    def push_random_data_and_increment_timer(
            num_runs=1,
            num_envs=2,
            num_agents=3,
            source_code=None
    ):
    
        assert source_code is not None
    
        def push_random_data(num_agents, num_envs):
            # Initialize the CUDA data manager
            cuda_data_manager = CUDADataManager(
                num_agents=num_agents,
                num_envs=num_envs,
                episode_length=100
            )
    
            # Create random data
            random_data = np.random.rand(num_envs, num_agents)
    
            # Push data from host to device
            data_feed = DataFeed()
            data_feed.add_data(
                name="random_data",
                data=random_data,
            )
            data_feed.add_data(
                name="num_agents",
                data=num_agents
            )
            cuda_data_manager.push_data_to_device(data_feed)
    
            return cuda_data_manager
    
    
        # Initialize the CUDA function manager
        def cuda_func_init():
            cuda_function_manager = CUDAFunctionManager(
                num_agents=num_agents, #cuda_data_manager.meta_info("n_agents"),
                num_envs=num_envs #cuda_data_manager.meta_info("n_envs")
            )
    
            # Load source code and initialize function
            cuda_function_manager.load_cuda_from_source_code(
                source_code,
                default_functions_included=False
            )
            cuda_function_manager.initialize_functions(["cuda_increment"])
            increment_function = cuda_function_manager._get_function("cuda_increment")
    
            return cuda_function_manager, increment_function
    
    
        def increment_data(cuda_data_manager, cuda_function_manager, increment_function):
            increment_function(
                cuda_data_manager.device_data("random_data"),
                cuda_data_manager.device_data("num_agents"),
                block=cuda_function_manager.block,
                grid=cuda_function_manager.grid
            )
    
    
        # set variable
        # cuda_data_manager = push_random_data(num_agents, num_envs)
    
        # cuda function init
        # cuda_function_manager, increment_function = cuda_func_init()
    
        # cuda function run
        # increment_data(cuda_data_manager, cuda_function_manager, increment_function)
    
    
        #data_push_time = Timer(lambda: push_random_data(num_agents, num_envs)).timeit(number=num_runs)
    
    
        cuda_function_manager, increment_function = cuda_func_init()  ###
        cuda_data_manager = push_random_data(num_agents, num_envs)    ###
        program_run_time = Timer(lambda: increment_data(cuda_data_manager, cuda_function_manager, increment_function)).timeit(number=num_runs)
        print(cuda_data_manager.pull_data_from_device('random_data'))
    
        return {
            "data push times": 0, #data_push_time,
            "code run time": program_run_time
        }
    
    
    
    
    num_runs = 1000
    times = {}
    
    for scenario in [
        (1, 1),
        (1, 100),
        (1, 1000),
        (100, 1000),
        (1000, 1000)
    ]:
        num_envs, num_agents = scenario
        times.update(
            {
                f"envs={num_envs}, agents={num_agents}":
                push_random_data_and_increment_timer(
                    num_runs,
                    num_envs,
                    num_agents,
                    source_code
                )
            }
        )
    
    
    print(f"Times for {num_runs} function calls")
    print("*"*40)
    for key, value in times.items():
        print(f"{key:30}: mean data push times: {value['data push times']:10.5}s,	 mean increment times: {value['code run time']:10.5}s")
    
    
    '''
    print(cuda_data_manager._meta_info)
    print(cuda_data_manager._host_data)
    print(cuda_data_manager._device_data_pointer)
    print(cuda_data_manager._scalar_data_list)
    print(cuda_data_manager._reset_data_list)
    print(cuda_data_manager._log_data_list)
    print(cuda_data_manager._device_data_via_torch)
    print(cuda_data_manager._shared_constants)
    print(cuda_data_manager._shape)
    print(cuda_data_manager._dtype)
    
    print(tensor_on_device)
    time.sleep(300)
    
    '''

    报错信息:

    Traceback (most recent call last):
      File "/home/xxxxxx/warp-drive/devil_make/tutorial-1-warp_drive_basics.py", line 145, in <module>
        source_code
      File "/home/xxxxxx/warp-drive/devil_make/tutorial-1-warp_drive_basics.py", line 116, in push_random_data_and_increment_timer
        program_run_time = Timer(lambda: increment_data(cuda_data_manager, cuda_function_manager, increment_function)).timeit(number=num_runs)
      File "/home/xxxxxx/anaconda3/envs/warp_drive/lib/python3.7/timeit.py", line 177, in timeit
        timing = self.inner(it, self.timer)
      File "<timeit-src>", line 6, in inner
      File "/home/xxxxxx/warp-drive/devil_make/tutorial-1-warp_drive_basics.py", line 116, in <lambda>
        program_run_time = Timer(lambda: increment_data(cuda_data_manager, cuda_function_manager, increment_function)).timeit(number=num_runs)
      File "/home/xxxxxx/warp-drive/devil_make/tutorial-1-warp_drive_basics.py", line 97, in increment_data
        grid=cuda_function_manager.grid
      File "/home/xxxxxx/anaconda3/envs/warp_drive/lib/python3.7/site-packages/pycuda/driver.py", line 480, in function_call
        func._set_block_shape(*block)
    pycuda._driver.LogicError: cuFuncSetBlockShape failed: invalid resource handle

    由此可知,在使用pycuda时,如果cuda函数初始化之前没有对cuda内存初始化则会报错:

    报错信息:

    pycuda._driver.LogicError: cuFuncSetBlockShape failed: invalid resource handle

    如果再cuda函数初始化之前对cuda内存初始化那么就不会报错:

    代码:

    import numpy as np
    
    from warp_drive.managers.data_manager import CUDADataManager
    from warp_drive.managers.function_manager import (
        CUDAFunctionManager, CUDALogController, CUDASampler, CUDAEnvironmentReset
    )
    from warp_drive.utils.data_feed import DataFeed
    
    
    source_code = """
    // A function to demonstrate how to manipulate data on the GPU.
    // This function increments each the random data array we pushed to the GPU before.
    // Each index corresponding to (env_id, agent_id) in the array is incremented by "agent_id + env_id".
    // Everything inside the if() loop runs in parallel for each agent and environment.
    //
    extern "C"{
        __global__ void cuda_increment(                               
                float* data,                                  
                int num_agents                                       
        )                                                            
        {                                                            
            int env_id = blockIdx.x;                                 
            int agent_id = threadIdx.x;                             
            if (agent_id < num_agents){                              
                int array_index = env_id * num_agents + agent_id;
                int increment = env_id + agent_id;
                data[array_index] += increment;
            }                                                            
        }   
    }
    """
    
    
    
    
    from timeit import Timer
    
    
    def push_random_data_and_increment_timer(
            num_runs=1,
            num_envs=2,
            num_agents=3,
            source_code=None
    ):
    
        assert source_code is not None
    
        def push_random_data(num_agents, num_envs):
            # Initialize the CUDA data manager
            cuda_data_manager = CUDADataManager(
                num_agents=num_agents,
                num_envs=num_envs,
                episode_length=100
            )
    
            # Create random data
            random_data = np.random.rand(num_envs, num_agents)
    
            # Push data from host to device
            data_feed = DataFeed()
            data_feed.add_data(
                name="random_data",
                data=random_data,
            )
            data_feed.add_data(
                name="num_agents",
                data=num_agents
            )
            cuda_data_manager.push_data_to_device(data_feed)
    
            return cuda_data_manager
    
    
        # Initialize the CUDA function manager
        def cuda_func_init():
            cuda_function_manager = CUDAFunctionManager(
                num_agents=num_agents, #cuda_data_manager.meta_info("n_agents"),
                num_envs=num_envs #cuda_data_manager.meta_info("n_envs")
            )
    
            # Load source code and initialize function
            cuda_function_manager.load_cuda_from_source_code(
                source_code,
                default_functions_included=False
            )
            cuda_function_manager.initialize_functions(["cuda_increment"])
            increment_function = cuda_function_manager._get_function("cuda_increment")
    
            return cuda_function_manager, increment_function
    
    
        def increment_data(cuda_data_manager, cuda_function_manager, increment_function):
            increment_function(
                cuda_data_manager.device_data("random_data"),
                cuda_data_manager.device_data("num_agents"),
                block=cuda_function_manager.block,
                grid=cuda_function_manager.grid
            )
    
    
        # set variable
        # cuda_data_manager = push_random_data(num_agents, num_envs)
    
        # cuda function init
        # cuda_function_manager, increment_function = cuda_func_init()
    
        # cuda function run
        # increment_data(cuda_data_manager, cuda_function_manager, increment_function)
    
    
        #data_push_time = Timer(lambda: push_random_data(num_agents, num_envs)).timeit(number=num_runs)
    
    
        cuda_data_manager = push_random_data(num_agents, num_envs)    ###
        cuda_function_manager, increment_function = cuda_func_init()  ###
        program_run_time = Timer(lambda: increment_data(cuda_data_manager, cuda_function_manager, increment_function)).timeit(number=num_runs)
        print(cuda_data_manager.pull_data_from_device('random_data'))
    
        return {
            "data push times": '0', #data_push_time,
            "code run time": program_run_time
        }
    
    
    
    
    num_runs = 1000
    times = {}
    
    for scenario in [
        (1, 1),
        (1, 100),
        (1, 1000),
        (100, 1000),
        (1000, 1000)
    ]:
        num_envs, num_agents = scenario
        times.update(
            {
                f"envs={num_envs}, agents={num_agents}":
                push_random_data_and_increment_timer(
                    num_runs,
                    num_envs,
                    num_agents,
                    source_code
                )
            }
        )
    
    
    print(f"Times for {num_runs} function calls")
    print("*"*40)
    for key, value in times.items():
        print(f"{key:30}: mean data push times: {value['data push times']:10.5}s,	 mean increment times: {value['code run time']:10.5}s")
    
    
    '''
    print(cuda_data_manager._meta_info)
    print(cuda_data_manager._host_data)
    print(cuda_data_manager._device_data_pointer)
    print(cuda_data_manager._scalar_data_list)
    print(cuda_data_manager._reset_data_list)
    print(cuda_data_manager._log_data_list)
    print(cuda_data_manager._device_data_via_torch)
    print(cuda_data_manager._shared_constants)
    print(cuda_data_manager._shape)
    print(cuda_data_manager._dtype)
    
    print(tensor_on_device)
    time.sleep(300)
    
    '''
    View Code

    神奇的是不论在cuda函数初始化之前对cuda内存初始化多大空间的内存都不会再报错,这也是该问题神奇的地方所在。

    如下代码:

    import numpy as np
    
    from warp_drive.managers.data_manager import CUDADataManager
    from warp_drive.managers.function_manager import (
        CUDAFunctionManager, CUDALogController, CUDASampler, CUDAEnvironmentReset
    )
    from warp_drive.utils.data_feed import DataFeed
    
    
    source_code = """
    // A function to demonstrate how to manipulate data on the GPU.
    // This function increments each the random data array we pushed to the GPU before.
    // Each index corresponding to (env_id, agent_id) in the array is incremented by "agent_id + env_id".
    // Everything inside the if() loop runs in parallel for each agent and environment.
    //
    extern "C"{
        __global__ void cuda_increment(                               
                float* data,                                  
                int num_agents                                       
        )                                                            
        {                                                            
            int env_id = blockIdx.x;                                 
            int agent_id = threadIdx.x;                             
            if (agent_id < num_agents){                              
                int array_index = env_id * num_agents + agent_id;
                int increment = env_id + agent_id;
                data[array_index] += increment;
            }                                                            
        }   
    }
    """
    
    
    
    
    from timeit import Timer
    
    
    def push_random_data_and_increment_timer(
            num_runs=1,
            num_envs=2,
            num_agents=3,
            source_code=None
    ):
    
        assert source_code is not None
    
        def push_random_data(num_agents, num_envs):
            # Initialize the CUDA data manager
            cuda_data_manager = CUDADataManager(
                num_agents=num_agents,
                num_envs=num_envs,
                episode_length=100
            )
    
            # Create random data
            random_data = np.random.rand(num_envs, num_agents)
    
            # Push data from host to device
            data_feed = DataFeed()
            data_feed.add_data(
                name="random_data",
                data=random_data,
            )
            data_feed.add_data(
                name="num_agents",
                data=num_agents
            )
            cuda_data_manager.push_data_to_device(data_feed)
    
            return cuda_data_manager
    
    
        # Initialize the CUDA function manager
        def cuda_func_init():
            cuda_function_manager = CUDAFunctionManager(
                num_agents=num_agents, #cuda_data_manager.meta_info("n_agents"),
                num_envs=num_envs #cuda_data_manager.meta_info("n_envs")
            )
    
            # Load source code and initialize function
            cuda_function_manager.load_cuda_from_source_code(
                source_code,
                default_functions_included=False
            )
            cuda_function_manager.initialize_functions(["cuda_increment"])
            increment_function = cuda_function_manager._get_function("cuda_increment")
    
            return cuda_function_manager, increment_function
    
    
        def increment_data(cuda_data_manager, cuda_function_manager, increment_function):
            increment_function(
                cuda_data_manager.device_data("random_data"),
                cuda_data_manager.device_data("num_agents"),
                block=cuda_function_manager.block,
                grid=cuda_function_manager.grid
            )
    
    
        # set variable
        # cuda_data_manager = push_random_data(num_agents, num_envs)
    
        # cuda function init
        # cuda_function_manager, increment_function = cuda_func_init()
    
        # cuda function run
        # increment_data(cuda_data_manager, cuda_function_manager, increment_function)
    
    
        #data_push_time = Timer(lambda: push_random_data(num_agents, num_envs)).timeit(number=num_runs)
    
        push_random_data(1, 1)
    
        cuda_function_manager, increment_function = cuda_func_init()  ###
        cuda_data_manager = push_random_data(num_agents, num_envs)    ###
        program_run_time = Timer(lambda: increment_data(cuda_data_manager, cuda_function_manager, increment_function)).timeit(number=num_runs)
        print(cuda_data_manager.pull_data_from_device('random_data'))
    
        return {
            "data push times": '0', #data_push_time,
            "code run time": program_run_time
        }
    
    
    
    
    num_runs = 1000
    times = {}
    
    for scenario in [
        (1, 1),
        (1, 100),
        (1, 1000),
        (100, 1000),
        (1000, 1000)
    ]:
        num_envs, num_agents = scenario
        times.update(
            {
                f"envs={num_envs}, agents={num_agents}":
                push_random_data_and_increment_timer(
                    num_runs,
                    num_envs,
                    num_agents,
                    source_code
                )
            }
        )
    
    
    print(f"Times for {num_runs} function calls")
    print("*"*40)
    for key, value in times.items():
        print(f"{key:30}: mean data push times: {value['data push times']:10.5}s,	 mean increment times: {value['code run time']:10.5}s")
    
    
    '''
    print(cuda_data_manager._meta_info)
    print(cuda_data_manager._host_data)
    print(cuda_data_manager._device_data_pointer)
    print(cuda_data_manager._scalar_data_list)
    print(cuda_data_manager._reset_data_list)
    print(cuda_data_manager._log_data_list)
    print(cuda_data_manager._device_data_via_torch)
    print(cuda_data_manager._shared_constants)
    print(cuda_data_manager._shape)
    print(cuda_data_manager._dtype)
    
    print(tensor_on_device)
    time.sleep(300)
    
    '''
    View Code

    核心代码:

        push_random_data(1, 1)
    
        cuda_function_manager, increment_function = cuda_func_init()  ###
        cuda_data_manager = push_random_data(num_agents, num_envs)    ###
        program_run_time = Timer(lambda: increment_data(cuda_data_manager, cuda_function_manager, increment_function)).timeit(number=num_runs)

    下面代码为cuda的内存申请,即使是较小的内存申请也是可以是下面的cuda函数初始化正常运行,如果再cuda函数初始化之前没有任何对cudsa内存申请的操作那就会报错。

    push_random_data(1, 1)

    初始化cuda内存,cuda内存的申请操作:

        push_random_data(1, 1)


    cuda函数的初始化操作:
    cuda_function_manager, increment_function = cuda_func_init() ###

    cuda 函数的执行:

    increment_data(cuda_data_manager, cuda_function_manager, increment_function)
    本博客是博主个人学习时的一些记录,不保证是为原创,个别文章加入了转载的源地址还有个别文章是汇总网上多份资料所成,在这之中也必有疏漏未加标注者,如有侵权请与博主联系。
  • 相关阅读:
    【软剑攻城队】团队介绍发布!
    【软剑攻城队】团队简介
    耿丹计科16-1大家庭
    便捷从使用git开始
    交流从选择coding.net开始
    相识从C语言开始
    川师2016上半年软件工程助教总结
    2016年川师大软件工程本科生博客地址列表
    川师大研究生2015级现代软件工程(2016春)
    SVN:Cleanup failed to process the following paths
  • 原文地址:https://www.cnblogs.com/devilmaycry812839668/p/15349240.html
Copyright © 2011-2022 走看看