zoukankan      html  css  js  c++  java
  • NVML查询显卡信息

            前段时间做英伟达硬解得时候,显卡总是莫名挂掉,后来发现是因为显卡温度过高掉了。这几天找到CUDA中有NVML工具可以查看显卡信息,nvidia-smi也是基于这个工具包。

            使用的CUDA版本为CUDA 8.0 。

    1.给程序添加NVML

            安装CUDA之后可以找到如下:

    image

    图1.NVML的例子

    这里面包含的是NVML的一个例子。我的系统是64位的,可以找到NVML的lib和头文件如下:

    image

    图2.NVML的lib文件

    image

    图3.NVML头文件

    在工程中包含NVML。我是新建的CUDA 8.0 Runtime工程,因为NVML包含在CUDA中,建CUDA 8.0 Runtime工程可以省去CUDA的配置工作,工程建立方法参见VS2013 VC++的.cpp文件调用CUDA的.cu文件中的函数

    ,CUDA 8.0为默认安装,系统为win10 64位。

            在程序中直接包含NVML的头文件和lib文件即可:

    #include "nvml.h"
    
    #pragma  comment(lib,"nvml.lib")

    注意64位系统应该建立x64工程,因为在安装的CUDA中没有win32的nvml.lib。

    2.NVML查询显卡信息

            常用函数:

            ·nvmlInit()函数初始化NVML;

            ·nvmlDeviceGetCount(unsigned int *deviceCount)函数可以获得显卡数;

            ·nvmlDeviceGetHandleByIndex(unsigned int index, nvmlDevice_t *device)获取设备;

            ·nvmlDeviceGetName(nvmlDevice_t device, char *name, unsigned int length)查询设备的名称;

            ·nvmlDeviceGetPciInfo(nvmlDevice_t device, nvmlPciInfo_t *pci)获取PCI信息,对这个函数的重要性,例子中是这么说的

                // pci.busId is very useful to know which device physically you're talking to
                // Using PCI identifier you can also match nvmlDevice handle to CUDA device.

            ·nvmlDeviceGetComputeMode(nvmlDevice_t device, nvmlComputeMode_t *mode)得到显卡当前所处的模式,模式由以下:

    typedef enum nvmlComputeMode_enum
    {
        NVML_COMPUTEMODE_DEFAULT           = 0,  //!< Default compute mode -- multiple contexts per device
        NVML_COMPUTEMODE_EXCLUSIVE_THREAD  = 1,  //!< Support Removed
        NVML_COMPUTEMODE_PROHIBITED        = 2,  //!< Compute-prohibited mode -- no contexts per device
        NVML_COMPUTEMODE_EXCLUSIVE_PROCESS = 3,  //!< Compute-exclusive-process mode -- only one context per device, usable from multiple threads at a time
       
        // Keep this last
        NVML_COMPUTEMODE_COUNT
    } nvmlComputeMode_t;

            ·nvmlDeviceSetComputeMode(nvmlDevice_t device, nvmlComputeMode_t mode)可以修改显卡的模式;

            ·nvmlDeviceGetTemperatureThreshold(nvmlDevice_t device, nvmlTemperatureThresholds_t thresholdType, unsigned int *temp)查询温度阈值,具体有两种:

    typedef enum nvmlTemperatureThresholds_enum
    {
        NVML_TEMPERATURE_THRESHOLD_SHUTDOWN = 0,    // Temperature at which the GPU will shut down for HW protection
        NVML_TEMPERATURE_THRESHOLD_SLOWDOWN = 1,    // Temperature at which the GPU will begin slowdown
        // Keep this last
        NVML_TEMPERATURE_THRESHOLD_COUNT
    } nvmlTemperatureThresholds_t;

    当温度达到NVML_TEMPERATURE_THRESHOLD_SHUTDOWN 参数获取的温度时,显卡将自动关闭以保护硬件;当温度达到NVML_TEMPERATURE_THRESHOLD_SLOWDOWN参数获取的温度时,显卡的性能将下降。

            ·nvmlDeviceGetTemperature(nvmlDevice_t device, nvmlTemperatureSensors_t sensorType, unsigned int *temp)获取显卡当前温度;

            ·nvmlDeviceGetUtilizationRates(nvmlDevice_t device, nvmlUtilization_t *utilization)获取设备的使用率(原注释:Retrieves the current utilization rates for the device's major subsystems。不知道理解错了没有),使用率包括以下:

    typedef struct nvmlUtilization_st
    {
        unsigned int gpu;                //!< Percent of time over the past sample period during which one or more kernels was executing on the GPU
        unsigned int memory;             //!< Percent of time over the past sample period during which global (device) memory was being read or written
    } nvmlUtilization_t;

            ·nvmlDeviceGetMemoryInfo(nvmlDevice_t device, nvmlMemory_t *memory)    Retrieves the amount of used, free and total memory available on the device, in bytes。

            ·nvmlDeviceGetBAR1MemoryInfo(nvmlDevice_t device, nvmlBAR1Memory_t *bar1Memory)   Gets Total, Available and Used size of BAR1 memory.(不知道这种与上一种有什么区别,有待后续学习)

           ·nvmlDeviceGetComputeRunningProcesses(nvmlDevice_t device, unsigned int *infoCount, nvmlProcessInfo_t *infos)    Get information about processes with a compute context on a device。应该是获取当前在使用显卡的程序信息。

            ·nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock)   Retrieves the maximum clock speeds for the device。包括以下:

    typedef enum nvmlClockType_enum
    {
        NVML_CLOCK_GRAPHICS  = 0,        //!< Graphics clock domain
        NVML_CLOCK_SM        = 1,        //!< SM clock domain
        NVML_CLOCK_MEM       = 2,        //!< Memory clock domain
        NVML_CLOCK_VIDEO     = 3,        //!< Video encoder/decoder clock domain
       
        // Keep this last
        NVML_CLOCK_COUNT //<! Count of clock types
    } nvmlClockType_t;

            ·nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock)   Retrieves the current clock speeds for the device.上面是获取最大的,这个是获取当前的。

    代码示例:

    #include "cuda_kernels.h"
    
    #include "nvml.h"
    
    #include <stdio.h>  
    #include <windows.h>  
    #include <winbase.h>  
    #include <tlhelp32.h>  
    #include <psapi.h>   #pragma comment(lib,"kernel32.lib")  
    #pragma comment(lib,"advapi32.lib")  
    
    #pragma  comment(lib,"nvml.lib")
    
    const char * convertToComputeModeString(nvmlComputeMode_t mode)
    {
        switch (mode)
        {
        case NVML_COMPUTEMODE_DEFAULT:
            return "Default";
        case NVML_COMPUTEMODE_EXCLUSIVE_THREAD:
            return "Exclusive_Thread";
        case NVML_COMPUTEMODE_PROHIBITED:
            return "Prohibited";
        case NVML_COMPUTEMODE_EXCLUSIVE_PROCESS:
            return "Exclusive Process";
        default:
            return "Unknown";
        }
    }
    
    int main()
    {
        cuAdd();
    
        nvmlReturn_t result;
        unsigned int device_count, i;
    
        // First initialize NVML library
        result = nvmlInit();
        if (NVML_SUCCESS != result)
        {
            printf("Failed to initialize NVML: %s
    ", nvmlErrorString(result));
    
            printf("Press ENTER to continue...
    ");
            getchar();
            return 1;
        }
    
        result = nvmlDeviceGetCount(&device_count);
        if (NVML_SUCCESS != result)
        {
            printf("Failed to query device count: %s
    ", nvmlErrorString(result));
            goto Error;
        }
        printf("Found %d device%s
    
    ", device_count, device_count != 1 ? "s" : "");
    
        printf("Listing devices:
    ");
        while (true)
        {
            for (i = 0; i < device_count; i++)
            {
                nvmlDevice_t device;
                char name[NVML_DEVICE_NAME_BUFFER_SIZE];
                nvmlPciInfo_t pci;
                nvmlComputeMode_t compute_mode;
    
                // Query for device handle to perform operations on a device
                // You can also query device handle by other features like:
                // nvmlDeviceGetHandleBySerial
                // nvmlDeviceGetHandleByPciBusId
                result = nvmlDeviceGetHandleByIndex(i, &device);
                if (NVML_SUCCESS != result)
                {
                    printf("Failed to get handle for device %i: %s
    ", i, nvmlErrorString(result));
                    goto Error;
                }
    
                result = nvmlDeviceGetName(device, name, NVML_DEVICE_NAME_BUFFER_SIZE);
                if (NVML_SUCCESS != result)
                {
                    printf("Failed to get name of device %i: %s
    ", i, nvmlErrorString(result));
                    goto Error;
                }
    
                // pci.busId is very useful to know which device physically you're talking to
                // Using PCI identifier you can also match nvmlDevice handle to CUDA device.
                result = nvmlDeviceGetPciInfo(device, &pci);
                if (NVML_SUCCESS != result)
                {
                    printf("Failed to get pci info for device %i: %s
    ", i, nvmlErrorString(result));
                    goto Error;
                }
    
                printf("%d. %s [%s]
    ", i, name, pci.busId);
    
                // This is a simple example on how you can modify GPU's state
                result = nvmlDeviceGetComputeMode(device, &compute_mode);
                if (NVML_ERROR_NOT_SUPPORTED == result)
                    printf("	 This is not CUDA capable device
    ");
                else if (NVML_SUCCESS != result)
                {
                    printf("Failed to get compute mode for device %i: %s
    ", i, nvmlErrorString(result));
                    goto Error;
                }
                else
                {
                    // try to change compute mode
                    printf("	 Changing device's compute mode from '%s' to '%s'
    ",
                        convertToComputeModeString(compute_mode),
                        convertToComputeModeString(NVML_COMPUTEMODE_PROHIBITED));
    
                    result = nvmlDeviceSetComputeMode(device, NVML_COMPUTEMODE_PROHIBITED);
                    if (NVML_ERROR_NO_PERMISSION == result)
                        printf("		 Need root privileges to do that: %s
    ", nvmlErrorString(result));
                    else if (NVML_ERROR_NOT_SUPPORTED == result)
                        printf("		 Compute mode prohibited not supported. You might be running on
    "
                        "		 windows in WDDM driver model or on non-CUDA capable GPU.
    ");
                    else if (NVML_SUCCESS != result)
                    {
                        printf("		 Failed to set compute mode for device %i: %s
    ", i, nvmlErrorString(result));
                        goto Error;
                    }
                    else
                    {
                        printf("	 Restoring device's compute mode back to '%s'
    ",
                            convertToComputeModeString(compute_mode));
                        result = nvmlDeviceSetComputeMode(device, compute_mode);
                        if (NVML_SUCCESS != result)
                        {
                            printf("		 Failed to restore compute mode for device %i: %s
    ", i, nvmlErrorString(result));
                            goto Error;
                        }
                    }
                }
    
                printf("
    ");
                printf("----- 温度 ----- 
    ");
                unsigned int temperature_threshold = 100;
                result = nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_SHUTDOWN, &temperature_threshold);
                if (NVML_SUCCESS != result)
                {
                    printf("device %i Failed to get NVML_TEMPERATURE_THRESHOLD_SHUTDOWN: %s
    ", i, nvmlErrorString(result));
                }
                else
                    printf("截止温度: %d 摄氏度  (Temperature at which the GPU will shut down for HW protection)
    ", temperature_threshold);
    
                result = nvmlDeviceGetTemperatureThreshold(device, NVML_TEMPERATURE_THRESHOLD_SLOWDOWN, &temperature_threshold);
                if (NVML_SUCCESS != result)
                {
                    printf("device %i Failed NVML_TEMPERATURE_THRESHOLD_SLOWDOWN: %s
    ", i, nvmlErrorString(result));
                }
                else
                    printf("上限温度: %d 摄氏度  (Temperature at which the GPU will begin slowdown)
    ", temperature_threshold);
    
                unsigned int temperature = 0;
                result = nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature);
                if (NVML_SUCCESS != result)
                {
                    printf("device %i NVML_TEMPERATURE_GPU Failed: %s
    ", i, nvmlErrorString(result));
                }
                else
                    printf("当前温度: %d 摄氏度 
    ", temperature);
    
                //使用率
                printf("
    ");
                nvmlUtilization_t utilization;
                result = nvmlDeviceGetUtilizationRates(device, &utilization);
                if (NVML_SUCCESS != result)
                {
                    printf(" device %i nvmlDeviceGetUtilizationRates Failed : %s
    ", i, nvmlErrorString(result));
                }
                else
                {
                    printf("----- 使用率 ----- 
    ");
                    printf("GPU 使用率: %lld %% 
    ", utilization.gpu);
                    printf("显存使用率: %lld %% 
    ", utilization.memory);
                }
    
                //FB memory
                printf("
    ");
                nvmlMemory_t memory;
                result = nvmlDeviceGetMemoryInfo(device, &memory);
                if (NVML_SUCCESS != result)
                {
                    printf("device %i nvmlDeviceGetMemoryInfo Failed : %s
    ", i, nvmlErrorString(result));
                }
                else
                {
                    printf("------ FB memory ------- 
    ");
                    printf("Total installed FB memory: %lld bytes 
    ", memory.total);
                    printf("Unallocated FB memory: %lld bytes 
    ", memory.free);
                    printf("Allocated FB memory: %lld bytes 
    ", memory.used);
                }
    
                //BAR1 memory
                printf("
    ");
                nvmlBAR1Memory_t bar1Memory;
                result = nvmlDeviceGetBAR1MemoryInfo(device, &bar1Memory);
                if (NVML_SUCCESS != result)
                {
                    printf("device %i  nvmlDeviceGetBAR1MemoryInfo Failed : %s
    ", i, nvmlErrorString(result));
                }
                else
                {
                    printf("------ BAR1 memory ------- 
    ");
                    printf("Total BAR1 memory: %lld bytes 
    ", bar1Memory.bar1Total);
                    printf("Unallocated BAR1 memory: %lld bytes 
    ", bar1Memory.bar1Free);
                    printf("Allocated BAR1 memory: %lld bytes 
    ", bar1Memory.bar1Used);
                }
    
                //Information about running compute processes on the GPU
                printf("
    ");
                unsigned int infoCount;
                nvmlProcessInfo_t infos[999];
                result = nvmlDeviceGetComputeRunningProcesses(device, &infoCount, infos);
                if (NVML_SUCCESS != result)
                {
                    printf("Failed to get ComputeRunningProcesses for device %i: %s
    ", i, nvmlErrorString(result));
                }
                else
                {
                    HANDLE handle; //定义CreateToolhelp32Snapshot系统快照句柄       
                    handle = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);//获得系统快照句柄     
                    PROCESSENTRY32 *info; //定义PROCESSENTRY32结构字指     
                    //PROCESSENTRY32 结构的 dwSize 成员设置成 sizeof(PROCESSENTRY32)      
                    info = new PROCESSENTRY32;
                    info->dwSize = sizeof(PROCESSENTRY32);
                    //调用一次     Process32First 函数,从快照中获取进程列表     
                    Process32First(handle, info);
                    //重复调用 Process32Next,直到函数返回 FALSE 为止    
    
                    printf("------ Information about running compute processes on the GPU ------- 
    ");
                    for (int i = 0; i < infoCount; i++)
                    {
                        printf("PID: %d  显存占用:%lld bytes   ", infos[i].pid, infos[i].usedGpuMemory);
    
                        while (Process32Next(handle, info) != FALSE)
                        {
                            if (info->th32ProcessID == infos[i].pid)
                            {
                                //printf("  %s
    ", info->szExeFile);
    
                                HANDLE hProcess = NULL;
                                //打开目标进程  
                                hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, info->th32ProcessID);
                                if (hProcess == NULL) {
                                    printf("
    Open Process fAiled:%d
    ", GetLastError());
                                    break;
                                }
    
                                char strFilePath[MAX_PATH];
                                GetModuleFileNameEx(hProcess, NULL, strFilePath, MAX_PATH);
                                printf(" %s
    ", strFilePath);
    
                                CloseHandle(hProcess);
    
                                break;
                            }
                        }
                    }
                    
                    delete info;
                    CloseHandle(handle);
                }
    
                //BAR1 memory
                printf("
    ");
                printf("------ Clocks ------- 
    "); 
                unsigned int max_clock;
                result = nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_GRAPHICS, &max_clock);
                if (NVML_SUCCESS != result)
                {
                    printf("device %i   nvmlDeviceGetMaxClockInfo Failed : %s
    ", i, nvmlErrorString(result));
                }
    
                unsigned int clock;
                result = nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &clock);
                if (NVML_SUCCESS != result)
                {
                    printf("Failed to get NVML_CLOCK_GRAPHICS info for device %i: %s
    ", i, nvmlErrorString(result));
                }
                else
                {
                    printf("GRAPHICS: %6d Mhz   max clock :%d  
    ", clock, max_clock);
                }
    
                result = nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_SM, &max_clock);
                if (NVML_SUCCESS != result)
                {
                    printf("Failed to get max NVML_CLOCK_SM for device %i: %s
    ", i, nvmlErrorString(result));
                }
    
                result = nvmlDeviceGetClockInfo(device, NVML_CLOCK_SM, &clock);
                if (NVML_SUCCESS != result)
                {
                    printf("Failed to get current NVML_CLOCK_SM for device %i: %s
    ", i, nvmlErrorString(result));
                }
                else
                {
                    printf("      SM: %6d Mhz   max clock :%d   
    ", clock, max_clock);
                }
    
                result = nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_MEM, &max_clock);
                if (NVML_SUCCESS != result)
                {
                    printf("Failed to get max NVML_CLOCK_MEM for device %i: %s
    ", i, nvmlErrorString(result));
                }
    
                result = nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &clock);
                if (NVML_SUCCESS != result)
                {
                    printf("Failed to get current NVML_CLOCK_MEM for device %i: %s
    ", i, nvmlErrorString(result));
                }
                else
                {
                    printf("     MEM: %6d Mhz   max clock :%d   
    ", clock, max_clock);
                }
    
                result = nvmlDeviceGetMaxClockInfo(device, NVML_CLOCK_VIDEO, &max_clock);
                if (NVML_SUCCESS != result)
                {
                    printf("Failed to get max NVML_CLOCK_VIDEO for device %i: %s
    ", i, nvmlErrorString(result));
                }
    
                result = nvmlDeviceGetClockInfo(device, NVML_CLOCK_VIDEO, &clock);
                if (NVML_SUCCESS != result)
                {
                    printf("Failed to get current NVML_CLOCK_VIDEO for device %i: %s
    ", i, nvmlErrorString(result));
                }
                else
                {
                    printf("   VIDEO: %6d Mhz   max clock :%d   
    ", clock, max_clock);
                }
            }
    
            printf("-------------------------------------------------------------------- 
    ");
    
            Sleep(1000);
        }
    
    Error:
        result = nvmlShutdown();
        if (NVML_SUCCESS != result)
            printf("Failed to shutdown NVML: %s
    ", nvmlErrorString(result));
    
        system("pause");
    
        return 0;
    }

    虽然我已经把nvml.dll拷贝到运行目录,程序应该是可以正常运行了。也做一下nvidia-smi的环境配置,参考NVIDIA 显卡信息(CUDA信息的查看),我把他的复制到下面来:

    1. nvidia-smi 查看显卡信息

    nvidia-smi 指的是 NVIDIA System Management Interface;

    在安装完成 NVIDIA 显卡驱动之后,对于 windows 用户而言,cmd 命令行界面还无法识别 nvidia-smi 命令,需要将相关环境变量添加进去。如将 NVIDIA 显卡驱动安装在默认位置,nvidia-smi 命令所在的完整路径应当为:

    C:Program FilesNVIDIA CorporationNVSMI

    也即将上述路径添加进 Path 系统环境变量中。

    2. 查看 CUDA 信息

    • CUDA 的版本:
      • 进入命令行:nvcc -V

    3.运行结果

    image

    图4.GeForce 940M查询结果

    P4测试

    图5.Tesla P4查询结果

            NVML对GeForce 940M的支持不怎么好,对Tesla P4支持得比较好。


    工程源码:http://download.csdn.net/download/qq_33892166/9841800

  • 相关阅读:
    死循环(endless loop)
    while循环和do-while循环语句
    99乘法表(for循环嵌套)
    代码块和作用域
    Break语句和Continue语句
    习题两则的简化(利用for循环)
    for循环语句
    条件判断语句(if-else)
    习题两则(自增操作符和数据类型相关知识点)
    字符集、编码和字符串
  • 原文地址:https://www.cnblogs.com/betterwgo/p/6858806.html
Copyright © 2011-2022 走看看