From d77183cbc1016d7ecbdecf160ba4ef47d732e8bc Mon Sep 17 00:00:00 2001 From: XMRig Date: Sun, 17 Sep 2017 21:33:43 +0300 Subject: [PATCH] Fixed CUDA and NVML device id inconsistency. --- src/Options.cpp | 1 + src/nvidia/NvmlApi.cpp | 41 +++++++++++++++++++++++++++++++++++++-- src/nvidia/NvmlApi.h | 7 +++++++ src/nvidia/cryptonight.h | 3 +++ src/nvidia/cuda_extra.cu | 3 +++ src/workers/GpuThread.cpp | 15 ++++++++++++-- src/workers/GpuThread.h | 9 +++++++++ src/workers/Workers.cpp | 6 ++++-- 8 files changed, 79 insertions(+), 6 deletions(-) diff --git a/src/Options.cpp b/src/Options.cpp index 444ec24..f55dc9b 100644 --- a/src/Options.cpp +++ b/src/Options.cpp @@ -337,6 +337,7 @@ Options::Options(int argc, char **argv) : } } + NvmlApi::bind(m_threads); m_ready = true; } diff --git a/src/nvidia/NvmlApi.cpp b/src/nvidia/NvmlApi.cpp index ea37d99..33f680c 100644 --- a/src/nvidia/NvmlApi.cpp +++ b/src/nvidia/NvmlApi.cpp @@ -27,6 +27,7 @@ #include "nvidia/NvmlApi.h" +#include "workers/GpuThread.h" static uv_lib_t nvmlLib; @@ -43,6 +44,8 @@ static nvmlReturn_t(*pNvmlDeviceGetPowerUsage)(nvmlDevice_t device, unsigned int static nvmlReturn_t(*pNvmlDeviceGetFanSpeed)(nvmlDevice_t device, unsigned int* speed) = nullptr; static nvmlReturn_t(*pNvmlDeviceGetClockInfo)(nvmlDevice_t device, nvmlClockType_t type, unsigned int* clock) = nullptr; static nvmlReturn_t(*pNvmlSystemGetNVMLVersion)(char *version, unsigned int length) = nullptr; +static nvmlReturn_t(*pNvmlDeviceGetCount)(unsigned int *deviceCount) = nullptr; +static nvmlReturn_t(*pNvmlDeviceGetPciInfo)(nvmlDevice_t device, nvmlPciInfo_t *pci) = nullptr; bool NvmlApi::init() @@ -70,6 +73,8 @@ bool NvmlApi::init() uv_dlsym(&nvmlLib, "nvmlDeviceGetFanSpeed", reinterpret_cast(&pNvmlDeviceGetFanSpeed)); uv_dlsym(&nvmlLib, "nvmlDeviceGetClockInfo", reinterpret_cast(&pNvmlDeviceGetClockInfo)); uv_dlsym(&nvmlLib, "nvmlSystemGetNVMLVersion", reinterpret_cast(&pNvmlSystemGetNVMLVersion)); + uv_dlsym(&nvmlLib, "nvmlDeviceGetCount_v2", reinterpret_cast(&pNvmlDeviceGetCount)); + uv_dlsym(&nvmlLib, "nvmlDeviceGetPciInfo_v2", reinterpret_cast(&pNvmlDeviceGetPciInfo)); m_available = pNvmlInit() == NVML_SUCCESS; @@ -93,7 +98,7 @@ void NvmlApi::release() bool NvmlApi::health(int id, Health &health) { - if (!isAvailable()) { + if (id == -1 || !isAvailable()) { return false; } @@ -119,7 +124,7 @@ bool NvmlApi::health(int id, Health &health) pNvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &health.memClock); } - return false; + return true; } @@ -127,3 +132,35 @@ const char *NvmlApi::version() { return nvmlVerion; } + + +void NvmlApi::bind(const std::vector &threads) +{ + if (!isAvailable() || !pNvmlDeviceGetCount || !pNvmlDeviceGetHandleByIndex || !pNvmlDeviceGetPciInfo) { + return; + } + + unsigned int count = 0; + if (pNvmlDeviceGetCount(&count) != NVML_SUCCESS) { + return; + } + + for (unsigned int i = 0; i < count; i++) { + nvmlDevice_t device; + if (pNvmlDeviceGetHandleByIndex(i, &device) != NVML_SUCCESS) { + continue; + } + + nvmlPciInfo_t pci; + if (pNvmlDeviceGetPciInfo(device, &pci) != NVML_SUCCESS) { + continue; + } + + for (GpuThread *thread : threads) { + if (thread->pciBusID() == pci.bus && thread->pciDeviceID() == pci.device && thread->pciDomainID() == pci.domain) { + thread->setNvmlId(i); + break; + } + } + } +} diff --git a/src/nvidia/NvmlApi.h b/src/nvidia/NvmlApi.h index 9624d28..20ce61d 100644 --- a/src/nvidia/NvmlApi.h +++ b/src/nvidia/NvmlApi.h @@ -25,9 +25,15 @@ #define __NVML_H__ +#include + + #include "nvidia/Health.h" +class GpuThread; + + class NvmlApi { public: @@ -36,6 +42,7 @@ public: static bool health(int id, Health &health); static const char *version(); + static void bind(const std::vector &threads); static inline bool isAvailable() { return m_available; } diff --git a/src/nvidia/cryptonight.h b/src/nvidia/cryptonight.h index 045416e..18f9a38 100644 --- a/src/nvidia/cryptonight.h +++ b/src/nvidia/cryptonight.h @@ -13,6 +13,9 @@ typedef struct { int device_bsleep; int device_clockRate; int device_memoryClockRate; + int device_pciBusID; + int device_pciDeviceID; + int device_pciDomainID; uint32_t *d_input; uint32_t inputlen; diff --git a/src/nvidia/cuda_extra.cu b/src/nvidia/cuda_extra.cu index c775916..ce8b91e 100644 --- a/src/nvidia/cuda_extra.cu +++ b/src/nvidia/cuda_extra.cu @@ -317,6 +317,9 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx) ctx->device_arch[1] = props.minor; ctx->device_clockRate = props.clockRate; ctx->device_memoryClockRate = props.memoryClockRate; + ctx->device_pciBusID = props.pciBusID; + ctx->device_pciDeviceID = props.pciDeviceID; + ctx->device_pciDomainID = props.pciDomainID; // set all evice option those marked as auto (-1) to a valid value if(ctx->device_blocks == -1) diff --git a/src/workers/GpuThread.cpp b/src/workers/GpuThread.cpp index 2059162..fbaa495 100644 --- a/src/workers/GpuThread.cpp +++ b/src/workers/GpuThread.cpp @@ -35,6 +35,10 @@ GpuThread::GpuThread() : m_clockRate(0), m_index(0), m_memoryClockRate(0), + m_nvmlId(-1), + m_pciBusID(0), + m_pciDeviceID(0), + m_pciDomainID(0), m_smx(0), m_threadId(0), m_threads(0) @@ -52,6 +56,10 @@ GpuThread::GpuThread(const nvid_ctx &ctx) : m_clockRate(ctx.device_clockRate), m_index(ctx.device_id), m_memoryClockRate(ctx.device_memoryClockRate), + m_nvmlId(-1), + m_pciBusID(ctx.device_pciBusID), + m_pciDeviceID(ctx.device_pciDeviceID), + m_pciDomainID(ctx.device_pciDomainID), m_smx(ctx.device_mpcount), m_threadId(0), m_threads(ctx.device_threads) @@ -94,9 +102,12 @@ bool GpuThread::init() m_blocks = ctx.device_blocks; m_smx = ctx.device_mpcount; - m_clockRate = ctx.device_clockRate; + m_clockRate = ctx.device_clockRate; m_memoryClockRate = ctx.device_memoryClockRate; - + m_pciBusID = ctx.device_pciBusID; + m_pciDeviceID = ctx.device_pciDeviceID; + m_pciDomainID = ctx.device_pciDomainID; + return true; } diff --git a/src/workers/GpuThread.h b/src/workers/GpuThread.h index f626105..acc68d7 100644 --- a/src/workers/GpuThread.h +++ b/src/workers/GpuThread.h @@ -50,6 +50,10 @@ public: inline int clockRate() const { return m_clockRate; } inline int index() const { return m_index; } inline int memoryClockRate() const { return m_memoryClockRate; } + inline int nvmlId() const { return m_nvmlId; } + inline int pciBusID() const { return m_pciBusID; } + inline int pciDeviceID() const { return m_pciDeviceID; } + inline int pciDomainID() const { return m_pciDomainID; } inline int smx() const { return m_smx; } inline int threadId() const { return m_threadId; } inline int threads() const { return m_threads; } @@ -58,6 +62,7 @@ public: inline void setBlocks(int blocks) { m_blocks = blocks; } inline void setBSleep(int bsleep) { m_bsleep = bsleep; } inline void setIndex(int index) { m_index = index; } + inline void setNvmlId(int id) { m_nvmlId = id; } inline void setThreadId(int threadId) { m_threadId = threadId; } inline void setThreads(int threads) { m_threads = threads; } @@ -71,6 +76,10 @@ private: int m_clockRate; int m_index; int m_memoryClockRate; + int m_nvmlId; + int m_pciBusID; + int m_pciDeviceID; + int m_pciDomainID; int m_smx; int m_threadId; int m_threads; diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp index 869e408..c612b0c 100644 --- a/src/workers/Workers.cpp +++ b/src/workers/Workers.cpp @@ -104,7 +104,9 @@ void Workers::printHealth() Health health; for (const GpuThread *thread : Options::i()->threads()) { - NvmlApi::health(thread->index(), health); + if (!NvmlApi::health(thread->nvmlId(), health)) { + continue; + } const uint32_t temp = health.temperature; @@ -307,7 +309,7 @@ void Workers::onTick(uv_timer_t *handle) std::vector records; Health health; for (const GpuThread *thread : Options::i()->threads()) { - NvmlApi::health(thread->index(), health); + NvmlApi::health(thread->nvmlId(), health); records.push_back(health); }