From d77183cbc1016d7ecbdecf160ba4ef47d732e8bc Mon Sep 17 00:00:00 2001
From: XMRig <support@xmrig.com>
Date: Sun, 17 Sep 2017 21:33:43 +0300
Subject: [PATCH] Fixed CUDA and NVML device id inconsistency.

---
 src/Options.cpp           |  1 +
 src/nvidia/NvmlApi.cpp    | 41 +++++++++++++++++++++++++++++++++++++--
 src/nvidia/NvmlApi.h      |  7 +++++++
 src/nvidia/cryptonight.h  |  3 +++
 src/nvidia/cuda_extra.cu  |  3 +++
 src/workers/GpuThread.cpp | 15 ++++++++++++--
 src/workers/GpuThread.h   |  9 +++++++++
 src/workers/Workers.cpp   |  6 ++++--
 8 files changed, 79 insertions(+), 6 deletions(-)
diff --git a/src/Options.cpp b/src/Options.cpp
index 444ec24..f55dc9b 100644
--- a/src/Options.cpp
+++ b/src/Options.cpp
@@ -337,6 +337,7 @@ Options::Options(int argc, char **argv) :
         }
     }
 
+    NvmlApi::bind(m_threads);
     m_ready = true;
 }
 
diff --git a/src/nvidia/NvmlApi.cpp b/src/nvidia/NvmlApi.cpp
index ea37d99..33f680c 100644
--- a/src/nvidia/NvmlApi.cpp
+++ b/src/nvidia/NvmlApi.cpp
@@ -27,6 +27,7 @@
 
 
 #include "nvidia/NvmlApi.h"
+#include "workers/GpuThread.h"
 
 
 static uv_lib_t nvmlLib;
@@ -43,6 +44,8 @@ static nvmlReturn_t(*pNvmlDeviceGetPowerUsage)(nvmlDevice_t device, unsigned int
 static nvmlReturn_t(*pNvmlDeviceGetFanSpeed)(nvmlDevice_t device, unsigned int* speed) = nullptr;
 static nvmlReturn_t(*pNvmlDeviceGetClockInfo)(nvmlDevice_t device, nvmlClockType_t type, unsigned int* clock) = nullptr;
 static nvmlReturn_t(*pNvmlSystemGetNVMLVersion)(char *version, unsigned int length) = nullptr;
+static nvmlReturn_t(*pNvmlDeviceGetCount)(unsigned int *deviceCount) = nullptr;
+static nvmlReturn_t(*pNvmlDeviceGetPciInfo)(nvmlDevice_t device, nvmlPciInfo_t *pci) = nullptr;
 
 
 bool NvmlApi::init()
@@ -70,6 +73,8 @@ bool NvmlApi::init()
     uv_dlsym(&nvmlLib, "nvmlDeviceGetFanSpeed", reinterpret_cast<void**>(&pNvmlDeviceGetFanSpeed));
     uv_dlsym(&nvmlLib, "nvmlDeviceGetClockInfo", reinterpret_cast<void**>(&pNvmlDeviceGetClockInfo));
     uv_dlsym(&nvmlLib, "nvmlSystemGetNVMLVersion", reinterpret_cast<void**>(&pNvmlSystemGetNVMLVersion));
+    uv_dlsym(&nvmlLib, "nvmlDeviceGetCount_v2", reinterpret_cast<void**>(&pNvmlDeviceGetCount));
+    uv_dlsym(&nvmlLib, "nvmlDeviceGetPciInfo_v2", reinterpret_cast<void**>(&pNvmlDeviceGetPciInfo));
 
     m_available = pNvmlInit() == NVML_SUCCESS;
 
@@ -93,7 +98,7 @@ void NvmlApi::release()
 
 bool NvmlApi::health(int id, Health &health)
 {
-    if (!isAvailable()) {
+    if (id == -1 || !isAvailable()) {
         return false;
     }
 
@@ -119,7 +124,7 @@ bool NvmlApi::health(int id, Health &health)
         pNvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &health.memClock);
     }
 
-    return false;
+    return true;
 }
 
 
@@ -127,3 +132,35 @@ const char *NvmlApi::version()
 {
     return nvmlVerion;
 }
+
+
+void NvmlApi::bind(const std::vector<GpuThread*> &threads)
+{
+    if (!isAvailable() || !pNvmlDeviceGetCount || !pNvmlDeviceGetHandleByIndex || !pNvmlDeviceGetPciInfo) {
+        return;
+    }
+
+    unsigned int count = 0;
+    if (pNvmlDeviceGetCount(&count) != NVML_SUCCESS) {
+        return;
+    }
+
+    for (unsigned int i = 0; i < count; i++) {
+        nvmlDevice_t device;
+        if (pNvmlDeviceGetHandleByIndex(i, &device) != NVML_SUCCESS) {
+            continue;
+        }
+
+        nvmlPciInfo_t pci;
+        if (pNvmlDeviceGetPciInfo(device, &pci) != NVML_SUCCESS) {
+            continue;
+        }
+
+        for (GpuThread *thread : threads) {
+            if (thread->pciBusID() == pci.bus && thread->pciDeviceID() == pci.device && thread->pciDomainID() == pci.domain) {
+                thread->setNvmlId(i);
+                break;
+            }
+        }
+    }
+}
diff --git a/src/nvidia/NvmlApi.h b/src/nvidia/NvmlApi.h
index 9624d28..20ce61d 100644
--- a/src/nvidia/NvmlApi.h
+++ b/src/nvidia/NvmlApi.h
@@ -25,9 +25,15 @@
 #define __NVML_H__
 
 
+#include <vector>
+
+
 #include "nvidia/Health.h"
 
 
+class GpuThread;
+
+
 class NvmlApi
 {
 public:
@@ -36,6 +42,7 @@ public:
 
     static bool health(int id, Health &health);
     static const char *version();
+    static void bind(const std::vector<GpuThread*> &threads);
 
     static inline bool isAvailable() { return m_available; }
 
diff --git a/src/nvidia/cryptonight.h b/src/nvidia/cryptonight.h
index 045416e..18f9a38 100644
--- a/src/nvidia/cryptonight.h
+++ b/src/nvidia/cryptonight.h
@@ -13,6 +13,9 @@ typedef struct {
 	int device_bsleep;
     int device_clockRate;
     int device_memoryClockRate;
+    int device_pciBusID;
+    int device_pciDeviceID;
+    int device_pciDomainID;
 
 	uint32_t *d_input;
 	uint32_t inputlen;
diff --git a/src/nvidia/cuda_extra.cu b/src/nvidia/cuda_extra.cu
index c775916..ce8b91e 100644
--- a/src/nvidia/cuda_extra.cu
+++ b/src/nvidia/cuda_extra.cu
@@ -317,6 +317,9 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
 	ctx->device_arch[1] = props.minor;
     ctx->device_clockRate = props.clockRate;
     ctx->device_memoryClockRate = props.memoryClockRate;
+    ctx->device_pciBusID = props.pciBusID;
+    ctx->device_pciDeviceID = props.pciDeviceID;
+    ctx->device_pciDomainID = props.pciDomainID;
 
 	// set all evice option those marked as auto (-1) to a valid value
 	if(ctx->device_blocks == -1)
diff --git a/src/workers/GpuThread.cpp b/src/workers/GpuThread.cpp
index 2059162..fbaa495 100644
--- a/src/workers/GpuThread.cpp
+++ b/src/workers/GpuThread.cpp
@@ -35,6 +35,10 @@ GpuThread::GpuThread() :
     m_clockRate(0),
     m_index(0),
     m_memoryClockRate(0),
+    m_nvmlId(-1),
+    m_pciBusID(0),
+    m_pciDeviceID(0),
+    m_pciDomainID(0),
     m_smx(0),
     m_threadId(0),
     m_threads(0)
@@ -52,6 +56,10 @@ GpuThread::GpuThread(const nvid_ctx &ctx) :
     m_clockRate(ctx.device_clockRate),
     m_index(ctx.device_id),
     m_memoryClockRate(ctx.device_memoryClockRate),
+    m_nvmlId(-1),
+    m_pciBusID(ctx.device_pciBusID),
+    m_pciDeviceID(ctx.device_pciDeviceID),
+    m_pciDomainID(ctx.device_pciDomainID),
     m_smx(ctx.device_mpcount),
     m_threadId(0),
     m_threads(ctx.device_threads)
@@ -94,9 +102,12 @@ bool GpuThread::init()
     m_blocks  = ctx.device_blocks;
     m_smx     = ctx.device_mpcount;
 
-    m_clockRate = ctx.device_clockRate;
+    m_clockRate       = ctx.device_clockRate;
     m_memoryClockRate = ctx.device_memoryClockRate;
-    
+    m_pciBusID        = ctx.device_pciBusID;
+    m_pciDeviceID     = ctx.device_pciDeviceID;
+    m_pciDomainID     = ctx.device_pciDomainID;
+
     return true;
 }
 
diff --git a/src/workers/GpuThread.h b/src/workers/GpuThread.h
index f626105..acc68d7 100644
--- a/src/workers/GpuThread.h
+++ b/src/workers/GpuThread.h
@@ -50,6 +50,10 @@ public:
     inline int clockRate() const          { return m_clockRate; } 
     inline int index() const              { return m_index; }
     inline int memoryClockRate() const    { return m_memoryClockRate; }
+    inline int nvmlId() const             { return m_nvmlId; }
+    inline int pciBusID() const           { return m_pciBusID; }
+    inline int pciDeviceID() const        { return m_pciDeviceID; }
+    inline int pciDomainID() const        { return m_pciDomainID; }
     inline int smx() const                { return m_smx; }
     inline int threadId() const           { return m_threadId; }
     inline int threads() const            { return m_threads; }
@@ -58,6 +62,7 @@ public:
     inline void setBlocks(int blocks)     { m_blocks = blocks; }
     inline void setBSleep(int bsleep)     { m_bsleep = bsleep; }
     inline void setIndex(int index)       { m_index = index; }
+    inline void setNvmlId(int id)         { m_nvmlId = id; }
     inline void setThreadId(int threadId) { m_threadId = threadId; }
     inline void setThreads(int threads)   { m_threads = threads; }
 
@@ -71,6 +76,10 @@ private:
     int m_clockRate;
     int m_index;
     int m_memoryClockRate;
+    int m_nvmlId;
+    int m_pciBusID;
+    int m_pciDeviceID;
+    int m_pciDomainID;
     int m_smx;
     int m_threadId;
     int m_threads;
diff --git a/src/workers/Workers.cpp b/src/workers/Workers.cpp
index 869e408..c612b0c 100644
--- a/src/workers/Workers.cpp
+++ b/src/workers/Workers.cpp
@@ -104,7 +104,9 @@ void Workers::printHealth()
 
     Health health;
     for (const GpuThread *thread : Options::i()->threads()) {
-        NvmlApi::health(thread->index(), health);
+        if (!NvmlApi::health(thread->nvmlId(), health)) {
+            continue;
+        }
 
         const uint32_t temp = health.temperature;
 
@@ -307,7 +309,7 @@ void Workers::onTick(uv_timer_t *handle)
         std::vector<Health> records;
         Health health;
         for (const GpuThread *thread : Options::i()->threads()) {
-            NvmlApi::health(thread->index(), health);
+            NvmlApi::health(thread->nvmlId(), health);
             records.push_back(health);
         }