From a500f537d8281ad32e2eb07016b9de78c946ec50 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Thu, 11 Jan 2018 20:32:06 +0100 Subject: [PATCH] Added a RetrieveParameters function to inspect tuning parameters --- CHANGELOG | 3 ++- include/clblast.h | 5 +++++ include/clblast_cuda.h | 5 +++++ scripts/generator/generator.py | 2 +- src/api_common.cpp | 28 ++++++++++++++++++++++++++++ src/database/database.hpp | 1 + 6 files changed, 42 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index e4205894..83ba7b07 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -10,6 +10,7 @@ Development (next version) - Improved compilation time by splitting the tuning database into multiple compilation units - Various minor fixes and enhancements - Added tuned parameters for various devices (see README) +- Added the RetrieveParameters function to the API to be able to inspect the tuning parameters - Added a strided-batched (not part of the BLAS standard) routine, faster but less generic compared to the existing xGEMMBATCHED routines: * SGEMMSTRIDEDBATCHED/DGEMMSTRIDEDBATCHED/CGEMMSTRIDEDBATCHED/ZGEMMSTRIDEDBATCHED/HGEMMSTRIDEDBATCHED @@ -70,7 +71,7 @@ Version 0.11.0 - Replaced the R graph scripts with Python/Matplotlib scripts - Various minor fixes and enhancements - Added tuned parameters for various devices (see README) -- Added the OverrideParameters function to the API to be able to supply custom tuning parmeters +- Added the OverrideParameters function to the API to be able to supply custom tuning parameters - Added triangular solver (level-2 & level-3) routines: * STRSV/DTRSV/CTRSV/ZTRSV (experimental, un-optimized) * STRSM/DTRSM/CTRSM/ZTRSM (experimental, un-optimized) diff --git a/include/clblast.h b/include/clblast.h index 8e3e64da..c4ff5290 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -682,6 +682,11 @@ StatusCode PUBLIC_API FillCache(const cl_device_id device); // ================================================================================================= +// Retrieves current tuning parameters for a specific device-precision-kernel combination +StatusCode PUBLIC_API RetrieveParameters(const cl_device_id device, const std::string &kernel_name, + const Precision precision, + std::unordered_map ¶meters); + // Overrides tuning parameters for a specific device-precision-kernel combination. The next time // the target routine is called it will re-compile and use the new parameters from then on. StatusCode PUBLIC_API OverrideParameters(const cl_device_id device, const std::string &kernel_name, diff --git a/include/clblast_cuda.h b/include/clblast_cuda.h index b0cb9aa8..ed348efe 100644 --- a/include/clblast_cuda.h +++ b/include/clblast_cuda.h @@ -654,6 +654,11 @@ StatusCode PUBLIC_API FillCache(const CUdevice device); // ================================================================================================= +// Retrieves current tuning parameters for a specific device-precision-kernel combination +StatusCode PUBLIC_API RetrieveParameters(const CUdevice device, const std::string &kernel_name, + const Precision precision, + std::unordered_map ¶meters); + // Overrides tuning parameters for a specific device-precision-kernel combination. The next time // the target routine is called it will re-compile and use the new parameters from then on. StatusCode PUBLIC_API OverrideParameters(const CUdevice device, const std::string &kernel_name, diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 528e61dd..b77b861e 100755 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -47,7 +47,7 @@ FILES = [ "/src/clblast_cuda.cpp", ] HEADER_LINES = [123, 21, 126, 24, 29, 41, 29, 65, 32, 95, 21] -FOOTER_LINES = [36, 56, 27, 38, 6, 6, 6, 9, 2, 36, 55] +FOOTER_LINES = [41, 56, 27, 38, 6, 6, 6, 9, 2, 41, 55] HEADER_LINES_DOC = 0 FOOTER_LINES_DOC = 63 diff --git a/src/api_common.cpp b/src/api_common.cpp index 0d387cd9..4e08f1ef 100644 --- a/src/api_common.cpp +++ b/src/api_common.cpp @@ -112,6 +112,34 @@ StatusCode FillCache(const RawDeviceID device) { // ================================================================================================= +// Retrieves the current tuning parameters for this device-precision-kernel combination +StatusCode RetrieveParameters(const RawDeviceID device, const std::string &kernel_name, + const Precision precision, + std::unordered_map ¶meters) { + try { + + // Retrieves the device name + const auto device_cpp = Device(device); + const auto platform_id = device_cpp.PlatformID(); + const auto device_name = GetDeviceName(device_cpp); + + // Retrieves the database values + auto in_cache = false; + auto database = DatabaseCache::Instance().Get(DatabaseKeyRef{platform_id, device, precision, kernel_name}, &in_cache); + if (!in_cache) { + log_debug("Searching database for kernel '" + kernel_name + "'"); + database = Database(device_cpp, kernel_name, precision, {}); + } + + // Retrieves the parameters + for (const auto ¶meter: database.GetParameters()) { + parameters[parameter.first] = parameter.second; + } + + } catch (...) { return DispatchException(); } + return StatusCode::kSuccess; +} + // Overrides the tuning parameters for this device-precision-kernel combination StatusCode OverrideParameters(const RawDeviceID device, const std::string &kernel_name, const Precision precision, diff --git a/src/database/database.hpp b/src/database/database.hpp index 8e53e013..1db2c286 100644 --- a/src/database/database.hpp +++ b/src/database/database.hpp @@ -56,6 +56,7 @@ class Database { // Retrieves the values or names of all the parameters std::string GetValuesString() const; std::vector GetParameterNames() const; + const database::Parameters& GetParameters() const { return *parameters_; } private: // Search method functions, returning a set of parameters (possibly empty)