Added a RetrieveParameters function to inspect tuning parameters
parent
389919faec
commit
a500f537d8
|
@ -10,6 +10,7 @@ Development (next version)
|
||||||
- Improved compilation time by splitting the tuning database into multiple compilation units
|
- Improved compilation time by splitting the tuning database into multiple compilation units
|
||||||
- Various minor fixes and enhancements
|
- Various minor fixes and enhancements
|
||||||
- Added tuned parameters for various devices (see README)
|
- Added tuned parameters for various devices (see README)
|
||||||
|
- Added the RetrieveParameters function to the API to be able to inspect the tuning parameters
|
||||||
- Added a strided-batched (not part of the BLAS standard) routine, faster but less generic compared
|
- Added a strided-batched (not part of the BLAS standard) routine, faster but less generic compared
|
||||||
to the existing xGEMMBATCHED routines:
|
to the existing xGEMMBATCHED routines:
|
||||||
* SGEMMSTRIDEDBATCHED/DGEMMSTRIDEDBATCHED/CGEMMSTRIDEDBATCHED/ZGEMMSTRIDEDBATCHED/HGEMMSTRIDEDBATCHED
|
* SGEMMSTRIDEDBATCHED/DGEMMSTRIDEDBATCHED/CGEMMSTRIDEDBATCHED/ZGEMMSTRIDEDBATCHED/HGEMMSTRIDEDBATCHED
|
||||||
|
@ -70,7 +71,7 @@ Version 0.11.0
|
||||||
- Replaced the R graph scripts with Python/Matplotlib scripts
|
- Replaced the R graph scripts with Python/Matplotlib scripts
|
||||||
- Various minor fixes and enhancements
|
- Various minor fixes and enhancements
|
||||||
- Added tuned parameters for various devices (see README)
|
- Added tuned parameters for various devices (see README)
|
||||||
- Added the OverrideParameters function to the API to be able to supply custom tuning parmeters
|
- Added the OverrideParameters function to the API to be able to supply custom tuning parameters
|
||||||
- Added triangular solver (level-2 & level-3) routines:
|
- Added triangular solver (level-2 & level-3) routines:
|
||||||
* STRSV/DTRSV/CTRSV/ZTRSV (experimental, un-optimized)
|
* STRSV/DTRSV/CTRSV/ZTRSV (experimental, un-optimized)
|
||||||
* STRSM/DTRSM/CTRSM/ZTRSM (experimental, un-optimized)
|
* STRSM/DTRSM/CTRSM/ZTRSM (experimental, un-optimized)
|
||||||
|
|
|
@ -682,6 +682,11 @@ StatusCode PUBLIC_API FillCache(const cl_device_id device);
|
||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
|
// Retrieves current tuning parameters for a specific device-precision-kernel combination
|
||||||
|
StatusCode PUBLIC_API RetrieveParameters(const cl_device_id device, const std::string &kernel_name,
|
||||||
|
const Precision precision,
|
||||||
|
std::unordered_map<std::string,size_t> ¶meters);
|
||||||
|
|
||||||
// Overrides tuning parameters for a specific device-precision-kernel combination. The next time
|
// Overrides tuning parameters for a specific device-precision-kernel combination. The next time
|
||||||
// the target routine is called it will re-compile and use the new parameters from then on.
|
// the target routine is called it will re-compile and use the new parameters from then on.
|
||||||
StatusCode PUBLIC_API OverrideParameters(const cl_device_id device, const std::string &kernel_name,
|
StatusCode PUBLIC_API OverrideParameters(const cl_device_id device, const std::string &kernel_name,
|
||||||
|
|
|
@ -654,6 +654,11 @@ StatusCode PUBLIC_API FillCache(const CUdevice device);
|
||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
|
// Retrieves current tuning parameters for a specific device-precision-kernel combination
|
||||||
|
StatusCode PUBLIC_API RetrieveParameters(const CUdevice device, const std::string &kernel_name,
|
||||||
|
const Precision precision,
|
||||||
|
std::unordered_map<std::string,size_t> ¶meters);
|
||||||
|
|
||||||
// Overrides tuning parameters for a specific device-precision-kernel combination. The next time
|
// Overrides tuning parameters for a specific device-precision-kernel combination. The next time
|
||||||
// the target routine is called it will re-compile and use the new parameters from then on.
|
// the target routine is called it will re-compile and use the new parameters from then on.
|
||||||
StatusCode PUBLIC_API OverrideParameters(const CUdevice device, const std::string &kernel_name,
|
StatusCode PUBLIC_API OverrideParameters(const CUdevice device, const std::string &kernel_name,
|
||||||
|
|
|
@ -47,7 +47,7 @@ FILES = [
|
||||||
"/src/clblast_cuda.cpp",
|
"/src/clblast_cuda.cpp",
|
||||||
]
|
]
|
||||||
HEADER_LINES = [123, 21, 126, 24, 29, 41, 29, 65, 32, 95, 21]
|
HEADER_LINES = [123, 21, 126, 24, 29, 41, 29, 65, 32, 95, 21]
|
||||||
FOOTER_LINES = [36, 56, 27, 38, 6, 6, 6, 9, 2, 36, 55]
|
FOOTER_LINES = [41, 56, 27, 38, 6, 6, 6, 9, 2, 41, 55]
|
||||||
HEADER_LINES_DOC = 0
|
HEADER_LINES_DOC = 0
|
||||||
FOOTER_LINES_DOC = 63
|
FOOTER_LINES_DOC = 63
|
||||||
|
|
||||||
|
|
|
@ -112,6 +112,34 @@ StatusCode FillCache(const RawDeviceID device) {
|
||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
|
// Retrieves the current tuning parameters for this device-precision-kernel combination
|
||||||
|
StatusCode RetrieveParameters(const RawDeviceID device, const std::string &kernel_name,
|
||||||
|
const Precision precision,
|
||||||
|
std::unordered_map<std::string,size_t> ¶meters) {
|
||||||
|
try {
|
||||||
|
|
||||||
|
// Retrieves the device name
|
||||||
|
const auto device_cpp = Device(device);
|
||||||
|
const auto platform_id = device_cpp.PlatformID();
|
||||||
|
const auto device_name = GetDeviceName(device_cpp);
|
||||||
|
|
||||||
|
// Retrieves the database values
|
||||||
|
auto in_cache = false;
|
||||||
|
auto database = DatabaseCache::Instance().Get(DatabaseKeyRef{platform_id, device, precision, kernel_name}, &in_cache);
|
||||||
|
if (!in_cache) {
|
||||||
|
log_debug("Searching database for kernel '" + kernel_name + "'");
|
||||||
|
database = Database(device_cpp, kernel_name, precision, {});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Retrieves the parameters
|
||||||
|
for (const auto ¶meter: database.GetParameters()) {
|
||||||
|
parameters[parameter.first] = parameter.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (...) { return DispatchException(); }
|
||||||
|
return StatusCode::kSuccess;
|
||||||
|
}
|
||||||
|
|
||||||
// Overrides the tuning parameters for this device-precision-kernel combination
|
// Overrides the tuning parameters for this device-precision-kernel combination
|
||||||
StatusCode OverrideParameters(const RawDeviceID device, const std::string &kernel_name,
|
StatusCode OverrideParameters(const RawDeviceID device, const std::string &kernel_name,
|
||||||
const Precision precision,
|
const Precision precision,
|
||||||
|
|
|
@ -56,6 +56,7 @@ class Database {
|
||||||
// Retrieves the values or names of all the parameters
|
// Retrieves the values or names of all the parameters
|
||||||
std::string GetValuesString() const;
|
std::string GetValuesString() const;
|
||||||
std::vector<std::string> GetParameterNames() const;
|
std::vector<std::string> GetParameterNames() const;
|
||||||
|
const database::Parameters& GetParameters() const { return *parameters_; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Search method functions, returning a set of parameters (possibly empty)
|
// Search method functions, returning a set of parameters (possibly empty)
|
||||||
|
|
Loading…
Reference in New Issue