mirror of
https://github.com/CNugteren/CLBlast.git
synced 2024-07-07 12:23:46 +02:00
Merge branch 'device_name_slow_on_nvidia_gpu'
This commit is contained in:
commit
0d8313708c
|
@ -4,6 +4,7 @@ Development (next version)
|
||||||
- The tuning database now has a dictionary to translate vendor/device names to a common set
|
- The tuning database now has a dictionary to translate vendor/device names to a common set
|
||||||
- The tuners can now distinguish between different AMD GPU board names of the same architecture
|
- The tuners can now distinguish between different AMD GPU board names of the same architecture
|
||||||
- The tuners can now use particle-swarm optimisation to search more efficiently (thanks to 'mcian')
|
- The tuners can now use particle-swarm optimisation to search more efficiently (thanks to 'mcian')
|
||||||
|
- Improved performance for small problems on NVIDIA hardware by caching the device name
|
||||||
- Further improved compilation time of database.cpp
|
- Further improved compilation time of database.cpp
|
||||||
- Added a small diagnostics helper executable
|
- Added a small diagnostics helper executable
|
||||||
- Various minor fixes and enhancements
|
- Various minor fixes and enhancements
|
||||||
|
|
|
@ -93,9 +93,9 @@ extern template Program ProgramCache::Get(const ProgramKeyRef &, bool *) const;
|
||||||
class Database;
|
class Database;
|
||||||
|
|
||||||
// The key struct for the cache of database maps.
|
// The key struct for the cache of database maps.
|
||||||
// Order of fields: precision, device_name, kernel_name (smaller fields first)
|
// Order of fields: platform_id, device_id, precision, kernel_name (smaller fields first)
|
||||||
typedef std::tuple<Precision, std::string, std::string> DatabaseKey;
|
typedef std::tuple<cl_platform_id, cl_device_id, Precision, std::string> DatabaseKey;
|
||||||
typedef std::tuple<const Precision &, const std::string &, const std::string &> DatabaseKeyRef;
|
typedef std::tuple<const cl_platform_id &, const cl_device_id &, const Precision &, const std::string &> DatabaseKeyRef;
|
||||||
|
|
||||||
typedef Cache<DatabaseKey, Database> DatabaseCache;
|
typedef Cache<DatabaseKey, Database> DatabaseCache;
|
||||||
|
|
||||||
|
|
|
@ -2492,11 +2492,12 @@ StatusCode OverrideParameters(const cl_device_id device, const std::string &kern
|
||||||
|
|
||||||
// Retrieves the device name
|
// Retrieves the device name
|
||||||
const auto device_cpp = Device(device);
|
const auto device_cpp = Device(device);
|
||||||
const auto device_name = device_cpp.Name();
|
const auto platform_id = device_cpp.Platform();
|
||||||
|
const auto device_name = GetDeviceName(device_cpp);
|
||||||
|
|
||||||
// Retrieves the current database values to verify whether the new ones are complete
|
// Retrieves the current database values to verify whether the new ones are complete
|
||||||
auto in_cache = false;
|
auto in_cache = false;
|
||||||
const auto current_database = DatabaseCache::Instance().Get(DatabaseKeyRef{ precision, device_name, kernel_name }, &in_cache);
|
const auto current_database = DatabaseCache::Instance().Get(DatabaseKeyRef{platform_id, device, precision, kernel_name}, &in_cache);
|
||||||
if (!in_cache) { return StatusCode::kInvalidOverrideKernel; }
|
if (!in_cache) { return StatusCode::kInvalidOverrideKernel; }
|
||||||
for (const auto ¤t_param : current_database.GetParameterNames()) {
|
for (const auto ¤t_param : current_database.GetParameterNames()) {
|
||||||
if (parameters.find(current_param) == parameters.end()) {
|
if (parameters.find(current_param) == parameters.end()) {
|
||||||
|
@ -2530,8 +2531,8 @@ StatusCode OverrideParameters(const cl_device_id device, const std::string &kern
|
||||||
const auto database = Database(device_cpp, kernel_name, precision, database_entries);
|
const auto database = Database(device_cpp, kernel_name, precision, database_entries);
|
||||||
|
|
||||||
// Removes the old database entry and stores the new one in the cache
|
// Removes the old database entry and stores the new one in the cache
|
||||||
DatabaseCache::Instance().Remove(DatabaseKey{ precision, device_name, kernel_name });
|
DatabaseCache::Instance().Remove(DatabaseKey{platform_id, device, precision, kernel_name});
|
||||||
DatabaseCache::Instance().Store(DatabaseKey{ precision, device_name, kernel_name }, Database(database));
|
DatabaseCache::Instance().Store(DatabaseKey{platform_id, device, precision, kernel_name}, Database(database));
|
||||||
|
|
||||||
} catch (...) { return DispatchException(); }
|
} catch (...) { return DispatchException(); }
|
||||||
return StatusCode::kSuccess;
|
return StatusCode::kSuccess;
|
||||||
|
|
|
@ -230,6 +230,7 @@ class Device {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Methods to retrieve device information
|
// Methods to retrieve device information
|
||||||
|
cl_platform_id Platform() const { return GetInfo<cl_platform_id>(CL_DEVICE_PLATFORM); }
|
||||||
std::string Version() const { return GetInfoString(CL_DEVICE_VERSION); }
|
std::string Version() const { return GetInfoString(CL_DEVICE_VERSION); }
|
||||||
size_t VersionNumber() const
|
size_t VersionNumber() const
|
||||||
{
|
{
|
||||||
|
|
|
@ -60,7 +60,7 @@ Routine::Routine(Queue &queue, EventPointer event, const std::string &name,
|
||||||
event_(event),
|
event_(event),
|
||||||
context_(queue_.GetContext()),
|
context_(queue_.GetContext()),
|
||||||
device_(queue_.GetDevice()),
|
device_(queue_.GetDevice()),
|
||||||
device_name_(device_.Name()),
|
platform_(device_.Platform()),
|
||||||
db_(kernel_names) {
|
db_(kernel_names) {
|
||||||
|
|
||||||
InitDatabase(userDatabase);
|
InitDatabase(userDatabase);
|
||||||
|
@ -72,13 +72,13 @@ void Routine::InitDatabase(const std::vector<database::DatabaseEntry> &userDatab
|
||||||
|
|
||||||
// Queries the cache to see whether or not the kernel parameter database is already there
|
// Queries the cache to see whether or not the kernel parameter database is already there
|
||||||
bool has_db;
|
bool has_db;
|
||||||
db_(kernel_name) = DatabaseCache::Instance().Get(DatabaseKeyRef{ precision_, device_name_, kernel_name },
|
db_(kernel_name) = DatabaseCache::Instance().Get(DatabaseKeyRef{ platform_, device_(), precision_, kernel_name },
|
||||||
&has_db);
|
&has_db);
|
||||||
if (has_db) { continue; }
|
if (has_db) { continue; }
|
||||||
|
|
||||||
// Builds the parameter database for this device and routine set and stores it in the cache
|
// Builds the parameter database for this device and routine set and stores it in the cache
|
||||||
db_(kernel_name) = Database(device_, kernel_name, precision_, userDatabase);
|
db_(kernel_name) = Database(device_, kernel_name, precision_, userDatabase);
|
||||||
DatabaseCache::Instance().Store(DatabaseKey{ precision_, device_name_, kernel_name },
|
DatabaseCache::Instance().Store(DatabaseKey{ platform_, device_(), precision_, kernel_name },
|
||||||
Database{ db_(kernel_name) });
|
Database{ db_(kernel_name) });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -100,8 +100,9 @@ void Routine::InitProgram(std::initializer_list<const char *> source) {
|
||||||
|
|
||||||
// Queries the cache to see whether or not the binary (device-specific) is already there. If it
|
// Queries the cache to see whether or not the binary (device-specific) is already there. If it
|
||||||
// is, a program is created and stored in the cache
|
// is, a program is created and stored in the cache
|
||||||
|
const auto device_name = GetDeviceName(device_);
|
||||||
bool has_binary;
|
bool has_binary;
|
||||||
auto binary = BinaryCache::Instance().Get(BinaryKeyRef{ precision_, routine_name_, device_name_ },
|
auto binary = BinaryCache::Instance().Get(BinaryKeyRef{ precision_, routine_name_, device_name },
|
||||||
&has_binary);
|
&has_binary);
|
||||||
if (has_binary) {
|
if (has_binary) {
|
||||||
program_ = Program(device_, context_, binary);
|
program_ = Program(device_, context_, binary);
|
||||||
|
@ -171,7 +172,7 @@ void Routine::InitProgram(std::initializer_list<const char *> source) {
|
||||||
// Prints details of the routine to compile in case of debugging in verbose mode
|
// Prints details of the routine to compile in case of debugging in verbose mode
|
||||||
#ifdef VERBOSE
|
#ifdef VERBOSE
|
||||||
printf("[DEBUG] Compiling routine '%s-%s' for device '%s'\n",
|
printf("[DEBUG] Compiling routine '%s-%s' for device '%s'\n",
|
||||||
routine_name_.c_str(), ToString(precision_).c_str(), device_name_.c_str());
|
routine_name_.c_str(), ToString(precision_).c_str(), device_name.c_str());
|
||||||
const auto start_time = std::chrono::steady_clock::now();
|
const auto start_time = std::chrono::steady_clock::now();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -188,7 +189,7 @@ void Routine::InitProgram(std::initializer_list<const char *> source) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store the compiled binary and program in the cache
|
// Store the compiled binary and program in the cache
|
||||||
BinaryCache::Instance().Store(BinaryKey{ precision_, routine_name_, device_name_ },
|
BinaryCache::Instance().Store(BinaryKey{ precision_, routine_name_, device_name },
|
||||||
program_.GetIR());
|
program_.GetIR());
|
||||||
|
|
||||||
ProgramCache::Instance().Store(ProgramKey{ context_(), device_(), precision_, routine_name_ },
|
ProgramCache::Instance().Store(ProgramKey{ context_(), device_(), precision_, routine_name_ },
|
||||||
|
|
|
@ -75,9 +75,7 @@ class Routine {
|
||||||
EventPointer event_;
|
EventPointer event_;
|
||||||
const Context context_;
|
const Context context_;
|
||||||
const Device device_;
|
const Device device_;
|
||||||
|
const cl_platform_id platform_;
|
||||||
// OpenCL device properties
|
|
||||||
const std::string device_name_;
|
|
||||||
|
|
||||||
// Compiled program (either retrieved from cache or compiled in slow path)
|
// Compiled program (either retrieved from cache or compiled in slow path)
|
||||||
Program program_;
|
Program program_;
|
||||||
|
|
|
@ -141,7 +141,7 @@ Tester<T,U>::Tester(const std::vector<std::string> &arguments, const bool silent
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prints the header
|
// Prints the header
|
||||||
fprintf(stdout, "* Running on OpenCL device '%s'.\n", device_.Name().c_str());
|
fprintf(stdout, "* Running on OpenCL device '%s'.\n", GetDeviceName(device_).c_str());
|
||||||
fprintf(stdout, "* Starting tests for the %s'%s'%s routine.",
|
fprintf(stdout, "* Starting tests for the %s'%s'%s routine.",
|
||||||
kPrintMessage.c_str(), name.c_str(), kPrintEnd.c_str());
|
kPrintMessage.c_str(), name.c_str(), kPrintEnd.c_str());
|
||||||
|
|
||||||
|
|
|
@ -83,6 +83,9 @@ void OpenCLDiagnostics(int argc, char *argv[]) {
|
||||||
printf("* queue.GetContext() %.4lf ms\n", TimeFunction(kNumRuns, [&](){queue.GetContext();} ));
|
printf("* queue.GetContext() %.4lf ms\n", TimeFunction(kNumRuns, [&](){queue.GetContext();} ));
|
||||||
printf("* queue.GetDevice() %.4lf ms\n", TimeFunction(kNumRuns, [&](){queue.GetDevice();} ));
|
printf("* queue.GetDevice() %.4lf ms\n", TimeFunction(kNumRuns, [&](){queue.GetDevice();} ));
|
||||||
printf("* device.Name() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Name();} ));
|
printf("* device.Name() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Name();} ));
|
||||||
|
printf("* device.Vendor() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Vendor();} ));
|
||||||
|
printf("* device.Version() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Version();} ));
|
||||||
|
printf("* device.Platform() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Platform();} ));
|
||||||
printf("* Buffer<float>(context, 1024) %.4lf ms\n", TimeFunction(kNumRuns, [&](){Buffer<float>(context, 1024);} ));
|
printf("* Buffer<float>(context, 1024) %.4lf ms\n", TimeFunction(kNumRuns, [&](){Buffer<float>(context, 1024);} ));
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
|
Loading…
Reference in a new issue