Fixes inability to run GEMM on multiple identical GPUs (issue #155)
parent
9c703a6021
commit
628e1e8cce
|
@ -80,8 +80,8 @@ extern template std::string BinaryCache::Get(const BinaryKeyRef &, bool *) const
|
|||
|
||||
// The key struct for the cache of compiled OpenCL programs (context-dependent)
|
||||
// Order of fields: context, precision, routine_name (smaller fields first)
|
||||
typedef std::tuple<cl_context, Precision, std::string> ProgramKey;
|
||||
typedef std::tuple<const cl_context &, const Precision &, const std::string &> ProgramKeyRef;
|
||||
typedef std::tuple<cl_context, cl_device_id, Precision, std::string> ProgramKey;
|
||||
typedef std::tuple<const cl_context &, const cl_device_id &, const Precision &, const std::string &> ProgramKeyRef;
|
||||
|
||||
typedef Cache<ProgramKey, Program> ProgramCache;
|
||||
|
||||
|
|
|
@ -2470,7 +2470,7 @@ StatusCode OverrideParameters(const cl_device_id device, const std::string &kern
|
|||
// Clears the existing program & binary cache for routines with the target kernel
|
||||
const auto routine_names = Routine::routines_by_kernel.at(kernel_name);
|
||||
for (const auto &routine_name : routine_names) {
|
||||
ProgramCache::Instance().RemoveBySubset<1, 2>(ProgramKey{nullptr, precision, routine_name});
|
||||
ProgramCache::Instance().RemoveBySubset<1, 2>(ProgramKey{nullptr, device, precision, routine_name});
|
||||
BinaryCache::Instance().Remove(BinaryKey{precision, routine_name, device_name});
|
||||
}
|
||||
|
||||
|
|
|
@ -87,7 +87,7 @@ void Routine::InitProgram(std::initializer_list<const char *> source) {
|
|||
|
||||
// Queries the cache to see whether or not the program (context-specific) is already there
|
||||
bool has_program;
|
||||
program_ = ProgramCache::Instance().Get(ProgramKeyRef{ context_(), precision_, routine_name_ },
|
||||
program_ = ProgramCache::Instance().Get(ProgramKeyRef{ context_(), device_(), precision_, routine_name_ },
|
||||
&has_program);
|
||||
if (has_program) { return; }
|
||||
|
||||
|
@ -106,7 +106,7 @@ void Routine::InitProgram(std::initializer_list<const char *> source) {
|
|||
if (has_binary) {
|
||||
program_ = Program(device_, context_, binary);
|
||||
program_.Build(device_, options);
|
||||
ProgramCache::Instance().Store(ProgramKey{ context_(), precision_, routine_name_ },
|
||||
ProgramCache::Instance().Store(ProgramKey{ context_(), device_(), precision_, routine_name_ },
|
||||
Program{ program_ });
|
||||
return;
|
||||
}
|
||||
|
@ -185,7 +185,7 @@ void Routine::InitProgram(std::initializer_list<const char *> source) {
|
|||
BinaryCache::Instance().Store(BinaryKey{ precision_, routine_name_, device_name_ },
|
||||
program_.GetIR());
|
||||
|
||||
ProgramCache::Instance().Store(ProgramKey{ context_(), precision_, routine_name_ },
|
||||
ProgramCache::Instance().Store(ProgramKey{ context_(), device_(), precision_, routine_name_ },
|
||||
Program{ program_ });
|
||||
|
||||
// Prints the elapsed compilation time in case of debugging in verbose mode
|
||||
|
|
Loading…
Reference in New Issue