Widened Apple OpenCL check, added way to debug too-large-workgroups issue

CLBlast-288-local-memory-optional-for-direct-GEMM
Cedric Nugteren 2018-05-30 22:59:04 +02:00
parent a8bb0c9f3c
commit ff4d5558a6
2 changed files with 4 additions and 2 deletions

View File

@ -99,7 +99,8 @@ Database::Database(const Device &device, const std::string &kernel_name,
if (device.Type() == "CPU") {
const auto extensions = device.Capabilities();
const auto is_apple = (extensions.find("cl_APPLE_SetMemObjectDestructor") == std::string::npos) ? false : true;
if (is_apple) {
const auto is_likely_apple = device.MaxWorkGroupSize() <= 32;
if (is_apple || is_likely_apple) {
databases.push_front(apple_cpu_fallback);
}
}

View File

@ -38,7 +38,8 @@ void RunKernel(Kernel &kernel, Queue &queue, const Device &device,
auto local_size = size_t{1};
for (auto &item: local) { local_size *= item; }
if (local_size > device.MaxWorkGroupSize()) {
throw RuntimeErrorCode(StatusCode::kInvalidLocalThreadsTotal);
throw RuntimeErrorCode(StatusCode::kInvalidLocalThreadsTotal,
ToString(local_size) + " is larger than " + ToString(device.MaxWorkGroupSize()));
}
// Make sure the global thread sizes are at least equal to the local sizes