Widened Apple OpenCL check, added way to debug too-large-workgroups issue

2018-05-30 22:59:04 +02:00 · 2018-05-30 22:59:04 +02:00 · ff4d5558a6
parent a8bb0c9f3c
commit ff4d5558a6
2 changed files with 4 additions and 2 deletions
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@ -99,7 +99,8 @@ Database::Database(const Device &device, const std::string &kernel_name,
    if (device.Type() == "CPU") {
      const auto extensions = device.Capabilities();
      const auto is_apple = (extensions.find("cl_APPLE_SetMemObjectDestructor") == std::string::npos) ? false : true;
-      if (is_apple) {
+      const auto is_likely_apple = device.MaxWorkGroupSize() <= 32;
+      if (is_apple || is_likely_apple) {
        databases.push_front(apple_cpu_fallback);
      }
    }
--- a/src/routines/common.cpp
+++ b/src/routines/common.cpp
@ -38,7 +38,8 @@ void RunKernel(Kernel &kernel, Queue &queue, const Device &device,
    auto local_size = size_t{1};
    for (auto &item: local) { local_size *= item; }
    if (local_size > device.MaxWorkGroupSize()) {
-      throw RuntimeErrorCode(StatusCode::kInvalidLocalThreadsTotal);
+      throw RuntimeErrorCode(StatusCode::kInvalidLocalThreadsTotal,
+                             ToString(local_size) + " is larger than " + ToString(device.MaxWorkGroupSize()));
    }

    // Make sure the global thread sizes are at least equal to the local sizes