diff --git a/src/database/database.cpp b/src/database/database.cpp index 60851fe7..fca3102d 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -99,7 +99,8 @@ Database::Database(const Device &device, const std::string &kernel_name, if (device.Type() == "CPU") { const auto extensions = device.Capabilities(); const auto is_apple = (extensions.find("cl_APPLE_SetMemObjectDestructor") == std::string::npos) ? false : true; - if (is_apple) { + const auto is_likely_apple = device.MaxWorkGroupSize() <= 32; + if (is_apple || is_likely_apple) { databases.push_front(apple_cpu_fallback); } } diff --git a/src/routines/common.cpp b/src/routines/common.cpp index e81c7873..d3c402bd 100644 --- a/src/routines/common.cpp +++ b/src/routines/common.cpp @@ -38,7 +38,8 @@ void RunKernel(Kernel &kernel, Queue &queue, const Device &device, auto local_size = size_t{1}; for (auto &item: local) { local_size *= item; } if (local_size > device.MaxWorkGroupSize()) { - throw RuntimeErrorCode(StatusCode::kInvalidLocalThreadsTotal); + throw RuntimeErrorCode(StatusCode::kInvalidLocalThreadsTotal, + ToString(local_size) + " is larger than " + ToString(device.MaxWorkGroupSize())); } // Make sure the global thread sizes are at least equal to the local sizes