Fixed a failing TRSM test using a CPU with Apple OpenCL

pull/269/head
Cedric Nugteren 2018-03-15 21:09:52 +01:00
parent 7a756cbce7
commit 52791bf355
2 changed files with 3 additions and 3 deletions

View File

@ -76,7 +76,7 @@ R"(
// =================================================================================================
#if defined(ROUTINE_INVERT) || defined(ROUTINE_TRSM)
__kernel __attribute__((reqd_work_group_size(8, 8, 1)))
__kernel __attribute__((reqd_work_group_size(16, 1, 1)))
void FillMatrix(const int m, const int n, const int ld, const int offset,
__global real* restrict dest, const real_arg arg_value) {
const real value = GetRealArg(arg_value);

View File

@ -89,8 +89,8 @@ void FillMatrix(Queue &queue, const Device &device,
kernel.SetArgument(3, static_cast<int>(offset));
kernel.SetArgument(4, dest());
kernel.SetArgument(5, GetRealArg(constant_value));
auto local = std::vector<size_t>{8, 8};
auto global = std::vector<size_t>{Ceil(m, 8), Ceil(n, 8)};
auto local = std::vector<size_t>{16, 1};
auto global = std::vector<size_t>{Ceil(m, 16), n};
RunKernel(kernel, queue, device, global, local, event, waitForEvents);
}