Fixed a failing TRSM test using a CPU with Apple OpenCL
parent
7a756cbce7
commit
52791bf355
|
@ -76,7 +76,7 @@ R"(
|
|||
// =================================================================================================
|
||||
#if defined(ROUTINE_INVERT) || defined(ROUTINE_TRSM)
|
||||
|
||||
__kernel __attribute__((reqd_work_group_size(8, 8, 1)))
|
||||
__kernel __attribute__((reqd_work_group_size(16, 1, 1)))
|
||||
void FillMatrix(const int m, const int n, const int ld, const int offset,
|
||||
__global real* restrict dest, const real_arg arg_value) {
|
||||
const real value = GetRealArg(arg_value);
|
||||
|
|
|
@ -89,8 +89,8 @@ void FillMatrix(Queue &queue, const Device &device,
|
|||
kernel.SetArgument(3, static_cast<int>(offset));
|
||||
kernel.SetArgument(4, dest());
|
||||
kernel.SetArgument(5, GetRealArg(constant_value));
|
||||
auto local = std::vector<size_t>{8, 8};
|
||||
auto global = std::vector<size_t>{Ceil(m, 8), Ceil(n, 8)};
|
||||
auto local = std::vector<size_t>{16, 1};
|
||||
auto global = std::vector<size_t>{Ceil(m, 16), n};
|
||||
RunKernel(kernel, queue, device, global, local, event, waitForEvents);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue