Fixed an if-statement in the direct GEMM kernel causing a bug with specific sets of input parameters

pull/176/head
Cedric Nugteren 2017-06-30 21:57:41 +02:00
parent 52881f3864
commit 4cf516cfec
2 changed files with 5 additions and 2 deletions

View File

@ -2,6 +2,7 @@
Development (next version)
- Fixed a bug in the TRSM routine for alpha != 1
- Fixed a bug in the cache related to multi-device contexts (thanks to 'kpot')
- Fixed a bug in the direct version of the GEMM kernel
- Fixed several warnings for MSVC and Clang
- Performance reports are now external at https://cnugteren.github.io/clblast
- Greatly improved compilation time of database.cpp

View File

@ -255,7 +255,8 @@ inline void GlobalToLocalCheckedA(const __global real* restrict agms, __local re
int idk = (a_transpose) ? kg + GetGroupID0()*WGD : kg + kwg;
// Loads the data from global memory into the local memory
int condition = (a_transpose) ? idm < kSizeK : idm < kSizeM;
int condition = (a_transpose) ? (idm < kSizeK) && (idk < kSizeM) :
(idm < kSizeM) && (idk < kSizeK);
if (condition) {
real result = agms[idk*a_ld + idm + a_offset];
if (a_conjugate) { COMPLEX_CONJUGATE(result); }
@ -293,7 +294,8 @@ inline void GlobalToLocalCheckedB(const __global real* restrict bgms, __local re
int idk = (b_transpose) ? kg + GetGroupID1()*WGD : kg + kwg;
// Loads the data from global memory into the local memory
int condition = (b_transpose) ? idn < kSizeK : idn < kSizeN;
int condition = (b_transpose) ? (idn < kSizeK) && (idk < kSizeN) :
(idn < kSizeN) && (idk < kSizeK);
if (condition) {
real result = bgms[idk*b_ld + idn + b_offset];
if (b_conjugate) { COMPLEX_CONJUGATE(result); }