Fixed half-precision bugs in HTBMV/HTPMV/HTRMV/HSYR2K/HTRMM related to incorrect constants

pull/143/head
Cedric Nugteren 2017-02-27 21:00:04 +01:00
parent 4284fcd940
commit 00281dad26
6 changed files with 10 additions and 9 deletions

View File

@ -4,6 +4,7 @@ Development version (next release)
- Fixed a bug having to re-create the binary even if it was in the cache
- Fixed a bug when using offsets in the direct version of the GEMM kernels
- Fixed a missing cl_khr_fp64 when running double-precision on Intel CPUs
- Fixed bugs in the half-precision routines HTBMV/HTPMV/HTRMV/HSYR2K/HTRMM
- Tests now also exit with an error code when OpenCL errors or compilation errors occur
- Added the OverrideParameters function to the API to be able to supply custom tuning parmeters
- Various minor fixes and enhancements

View File

@ -52,9 +52,9 @@ void Xtbmv<T>::DoTbmv(const Layout layout, const Triangle triangle,
auto fast_kernels = false;
try {
MatVec(layout, a_transpose,
n, n, static_cast<T>(1),
n, n, ConstantOne<T>(),
a_buffer, a_offset, a_ld,
scratch_buffer, x_offset, x_inc, static_cast<T>(0),
scratch_buffer, x_offset, x_inc, ConstantZero<T>(),
x_buffer, x_offset, x_inc,
fast_kernels, fast_kernels,
parameter, false, k, 0);

View File

@ -52,9 +52,9 @@ void Xtpmv<T>::DoTpmv(const Layout layout, const Triangle triangle,
auto fast_kernels = false;
try {
MatVec(layout, a_transpose,
n, n, static_cast<T>(1),
n, n, ConstantOne<T>(),
ap_buffer, ap_offset, n,
scratch_buffer, x_offset, x_inc, static_cast<T>(0),
scratch_buffer, x_offset, x_inc, ConstantZero<T>(),
x_buffer, x_offset, x_inc,
fast_kernels, fast_kernels,
parameter, true, 0, 0);

View File

@ -52,9 +52,9 @@ void Xtrmv<T>::DoTrmv(const Layout layout, const Triangle triangle,
auto fast_kernels = false;
try {
MatVec(layout, a_transpose,
n, n, static_cast<T>(1),
n, n, ConstantOne<T>(),
a_buffer, a_offset, a_ld,
scratch_buffer, x_offset, x_inc, static_cast<T>(0),
scratch_buffer, x_offset, x_inc, ConstantZero<T>(),
x_buffer, x_offset, x_inc,
fast_kernels, fast_kernels,
parameter, false, 0, 0);

View File

@ -149,7 +149,7 @@ void Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, const Tran
eventWaitList.push_back(eventKernel1);
// Swaps the arguments for matrices A and B, and sets 'beta' to 1
auto one = static_cast<T>(1);
auto one = ConstantOne<T>();
kernel.SetArgument(3, GetRealArg(one));
kernel.SetArgument(4, b_temp());
kernel.SetArgument(5, a_temp());

View File

@ -101,7 +101,7 @@ void Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle trian
alpha,
temp_triangular, 0, k,
b_buffer_copy, b_offset, b_ld,
static_cast<T>(0.0),
ConstantZero<T>(),
b_buffer, b_offset, b_ld);
}
@ -113,7 +113,7 @@ void Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle trian
alpha,
b_buffer_copy, b_offset, b_ld,
temp_triangular, 0, k,
static_cast<T>(0.0),
ConstantZero<T>(),
b_buffer, b_offset, b_ld);
} catch (BLASError &e) {
// A and B are now reversed, so also reverse the error codes returned from the Xgemm routine