Fixed half-precision bugs in HTBMV/HTPMV/HTRMV/HSYR2K/HTRMM related to incorrect constants
parent
4284fcd940
commit
00281dad26
|
@ -4,6 +4,7 @@ Development version (next release)
|
|||
- Fixed a bug having to re-create the binary even if it was in the cache
|
||||
- Fixed a bug when using offsets in the direct version of the GEMM kernels
|
||||
- Fixed a missing cl_khr_fp64 when running double-precision on Intel CPUs
|
||||
- Fixed bugs in the half-precision routines HTBMV/HTPMV/HTRMV/HSYR2K/HTRMM
|
||||
- Tests now also exit with an error code when OpenCL errors or compilation errors occur
|
||||
- Added the OverrideParameters function to the API to be able to supply custom tuning parmeters
|
||||
- Various minor fixes and enhancements
|
||||
|
|
|
@ -52,9 +52,9 @@ void Xtbmv<T>::DoTbmv(const Layout layout, const Triangle triangle,
|
|||
auto fast_kernels = false;
|
||||
try {
|
||||
MatVec(layout, a_transpose,
|
||||
n, n, static_cast<T>(1),
|
||||
n, n, ConstantOne<T>(),
|
||||
a_buffer, a_offset, a_ld,
|
||||
scratch_buffer, x_offset, x_inc, static_cast<T>(0),
|
||||
scratch_buffer, x_offset, x_inc, ConstantZero<T>(),
|
||||
x_buffer, x_offset, x_inc,
|
||||
fast_kernels, fast_kernels,
|
||||
parameter, false, k, 0);
|
||||
|
|
|
@ -52,9 +52,9 @@ void Xtpmv<T>::DoTpmv(const Layout layout, const Triangle triangle,
|
|||
auto fast_kernels = false;
|
||||
try {
|
||||
MatVec(layout, a_transpose,
|
||||
n, n, static_cast<T>(1),
|
||||
n, n, ConstantOne<T>(),
|
||||
ap_buffer, ap_offset, n,
|
||||
scratch_buffer, x_offset, x_inc, static_cast<T>(0),
|
||||
scratch_buffer, x_offset, x_inc, ConstantZero<T>(),
|
||||
x_buffer, x_offset, x_inc,
|
||||
fast_kernels, fast_kernels,
|
||||
parameter, true, 0, 0);
|
||||
|
|
|
@ -52,9 +52,9 @@ void Xtrmv<T>::DoTrmv(const Layout layout, const Triangle triangle,
|
|||
auto fast_kernels = false;
|
||||
try {
|
||||
MatVec(layout, a_transpose,
|
||||
n, n, static_cast<T>(1),
|
||||
n, n, ConstantOne<T>(),
|
||||
a_buffer, a_offset, a_ld,
|
||||
scratch_buffer, x_offset, x_inc, static_cast<T>(0),
|
||||
scratch_buffer, x_offset, x_inc, ConstantZero<T>(),
|
||||
x_buffer, x_offset, x_inc,
|
||||
fast_kernels, fast_kernels,
|
||||
parameter, false, 0, 0);
|
||||
|
|
|
@ -149,7 +149,7 @@ void Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, const Tran
|
|||
eventWaitList.push_back(eventKernel1);
|
||||
|
||||
// Swaps the arguments for matrices A and B, and sets 'beta' to 1
|
||||
auto one = static_cast<T>(1);
|
||||
auto one = ConstantOne<T>();
|
||||
kernel.SetArgument(3, GetRealArg(one));
|
||||
kernel.SetArgument(4, b_temp());
|
||||
kernel.SetArgument(5, a_temp());
|
||||
|
|
|
@ -101,7 +101,7 @@ void Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle trian
|
|||
alpha,
|
||||
temp_triangular, 0, k,
|
||||
b_buffer_copy, b_offset, b_ld,
|
||||
static_cast<T>(0.0),
|
||||
ConstantZero<T>(),
|
||||
b_buffer, b_offset, b_ld);
|
||||
}
|
||||
|
||||
|
@ -113,7 +113,7 @@ void Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle trian
|
|||
alpha,
|
||||
b_buffer_copy, b_offset, b_ld,
|
||||
temp_triangular, 0, k,
|
||||
static_cast<T>(0.0),
|
||||
ConstantZero<T>(),
|
||||
b_buffer, b_offset, b_ld);
|
||||
} catch (BLASError &e) {
|
||||
// A and B are now reversed, so also reverse the error codes returned from the Xgemm routine
|
||||
|
|
Loading…
Reference in New Issue