mirror of
https://github.com/CNugteren/CLBlast.git
synced 2024-07-15 19:05:44 +02:00
Fixed a bug in the xGEMM routine related to the event incorrectly set
This commit is contained in:
parent
9e36b3b20d
commit
716d7c67d9
|
@ -1,6 +1,7 @@
|
||||||
|
|
||||||
Development version (next release)
|
Development version (next release)
|
||||||
- Improved performance of large power-of-2 xGEMM kernels for AMD GPUs
|
- Improved performance of large power-of-2 xGEMM kernels for AMD GPUs
|
||||||
|
- Fixed a bug in the xGEMM routine related to the event incorrectly set
|
||||||
|
|
||||||
Version 0.7.0
|
Version 0.7.0
|
||||||
- Added exports to be able to create a DLL on Windows (thanks to Marco Hutter)
|
- Added exports to be able to create a DLL on Windows (thanks to Marco Hutter)
|
||||||
|
|
|
@ -184,12 +184,13 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout,
|
||||||
|
|
||||||
// Launches the kernel
|
// Launches the kernel
|
||||||
auto eventKernel = Event();
|
auto eventKernel = Event();
|
||||||
status = RunKernel(kernel, global, local, eventKernel.pointer(), eventWaitList);
|
auto eventPointer = (!c_no_temp) ? eventKernel.pointer() : event_;
|
||||||
|
status = RunKernel(kernel, global, local, eventPointer, eventWaitList);
|
||||||
if (ErrorIn(status)) { return status; }
|
if (ErrorIn(status)) { return status; }
|
||||||
eventWaitList.push_back(eventKernel);
|
|
||||||
|
|
||||||
// Runs the post-processing kernel if needed
|
// Runs the post-processing kernel if needed
|
||||||
if (!c_no_temp) {
|
if (!c_no_temp) {
|
||||||
|
eventWaitList.push_back(eventKernel);
|
||||||
status = PadCopyTransposeMatrix(event_, eventWaitList,
|
status = PadCopyTransposeMatrix(event_, eventWaitList,
|
||||||
m_ceiled, n_ceiled, m_ceiled, 0, c_temp,
|
m_ceiled, n_ceiled, m_ceiled, 0, c_temp,
|
||||||
c_one, c_two, c_ld, c_offset, c_buffer,
|
c_one, c_two, c_ld, c_offset, c_buffer,
|
||||||
|
|
|
@ -334,7 +334,7 @@ bool TestSimilarity(const T val1, const T val2) {
|
||||||
|
|
||||||
// Set the allowed error margin for floating-point comparisons
|
// Set the allowed error margin for floating-point comparisons
|
||||||
constexpr auto kErrorMarginRelative = T(0.025);
|
constexpr auto kErrorMarginRelative = T(0.025);
|
||||||
constexpr auto kErrorMarginAbsolute = T(1.0e-4);
|
constexpr auto kErrorMarginAbsolute = T(1.0e-3);
|
||||||
|
|
||||||
// Shortcut, handles infinities
|
// Shortcut, handles infinities
|
||||||
if (val1 == val2) {
|
if (val1 == val2) {
|
||||||
|
|
Loading…
Reference in a new issue