Merge pull request #312 from CNugteren/CLBlast-311-missing-event-in-trsv-trsm
Missing events in TRSV and TRSMpull/316/head
commit
c2c1e5fa95
|
@ -729,9 +729,10 @@ class Buffer {
|
|||
}
|
||||
|
||||
// Copies the contents of this buffer into another device buffer
|
||||
void CopyToAsync(const Queue &queue, const size_t size, const Buffer<T> &destination) const {
|
||||
void CopyToAsync(const Queue &queue, const size_t size, const Buffer<T> &destination,
|
||||
EventPointer event = nullptr) const {
|
||||
CheckError(clEnqueueCopyBuffer(queue(), *buffer_, destination(), 0, 0, size*sizeof(T), 0,
|
||||
nullptr, nullptr));
|
||||
nullptr, event));
|
||||
}
|
||||
void CopyTo(const Queue &queue, const size_t size, const Buffer<T> &destination) const {
|
||||
CopyToAsync(queue, size, destination);
|
||||
|
|
|
@ -33,7 +33,8 @@ void Xtrsv<T>::Substitution(const Layout layout, const Triangle triangle,
|
|||
const size_t n,
|
||||
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_inc,
|
||||
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
|
||||
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
EventPointer event) {
|
||||
|
||||
if (n > db_["TRSV_BLOCK_SIZE"]) { throw BLASError(StatusCode::kUnexpectedError); };
|
||||
|
||||
|
@ -69,9 +70,7 @@ void Xtrsv<T>::Substitution(const Layout layout, const Triangle triangle,
|
|||
// Launches the kernel
|
||||
const auto local = std::vector<size_t>{db_["TRSV_BLOCK_SIZE"]};
|
||||
const auto global = std::vector<size_t>{Ceil(n, db_["TRSV_BLOCK_SIZE"])};
|
||||
auto event = Event();
|
||||
RunKernel(kernel, queue_, device_, global, local, event.pointer());
|
||||
event.WaitForCompletion();
|
||||
RunKernel(kernel, queue_, device_, global, local, event);
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
@ -146,14 +145,16 @@ void Xtrsv<T>::DoTrsv(const Layout layout, const Triangle triangle,
|
|||
}
|
||||
|
||||
// Runs the triangular substitution for the block size
|
||||
auto sub_event = Event();
|
||||
Substitution(layout, triangle, a_transpose, diagonal, block_size,
|
||||
a_buffer, a_offset + col + col*a_ld, a_ld,
|
||||
b_buffer, b_offset + col*b_inc, b_inc,
|
||||
x_buffer, x_offset + col*x_inc, x_inc);
|
||||
x_buffer, x_offset + col*x_inc, x_inc, sub_event.pointer());
|
||||
sub_event.WaitForCompletion();
|
||||
}
|
||||
|
||||
// Retrieves the results
|
||||
x_buffer.CopyTo(queue_, x_size, b_buffer);
|
||||
x_buffer.CopyToAsync(queue_, x_size, b_buffer, event_);
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -32,6 +32,7 @@ class Xtrsv: public Xgemv<T> {
|
|||
using Xgemv<T>::device_;
|
||||
using Xgemv<T>::db_;
|
||||
using Xgemv<T>::program_;
|
||||
using Xgemv<T>::event_;
|
||||
using Xgemv<T>::DoGemv;
|
||||
|
||||
// Constructor
|
||||
|
@ -50,7 +51,8 @@ class Xtrsv: public Xgemv<T> {
|
|||
const size_t n,
|
||||
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_inc,
|
||||
const Buffer<T> &x_buffer, const size_t offset_x, const size_t x_inc);
|
||||
const Buffer<T> &x_buffer, const size_t offset_x, const size_t x_inc,
|
||||
EventPointer event);
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -246,7 +246,7 @@ void Xtrsm<T>::TrsmColMajor(const Side side, const Triangle triangle,
|
|||
}
|
||||
|
||||
// Retrieves the results
|
||||
x_buffer.CopyTo(queue_, b_size, b_buffer);
|
||||
x_buffer.CopyToAsync(queue_, b_size, b_buffer, event_);
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -31,6 +31,7 @@ class Xtrsm: public Xgemm<T> {
|
|||
using Xgemm<T>::device_;
|
||||
using Xgemm<T>::db_;
|
||||
using Xgemm<T>::program_;
|
||||
using Xgemm<T>::event_;
|
||||
using Xgemm<T>::DoGemm;
|
||||
|
||||
// Constructor
|
||||
|
|
Loading…
Reference in New Issue