mirror of
https://github.com/CNugteren/CLBlast.git
synced 2024-07-02 12:26:57 +02:00
Made buffers of batched routines read/write (was: read-only)
This commit is contained in:
parent
f349731d54
commit
b1270f04b8
|
@ -668,6 +668,9 @@ class Buffer {
|
||||||
|
|
||||||
// Copies from host to device: writing the device buffer a-synchronously
|
// Copies from host to device: writing the device buffer a-synchronously
|
||||||
void WriteAsync(const Queue &queue, const size_t size, const T* host, const size_t offset = 0) {
|
void WriteAsync(const Queue &queue, const size_t size, const T* host, const size_t offset = 0) {
|
||||||
|
if (access_ == BufferAccess::kReadOnly) {
|
||||||
|
throw LogicError("Buffer: writing to a read-only buffer");
|
||||||
|
}
|
||||||
if (GetSize() < (offset+size)*sizeof(T)) {
|
if (GetSize() < (offset+size)*sizeof(T)) {
|
||||||
throw LogicError("Buffer: target device buffer is too small");
|
throw LogicError("Buffer: target device buffer is too small");
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,9 +59,9 @@ void XaxpyBatched<T>::DoAxpyBatched(const size_t n, const std::vector<T> &alphas
|
||||||
x_offsets_int[batch] = static_cast<int>(x_offsets[batch]);
|
x_offsets_int[batch] = static_cast<int>(x_offsets[batch]);
|
||||||
y_offsets_int[batch] = static_cast<int>(y_offsets[batch]);
|
y_offsets_int[batch] = static_cast<int>(y_offsets[batch]);
|
||||||
}
|
}
|
||||||
auto x_offsets_device = Buffer<int>(context_, BufferAccess::kReadOnly, batch_count);
|
auto x_offsets_device = Buffer<int>(context_, BufferAccess::kReadWrite, batch_count);
|
||||||
auto y_offsets_device = Buffer<int>(context_, BufferAccess::kReadOnly, batch_count);
|
auto y_offsets_device = Buffer<int>(context_, BufferAccess::kReadWrite, batch_count);
|
||||||
auto alphas_device = Buffer<T>(context_, BufferAccess::kReadOnly, batch_count);
|
auto alphas_device = Buffer<T>(context_, BufferAccess::kReadWrite, batch_count);
|
||||||
x_offsets_device.Write(queue_, batch_count, x_offsets_int);
|
x_offsets_device.Write(queue_, batch_count, x_offsets_int);
|
||||||
y_offsets_device.Write(queue_, batch_count, y_offsets_int);
|
y_offsets_device.Write(queue_, batch_count, y_offsets_int);
|
||||||
alphas_device.Write(queue_, batch_count, alphas);
|
alphas_device.Write(queue_, batch_count, alphas);
|
||||||
|
|
|
@ -100,8 +100,8 @@ void XgemmBatched<T>::DoGemmBatched(const Layout layout, const Transpose a_trans
|
||||||
}
|
}
|
||||||
|
|
||||||
// Upload the scalar arguments to the device
|
// Upload the scalar arguments to the device
|
||||||
auto alphas_device = Buffer<T>(context_, BufferAccess::kReadOnly, batch_count);
|
auto alphas_device = Buffer<T>(context_, BufferAccess::kReadWrite, batch_count);
|
||||||
auto betas_device = Buffer<T>(context_, BufferAccess::kReadOnly, batch_count);
|
auto betas_device = Buffer<T>(context_, BufferAccess::kReadWrite, batch_count);
|
||||||
alphas_device.Write(queue_, batch_count, alphas);
|
alphas_device.Write(queue_, batch_count, alphas);
|
||||||
betas_device.Write(queue_, batch_count, betas);
|
betas_device.Write(queue_, batch_count, betas);
|
||||||
|
|
||||||
|
@ -200,8 +200,8 @@ void XgemmBatched<T>::BatchedGemmIndirect(const size_t m, const size_t n, const
|
||||||
// to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In
|
// to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In
|
||||||
// case nothing has to be done, these kernels can be skipped.
|
// case nothing has to be done, these kernels can be skipped.
|
||||||
if (!a_no_temp) {
|
if (!a_no_temp) {
|
||||||
auto a_offsets_device = Buffer<int>(context_, BufferAccess::kReadOnly, batch_count);
|
auto a_offsets_device = Buffer<int>(context_, BufferAccess::kReadWrite, batch_count);
|
||||||
auto a_offsets_i_device = Buffer<int>(context_, BufferAccess::kReadOnly, batch_count);
|
auto a_offsets_i_device = Buffer<int>(context_, BufferAccess::kReadWrite, batch_count);
|
||||||
a_offsets_device.Write(queue_, batch_count, a_offsets);
|
a_offsets_device.Write(queue_, batch_count, a_offsets);
|
||||||
a_offsets_i_device.Write(queue_, batch_count, a_offsets_i);
|
a_offsets_i_device.Write(queue_, batch_count, a_offsets_i);
|
||||||
auto eventProcessA = Event();
|
auto eventProcessA = Event();
|
||||||
|
@ -214,8 +214,8 @@ void XgemmBatched<T>::BatchedGemmIndirect(const size_t m, const size_t n, const
|
||||||
|
|
||||||
// As above, but now for matrix B
|
// As above, but now for matrix B
|
||||||
if (!b_no_temp) {
|
if (!b_no_temp) {
|
||||||
auto b_offsets_device = Buffer<int>(context_, BufferAccess::kReadOnly, batch_count);
|
auto b_offsets_device = Buffer<int>(context_, BufferAccess::kReadWrite, batch_count);
|
||||||
auto b_offsets_i_device = Buffer<int>(context_, BufferAccess::kReadOnly, batch_count);
|
auto b_offsets_i_device = Buffer<int>(context_, BufferAccess::kReadWrite, batch_count);
|
||||||
b_offsets_device.Write(queue_, batch_count, b_offsets);
|
b_offsets_device.Write(queue_, batch_count, b_offsets);
|
||||||
b_offsets_i_device.Write(queue_, batch_count, b_offsets_i);
|
b_offsets_i_device.Write(queue_, batch_count, b_offsets_i);
|
||||||
auto eventProcessB = Event();
|
auto eventProcessB = Event();
|
||||||
|
@ -227,8 +227,8 @@ void XgemmBatched<T>::BatchedGemmIndirect(const size_t m, const size_t n, const
|
||||||
}
|
}
|
||||||
|
|
||||||
// As above, but now for matrix C
|
// As above, but now for matrix C
|
||||||
auto c_offsets_device = Buffer<int>(context_, BufferAccess::kReadOnly, batch_count);
|
auto c_offsets_device = Buffer<int>(context_, BufferAccess::kReadWrite, batch_count);
|
||||||
auto c_offsets_i_device = Buffer<int>(context_, BufferAccess::kReadOnly, batch_count);
|
auto c_offsets_i_device = Buffer<int>(context_, BufferAccess::kReadWrite, batch_count);
|
||||||
if (!c_no_temp) {
|
if (!c_no_temp) {
|
||||||
c_offsets_device.Write(queue_, batch_count, c_offsets);
|
c_offsets_device.Write(queue_, batch_count, c_offsets);
|
||||||
c_offsets_i_device.Write(queue_, batch_count, c_offsets_i);
|
c_offsets_i_device.Write(queue_, batch_count, c_offsets_i);
|
||||||
|
@ -297,9 +297,9 @@ void XgemmBatched<T>::BatchedGemmDirect(const size_t m, const size_t n, const si
|
||||||
const size_t batch_count) {
|
const size_t batch_count) {
|
||||||
|
|
||||||
// Uploads the offsets to the device
|
// Uploads the offsets to the device
|
||||||
auto a_offsets_device = Buffer<int>(context_, BufferAccess::kReadOnly, batch_count);
|
auto a_offsets_device = Buffer<int>(context_, BufferAccess::kReadWrite, batch_count);
|
||||||
auto b_offsets_device = Buffer<int>(context_, BufferAccess::kReadOnly, batch_count);
|
auto b_offsets_device = Buffer<int>(context_, BufferAccess::kReadWrite, batch_count);
|
||||||
auto c_offsets_device = Buffer<int>(context_, BufferAccess::kReadOnly, batch_count);
|
auto c_offsets_device = Buffer<int>(context_, BufferAccess::kReadWrite, batch_count);
|
||||||
a_offsets_device.Write(queue_, batch_count, a_offsets);
|
a_offsets_device.Write(queue_, batch_count, a_offsets);
|
||||||
b_offsets_device.Write(queue_, batch_count, b_offsets);
|
b_offsets_device.Write(queue_, batch_count, b_offsets);
|
||||||
c_offsets_device.Write(queue_, batch_count, c_offsets);
|
c_offsets_device.Write(queue_, batch_count, c_offsets);
|
||||||
|
|
Loading…
Reference in a new issue