From b1270f04b89c3271aca11594501f7e997848e394 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Tue, 17 Oct 2017 19:56:47 +0200 Subject: [PATCH] Made buffers of batched routines read/write (was: read-only) --- src/clpp11.hpp | 3 +++ src/routines/levelx/xaxpybatched.cpp | 6 +++--- src/routines/levelx/xgemmbatched.cpp | 22 +++++++++++----------- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/clpp11.hpp b/src/clpp11.hpp index 97045644..2335caef 100644 --- a/src/clpp11.hpp +++ b/src/clpp11.hpp @@ -668,6 +668,9 @@ class Buffer { // Copies from host to device: writing the device buffer a-synchronously void WriteAsync(const Queue &queue, const size_t size, const T* host, const size_t offset = 0) { + if (access_ == BufferAccess::kReadOnly) { + throw LogicError("Buffer: writing to a read-only buffer"); + } if (GetSize() < (offset+size)*sizeof(T)) { throw LogicError("Buffer: target device buffer is too small"); } diff --git a/src/routines/levelx/xaxpybatched.cpp b/src/routines/levelx/xaxpybatched.cpp index 0b755ccf..52c27b78 100644 --- a/src/routines/levelx/xaxpybatched.cpp +++ b/src/routines/levelx/xaxpybatched.cpp @@ -59,9 +59,9 @@ void XaxpyBatched::DoAxpyBatched(const size_t n, const std::vector &alphas x_offsets_int[batch] = static_cast(x_offsets[batch]); y_offsets_int[batch] = static_cast(y_offsets[batch]); } - auto x_offsets_device = Buffer(context_, BufferAccess::kReadOnly, batch_count); - auto y_offsets_device = Buffer(context_, BufferAccess::kReadOnly, batch_count); - auto alphas_device = Buffer(context_, BufferAccess::kReadOnly, batch_count); + auto x_offsets_device = Buffer(context_, BufferAccess::kReadWrite, batch_count); + auto y_offsets_device = Buffer(context_, BufferAccess::kReadWrite, batch_count); + auto alphas_device = Buffer(context_, BufferAccess::kReadWrite, batch_count); x_offsets_device.Write(queue_, batch_count, x_offsets_int); y_offsets_device.Write(queue_, batch_count, y_offsets_int); alphas_device.Write(queue_, batch_count, alphas); diff --git a/src/routines/levelx/xgemmbatched.cpp b/src/routines/levelx/xgemmbatched.cpp index 4e9f0004..8a015e97 100644 --- a/src/routines/levelx/xgemmbatched.cpp +++ b/src/routines/levelx/xgemmbatched.cpp @@ -100,8 +100,8 @@ void XgemmBatched::DoGemmBatched(const Layout layout, const Transpose a_trans } // Upload the scalar arguments to the device - auto alphas_device = Buffer(context_, BufferAccess::kReadOnly, batch_count); - auto betas_device = Buffer(context_, BufferAccess::kReadOnly, batch_count); + auto alphas_device = Buffer(context_, BufferAccess::kReadWrite, batch_count); + auto betas_device = Buffer(context_, BufferAccess::kReadWrite, batch_count); alphas_device.Write(queue_, batch_count, alphas); betas_device.Write(queue_, batch_count, betas); @@ -200,8 +200,8 @@ void XgemmBatched::BatchedGemmIndirect(const size_t m, const size_t n, const // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In // case nothing has to be done, these kernels can be skipped. if (!a_no_temp) { - auto a_offsets_device = Buffer(context_, BufferAccess::kReadOnly, batch_count); - auto a_offsets_i_device = Buffer(context_, BufferAccess::kReadOnly, batch_count); + auto a_offsets_device = Buffer(context_, BufferAccess::kReadWrite, batch_count); + auto a_offsets_i_device = Buffer(context_, BufferAccess::kReadWrite, batch_count); a_offsets_device.Write(queue_, batch_count, a_offsets); a_offsets_i_device.Write(queue_, batch_count, a_offsets_i); auto eventProcessA = Event(); @@ -214,8 +214,8 @@ void XgemmBatched::BatchedGemmIndirect(const size_t m, const size_t n, const // As above, but now for matrix B if (!b_no_temp) { - auto b_offsets_device = Buffer(context_, BufferAccess::kReadOnly, batch_count); - auto b_offsets_i_device = Buffer(context_, BufferAccess::kReadOnly, batch_count); + auto b_offsets_device = Buffer(context_, BufferAccess::kReadWrite, batch_count); + auto b_offsets_i_device = Buffer(context_, BufferAccess::kReadWrite, batch_count); b_offsets_device.Write(queue_, batch_count, b_offsets); b_offsets_i_device.Write(queue_, batch_count, b_offsets_i); auto eventProcessB = Event(); @@ -227,8 +227,8 @@ void XgemmBatched::BatchedGemmIndirect(const size_t m, const size_t n, const } // As above, but now for matrix C - auto c_offsets_device = Buffer(context_, BufferAccess::kReadOnly, batch_count); - auto c_offsets_i_device = Buffer(context_, BufferAccess::kReadOnly, batch_count); + auto c_offsets_device = Buffer(context_, BufferAccess::kReadWrite, batch_count); + auto c_offsets_i_device = Buffer(context_, BufferAccess::kReadWrite, batch_count); if (!c_no_temp) { c_offsets_device.Write(queue_, batch_count, c_offsets); c_offsets_i_device.Write(queue_, batch_count, c_offsets_i); @@ -297,9 +297,9 @@ void XgemmBatched::BatchedGemmDirect(const size_t m, const size_t n, const si const size_t batch_count) { // Uploads the offsets to the device - auto a_offsets_device = Buffer(context_, BufferAccess::kReadOnly, batch_count); - auto b_offsets_device = Buffer(context_, BufferAccess::kReadOnly, batch_count); - auto c_offsets_device = Buffer(context_, BufferAccess::kReadOnly, batch_count); + auto a_offsets_device = Buffer(context_, BufferAccess::kReadWrite, batch_count); + auto b_offsets_device = Buffer(context_, BufferAccess::kReadWrite, batch_count); + auto c_offsets_device = Buffer(context_, BufferAccess::kReadWrite, batch_count); a_offsets_device.Write(queue_, batch_count, a_offsets); b_offsets_device.Write(queue_, batch_count, b_offsets); c_offsets_device.Write(queue_, batch_count, c_offsets);