From 6e5f558746eec09eda6132754649419430a86f41 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Wed, 30 Mar 2016 15:31:45 +0200 Subject: [PATCH] Made event an optional argument in the CLBlast C++ API --- include/clblast.h | 82 +++++++++++++++++----------------- include/internal/clpp11.h | 3 -- scripts/generator/generator.py | 10 ++--- scripts/generator/routine.py | 4 +- 4 files changed, 48 insertions(+), 51 deletions(-) diff --git a/include/clblast.h b/include/clblast.h index 70a3b5bc..2d03b096 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -92,21 +92,21 @@ template StatusCode Swap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL template StatusCode Scal(const size_t n, const T alpha, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY template StatusCode Copy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY template @@ -114,7 +114,7 @@ StatusCode Axpy(const size_t n, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Dot product of two vectors: SDOT/DDOT template @@ -122,7 +122,7 @@ StatusCode Dot(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Dot product of two complex vectors: CDOTU/ZDOTU template @@ -130,7 +130,7 @@ StatusCode Dotu(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC template @@ -138,7 +138,7 @@ StatusCode Dotc(const size_t n, cl_mem dot_buffer, const size_t dot_offset, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // ================================================================================================= // BLAS level-2 (matrix-vector) routines @@ -153,7 +153,7 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV template @@ -164,7 +164,7 @@ StatusCode Gbmv(const Layout layout, const Transpose a_transpose, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Hermitian matrix-vector multiplication: CHEMV/ZHEMV template @@ -175,7 +175,7 @@ StatusCode Hemv(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Hermitian banded matrix-vector multiplication: CHBMV/ZHBMV template @@ -186,7 +186,7 @@ StatusCode Hbmv(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV template @@ -197,7 +197,7 @@ StatusCode Hpmv(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Symmetric matrix-vector multiplication: SSYMV/DSYMV template @@ -208,7 +208,7 @@ StatusCode Symv(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Symmetric banded matrix-vector multiplication: SSBMV/DSBMV template @@ -219,7 +219,7 @@ StatusCode Sbmv(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Symmetric packed matrix-vector multiplication: SSPMV/DSPMV template @@ -230,7 +230,7 @@ StatusCode Spmv(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const T beta, cl_mem y_buffer, const size_t y_offset, const size_t y_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Triangular matrix-vector multiplication: STRMV/DTRMV/CTRMV/ZTRMV template @@ -238,7 +238,7 @@ StatusCode Trmv(const Layout layout, const Triangle triangle, const Transpose a_ const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Triangular banded matrix-vector multiplication: STBMV/DTBMV/CTBMV/ZTBMV template @@ -246,7 +246,7 @@ StatusCode Tbmv(const Layout layout, const Triangle triangle, const Transpose a_ const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV template @@ -254,7 +254,7 @@ StatusCode Tpmv(const Layout layout, const Triangle triangle, const Transpose a_ const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV template @@ -262,7 +262,7 @@ StatusCode Trsv(const Layout layout, const Triangle triangle, const Transpose a_ const size_t n, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Solves a banded triangular system of equations: STBSV/DTBSV/CTBSV/ZTBSV template @@ -270,7 +270,7 @@ StatusCode Tbsv(const Layout layout, const Triangle triangle, const Transpose a_ const size_t n, const size_t k, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Solves a packed triangular system of equations: STPSV/DTPSV/CTPSV/ZTPSV template @@ -278,7 +278,7 @@ StatusCode Tpsv(const Layout layout, const Triangle triangle, const Transpose a_ const size_t n, const cl_mem ap_buffer, const size_t ap_offset, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // General rank-1 matrix update: SGER/DGER template @@ -288,7 +288,7 @@ StatusCode Ger(const Layout layout, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // General rank-1 complex matrix update: CGERU/ZGERU template @@ -298,7 +298,7 @@ StatusCode Geru(const Layout layout, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // General rank-1 complex conjugated matrix update: CGERC/ZGERC template @@ -308,7 +308,7 @@ StatusCode Gerc(const Layout layout, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Hermitian rank-1 matrix update: CHER/ZHER template @@ -317,7 +317,7 @@ StatusCode Her(const Layout layout, const Triangle triangle, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Hermitian packed rank-1 matrix update: CHPR/ZHPR template @@ -326,7 +326,7 @@ StatusCode Hpr(const Layout layout, const Triangle triangle, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Hermitian rank-2 matrix update: CHER2/ZHER2 template @@ -336,7 +336,7 @@ StatusCode Her2(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Hermitian packed rank-2 matrix update: CHPR2/ZHPR2 template @@ -346,7 +346,7 @@ StatusCode Hpr2(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Symmetric rank-1 matrix update: SSYR/DSYR template @@ -355,7 +355,7 @@ StatusCode Syr(const Layout layout, const Triangle triangle, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Symmetric packed rank-1 matrix update: SSPR/DSPR template @@ -364,7 +364,7 @@ StatusCode Spr(const Layout layout, const Triangle triangle, const T alpha, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem ap_buffer, const size_t ap_offset, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Symmetric rank-2 matrix update: SSYR2/DSYR2 template @@ -374,7 +374,7 @@ StatusCode Syr2(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Symmetric packed rank-2 matrix update: SSPR2/DSPR2 template @@ -384,7 +384,7 @@ StatusCode Spr2(const Layout layout, const Triangle triangle, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem ap_buffer, const size_t ap_offset, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // ================================================================================================= // BLAS level-3 (matrix-matrix) routines @@ -399,7 +399,7 @@ StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpos const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM template @@ -410,7 +410,7 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Hermitian matrix-matrix multiplication: CHEMM/ZHEMM template @@ -421,7 +421,7 @@ StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK template @@ -431,7 +431,7 @@ StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Rank-K update of a hermitian matrix: CHERK/ZHERK template @@ -441,7 +441,7 @@ StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K template @@ -452,7 +452,7 @@ StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose a const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const T beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Rank-2K update of a hermitian matrix: CHER2K/ZHER2K template @@ -463,7 +463,7 @@ StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose a const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const U beta, cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Triangular matrix-matrix multiplication: STRMM/DTRMM/CTRMM/ZTRMM template @@ -472,7 +472,7 @@ StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle, c const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // Solves a triangular system of equations: STRSM/DTRSM/CTRSM/ZTRSM template @@ -481,7 +481,7 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, c const T alpha, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - cl_command_queue* queue, cl_event* event); + cl_command_queue* queue, cl_event* event = nullptr); // ================================================================================================= } // namespace clblast diff --git a/include/internal/clpp11.h b/include/internal/clpp11.h index a705c6b7..aac66396 100644 --- a/include/internal/clpp11.h +++ b/include/internal/clpp11.h @@ -75,9 +75,6 @@ class Event { // Constructor based on the regular OpenCL data-type explicit Event(cl_event* event): event_(event) { } - // Constructor based on a non-existant event - explicit Event(): event_(nullptr) { } - // Retrieves the elapsed time of the last recorded event. Note that no error checking is done on // the 'clGetEventProfilingInfo' function, since there is a bug in Apple's OpenCL implementation: // http://stackoverflow.com/questions/26145603/clgeteventprofilinginfo-bug-in-macosx diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 8ff5e130..5163b1ca 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -151,7 +151,7 @@ def clblast_h(routines): result = "" for routine in routines: result += "\n// "+routine.description+": "+routine.ShortNames()+"\n" - result += routine.RoutineHeaderCPP(12)+";\n" + result += routine.RoutineHeaderCPP(12, " = nullptr")+";\n" return result # The C++ API implementation (.cc) @@ -161,9 +161,9 @@ def clblast_cc(routines): indent1 = " "*(20 + routine.Length()) result += "\n// "+routine.description+": "+routine.ShortNames()+"\n" if routine.implemented: - result += routine.RoutineHeaderCPP(12)+" {\n" + result += routine.RoutineHeaderCPP(12, "")+" {\n" result += " auto queue_cpp = Queue(*queue);\n" - result += " auto event_cpp = Event(*event);\n" + result += " auto event_cpp = Event(event);\n" result += " auto routine = X"+routine.name+"<"+routine.template.template+">(queue_cpp, event_cpp);\n" result += " auto status = routine.SetUp();\n" result += " if (status != StatusCode::kSuccess) { return status; }\n" @@ -247,8 +247,8 @@ files = [ path_clblast+"/src/clblast_c.cc", path_clblast+"/test/wrapper_clblas.h", ] -header_lines = [84, 64, 88, 24, 22] -footer_lines = [6, 3, 5, 2, 6] +header_lines = [84, 64, 93, 22, 22] +footer_lines = [6, 3, 9, 2, 6] # Checks whether the command-line arguments are valid; exists otherwise for f in files: diff --git a/scripts/generator/routine.py b/scripts/generator/routine.py index 60b9fcc5..1086cecc 100644 --- a/scripts/generator/routine.py +++ b/scripts/generator/routine.py @@ -308,12 +308,12 @@ class Routine(): # ============================================================================================== # Retrieves the C++ templated definition for a routine - def RoutineHeaderCPP(self, spaces): + def RoutineHeaderCPP(self, spaces, default_event): indent = " "*(spaces + self.Length()) result = "template <"+self.template.name+">\n" result += "StatusCode "+self.name.capitalize()+"(" result += (",\n"+indent).join([a for a in self.ArgumentsDef(self.template)]) - result += ",\n"+indent+"cl_command_queue* queue, cl_event* event)" + result += ",\n"+indent+"cl_command_queue* queue, cl_event* event"+default_event+")" return result # As above, but now without variable names