Made event an optional argument in the CLBlast C++ API

This commit is contained in:
Cedric Nugteren 2016-03-30 15:31:45 +02:00 committed by cnugteren
parent 6f561abada
commit 6e5f558746
4 changed files with 48 additions and 51 deletions

View file

@ -92,21 +92,21 @@ template <typename T>
StatusCode Swap(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL
template <typename T>
StatusCode Scal(const size_t n,
const T alpha,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY
template <typename T>
StatusCode Copy(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY
template <typename T>
@ -114,7 +114,7 @@ StatusCode Axpy(const size_t n,
const T alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Dot product of two vectors: SDOT/DDOT
template <typename T>
@ -122,7 +122,7 @@ StatusCode Dot(const size_t n,
cl_mem dot_buffer, const size_t dot_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Dot product of two complex vectors: CDOTU/ZDOTU
template <typename T>
@ -130,7 +130,7 @@ StatusCode Dotu(const size_t n,
cl_mem dot_buffer, const size_t dot_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC
template <typename T>
@ -138,7 +138,7 @@ StatusCode Dotc(const size_t n,
cl_mem dot_buffer, const size_t dot_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// =================================================================================================
// BLAS level-2 (matrix-vector) routines
@ -153,7 +153,7 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV
template <typename T>
@ -164,7 +164,7 @@ StatusCode Gbmv(const Layout layout, const Transpose a_transpose,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian matrix-vector multiplication: CHEMV/ZHEMV
template <typename T>
@ -175,7 +175,7 @@ StatusCode Hemv(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian banded matrix-vector multiplication: CHBMV/ZHBMV
template <typename T>
@ -186,7 +186,7 @@ StatusCode Hbmv(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV
template <typename T>
@ -197,7 +197,7 @@ StatusCode Hpmv(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric matrix-vector multiplication: SSYMV/DSYMV
template <typename T>
@ -208,7 +208,7 @@ StatusCode Symv(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric banded matrix-vector multiplication: SSBMV/DSBMV
template <typename T>
@ -219,7 +219,7 @@ StatusCode Sbmv(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric packed matrix-vector multiplication: SSPMV/DSPMV
template <typename T>
@ -230,7 +230,7 @@ StatusCode Spmv(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Triangular matrix-vector multiplication: STRMV/DTRMV/CTRMV/ZTRMV
template <typename T>
@ -238,7 +238,7 @@ StatusCode Trmv(const Layout layout, const Triangle triangle, const Transpose a_
const size_t n,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Triangular banded matrix-vector multiplication: STBMV/DTBMV/CTBMV/ZTBMV
template <typename T>
@ -246,7 +246,7 @@ StatusCode Tbmv(const Layout layout, const Triangle triangle, const Transpose a_
const size_t n, const size_t k,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV
template <typename T>
@ -254,7 +254,7 @@ StatusCode Tpmv(const Layout layout, const Triangle triangle, const Transpose a_
const size_t n,
const cl_mem ap_buffer, const size_t ap_offset,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV
template <typename T>
@ -262,7 +262,7 @@ StatusCode Trsv(const Layout layout, const Triangle triangle, const Transpose a_
const size_t n,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Solves a banded triangular system of equations: STBSV/DTBSV/CTBSV/ZTBSV
template <typename T>
@ -270,7 +270,7 @@ StatusCode Tbsv(const Layout layout, const Triangle triangle, const Transpose a_
const size_t n, const size_t k,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Solves a packed triangular system of equations: STPSV/DTPSV/CTPSV/ZTPSV
template <typename T>
@ -278,7 +278,7 @@ StatusCode Tpsv(const Layout layout, const Triangle triangle, const Transpose a_
const size_t n,
const cl_mem ap_buffer, const size_t ap_offset,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// General rank-1 matrix update: SGER/DGER
template <typename T>
@ -288,7 +288,7 @@ StatusCode Ger(const Layout layout,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// General rank-1 complex matrix update: CGERU/ZGERU
template <typename T>
@ -298,7 +298,7 @@ StatusCode Geru(const Layout layout,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// General rank-1 complex conjugated matrix update: CGERC/ZGERC
template <typename T>
@ -308,7 +308,7 @@ StatusCode Gerc(const Layout layout,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian rank-1 matrix update: CHER/ZHER
template <typename T>
@ -317,7 +317,7 @@ StatusCode Her(const Layout layout, const Triangle triangle,
const T alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian packed rank-1 matrix update: CHPR/ZHPR
template <typename T>
@ -326,7 +326,7 @@ StatusCode Hpr(const Layout layout, const Triangle triangle,
const T alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian rank-2 matrix update: CHER2/ZHER2
template <typename T>
@ -336,7 +336,7 @@ StatusCode Her2(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian packed rank-2 matrix update: CHPR2/ZHPR2
template <typename T>
@ -346,7 +346,7 @@ StatusCode Hpr2(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric rank-1 matrix update: SSYR/DSYR
template <typename T>
@ -355,7 +355,7 @@ StatusCode Syr(const Layout layout, const Triangle triangle,
const T alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric packed rank-1 matrix update: SSPR/DSPR
template <typename T>
@ -364,7 +364,7 @@ StatusCode Spr(const Layout layout, const Triangle triangle,
const T alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric rank-2 matrix update: SSYR2/DSYR2
template <typename T>
@ -374,7 +374,7 @@ StatusCode Syr2(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric packed rank-2 matrix update: SSPR2/DSPR2
template <typename T>
@ -384,7 +384,7 @@ StatusCode Spr2(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// =================================================================================================
// BLAS level-3 (matrix-matrix) routines
@ -399,7 +399,7 @@ StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpos
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM
template <typename T>
@ -410,7 +410,7 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle,
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian matrix-matrix multiplication: CHEMM/ZHEMM
template <typename T>
@ -421,7 +421,7 @@ StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle,
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK
template <typename T>
@ -431,7 +431,7 @@ StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Rank-K update of a hermitian matrix: CHERK/ZHERK
template <typename T>
@ -441,7 +441,7 @@ StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K
template <typename T>
@ -452,7 +452,7 @@ StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose a
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Rank-2K update of a hermitian matrix: CHER2K/ZHER2K
template <typename T, typename U>
@ -463,7 +463,7 @@ StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose a
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
const U beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Triangular matrix-matrix multiplication: STRMM/DTRMM/CTRMM/ZTRMM
template <typename T>
@ -472,7 +472,7 @@ StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle, c
const T alpha,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// Solves a triangular system of equations: STRSM/DTRSM/CTRSM/ZTRSM
template <typename T>
@ -481,7 +481,7 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, c
const T alpha,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event);
cl_command_queue* queue, cl_event* event = nullptr);
// =================================================================================================
} // namespace clblast

View file

@ -75,9 +75,6 @@ class Event {
// Constructor based on the regular OpenCL data-type
explicit Event(cl_event* event): event_(event) { }
// Constructor based on a non-existant event
explicit Event(): event_(nullptr) { }
// Retrieves the elapsed time of the last recorded event. Note that no error checking is done on
// the 'clGetEventProfilingInfo' function, since there is a bug in Apple's OpenCL implementation:
// http://stackoverflow.com/questions/26145603/clgeteventprofilinginfo-bug-in-macosx

View file

@ -151,7 +151,7 @@ def clblast_h(routines):
result = ""
for routine in routines:
result += "\n// "+routine.description+": "+routine.ShortNames()+"\n"
result += routine.RoutineHeaderCPP(12)+";\n"
result += routine.RoutineHeaderCPP(12, " = nullptr")+";\n"
return result
# The C++ API implementation (.cc)
@ -161,9 +161,9 @@ def clblast_cc(routines):
indent1 = " "*(20 + routine.Length())
result += "\n// "+routine.description+": "+routine.ShortNames()+"\n"
if routine.implemented:
result += routine.RoutineHeaderCPP(12)+" {\n"
result += routine.RoutineHeaderCPP(12, "")+" {\n"
result += " auto queue_cpp = Queue(*queue);\n"
result += " auto event_cpp = Event(*event);\n"
result += " auto event_cpp = Event(event);\n"
result += " auto routine = X"+routine.name+"<"+routine.template.template+">(queue_cpp, event_cpp);\n"
result += " auto status = routine.SetUp();\n"
result += " if (status != StatusCode::kSuccess) { return status; }\n"
@ -247,8 +247,8 @@ files = [
path_clblast+"/src/clblast_c.cc",
path_clblast+"/test/wrapper_clblas.h",
]
header_lines = [84, 64, 88, 24, 22]
footer_lines = [6, 3, 5, 2, 6]
header_lines = [84, 64, 93, 22, 22]
footer_lines = [6, 3, 9, 2, 6]
# Checks whether the command-line arguments are valid; exists otherwise
for f in files:

View file

@ -308,12 +308,12 @@ class Routine():
# ==============================================================================================
# Retrieves the C++ templated definition for a routine
def RoutineHeaderCPP(self, spaces):
def RoutineHeaderCPP(self, spaces, default_event):
indent = " "*(spaces + self.Length())
result = "template <"+self.template.name+">\n"
result += "StatusCode "+self.name.capitalize()+"("
result += (",\n"+indent).join([a for a in self.ArgumentsDef(self.template)])
result += ",\n"+indent+"cl_command_queue* queue, cl_event* event)"
result += ",\n"+indent+"cl_command_queue* queue, cl_event* event"+default_event+")"
return result
# As above, but now without variable names