Made event an optional argument in the CLBlast C++ API

This commit is contained in:
Cedric Nugteren 2016-03-30 15:31:45 +02:00 committed by cnugteren
parent 6f561abada
commit 6e5f558746
4 changed files with 48 additions and 51 deletions

View file

@ -92,21 +92,21 @@ template <typename T>
StatusCode Swap(const size_t n, StatusCode Swap(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL // Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL
template <typename T> template <typename T>
StatusCode Scal(const size_t n, StatusCode Scal(const size_t n,
const T alpha, const T alpha,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY // Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY
template <typename T> template <typename T>
StatusCode Copy(const size_t n, StatusCode Copy(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY // Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY
template <typename T> template <typename T>
@ -114,7 +114,7 @@ StatusCode Axpy(const size_t n,
const T alpha, const T alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Dot product of two vectors: SDOT/DDOT // Dot product of two vectors: SDOT/DDOT
template <typename T> template <typename T>
@ -122,7 +122,7 @@ StatusCode Dot(const size_t n,
cl_mem dot_buffer, const size_t dot_offset, cl_mem dot_buffer, const size_t dot_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Dot product of two complex vectors: CDOTU/ZDOTU // Dot product of two complex vectors: CDOTU/ZDOTU
template <typename T> template <typename T>
@ -130,7 +130,7 @@ StatusCode Dotu(const size_t n,
cl_mem dot_buffer, const size_t dot_offset, cl_mem dot_buffer, const size_t dot_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC // Dot product of two complex vectors, one conjugated: CDOTC/ZDOTC
template <typename T> template <typename T>
@ -138,7 +138,7 @@ StatusCode Dotc(const size_t n,
cl_mem dot_buffer, const size_t dot_offset, cl_mem dot_buffer, const size_t dot_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// ================================================================================================= // =================================================================================================
// BLAS level-2 (matrix-vector) routines // BLAS level-2 (matrix-vector) routines
@ -153,7 +153,7 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta, const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV // General banded matrix-vector multiplication: SGBMV/DGBMV/CGBMV/ZGBMV
template <typename T> template <typename T>
@ -164,7 +164,7 @@ StatusCode Gbmv(const Layout layout, const Transpose a_transpose,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta, const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian matrix-vector multiplication: CHEMV/ZHEMV // Hermitian matrix-vector multiplication: CHEMV/ZHEMV
template <typename T> template <typename T>
@ -175,7 +175,7 @@ StatusCode Hemv(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta, const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian banded matrix-vector multiplication: CHBMV/ZHBMV // Hermitian banded matrix-vector multiplication: CHBMV/ZHBMV
template <typename T> template <typename T>
@ -186,7 +186,7 @@ StatusCode Hbmv(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta, const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV // Hermitian packed matrix-vector multiplication: CHPMV/ZHPMV
template <typename T> template <typename T>
@ -197,7 +197,7 @@ StatusCode Hpmv(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta, const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric matrix-vector multiplication: SSYMV/DSYMV // Symmetric matrix-vector multiplication: SSYMV/DSYMV
template <typename T> template <typename T>
@ -208,7 +208,7 @@ StatusCode Symv(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta, const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric banded matrix-vector multiplication: SSBMV/DSBMV // Symmetric banded matrix-vector multiplication: SSBMV/DSBMV
template <typename T> template <typename T>
@ -219,7 +219,7 @@ StatusCode Sbmv(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta, const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric packed matrix-vector multiplication: SSPMV/DSPMV // Symmetric packed matrix-vector multiplication: SSPMV/DSPMV
template <typename T> template <typename T>
@ -230,7 +230,7 @@ StatusCode Spmv(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const T beta, const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc, cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Triangular matrix-vector multiplication: STRMV/DTRMV/CTRMV/ZTRMV // Triangular matrix-vector multiplication: STRMV/DTRMV/CTRMV/ZTRMV
template <typename T> template <typename T>
@ -238,7 +238,7 @@ StatusCode Trmv(const Layout layout, const Triangle triangle, const Transpose a_
const size_t n, const size_t n,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Triangular banded matrix-vector multiplication: STBMV/DTBMV/CTBMV/ZTBMV // Triangular banded matrix-vector multiplication: STBMV/DTBMV/CTBMV/ZTBMV
template <typename T> template <typename T>
@ -246,7 +246,7 @@ StatusCode Tbmv(const Layout layout, const Triangle triangle, const Transpose a_
const size_t n, const size_t k, const size_t n, const size_t k,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV // Triangular packed matrix-vector multiplication: STPMV/DTPMV/CTPMV/ZTPMV
template <typename T> template <typename T>
@ -254,7 +254,7 @@ StatusCode Tpmv(const Layout layout, const Triangle triangle, const Transpose a_
const size_t n, const size_t n,
const cl_mem ap_buffer, const size_t ap_offset, const cl_mem ap_buffer, const size_t ap_offset,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV // Solves a triangular system of equations: STRSV/DTRSV/CTRSV/ZTRSV
template <typename T> template <typename T>
@ -262,7 +262,7 @@ StatusCode Trsv(const Layout layout, const Triangle triangle, const Transpose a_
const size_t n, const size_t n,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Solves a banded triangular system of equations: STBSV/DTBSV/CTBSV/ZTBSV // Solves a banded triangular system of equations: STBSV/DTBSV/CTBSV/ZTBSV
template <typename T> template <typename T>
@ -270,7 +270,7 @@ StatusCode Tbsv(const Layout layout, const Triangle triangle, const Transpose a_
const size_t n, const size_t k, const size_t n, const size_t k,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Solves a packed triangular system of equations: STPSV/DTPSV/CTPSV/ZTPSV // Solves a packed triangular system of equations: STPSV/DTPSV/CTPSV/ZTPSV
template <typename T> template <typename T>
@ -278,7 +278,7 @@ StatusCode Tpsv(const Layout layout, const Triangle triangle, const Transpose a_
const size_t n, const size_t n,
const cl_mem ap_buffer, const size_t ap_offset, const cl_mem ap_buffer, const size_t ap_offset,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// General rank-1 matrix update: SGER/DGER // General rank-1 matrix update: SGER/DGER
template <typename T> template <typename T>
@ -288,7 +288,7 @@ StatusCode Ger(const Layout layout,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// General rank-1 complex matrix update: CGERU/ZGERU // General rank-1 complex matrix update: CGERU/ZGERU
template <typename T> template <typename T>
@ -298,7 +298,7 @@ StatusCode Geru(const Layout layout,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// General rank-1 complex conjugated matrix update: CGERC/ZGERC // General rank-1 complex conjugated matrix update: CGERC/ZGERC
template <typename T> template <typename T>
@ -308,7 +308,7 @@ StatusCode Gerc(const Layout layout,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian rank-1 matrix update: CHER/ZHER // Hermitian rank-1 matrix update: CHER/ZHER
template <typename T> template <typename T>
@ -317,7 +317,7 @@ StatusCode Her(const Layout layout, const Triangle triangle,
const T alpha, const T alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian packed rank-1 matrix update: CHPR/ZHPR // Hermitian packed rank-1 matrix update: CHPR/ZHPR
template <typename T> template <typename T>
@ -326,7 +326,7 @@ StatusCode Hpr(const Layout layout, const Triangle triangle,
const T alpha, const T alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem ap_buffer, const size_t ap_offset, cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian rank-2 matrix update: CHER2/ZHER2 // Hermitian rank-2 matrix update: CHER2/ZHER2
template <typename T> template <typename T>
@ -336,7 +336,7 @@ StatusCode Her2(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian packed rank-2 matrix update: CHPR2/ZHPR2 // Hermitian packed rank-2 matrix update: CHPR2/ZHPR2
template <typename T> template <typename T>
@ -346,7 +346,7 @@ StatusCode Hpr2(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem ap_buffer, const size_t ap_offset, cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric rank-1 matrix update: SSYR/DSYR // Symmetric rank-1 matrix update: SSYR/DSYR
template <typename T> template <typename T>
@ -355,7 +355,7 @@ StatusCode Syr(const Layout layout, const Triangle triangle,
const T alpha, const T alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric packed rank-1 matrix update: SSPR/DSPR // Symmetric packed rank-1 matrix update: SSPR/DSPR
template <typename T> template <typename T>
@ -364,7 +364,7 @@ StatusCode Spr(const Layout layout, const Triangle triangle,
const T alpha, const T alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem ap_buffer, const size_t ap_offset, cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric rank-2 matrix update: SSYR2/DSYR2 // Symmetric rank-2 matrix update: SSYR2/DSYR2
template <typename T> template <typename T>
@ -374,7 +374,7 @@ StatusCode Syr2(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld, cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric packed rank-2 matrix update: SSPR2/DSPR2 // Symmetric packed rank-2 matrix update: SSPR2/DSPR2
template <typename T> template <typename T>
@ -384,7 +384,7 @@ StatusCode Spr2(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem ap_buffer, const size_t ap_offset, cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// ================================================================================================= // =================================================================================================
// BLAS level-3 (matrix-matrix) routines // BLAS level-3 (matrix-matrix) routines
@ -399,7 +399,7 @@ StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpos
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
const T beta, const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM // Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM
template <typename T> template <typename T>
@ -410,7 +410,7 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle,
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
const T beta, const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Hermitian matrix-matrix multiplication: CHEMM/ZHEMM // Hermitian matrix-matrix multiplication: CHEMM/ZHEMM
template <typename T> template <typename T>
@ -421,7 +421,7 @@ StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle,
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
const T beta, const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK // Rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK
template <typename T> template <typename T>
@ -431,7 +431,7 @@ StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
const T beta, const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Rank-K update of a hermitian matrix: CHERK/ZHERK // Rank-K update of a hermitian matrix: CHERK/ZHERK
template <typename T> template <typename T>
@ -441,7 +441,7 @@ StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
const T beta, const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K // Rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K
template <typename T> template <typename T>
@ -452,7 +452,7 @@ StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose a
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
const T beta, const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Rank-2K update of a hermitian matrix: CHER2K/ZHER2K // Rank-2K update of a hermitian matrix: CHER2K/ZHER2K
template <typename T, typename U> template <typename T, typename U>
@ -463,7 +463,7 @@ StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose a
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
const U beta, const U beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld, cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Triangular matrix-matrix multiplication: STRMM/DTRMM/CTRMM/ZTRMM // Triangular matrix-matrix multiplication: STRMM/DTRMM/CTRMM/ZTRMM
template <typename T> template <typename T>
@ -472,7 +472,7 @@ StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle, c
const T alpha, const T alpha,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// Solves a triangular system of equations: STRSM/DTRSM/CTRSM/ZTRSM // Solves a triangular system of equations: STRSM/DTRSM/CTRSM/ZTRSM
template <typename T> template <typename T>
@ -481,7 +481,7 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, c
const T alpha, const T alpha,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld, cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event); cl_command_queue* queue, cl_event* event = nullptr);
// ================================================================================================= // =================================================================================================
} // namespace clblast } // namespace clblast

View file

@ -75,9 +75,6 @@ class Event {
// Constructor based on the regular OpenCL data-type // Constructor based on the regular OpenCL data-type
explicit Event(cl_event* event): event_(event) { } explicit Event(cl_event* event): event_(event) { }
// Constructor based on a non-existant event
explicit Event(): event_(nullptr) { }
// Retrieves the elapsed time of the last recorded event. Note that no error checking is done on // Retrieves the elapsed time of the last recorded event. Note that no error checking is done on
// the 'clGetEventProfilingInfo' function, since there is a bug in Apple's OpenCL implementation: // the 'clGetEventProfilingInfo' function, since there is a bug in Apple's OpenCL implementation:
// http://stackoverflow.com/questions/26145603/clgeteventprofilinginfo-bug-in-macosx // http://stackoverflow.com/questions/26145603/clgeteventprofilinginfo-bug-in-macosx

View file

@ -151,7 +151,7 @@ def clblast_h(routines):
result = "" result = ""
for routine in routines: for routine in routines:
result += "\n// "+routine.description+": "+routine.ShortNames()+"\n" result += "\n// "+routine.description+": "+routine.ShortNames()+"\n"
result += routine.RoutineHeaderCPP(12)+";\n" result += routine.RoutineHeaderCPP(12, " = nullptr")+";\n"
return result return result
# The C++ API implementation (.cc) # The C++ API implementation (.cc)
@ -161,9 +161,9 @@ def clblast_cc(routines):
indent1 = " "*(20 + routine.Length()) indent1 = " "*(20 + routine.Length())
result += "\n// "+routine.description+": "+routine.ShortNames()+"\n" result += "\n// "+routine.description+": "+routine.ShortNames()+"\n"
if routine.implemented: if routine.implemented:
result += routine.RoutineHeaderCPP(12)+" {\n" result += routine.RoutineHeaderCPP(12, "")+" {\n"
result += " auto queue_cpp = Queue(*queue);\n" result += " auto queue_cpp = Queue(*queue);\n"
result += " auto event_cpp = Event(*event);\n" result += " auto event_cpp = Event(event);\n"
result += " auto routine = X"+routine.name+"<"+routine.template.template+">(queue_cpp, event_cpp);\n" result += " auto routine = X"+routine.name+"<"+routine.template.template+">(queue_cpp, event_cpp);\n"
result += " auto status = routine.SetUp();\n" result += " auto status = routine.SetUp();\n"
result += " if (status != StatusCode::kSuccess) { return status; }\n" result += " if (status != StatusCode::kSuccess) { return status; }\n"
@ -247,8 +247,8 @@ files = [
path_clblast+"/src/clblast_c.cc", path_clblast+"/src/clblast_c.cc",
path_clblast+"/test/wrapper_clblas.h", path_clblast+"/test/wrapper_clblas.h",
] ]
header_lines = [84, 64, 88, 24, 22] header_lines = [84, 64, 93, 22, 22]
footer_lines = [6, 3, 5, 2, 6] footer_lines = [6, 3, 9, 2, 6]
# Checks whether the command-line arguments are valid; exists otherwise # Checks whether the command-line arguments are valid; exists otherwise
for f in files: for f in files:

View file

@ -308,12 +308,12 @@ class Routine():
# ============================================================================================== # ==============================================================================================
# Retrieves the C++ templated definition for a routine # Retrieves the C++ templated definition for a routine
def RoutineHeaderCPP(self, spaces): def RoutineHeaderCPP(self, spaces, default_event):
indent = " "*(spaces + self.Length()) indent = " "*(spaces + self.Length())
result = "template <"+self.template.name+">\n" result = "template <"+self.template.name+">\n"
result += "StatusCode "+self.name.capitalize()+"(" result += "StatusCode "+self.name.capitalize()+"("
result += (",\n"+indent).join([a for a in self.ArgumentsDef(self.template)]) result += (",\n"+indent).join([a for a in self.ArgumentsDef(self.template)])
result += ",\n"+indent+"cl_command_queue* queue, cl_event* event)" result += ",\n"+indent+"cl_command_queue* queue, cl_event* event"+default_event+")"
return result return result
# As above, but now without variable names # As above, but now without variable names