Fixed some small issues regarding PR#253
parent
e3384be0d0
commit
bff64917bd
44
doc/api.md
44
doc/api.md
|
@ -2236,50 +2236,6 @@ CLBlastStatusCode CLBlastHgemm(const CLBlastLayout layout, const CLBlastTranspos
|
|||
cl_command_queue* queue, cl_event* event)
|
||||
```
|
||||
|
||||
C API with temporary buffer (user to allocate & pass `temp_buffer` with size provided by xGemmTempBufferSize() ):
|
||||
```
|
||||
CLBlastStatusCode CLBlastSgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const float alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const float beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event, cl_mem temp_buffer)
|
||||
CLBlastStatusCode CLBlastDgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const double alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const double beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event, cl_mem temp_buffer)
|
||||
CLBlastStatusCode CLBlastCgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const cl_float2 alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const cl_float2 beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event, cl_mem temp_buffer)
|
||||
CLBlastStatusCode CLBlastZgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const cl_double2 alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const cl_double2 beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event, cl_mem temp_buffer)
|
||||
CLBlastStatusCode CLBlastHgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const cl_half alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const cl_half beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event, cl_mem temp_buffer)
|
||||
```
|
||||
|
||||
Arguments to GEMM:
|
||||
|
||||
* `const Layout layout`: Data-layout of the matrices, either `Layout::kRowMajor` (101) for row-major layout or `Layout::kColMajor` (102) for column-major data-layout.
|
||||
|
|
|
@ -1539,87 +1539,87 @@ CLBlastStatusCode PUBLIC_API CLBlastHgemmStridedBatched(const CLBlastLayout layo
|
|||
// =================================================================================================
|
||||
// General matrix-matrix multiplication with temporary buffer from user (optional, for advanced users): SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM
|
||||
CLBlastStatusCode PUBLIC_API CLBlastSgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const float alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const float beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const float alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const float beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
|
||||
CLBlastStatusCode PUBLIC_API CLBlastDgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const double alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const double beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const double alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const double beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
|
||||
CLBlastStatusCode PUBLIC_API CLBlastCgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const cl_float2 alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const cl_float2 beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const cl_float2 alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const cl_float2 beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
|
||||
CLBlastStatusCode PUBLIC_API CLBlastZgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const cl_double2 alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const cl_double2 beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const cl_double2 alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const cl_double2 beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
|
||||
CLBlastStatusCode PUBLIC_API CLBlastHgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const cl_half alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const cl_half beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const cl_half alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const cl_half beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
|
||||
|
||||
// =================================================================================================
|
||||
// Retrieves the required size of the temporary buffer for the GEMM kernel: SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM (optional)
|
||||
CLBlastStatusCode PUBLIC_API CLBlastSGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const size_t a_offset, const size_t a_ld,
|
||||
const size_t b_offset, const size_t b_ld,
|
||||
const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue,
|
||||
size_t* temp_buffer_size);
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const size_t a_offset, const size_t a_ld,
|
||||
const size_t b_offset, const size_t b_ld,
|
||||
const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue,
|
||||
size_t* temp_buffer_size);
|
||||
|
||||
CLBlastStatusCode PUBLIC_API CLBlastDGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const size_t a_offset, const size_t a_ld,
|
||||
const size_t b_offset, const size_t b_ld,
|
||||
const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue,
|
||||
size_t* temp_buffer_size);
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const size_t a_offset, const size_t a_ld,
|
||||
const size_t b_offset, const size_t b_ld,
|
||||
const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue,
|
||||
size_t* temp_buffer_size);
|
||||
|
||||
CLBlastStatusCode PUBLIC_API CLBlastCGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const size_t a_offset, const size_t a_ld,
|
||||
const size_t b_offset, const size_t b_ld,
|
||||
const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue,
|
||||
size_t* temp_buffer_size);
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const size_t a_offset, const size_t a_ld,
|
||||
const size_t b_offset, const size_t b_ld,
|
||||
const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue,
|
||||
size_t* temp_buffer_size);
|
||||
|
||||
CLBlastStatusCode PUBLIC_API CLBlastZGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const size_t a_offset, const size_t a_ld,
|
||||
const size_t b_offset, const size_t b_ld,
|
||||
const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue,
|
||||
size_t* temp_buffer_size);
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const size_t a_offset, const size_t a_ld,
|
||||
const size_t b_offset, const size_t b_ld,
|
||||
const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue,
|
||||
size_t* temp_buffer_size);
|
||||
|
||||
CLBlastStatusCode PUBLIC_API CLBlastHGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const size_t a_offset, const size_t a_ld,
|
||||
const size_t b_offset, const size_t b_ld,
|
||||
const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue,
|
||||
size_t* temp_buffer_size);
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const size_t a_offset, const size_t a_ld,
|
||||
const size_t b_offset, const size_t b_ld,
|
||||
const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue,
|
||||
size_t* temp_buffer_size);
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ FILES = [
|
|||
HEADER_LINES = [123, 21, 127, 24, 29, 41, 29, 65, 32, 95, 21, 288]
|
||||
FOOTER_LINES = [41, 56, 112, 275, 6, 6, 6, 9, 2, 41, 55, 1]
|
||||
HEADER_LINES_DOC = 0
|
||||
FOOTER_LINES_DOC = 123
|
||||
FOOTER_LINES_DOC = 158
|
||||
|
||||
# Different possibilities for requirements
|
||||
ald_m = "The value of `a_ld` must be at least `m`."
|
||||
|
|
|
@ -4074,14 +4074,14 @@ CLBlastStatusCode CLBlastHgemmStridedBatched(const CLBlastLayout layout, const C
|
|||
|
||||
// GEMM with temporary buffer (optional, for advanced users)
|
||||
CLBlastStatusCode CLBlastSgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const float alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const float beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event,
|
||||
cl_mem temp_buffer) {
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const float alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const float beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event,
|
||||
cl_mem temp_buffer) {
|
||||
try {
|
||||
return static_cast<CLBlastStatusCode>(
|
||||
clblast::Gemm(static_cast<clblast::Layout>(layout),
|
||||
|
@ -4098,14 +4098,14 @@ CLBlastStatusCode CLBlastSgemmWithTempBuffer(const CLBlastLayout layout, const C
|
|||
} catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
|
||||
}
|
||||
CLBlastStatusCode CLBlastDgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const double alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const double beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event,
|
||||
cl_mem temp_buffer) {
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const double alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const double beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event,
|
||||
cl_mem temp_buffer) {
|
||||
try {
|
||||
return static_cast<CLBlastStatusCode>(
|
||||
clblast::Gemm(static_cast<clblast::Layout>(layout),
|
||||
|
@ -4122,14 +4122,14 @@ CLBlastStatusCode CLBlastDgemmWithTempBuffer(const CLBlastLayout layout, const C
|
|||
} catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
|
||||
}
|
||||
CLBlastStatusCode CLBlastCgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const cl_float2 alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const cl_float2 beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event,
|
||||
cl_mem temp_buffer) {
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const cl_float2 alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const cl_float2 beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event,
|
||||
cl_mem temp_buffer) {
|
||||
try {
|
||||
return static_cast<CLBlastStatusCode>(
|
||||
clblast::Gemm(static_cast<clblast::Layout>(layout),
|
||||
|
@ -4146,14 +4146,14 @@ CLBlastStatusCode CLBlastCgemmWithTempBuffer(const CLBlastLayout layout, const C
|
|||
} catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
|
||||
}
|
||||
CLBlastStatusCode CLBlastZgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const cl_double2 alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const cl_double2 beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event,
|
||||
cl_mem temp_buffer) {
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const cl_double2 alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const cl_double2 beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event,
|
||||
cl_mem temp_buffer) {
|
||||
try {
|
||||
return static_cast<CLBlastStatusCode>(
|
||||
clblast::Gemm(static_cast<clblast::Layout>(layout),
|
||||
|
@ -4170,14 +4170,14 @@ CLBlastStatusCode CLBlastZgemmWithTempBuffer(const CLBlastLayout layout, const C
|
|||
} catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
|
||||
}
|
||||
CLBlastStatusCode CLBlastHgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const cl_half alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const cl_half beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event,
|
||||
cl_mem temp_buffer) {
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const cl_half alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const cl_half beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_command_queue* queue, cl_event* event,
|
||||
cl_mem temp_buffer) {
|
||||
try {
|
||||
return static_cast<CLBlastStatusCode>(
|
||||
clblast::Gemm(static_cast<clblast::Layout>(layout),
|
||||
|
@ -4343,4 +4343,4 @@ CLBlastStatusCode PUBLIC_API CLBlastOverrideParameters(const cl_device_id device
|
|||
} catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
// =================================================================================================
|
||||
|
|
Loading…
Reference in New Issue