Added possibility to run the performance client with half-precision

This commit is contained in:
Cedric Nugteren 2016-05-25 14:37:26 +02:00
parent 9f87455070
commit 4612ff3552
31 changed files with 524 additions and 34 deletions

View file

@ -235,9 +235,9 @@ def wrapper_clblas(routines):
if routine.NoScalars():
result += routine.RoutineHeaderWrapperCL(routine.template, True, 21)+";\n"
for flavour in routine.flavours:
indent = " "*(17 + routine.Length())
result += routine.RoutineHeaderWrapperCL(flavour, False, 21)+" {\n"
if flavour.precision_name in ["S","D","C","Z"]:
indent = " "*(17 + routine.Length())
result += routine.RoutineHeaderWrapperCL(flavour, False, 21)+" {\n"
arguments = routine.ArgumentsWrapperCL(flavour)
if routine.scratch:
result += " auto queue = Queue(queues[0]);\n"
@ -247,7 +247,9 @@ def wrapper_clblas(routines):
result += " return clblas"+flavour.name+routine.name+"("
result += (",\n"+indent).join([a for a in arguments])
result += ",\n"+indent+"num_queues, queues, num_wait_events, wait_events, events);"
result += "\n}\n"
else:
result += " return clblasNotImplemented;"
result += "\n}\n"
return result
# The wrapper to the reference CBLAS routines (for performance/correctness testing)
@ -257,9 +259,9 @@ def wrapper_cblas(routines):
if routine.has_tests:
result += "\n// Forwards the Netlib BLAS calls for %s\n" % (routine.ShortNamesTested())
for flavour in routine.flavours:
indent = " "*(10 + routine.Length())
result += routine.RoutineHeaderWrapperC(flavour, False, 12)+" {\n"
if flavour.precision_name in ["S","D","C","Z"]:
indent = " "*(10 + routine.Length())
result += routine.RoutineHeaderWrapperC(flavour, False, 12)+" {\n"
arguments = routine.ArgumentsWrapperC(flavour)
# Double-precision scalars
@ -293,7 +295,9 @@ def wrapper_cblas(routines):
result += " "+assignment+"cblas_"+flavour.name.lower()+routine.name+postfix+"("
result += (",\n"+indent).join([a for a in arguments])
result += extra_argument+endofline+");"
result += "\n}\n"
else:
result += " return;"
result += "\n}\n"
return result
# ==================================================================================================
@ -402,7 +406,7 @@ for level in [1,2,3]:
body += " case clblast::Precision::k"+PrecisionToFullName(precision)+":"
found = False
for flavour in routine.flavours:
if flavour.precision_name == precision and flavour.precision_name in ["S","D","C","Z"]:
if flavour.precision_name == precision:
body += "\n clblast::RunClient<clblast::TestX"+routine.name+flavour.TestTemplate()
body += ">(argc, argv); break;\n"
found = True

View file

@ -116,6 +116,17 @@ Arguments<U> Client<T,U>::ParseArguments(int argc, char *argv[], const GetMetric
// which is thus always displayed (unless silence is specified).
if (!args.silent) { fprintf(stdout, "%s\n", help.c_str()); }
// Comparison against clBLAS or a CPU BLAS library is not supported in case of half-precision
if (args.precision == Precision::kHalf) {
if (args.compare_clblas != 0 || args.compare_cblas != 0) {
if (!args.silent) {
fprintf(stdout, "* Disabling clBLAS and CPU BLAS comparisons for half-precision\n\n");
}
}
args.compare_clblas = 0;
args.compare_cblas = 0;
}
// Returns the arguments
return args;
}
@ -339,6 +350,7 @@ void Client<T,U>::PrintTableRow(const Arguments<U>& args,
// =================================================================================================
// Compiles the templated class
template class Client<half,half>;
template class Client<float,float>;
template class Client<double,double>;
template class Client<float2,float2>;

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXamax<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXamax<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXasum<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXasum<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXaxpy<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXaxpy<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXcopy<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXcopy<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXdot<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXdot<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXnrm2<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXnrm2<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXscal<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXscal<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXswap<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXswap<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXgbmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXgbmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXgemv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXgemv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXger<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXger<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXsbmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsbmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXspmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXspmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXspr<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXspr<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXspr2<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXspr2<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXsymv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsymv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXsyr<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsyr<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXsyr2<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsyr2<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXtbmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXtbmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXtpmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXtpmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXtrmv<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXtrmv<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXgemm<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXgemm<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXsymm<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsymm<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXsyr2k<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsyr2k<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXsyrk<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsyrk<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXtrmm<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXtrmm<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -19,7 +19,8 @@ using double2 = clblast::double2;
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kHalf:
clblast::RunClient<clblast::TestXtrsm<half>, half, half>(argc, argv); break;
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXtrsm<float>, float, float>(argc, argv); break;
case clblast::Precision::kDouble:

View file

@ -161,6 +161,11 @@ void cblasXswap(const size_t n,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
void cblasXswap(const size_t n,
std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
return;
}
// Forwards the Netlib BLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL
void cblasXscal(const size_t n,
@ -193,6 +198,11 @@ void cblasXscal(const size_t n,
alpha_array.data(),
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
void cblasXscal(const size_t n,
const half alpha,
std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
return;
}
// Forwards the Netlib BLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY
void cblasXcopy(const size_t n,
@ -223,6 +233,11 @@ void cblasXcopy(const size_t n,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
void cblasXcopy(const size_t n,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
return;
}
// Forwards the Netlib BLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY
void cblasXaxpy(const size_t n,
@ -263,6 +278,12 @@ void cblasXaxpy(const size_t n,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
void cblasXaxpy(const size_t n,
const half alpha,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
return;
}
// Forwards the Netlib BLAS calls for SDOT/DDOT
void cblasXdot(const size_t n,
@ -281,6 +302,12 @@ void cblasXdot(const size_t n,
&x_buffer[x_offset], static_cast<int>(x_inc),
&y_buffer[y_offset], static_cast<int>(y_inc));
}
void cblasXdot(const size_t n,
std::vector<half>& dot_buffer, const size_t dot_offset,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
return;
}
// Forwards the Netlib BLAS calls for CDOTU/ZDOTU
void cblasXdotu(const size_t n,
@ -347,6 +374,11 @@ void cblasXnrm2(const size_t n,
nrm2_buffer[nrm2_offset].real(cblas_dznrm2(n,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
}
void cblasXnrm2(const size_t n,
std::vector<half>& nrm2_buffer, const size_t nrm2_offset,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
return;
}
// Forwards the Netlib BLAS calls for SASUM/DASUM/ScASUM/DzASUM
void cblasXasum(const size_t n,
@ -373,6 +405,11 @@ void cblasXasum(const size_t n,
asum_buffer[asum_offset].real(cblas_dzasum(n,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
}
void cblasXasum(const size_t n,
std::vector<half>& asum_buffer, const size_t asum_offset,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
return;
}
// Forwards the Netlib BLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX
void cblasXamax(const size_t n,
@ -399,6 +436,11 @@ void cblasXamax(const size_t n,
((int*)&imax_buffer[0])[imax_offset] = cblas_izamax(n,
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
void cblasXamax(const size_t n,
std::vector<half>& imax_buffer, const size_t imax_offset,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
return;
}
// =================================================================================================
// BLAS level-2 (matrix-vector) routines
@ -469,6 +511,15 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
beta_array.data(),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const size_t m, const size_t n,
const half alpha,
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
const half beta,
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
return;
}
// Forwards the Netlib BLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV
void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
@ -535,6 +586,15 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
beta_array.data(),
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
}
void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
const size_t m, const size_t n, const size_t kl, const size_t ku,
const half alpha,
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
const half beta,
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
return;
}
// Forwards the Netlib BLAS calls for CHEMV/ZHEMV
void cblasXhemv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@ -675,6 +735,15 @@ void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
beta,
&y_buffer[y_offset], static_cast<int>(y_inc));
}
void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const size_t n,
const half alpha,
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
const half beta,
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
return;
}
// Forwards the Netlib BLAS calls for SSBMV/DSBMV
void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@ -707,6 +776,15 @@ void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
beta,
&y_buffer[y_offset], static_cast<int>(y_inc));
}
void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const size_t n, const size_t k,
const half alpha,
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
const half beta,
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
return;
}
// Forwards the Netlib BLAS calls for SSPMV/DSPMV
void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@ -739,6 +817,15 @@ void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
beta,
&y_buffer[y_offset], static_cast<int>(y_inc));
}
void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const size_t n,
const half alpha,
const std::vector<half>& ap_buffer, const size_t ap_offset,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
const half beta,
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
return;
}
// Forwards the Netlib BLAS calls for STRMV/DTRMV/CTRMV/ZTRMV
void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
@ -777,6 +864,12 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
const size_t n,
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
return;
}
// Forwards the Netlib BLAS calls for STBMV/DTBMV/CTBMV/ZTBMV
void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
@ -815,6 +908,12 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
const size_t n, const size_t k,
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
return;
}
// Forwards the Netlib BLAS calls for STPMV/DTPMV/CTPMV/ZTPMV
void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
@ -853,6 +952,12 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
reinterpret_cast<const double*>(&ap_buffer[ap_offset]),
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
}
void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
const size_t n,
const std::vector<half>& ap_buffer, const size_t ap_offset,
std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
return;
}
// Forwards the Netlib BLAS calls for STRSV/DTRSV/CTRSV/ZTRSV
void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
@ -995,6 +1100,14 @@ void cblasXger(const CBLAS_ORDER layout,
&y_buffer[y_offset], static_cast<int>(y_inc),
&a_buffer[a_offset], a_ld);
}
void cblasXger(const CBLAS_ORDER layout,
const size_t m, const size_t n,
const half alpha,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld) {
return;
}
// Forwards the Netlib BLAS calls for CGERU/ZGERU
void cblasXgeru(const CBLAS_ORDER layout,
@ -1187,6 +1300,13 @@ void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
&x_buffer[x_offset], static_cast<int>(x_inc),
&a_buffer[a_offset], a_ld);
}
void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const size_t n,
const half alpha,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld) {
return;
}
// Forwards the Netlib BLAS calls for SSPR/DSPR
void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@ -1211,6 +1331,13 @@ void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
&x_buffer[x_offset], static_cast<int>(x_inc),
&ap_buffer[ap_offset]);
}
void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const size_t n,
const half alpha,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
std::vector<half>& ap_buffer, const size_t ap_offset) {
return;
}
// Forwards the Netlib BLAS calls for SSYR2/DSYR2
void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@ -1239,6 +1366,14 @@ void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
&y_buffer[y_offset], static_cast<int>(y_inc),
&a_buffer[a_offset], a_ld);
}
void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const size_t n,
const half alpha,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld) {
return;
}
// Forwards the Netlib BLAS calls for SSPR2/DSPR2
void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
@ -1267,6 +1402,14 @@ void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
&y_buffer[y_offset], static_cast<int>(y_inc),
&ap_buffer[ap_offset]);
}
void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
const size_t n,
const half alpha,
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc,
std::vector<half>& ap_buffer, const size_t ap_offset) {
return;
}
// =================================================================================================
// BLAS level-3 (matrix-matrix) routines
@ -1337,6 +1480,15 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con
beta_array.data(),
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
}
void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const CBLAS_TRANSPOSE b_transpose,
const size_t m, const size_t n, const size_t k,
const half alpha,
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
const std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld,
const half beta,
std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
return;
}
// Forwards the Netlib BLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM
void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
@ -1403,6 +1555,15 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
beta_array.data(),
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
}
void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
const size_t m, const size_t n,
const half alpha,
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
const std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld,
const half beta,
std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
return;
}
// Forwards the Netlib BLAS calls for CHEMM/ZHEMM
void cblasXhemm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
@ -1497,6 +1658,14 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
beta_array.data(),
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
}
void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose,
const size_t n, const size_t k,
const half alpha,
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
const half beta,
std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
return;
}
// Forwards the Netlib BLAS calls for CHERK/ZHERK
void cblasXherk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose,
@ -1591,6 +1760,15 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
beta_array.data(),
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
}
void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose,
const size_t n, const size_t k,
const half alpha,
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
const std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld,
const half beta,
std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
return;
}
// Forwards the Netlib BLAS calls for CHER2K/ZHER2K
void cblasXher2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose,
@ -1673,6 +1851,13 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld);
}
void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
const size_t m, const size_t n,
const half alpha,
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld) {
return;
}
// Forwards the Netlib BLAS calls for STRSM/DTRSM/CTRSM/ZTRSM
void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
@ -1721,6 +1906,13 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld);
}
void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
const size_t m, const size_t n,
const half alpha,
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld) {
return;
}
// =================================================================================================
} // namespace clblast

View file

@ -223,6 +223,14 @@ clblasStatus clblasXswap<double2>(const size_t n,
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXswap<half>(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL
clblasStatus clblasXscal(const size_t n,
@ -265,6 +273,13 @@ clblasStatus clblasXscal(const size_t n,
x_buffer, x_offset, static_cast<int>(x_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXscal(const size_t n,
const half alpha,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY
template <typename T>
@ -317,6 +332,14 @@ clblasStatus clblasXcopy<double2>(const size_t n,
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXcopy<half>(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY
clblasStatus clblasXaxpy(const size_t n,
@ -367,6 +390,14 @@ clblasStatus clblasXaxpy(const size_t n,
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXaxpy(const size_t n,
const half alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for SDOT/DDOT
template <typename T>
@ -410,6 +441,15 @@ clblasStatus clblasXdot<double>(const size_t n,
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXdot<half>(const size_t n,
cl_mem dot_buffer, const size_t dot_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for CDOTU/ZDOTU
template <typename T>
@ -564,6 +604,14 @@ clblasStatus clblasXnrm2<double2>(const size_t n,
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXnrm2<half>(const size_t n,
cl_mem nrm2_buffer, const size_t nrm2_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for SASUM/DASUM/ScASUM/DzASUM
template <typename T>
@ -632,6 +680,14 @@ clblasStatus clblasXasum<double2>(const size_t n,
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXasum<half>(const size_t n,
cl_mem asum_buffer, const size_t asum_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX
template <typename T>
@ -700,6 +756,14 @@ clblasStatus clblasXamax<double2>(const size_t n,
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXamax<half>(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// =================================================================================================
// BLAS level-2 (matrix-vector) routines
@ -778,6 +842,17 @@ clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_trans
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n,
const half alpha,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const half beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV
clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
@ -852,6 +927,17 @@ clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_trans
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
const size_t m, const size_t n, const size_t kl, const size_t ku,
const half alpha,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const half beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for CHEMV/ZHEMV
clblasStatus clblasXhemv(const clblasOrder layout, const clblasUplo triangle,
@ -1004,6 +1090,17 @@ clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle,
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const half alpha,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const half beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for SSBMV/DSBMV
clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle,
@ -1042,6 +1139,17 @@ clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle,
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n, const size_t k,
const half alpha,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const half beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for SSPMV/DSPMV
clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle,
@ -1080,6 +1188,17 @@ clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle,
y_buffer, y_offset, static_cast<int>(y_inc),
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const half alpha,
const cl_mem ap_buffer, const size_t ap_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const half beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for STRMV/DTRMV/CTRMV/ZTRMV
template <typename T>
@ -1157,6 +1276,15 @@ clblasStatus clblasXtrmv<double2>(const clblasOrder layout, const clblasUplo tri
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtrmv<half>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for STBMV/DTBMV/CTBMV/ZTBMV
template <typename T>
@ -1234,6 +1362,15 @@ clblasStatus clblasXtbmv<double2>(const clblasOrder layout, const clblasUplo tri
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtbmv<half>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n, const size_t k,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for STPMV/DTPMV/CTPMV/ZTPMV
template <typename T>
@ -1311,6 +1448,15 @@ clblasStatus clblasXtpmv<double2>(const clblasOrder layout, const clblasUplo tri
scratch_buffer(),
num_queues, queues, num_wait_events, wait_events, events);
}
template <>
clblasStatus clblasXtpmv<half>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t n,
const cl_mem ap_buffer, const size_t ap_offset,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for STRSV/DTRSV/CTRSV/ZTRSV
template <typename T>
@ -1528,6 +1674,16 @@ clblasStatus clblasXger(const clblasOrder layout,
a_buffer, a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXger(const clblasOrder layout,
const size_t m, const size_t n,
const half alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for CGERU/ZGERU
clblasStatus clblasXgeru(const clblasOrder layout,
@ -1754,6 +1910,15 @@ clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle,
a_buffer, a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const half alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for SSPR/DSPR
clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle,
@ -1784,6 +1949,15 @@ clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle,
ap_buffer, ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const half alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for SSYR2/DSYR2
clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle,
@ -1818,6 +1992,16 @@ clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle,
a_buffer, a_offset, a_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const half alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for SSPR2/DSPR2
clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
@ -1852,6 +2036,16 @@ clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
ap_buffer, ap_offset,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
const size_t n,
const half alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// =================================================================================================
// BLAS level-3 (matrix-matrix) routines
@ -1930,6 +2124,17 @@ clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_trans
c_buffer, c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose,
const size_t m, const size_t n, const size_t k,
const half alpha,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
const half beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM
clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
@ -2004,6 +2209,17 @@ clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const
c_buffer, c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
const size_t m, const size_t n,
const half alpha,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
const half beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for CHEMM/ZHEMM
clblasStatus clblasXhemm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
@ -2108,6 +2324,16 @@ clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, co
c_buffer, c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
const size_t n, const size_t k,
const half alpha,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
const half beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for CHERK/ZHERK
clblasStatus clblasXherk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
@ -2216,6 +2442,17 @@ clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, c
c_buffer, c_offset, c_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
const size_t n, const size_t k,
const half alpha,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
const half beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for CHER2K/ZHER2K
clblasStatus clblasXher2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
@ -2312,6 +2549,15 @@ clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const
b_buffer, b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const half alpha,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// Forwards the clBLAS calls for STRSM/DTRSM/CTRSM/ZTRSM
clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
@ -2370,6 +2616,15 @@ clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const
b_buffer, b_offset, b_ld,
num_queues, queues, num_wait_events, wait_events, events);
}
clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
const size_t m, const size_t n,
const half alpha,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_uint num_queues, cl_command_queue *queues,
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
return clblasNotImplemented;
}
// =================================================================================================
} // namespace clblast