mirror of
https://github.com/CNugteren/CLBlast.git
synced 2024-07-07 12:23:46 +02:00
Added possibility to run the performance client with half-precision
This commit is contained in:
parent
9f87455070
commit
4612ff3552
|
@ -235,9 +235,9 @@ def wrapper_clblas(routines):
|
|||
if routine.NoScalars():
|
||||
result += routine.RoutineHeaderWrapperCL(routine.template, True, 21)+";\n"
|
||||
for flavour in routine.flavours:
|
||||
indent = " "*(17 + routine.Length())
|
||||
result += routine.RoutineHeaderWrapperCL(flavour, False, 21)+" {\n"
|
||||
if flavour.precision_name in ["S","D","C","Z"]:
|
||||
indent = " "*(17 + routine.Length())
|
||||
result += routine.RoutineHeaderWrapperCL(flavour, False, 21)+" {\n"
|
||||
arguments = routine.ArgumentsWrapperCL(flavour)
|
||||
if routine.scratch:
|
||||
result += " auto queue = Queue(queues[0]);\n"
|
||||
|
@ -247,7 +247,9 @@ def wrapper_clblas(routines):
|
|||
result += " return clblas"+flavour.name+routine.name+"("
|
||||
result += (",\n"+indent).join([a for a in arguments])
|
||||
result += ",\n"+indent+"num_queues, queues, num_wait_events, wait_events, events);"
|
||||
result += "\n}\n"
|
||||
else:
|
||||
result += " return clblasNotImplemented;"
|
||||
result += "\n}\n"
|
||||
return result
|
||||
|
||||
# The wrapper to the reference CBLAS routines (for performance/correctness testing)
|
||||
|
@ -257,9 +259,9 @@ def wrapper_cblas(routines):
|
|||
if routine.has_tests:
|
||||
result += "\n// Forwards the Netlib BLAS calls for %s\n" % (routine.ShortNamesTested())
|
||||
for flavour in routine.flavours:
|
||||
indent = " "*(10 + routine.Length())
|
||||
result += routine.RoutineHeaderWrapperC(flavour, False, 12)+" {\n"
|
||||
if flavour.precision_name in ["S","D","C","Z"]:
|
||||
indent = " "*(10 + routine.Length())
|
||||
result += routine.RoutineHeaderWrapperC(flavour, False, 12)+" {\n"
|
||||
arguments = routine.ArgumentsWrapperC(flavour)
|
||||
|
||||
# Double-precision scalars
|
||||
|
@ -293,7 +295,9 @@ def wrapper_cblas(routines):
|
|||
result += " "+assignment+"cblas_"+flavour.name.lower()+routine.name+postfix+"("
|
||||
result += (",\n"+indent).join([a for a in arguments])
|
||||
result += extra_argument+endofline+");"
|
||||
result += "\n}\n"
|
||||
else:
|
||||
result += " return;"
|
||||
result += "\n}\n"
|
||||
return result
|
||||
|
||||
# ==================================================================================================
|
||||
|
@ -402,7 +406,7 @@ for level in [1,2,3]:
|
|||
body += " case clblast::Precision::k"+PrecisionToFullName(precision)+":"
|
||||
found = False
|
||||
for flavour in routine.flavours:
|
||||
if flavour.precision_name == precision and flavour.precision_name in ["S","D","C","Z"]:
|
||||
if flavour.precision_name == precision:
|
||||
body += "\n clblast::RunClient<clblast::TestX"+routine.name+flavour.TestTemplate()
|
||||
body += ">(argc, argv); break;\n"
|
||||
found = True
|
||||
|
|
|
@ -116,6 +116,17 @@ Arguments<U> Client<T,U>::ParseArguments(int argc, char *argv[], const GetMetric
|
|||
// which is thus always displayed (unless silence is specified).
|
||||
if (!args.silent) { fprintf(stdout, "%s\n", help.c_str()); }
|
||||
|
||||
// Comparison against clBLAS or a CPU BLAS library is not supported in case of half-precision
|
||||
if (args.precision == Precision::kHalf) {
|
||||
if (args.compare_clblas != 0 || args.compare_cblas != 0) {
|
||||
if (!args.silent) {
|
||||
fprintf(stdout, "* Disabling clBLAS and CPU BLAS comparisons for half-precision\n\n");
|
||||
}
|
||||
}
|
||||
args.compare_clblas = 0;
|
||||
args.compare_cblas = 0;
|
||||
}
|
||||
|
||||
// Returns the arguments
|
||||
return args;
|
||||
}
|
||||
|
@ -339,6 +350,7 @@ void Client<T,U>::PrintTableRow(const Arguments<U>& args,
|
|||
// =================================================================================================
|
||||
|
||||
// Compiles the templated class
|
||||
template class Client<half,half>;
|
||||
template class Client<float,float>;
|
||||
template class Client<double,double>;
|
||||
template class Client<float2,float2>;
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXamax<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXamax<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXasum<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXasum<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXaxpy<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXaxpy<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXcopy<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXcopy<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXdot<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXdot<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXnrm2<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXnrm2<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXscal<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXscal<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXswap<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXswap<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXgbmv<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXgbmv<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXgemv<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXgemv<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXger<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXger<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXsbmv<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXsbmv<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXspmv<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXspmv<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXspr<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXspr<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXspr2<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXspr2<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXsymv<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXsymv<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXsyr<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXsyr<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXsyr2<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXsyr2<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXtbmv<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXtbmv<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXtpmv<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXtpmv<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXtrmv<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXtrmv<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXgemm<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXgemm<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXsymm<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXsymm<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXsyr2k<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXsyr2k<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXsyrk<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXsyrk<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXtrmm<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXtrmm<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -19,7 +19,8 @@ using double2 = clblast::double2;
|
|||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) {
|
||||
case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
|
||||
case clblast::Precision::kHalf:
|
||||
clblast::RunClient<clblast::TestXtrsm<half>, half, half>(argc, argv); break;
|
||||
case clblast::Precision::kSingle:
|
||||
clblast::RunClient<clblast::TestXtrsm<float>, float, float>(argc, argv); break;
|
||||
case clblast::Precision::kDouble:
|
||||
|
|
|
@ -161,6 +161,11 @@ void cblasXswap(const size_t n,
|
|||
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
|
||||
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
|
||||
}
|
||||
void cblasXswap(const size_t n,
|
||||
std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL
|
||||
void cblasXscal(const size_t n,
|
||||
|
@ -193,6 +198,11 @@ void cblasXscal(const size_t n,
|
|||
alpha_array.data(),
|
||||
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
|
||||
}
|
||||
void cblasXscal(const size_t n,
|
||||
const half alpha,
|
||||
std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY
|
||||
void cblasXcopy(const size_t n,
|
||||
|
@ -223,6 +233,11 @@ void cblasXcopy(const size_t n,
|
|||
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
|
||||
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
|
||||
}
|
||||
void cblasXcopy(const size_t n,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY
|
||||
void cblasXaxpy(const size_t n,
|
||||
|
@ -263,6 +278,12 @@ void cblasXaxpy(const size_t n,
|
|||
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc),
|
||||
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
|
||||
}
|
||||
void cblasXaxpy(const size_t n,
|
||||
const half alpha,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for SDOT/DDOT
|
||||
void cblasXdot(const size_t n,
|
||||
|
@ -281,6 +302,12 @@ void cblasXdot(const size_t n,
|
|||
&x_buffer[x_offset], static_cast<int>(x_inc),
|
||||
&y_buffer[y_offset], static_cast<int>(y_inc));
|
||||
}
|
||||
void cblasXdot(const size_t n,
|
||||
std::vector<half>& dot_buffer, const size_t dot_offset,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for CDOTU/ZDOTU
|
||||
void cblasXdotu(const size_t n,
|
||||
|
@ -347,6 +374,11 @@ void cblasXnrm2(const size_t n,
|
|||
nrm2_buffer[nrm2_offset].real(cblas_dznrm2(n,
|
||||
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
|
||||
}
|
||||
void cblasXnrm2(const size_t n,
|
||||
std::vector<half>& nrm2_buffer, const size_t nrm2_offset,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for SASUM/DASUM/ScASUM/DzASUM
|
||||
void cblasXasum(const size_t n,
|
||||
|
@ -373,6 +405,11 @@ void cblasXasum(const size_t n,
|
|||
asum_buffer[asum_offset].real(cblas_dzasum(n,
|
||||
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)));
|
||||
}
|
||||
void cblasXasum(const size_t n,
|
||||
std::vector<half>& asum_buffer, const size_t asum_offset,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX
|
||||
void cblasXamax(const size_t n,
|
||||
|
@ -399,6 +436,11 @@ void cblasXamax(const size_t n,
|
|||
((int*)&imax_buffer[0])[imax_offset] = cblas_izamax(n,
|
||||
reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
|
||||
}
|
||||
void cblasXamax(const size_t n,
|
||||
std::vector<half>& imax_buffer, const size_t imax_offset,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
// BLAS level-2 (matrix-vector) routines
|
||||
|
@ -469,6 +511,15 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
|
|||
beta_array.data(),
|
||||
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
|
||||
}
|
||||
void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
|
||||
const size_t m, const size_t n,
|
||||
const half alpha,
|
||||
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const half beta,
|
||||
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV
|
||||
void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
|
||||
|
@ -535,6 +586,15 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
|
|||
beta_array.data(),
|
||||
reinterpret_cast<double*>(&y_buffer[y_offset]), static_cast<int>(y_inc));
|
||||
}
|
||||
void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose,
|
||||
const size_t m, const size_t n, const size_t kl, const size_t ku,
|
||||
const half alpha,
|
||||
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const half beta,
|
||||
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for CHEMV/ZHEMV
|
||||
void cblasXhemv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
||||
|
@ -675,6 +735,15 @@ void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
|||
beta,
|
||||
&y_buffer[y_offset], static_cast<int>(y_inc));
|
||||
}
|
||||
void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
||||
const size_t n,
|
||||
const half alpha,
|
||||
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const half beta,
|
||||
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for SSBMV/DSBMV
|
||||
void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
||||
|
@ -707,6 +776,15 @@ void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
|||
beta,
|
||||
&y_buffer[y_offset], static_cast<int>(y_inc));
|
||||
}
|
||||
void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
||||
const size_t n, const size_t k,
|
||||
const half alpha,
|
||||
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const half beta,
|
||||
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for SSPMV/DSPMV
|
||||
void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
||||
|
@ -739,6 +817,15 @@ void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
|||
beta,
|
||||
&y_buffer[y_offset], static_cast<int>(y_inc));
|
||||
}
|
||||
void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
||||
const size_t n,
|
||||
const half alpha,
|
||||
const std::vector<half>& ap_buffer, const size_t ap_offset,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const half beta,
|
||||
std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for STRMV/DTRMV/CTRMV/ZTRMV
|
||||
void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
|
||||
|
@ -777,6 +864,12 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
|
|||
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
|
||||
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
|
||||
}
|
||||
void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
|
||||
const size_t n,
|
||||
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for STBMV/DTBMV/CTBMV/ZTBMV
|
||||
void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
|
||||
|
@ -815,6 +908,12 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
|
|||
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
|
||||
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
|
||||
}
|
||||
void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
|
||||
const size_t n, const size_t k,
|
||||
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for STPMV/DTPMV/CTPMV/ZTPMV
|
||||
void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
|
||||
|
@ -853,6 +952,12 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
|
|||
reinterpret_cast<const double*>(&ap_buffer[ap_offset]),
|
||||
reinterpret_cast<double*>(&x_buffer[x_offset]), static_cast<int>(x_inc));
|
||||
}
|
||||
void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
|
||||
const size_t n,
|
||||
const std::vector<half>& ap_buffer, const size_t ap_offset,
|
||||
std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for STRSV/DTRSV/CTRSV/ZTRSV
|
||||
void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
|
||||
|
@ -995,6 +1100,14 @@ void cblasXger(const CBLAS_ORDER layout,
|
|||
&y_buffer[y_offset], static_cast<int>(y_inc),
|
||||
&a_buffer[a_offset], a_ld);
|
||||
}
|
||||
void cblasXger(const CBLAS_ORDER layout,
|
||||
const size_t m, const size_t n,
|
||||
const half alpha,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for CGERU/ZGERU
|
||||
void cblasXgeru(const CBLAS_ORDER layout,
|
||||
|
@ -1187,6 +1300,13 @@ void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
|||
&x_buffer[x_offset], static_cast<int>(x_inc),
|
||||
&a_buffer[a_offset], a_ld);
|
||||
}
|
||||
void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
||||
const size_t n,
|
||||
const half alpha,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for SSPR/DSPR
|
||||
void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
||||
|
@ -1211,6 +1331,13 @@ void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
|||
&x_buffer[x_offset], static_cast<int>(x_inc),
|
||||
&ap_buffer[ap_offset]);
|
||||
}
|
||||
void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
||||
const size_t n,
|
||||
const half alpha,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
std::vector<half>& ap_buffer, const size_t ap_offset) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for SSYR2/DSYR2
|
||||
void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
||||
|
@ -1239,6 +1366,14 @@ void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
|||
&y_buffer[y_offset], static_cast<int>(y_inc),
|
||||
&a_buffer[a_offset], a_ld);
|
||||
}
|
||||
void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
||||
const size_t n,
|
||||
const half alpha,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for SSPR2/DSPR2
|
||||
void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
||||
|
@ -1267,6 +1402,14 @@ void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
|||
&y_buffer[y_offset], static_cast<int>(y_inc),
|
||||
&ap_buffer[ap_offset]);
|
||||
}
|
||||
void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle,
|
||||
const size_t n,
|
||||
const half alpha,
|
||||
const std::vector<half>& x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const std::vector<half>& y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
std::vector<half>& ap_buffer, const size_t ap_offset) {
|
||||
return;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
// BLAS level-3 (matrix-matrix) routines
|
||||
|
@ -1337,6 +1480,15 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con
|
|||
beta_array.data(),
|
||||
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
|
||||
}
|
||||
void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const CBLAS_TRANSPOSE b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const half alpha,
|
||||
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const half beta,
|
||||
std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM
|
||||
void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
|
||||
|
@ -1403,6 +1555,15 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
|
|||
beta_array.data(),
|
||||
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
|
||||
}
|
||||
void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
|
||||
const size_t m, const size_t n,
|
||||
const half alpha,
|
||||
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const half beta,
|
||||
std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for CHEMM/ZHEMM
|
||||
void cblasXhemm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle,
|
||||
|
@ -1497,6 +1658,14 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS
|
|||
beta_array.data(),
|
||||
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
|
||||
}
|
||||
void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose,
|
||||
const size_t n, const size_t k,
|
||||
const half alpha,
|
||||
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const half beta,
|
||||
std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for CHERK/ZHERK
|
||||
void cblasXherk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose,
|
||||
|
@ -1591,6 +1760,15 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA
|
|||
beta_array.data(),
|
||||
reinterpret_cast<double*>(&c_buffer[c_offset]), c_ld);
|
||||
}
|
||||
void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose,
|
||||
const size_t n, const size_t k,
|
||||
const half alpha,
|
||||
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const half beta,
|
||||
std::vector<half>& c_buffer, const size_t c_offset, const size_t c_ld) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for CHER2K/ZHER2K
|
||||
void cblasXher2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose,
|
||||
|
@ -1673,6 +1851,13 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
|
|||
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
|
||||
reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld);
|
||||
}
|
||||
void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
|
||||
const size_t m, const size_t n,
|
||||
const half alpha,
|
||||
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Forwards the Netlib BLAS calls for STRSM/DTRSM/CTRSM/ZTRSM
|
||||
void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
|
||||
|
@ -1721,6 +1906,13 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL
|
|||
reinterpret_cast<const double*>(&a_buffer[a_offset]), a_ld,
|
||||
reinterpret_cast<double*>(&b_buffer[b_offset]), b_ld);
|
||||
}
|
||||
void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal,
|
||||
const size_t m, const size_t n,
|
||||
const half alpha,
|
||||
const std::vector<half>& a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
std::vector<half>& b_buffer, const size_t b_offset, const size_t b_ld) {
|
||||
return;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
} // namespace clblast
|
||||
|
|
|
@ -223,6 +223,14 @@ clblasStatus clblasXswap<double2>(const size_t n,
|
|||
y_buffer, y_offset, static_cast<int>(y_inc),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
template <>
|
||||
clblasStatus clblasXswap<half>(const size_t n,
|
||||
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL
|
||||
clblasStatus clblasXscal(const size_t n,
|
||||
|
@ -265,6 +273,13 @@ clblasStatus clblasXscal(const size_t n,
|
|||
x_buffer, x_offset, static_cast<int>(x_inc),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXscal(const size_t n,
|
||||
const half alpha,
|
||||
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY
|
||||
template <typename T>
|
||||
|
@ -317,6 +332,14 @@ clblasStatus clblasXcopy<double2>(const size_t n,
|
|||
y_buffer, y_offset, static_cast<int>(y_inc),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
template <>
|
||||
clblasStatus clblasXcopy<half>(const size_t n,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY
|
||||
clblasStatus clblasXaxpy(const size_t n,
|
||||
|
@ -367,6 +390,14 @@ clblasStatus clblasXaxpy(const size_t n,
|
|||
y_buffer, y_offset, static_cast<int>(y_inc),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXaxpy(const size_t n,
|
||||
const half alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for SDOT/DDOT
|
||||
template <typename T>
|
||||
|
@ -410,6 +441,15 @@ clblasStatus clblasXdot<double>(const size_t n,
|
|||
scratch_buffer(),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
template <>
|
||||
clblasStatus clblasXdot<half>(const size_t n,
|
||||
cl_mem dot_buffer, const size_t dot_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for CDOTU/ZDOTU
|
||||
template <typename T>
|
||||
|
@ -564,6 +604,14 @@ clblasStatus clblasXnrm2<double2>(const size_t n,
|
|||
scratch_buffer(),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
template <>
|
||||
clblasStatus clblasXnrm2<half>(const size_t n,
|
||||
cl_mem nrm2_buffer, const size_t nrm2_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for SASUM/DASUM/ScASUM/DzASUM
|
||||
template <typename T>
|
||||
|
@ -632,6 +680,14 @@ clblasStatus clblasXasum<double2>(const size_t n,
|
|||
scratch_buffer(),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
template <>
|
||||
clblasStatus clblasXasum<half>(const size_t n,
|
||||
cl_mem asum_buffer, const size_t asum_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX
|
||||
template <typename T>
|
||||
|
@ -700,6 +756,14 @@ clblasStatus clblasXamax<double2>(const size_t n,
|
|||
scratch_buffer(),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
template <>
|
||||
clblasStatus clblasXamax<half>(const size_t n,
|
||||
cl_mem imax_buffer, const size_t imax_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
// BLAS level-2 (matrix-vector) routines
|
||||
|
@ -778,6 +842,17 @@ clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_trans
|
|||
y_buffer, y_offset, static_cast<int>(y_inc),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose,
|
||||
const size_t m, const size_t n,
|
||||
const half alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const half beta,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV
|
||||
clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
|
||||
|
@ -852,6 +927,17 @@ clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_trans
|
|||
y_buffer, y_offset, static_cast<int>(y_inc),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose,
|
||||
const size_t m, const size_t n, const size_t kl, const size_t ku,
|
||||
const half alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const half beta,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for CHEMV/ZHEMV
|
||||
clblasStatus clblasXhemv(const clblasOrder layout, const clblasUplo triangle,
|
||||
|
@ -1004,6 +1090,17 @@ clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle,
|
|||
y_buffer, y_offset, static_cast<int>(y_inc),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle,
|
||||
const size_t n,
|
||||
const half alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const half beta,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for SSBMV/DSBMV
|
||||
clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle,
|
||||
|
@ -1042,6 +1139,17 @@ clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle,
|
|||
y_buffer, y_offset, static_cast<int>(y_inc),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle,
|
||||
const size_t n, const size_t k,
|
||||
const half alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const half beta,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for SSPMV/DSPMV
|
||||
clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle,
|
||||
|
@ -1080,6 +1188,17 @@ clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle,
|
|||
y_buffer, y_offset, static_cast<int>(y_inc),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle,
|
||||
const size_t n,
|
||||
const half alpha,
|
||||
const cl_mem ap_buffer, const size_t ap_offset,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const half beta,
|
||||
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for STRMV/DTRMV/CTRMV/ZTRMV
|
||||
template <typename T>
|
||||
|
@ -1157,6 +1276,15 @@ clblasStatus clblasXtrmv<double2>(const clblasOrder layout, const clblasUplo tri
|
|||
scratch_buffer(),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
template <>
|
||||
clblasStatus clblasXtrmv<half>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
|
||||
const size_t n,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for STBMV/DTBMV/CTBMV/ZTBMV
|
||||
template <typename T>
|
||||
|
@ -1234,6 +1362,15 @@ clblasStatus clblasXtbmv<double2>(const clblasOrder layout, const clblasUplo tri
|
|||
scratch_buffer(),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
template <>
|
||||
clblasStatus clblasXtbmv<half>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
|
||||
const size_t n, const size_t k,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for STPMV/DTPMV/CTPMV/ZTPMV
|
||||
template <typename T>
|
||||
|
@ -1311,6 +1448,15 @@ clblasStatus clblasXtpmv<double2>(const clblasOrder layout, const clblasUplo tri
|
|||
scratch_buffer(),
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
template <>
|
||||
clblasStatus clblasXtpmv<half>(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
|
||||
const size_t n,
|
||||
const cl_mem ap_buffer, const size_t ap_offset,
|
||||
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for STRSV/DTRSV/CTRSV/ZTRSV
|
||||
template <typename T>
|
||||
|
@ -1528,6 +1674,16 @@ clblasStatus clblasXger(const clblasOrder layout,
|
|||
a_buffer, a_offset, a_ld,
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXger(const clblasOrder layout,
|
||||
const size_t m, const size_t n,
|
||||
const half alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for CGERU/ZGERU
|
||||
clblasStatus clblasXgeru(const clblasOrder layout,
|
||||
|
@ -1754,6 +1910,15 @@ clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle,
|
|||
a_buffer, a_offset, a_ld,
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle,
|
||||
const size_t n,
|
||||
const half alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for SSPR/DSPR
|
||||
clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle,
|
||||
|
@ -1784,6 +1949,15 @@ clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle,
|
|||
ap_buffer, ap_offset,
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle,
|
||||
const size_t n,
|
||||
const half alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
cl_mem ap_buffer, const size_t ap_offset,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for SSYR2/DSYR2
|
||||
clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle,
|
||||
|
@ -1818,6 +1992,16 @@ clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle,
|
|||
a_buffer, a_offset, a_ld,
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle,
|
||||
const size_t n,
|
||||
const half alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for SSPR2/DSPR2
|
||||
clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
|
||||
|
@ -1852,6 +2036,16 @@ clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
|
|||
ap_buffer, ap_offset,
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle,
|
||||
const size_t n,
|
||||
const half alpha,
|
||||
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
|
||||
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
|
||||
cl_mem ap_buffer, const size_t ap_offset,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
// BLAS level-3 (matrix-matrix) routines
|
||||
|
@ -1930,6 +2124,17 @@ clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_trans
|
|||
c_buffer, c_offset, c_ld,
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose,
|
||||
const size_t m, const size_t n, const size_t k,
|
||||
const half alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const half beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM
|
||||
clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
|
||||
|
@ -2004,6 +2209,17 @@ clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const
|
|||
c_buffer, c_offset, c_ld,
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
|
||||
const size_t m, const size_t n,
|
||||
const half alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const half beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for CHEMM/ZHEMM
|
||||
clblasStatus clblasXhemm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle,
|
||||
|
@ -2108,6 +2324,16 @@ clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, co
|
|||
c_buffer, c_offset, c_ld,
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
|
||||
const size_t n, const size_t k,
|
||||
const half alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const half beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for CHERK/ZHERK
|
||||
clblasStatus clblasXherk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose,
|
||||
|
@ -2216,6 +2442,17 @@ clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, c
|
|||
c_buffer, c_offset, c_ld,
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
|
||||
const size_t n, const size_t k,
|
||||
const half alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
const half beta,
|
||||
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for CHER2K/ZHER2K
|
||||
clblasStatus clblasXher2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose,
|
||||
|
@ -2312,6 +2549,15 @@ clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const
|
|||
b_buffer, b_offset, b_ld,
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
|
||||
const size_t m, const size_t n,
|
||||
const half alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// Forwards the clBLAS calls for STRSM/DTRSM/CTRSM/ZTRSM
|
||||
clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
|
||||
|
@ -2370,6 +2616,15 @@ clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const
|
|||
b_buffer, b_offset, b_ld,
|
||||
num_queues, queues, num_wait_events, wait_events, events);
|
||||
}
|
||||
clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal,
|
||||
const size_t m, const size_t n,
|
||||
const half alpha,
|
||||
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
|
||||
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
|
||||
cl_uint num_queues, cl_command_queue *queues,
|
||||
cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) {
|
||||
return clblasNotImplemented;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
} // namespace clblast
|
||||
|
|
Loading…
Reference in a new issue