diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 7a8ff9f8..d78c3201 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -235,9 +235,9 @@ def wrapper_clblas(routines): if routine.NoScalars(): result += routine.RoutineHeaderWrapperCL(routine.template, True, 21)+";\n" for flavour in routine.flavours: + indent = " "*(17 + routine.Length()) + result += routine.RoutineHeaderWrapperCL(flavour, False, 21)+" {\n" if flavour.precision_name in ["S","D","C","Z"]: - indent = " "*(17 + routine.Length()) - result += routine.RoutineHeaderWrapperCL(flavour, False, 21)+" {\n" arguments = routine.ArgumentsWrapperCL(flavour) if routine.scratch: result += " auto queue = Queue(queues[0]);\n" @@ -247,7 +247,9 @@ def wrapper_clblas(routines): result += " return clblas"+flavour.name+routine.name+"(" result += (",\n"+indent).join([a for a in arguments]) result += ",\n"+indent+"num_queues, queues, num_wait_events, wait_events, events);" - result += "\n}\n" + else: + result += " return clblasNotImplemented;" + result += "\n}\n" return result # The wrapper to the reference CBLAS routines (for performance/correctness testing) @@ -257,9 +259,9 @@ def wrapper_cblas(routines): if routine.has_tests: result += "\n// Forwards the Netlib BLAS calls for %s\n" % (routine.ShortNamesTested()) for flavour in routine.flavours: + indent = " "*(10 + routine.Length()) + result += routine.RoutineHeaderWrapperC(flavour, False, 12)+" {\n" if flavour.precision_name in ["S","D","C","Z"]: - indent = " "*(10 + routine.Length()) - result += routine.RoutineHeaderWrapperC(flavour, False, 12)+" {\n" arguments = routine.ArgumentsWrapperC(flavour) # Double-precision scalars @@ -293,7 +295,9 @@ def wrapper_cblas(routines): result += " "+assignment+"cblas_"+flavour.name.lower()+routine.name+postfix+"(" result += (",\n"+indent).join([a for a in arguments]) result += extra_argument+endofline+");" - result += "\n}\n" + else: + result += " return;" + result += "\n}\n" return result # ================================================================================================== @@ -402,7 +406,7 @@ for level in [1,2,3]: body += " case clblast::Precision::k"+PrecisionToFullName(precision)+":" found = False for flavour in routine.flavours: - if flavour.precision_name == precision and flavour.precision_name in ["S","D","C","Z"]: + if flavour.precision_name == precision: body += "\n clblast::RunClient Client::ParseArguments(int argc, char *argv[], const GetMetric // which is thus always displayed (unless silence is specified). if (!args.silent) { fprintf(stdout, "%s\n", help.c_str()); } + // Comparison against clBLAS or a CPU BLAS library is not supported in case of half-precision + if (args.precision == Precision::kHalf) { + if (args.compare_clblas != 0 || args.compare_cblas != 0) { + if (!args.silent) { + fprintf(stdout, "* Disabling clBLAS and CPU BLAS comparisons for half-precision\n\n"); + } + } + args.compare_clblas = 0; + args.compare_cblas = 0; + } + // Returns the arguments return args; } @@ -339,6 +350,7 @@ void Client::PrintTableRow(const Arguments& args, // ================================================================================================= // Compiles the templated class +template class Client; template class Client; template class Client; template class Client; diff --git a/test/performance/routines/level1/xamax.cc b/test/performance/routines/level1/xamax.cc index 85caa483..4af1f1c0 100644 --- a/test/performance/routines/level1/xamax.cc +++ b/test/performance/routines/level1/xamax.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level1/xasum.cc b/test/performance/routines/level1/xasum.cc index 2680966e..8e098890 100644 --- a/test/performance/routines/level1/xasum.cc +++ b/test/performance/routines/level1/xasum.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level1/xaxpy.cc b/test/performance/routines/level1/xaxpy.cc index b423bc3a..b48c290d 100644 --- a/test/performance/routines/level1/xaxpy.cc +++ b/test/performance/routines/level1/xaxpy.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level1/xcopy.cc b/test/performance/routines/level1/xcopy.cc index c04c6c1c..b7c60f0f 100644 --- a/test/performance/routines/level1/xcopy.cc +++ b/test/performance/routines/level1/xcopy.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level1/xdot.cc b/test/performance/routines/level1/xdot.cc index f4616464..3edf2590 100644 --- a/test/performance/routines/level1/xdot.cc +++ b/test/performance/routines/level1/xdot.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level1/xnrm2.cc b/test/performance/routines/level1/xnrm2.cc index db6ec9ad..f167df95 100644 --- a/test/performance/routines/level1/xnrm2.cc +++ b/test/performance/routines/level1/xnrm2.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level1/xscal.cc b/test/performance/routines/level1/xscal.cc index bd38f43e..35e21ba8 100644 --- a/test/performance/routines/level1/xscal.cc +++ b/test/performance/routines/level1/xscal.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level1/xswap.cc b/test/performance/routines/level1/xswap.cc index 112641d3..4791d4c3 100644 --- a/test/performance/routines/level1/xswap.cc +++ b/test/performance/routines/level1/xswap.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level2/xgbmv.cc b/test/performance/routines/level2/xgbmv.cc index b050184d..be4056de 100644 --- a/test/performance/routines/level2/xgbmv.cc +++ b/test/performance/routines/level2/xgbmv.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level2/xgemv.cc b/test/performance/routines/level2/xgemv.cc index 51ab9a10..50e6225a 100644 --- a/test/performance/routines/level2/xgemv.cc +++ b/test/performance/routines/level2/xgemv.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level2/xger.cc b/test/performance/routines/level2/xger.cc index 2d956346..b1b5a268 100644 --- a/test/performance/routines/level2/xger.cc +++ b/test/performance/routines/level2/xger.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level2/xsbmv.cc b/test/performance/routines/level2/xsbmv.cc index eabab3b7..5fb6e8c0 100644 --- a/test/performance/routines/level2/xsbmv.cc +++ b/test/performance/routines/level2/xsbmv.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level2/xspmv.cc b/test/performance/routines/level2/xspmv.cc index 2a9ef925..e0ee2075 100644 --- a/test/performance/routines/level2/xspmv.cc +++ b/test/performance/routines/level2/xspmv.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level2/xspr.cc b/test/performance/routines/level2/xspr.cc index 84331d74..19651679 100644 --- a/test/performance/routines/level2/xspr.cc +++ b/test/performance/routines/level2/xspr.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level2/xspr2.cc b/test/performance/routines/level2/xspr2.cc index c42009a1..8745c004 100644 --- a/test/performance/routines/level2/xspr2.cc +++ b/test/performance/routines/level2/xspr2.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level2/xsymv.cc b/test/performance/routines/level2/xsymv.cc index 3f72fe77..42de1ed5 100644 --- a/test/performance/routines/level2/xsymv.cc +++ b/test/performance/routines/level2/xsymv.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level2/xsyr.cc b/test/performance/routines/level2/xsyr.cc index 6b31d3a9..310bfb5e 100644 --- a/test/performance/routines/level2/xsyr.cc +++ b/test/performance/routines/level2/xsyr.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level2/xsyr2.cc b/test/performance/routines/level2/xsyr2.cc index 0ad59d2d..bbeed3db 100644 --- a/test/performance/routines/level2/xsyr2.cc +++ b/test/performance/routines/level2/xsyr2.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level2/xtbmv.cc b/test/performance/routines/level2/xtbmv.cc index a3297f34..24eec61f 100644 --- a/test/performance/routines/level2/xtbmv.cc +++ b/test/performance/routines/level2/xtbmv.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level2/xtpmv.cc b/test/performance/routines/level2/xtpmv.cc index 72477f2d..2f2487f8 100644 --- a/test/performance/routines/level2/xtpmv.cc +++ b/test/performance/routines/level2/xtpmv.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level2/xtrmv.cc b/test/performance/routines/level2/xtrmv.cc index 894a7952..3f23afd1 100644 --- a/test/performance/routines/level2/xtrmv.cc +++ b/test/performance/routines/level2/xtrmv.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level3/xgemm.cc b/test/performance/routines/level3/xgemm.cc index 91897ee1..8e48dc3a 100644 --- a/test/performance/routines/level3/xgemm.cc +++ b/test/performance/routines/level3/xgemm.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level3/xsymm.cc b/test/performance/routines/level3/xsymm.cc index e0feadd1..7eac5537 100644 --- a/test/performance/routines/level3/xsymm.cc +++ b/test/performance/routines/level3/xsymm.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level3/xsyr2k.cc b/test/performance/routines/level3/xsyr2k.cc index 4a82ddc4..49d00f34 100644 --- a/test/performance/routines/level3/xsyr2k.cc +++ b/test/performance/routines/level3/xsyr2k.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level3/xsyrk.cc b/test/performance/routines/level3/xsyrk.cc index 70f61322..ad0a06b4 100644 --- a/test/performance/routines/level3/xsyrk.cc +++ b/test/performance/routines/level3/xsyrk.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level3/xtrmm.cc b/test/performance/routines/level3/xtrmm.cc index 6f6041e4..92526844 100644 --- a/test/performance/routines/level3/xtrmm.cc +++ b/test/performance/routines/level3/xtrmm.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/performance/routines/level3/xtrsm.cc b/test/performance/routines/level3/xtrsm.cc index 76ef255a..08e4b4a9 100644 --- a/test/performance/routines/level3/xtrsm.cc +++ b/test/performance/routines/level3/xtrsm.cc @@ -19,7 +19,8 @@ using double2 = clblast::double2; // Main function (not within the clblast namespace) int main(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: + clblast::RunClient, half, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::RunClient, float, float>(argc, argv); break; case clblast::Precision::kDouble: diff --git a/test/wrapper_cblas.h b/test/wrapper_cblas.h index 3182fdfc..2fcab4d0 100644 --- a/test/wrapper_cblas.h +++ b/test/wrapper_cblas.h @@ -161,6 +161,11 @@ void cblasXswap(const size_t n, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); } +void cblasXswap(const size_t n, + std::vector& x_buffer, const size_t x_offset, const size_t x_inc, + std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { + return; +} // Forwards the Netlib BLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL void cblasXscal(const size_t n, @@ -193,6 +198,11 @@ void cblasXscal(const size_t n, alpha_array.data(), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } +void cblasXscal(const size_t n, + const half alpha, + std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { + return; +} // Forwards the Netlib BLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY void cblasXcopy(const size_t n, @@ -223,6 +233,11 @@ void cblasXcopy(const size_t n, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); } +void cblasXcopy(const size_t n, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, + std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { + return; +} // Forwards the Netlib BLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY void cblasXaxpy(const size_t n, @@ -263,6 +278,12 @@ void cblasXaxpy(const size_t n, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); } +void cblasXaxpy(const size_t n, + const half alpha, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, + std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { + return; +} // Forwards the Netlib BLAS calls for SDOT/DDOT void cblasXdot(const size_t n, @@ -281,6 +302,12 @@ void cblasXdot(const size_t n, &x_buffer[x_offset], static_cast(x_inc), &y_buffer[y_offset], static_cast(y_inc)); } +void cblasXdot(const size_t n, + std::vector& dot_buffer, const size_t dot_offset, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, + const std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { + return; +} // Forwards the Netlib BLAS calls for CDOTU/ZDOTU void cblasXdotu(const size_t n, @@ -347,6 +374,11 @@ void cblasXnrm2(const size_t n, nrm2_buffer[nrm2_offset].real(cblas_dznrm2(n, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc))); } +void cblasXnrm2(const size_t n, + std::vector& nrm2_buffer, const size_t nrm2_offset, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { + return; +} // Forwards the Netlib BLAS calls for SASUM/DASUM/ScASUM/DzASUM void cblasXasum(const size_t n, @@ -373,6 +405,11 @@ void cblasXasum(const size_t n, asum_buffer[asum_offset].real(cblas_dzasum(n, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc))); } +void cblasXasum(const size_t n, + std::vector& asum_buffer, const size_t asum_offset, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { + return; +} // Forwards the Netlib BLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX void cblasXamax(const size_t n, @@ -399,6 +436,11 @@ void cblasXamax(const size_t n, ((int*)&imax_buffer[0])[imax_offset] = cblas_izamax(n, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } +void cblasXamax(const size_t n, + std::vector& imax_buffer, const size_t imax_offset, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { + return; +} // ================================================================================================= // BLAS level-2 (matrix-vector) routines @@ -469,6 +511,15 @@ void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, beta_array.data(), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); } +void cblasXgemv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, + const size_t m, const size_t n, + const half alpha, + const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { + return; +} // Forwards the Netlib BLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, @@ -535,6 +586,15 @@ void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, beta_array.data(), reinterpret_cast(&y_buffer[y_offset]), static_cast(y_inc)); } +void cblasXgbmv(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const half alpha, + const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { + return; +} // Forwards the Netlib BLAS calls for CHEMV/ZHEMV void cblasXhemv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, @@ -675,6 +735,15 @@ void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, beta, &y_buffer[y_offset], static_cast(y_inc)); } +void cblasXsymv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, + const size_t n, + const half alpha, + const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { + return; +} // Forwards the Netlib BLAS calls for SSBMV/DSBMV void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, @@ -707,6 +776,15 @@ void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, beta, &y_buffer[y_offset], static_cast(y_inc)); } +void cblasXsbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, + const size_t n, const size_t k, + const half alpha, + const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { + return; +} // Forwards the Netlib BLAS calls for SSPMV/DSPMV void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, @@ -739,6 +817,15 @@ void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, beta, &y_buffer[y_offset], static_cast(y_inc)); } +void cblasXspmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, + const size_t n, + const half alpha, + const std::vector& ap_buffer, const size_t ap_offset, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + std::vector& y_buffer, const size_t y_offset, const size_t y_inc) { + return; +} // Forwards the Netlib BLAS calls for STRMV/DTRMV/CTRMV/ZTRMV void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal, @@ -777,6 +864,12 @@ void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } +void cblasXtrmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal, + const size_t n, + const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, + std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { + return; +} // Forwards the Netlib BLAS calls for STBMV/DTBMV/CTBMV/ZTBMV void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal, @@ -815,6 +908,12 @@ void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } +void cblasXtbmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal, + const size_t n, const size_t k, + const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, + std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { + return; +} // Forwards the Netlib BLAS calls for STPMV/DTPMV/CTPMV/ZTPMV void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal, @@ -853,6 +952,12 @@ void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS reinterpret_cast(&ap_buffer[ap_offset]), reinterpret_cast(&x_buffer[x_offset]), static_cast(x_inc)); } +void cblasXtpmv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal, + const size_t n, + const std::vector& ap_buffer, const size_t ap_offset, + std::vector& x_buffer, const size_t x_offset, const size_t x_inc) { + return; +} // Forwards the Netlib BLAS calls for STRSV/DTRSV/CTRSV/ZTRSV void cblasXtrsv(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal, @@ -995,6 +1100,14 @@ void cblasXger(const CBLAS_ORDER layout, &y_buffer[y_offset], static_cast(y_inc), &a_buffer[a_offset], a_ld); } +void cblasXger(const CBLAS_ORDER layout, + const size_t m, const size_t n, + const half alpha, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, + const std::vector& y_buffer, const size_t y_offset, const size_t y_inc, + std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { + return; +} // Forwards the Netlib BLAS calls for CGERU/ZGERU void cblasXgeru(const CBLAS_ORDER layout, @@ -1187,6 +1300,13 @@ void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, &x_buffer[x_offset], static_cast(x_inc), &a_buffer[a_offset], a_ld); } +void cblasXsyr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, + const size_t n, + const half alpha, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, + std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { + return; +} // Forwards the Netlib BLAS calls for SSPR/DSPR void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, @@ -1211,6 +1331,13 @@ void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, &x_buffer[x_offset], static_cast(x_inc), &ap_buffer[ap_offset]); } +void cblasXspr(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, + const size_t n, + const half alpha, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, + std::vector& ap_buffer, const size_t ap_offset) { + return; +} // Forwards the Netlib BLAS calls for SSYR2/DSYR2 void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, @@ -1239,6 +1366,14 @@ void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, &y_buffer[y_offset], static_cast(y_inc), &a_buffer[a_offset], a_ld); } +void cblasXsyr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, + const size_t n, + const half alpha, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, + const std::vector& y_buffer, const size_t y_offset, const size_t y_inc, + std::vector& a_buffer, const size_t a_offset, const size_t a_ld) { + return; +} // Forwards the Netlib BLAS calls for SSPR2/DSPR2 void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, @@ -1267,6 +1402,14 @@ void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, &y_buffer[y_offset], static_cast(y_inc), &ap_buffer[ap_offset]); } +void cblasXspr2(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, + const size_t n, + const half alpha, + const std::vector& x_buffer, const size_t x_offset, const size_t x_inc, + const std::vector& y_buffer, const size_t y_offset, const size_t y_inc, + std::vector& ap_buffer, const size_t ap_offset) { + return; +} // ================================================================================================= // BLAS level-3 (matrix-matrix) routines @@ -1337,6 +1480,15 @@ void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, con beta_array.data(), reinterpret_cast(&c_buffer[c_offset]), c_ld); } +void cblasXgemm(const CBLAS_ORDER layout, const CBLAS_TRANSPOSE a_transpose, const CBLAS_TRANSPOSE b_transpose, + const size_t m, const size_t n, const size_t k, + const half alpha, + const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, + const std::vector& b_buffer, const size_t b_offset, const size_t b_ld, + const half beta, + std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { + return; +} // Forwards the Netlib BLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, @@ -1403,6 +1555,15 @@ void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL beta_array.data(), reinterpret_cast(&c_buffer[c_offset]), c_ld); } +void cblasXsymm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, + const size_t m, const size_t n, + const half alpha, + const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, + const std::vector& b_buffer, const size_t b_offset, const size_t b_ld, + const half beta, + std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { + return; +} // Forwards the Netlib BLAS calls for CHEMM/ZHEMM void cblasXhemm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, @@ -1497,6 +1658,14 @@ void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS beta_array.data(), reinterpret_cast(&c_buffer[c_offset]), c_ld); } +void cblasXsyrk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, + const size_t n, const size_t k, + const half alpha, + const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, + const half beta, + std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { + return; +} // Forwards the Netlib BLAS calls for CHERK/ZHERK void cblasXherk(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, @@ -1591,6 +1760,15 @@ void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLA beta_array.data(), reinterpret_cast(&c_buffer[c_offset]), c_ld); } +void cblasXsyr2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose, + const size_t n, const size_t k, + const half alpha, + const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, + const std::vector& b_buffer, const size_t b_offset, const size_t b_ld, + const half beta, + std::vector& c_buffer, const size_t c_offset, const size_t c_ld) { + return; +} // Forwards the Netlib BLAS calls for CHER2K/ZHER2K void cblasXher2k(const CBLAS_ORDER layout, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE ab_transpose, @@ -1673,6 +1851,13 @@ void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld); } +void cblasXtrmm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal, + const size_t m, const size_t n, + const half alpha, + const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, + std::vector& b_buffer, const size_t b_offset, const size_t b_ld) { + return; +} // Forwards the Netlib BLAS calls for STRSM/DTRSM/CTRSM/ZTRSM void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal, @@ -1721,6 +1906,13 @@ void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPL reinterpret_cast(&a_buffer[a_offset]), a_ld, reinterpret_cast(&b_buffer[b_offset]), b_ld); } +void cblasXtrsm(const CBLAS_ORDER layout, const CBLAS_SIDE side, const CBLAS_UPLO triangle, const CBLAS_TRANSPOSE a_transpose, const CBLAS_DIAG diagonal, + const size_t m, const size_t n, + const half alpha, + const std::vector& a_buffer, const size_t a_offset, const size_t a_ld, + std::vector& b_buffer, const size_t b_offset, const size_t b_ld) { + return; +} // ================================================================================================= } // namespace clblast diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h index b9410cae..6e44d780 100644 --- a/test/wrapper_clblas.h +++ b/test/wrapper_clblas.h @@ -223,6 +223,14 @@ clblasStatus clblasXswap(const size_t n, y_buffer, y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } +template <> +clblasStatus clblasXswap(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for SSCAL/DSCAL/CSCAL/ZSCAL clblasStatus clblasXscal(const size_t n, @@ -265,6 +273,13 @@ clblasStatus clblasXscal(const size_t n, x_buffer, x_offset, static_cast(x_inc), num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXscal(const size_t n, + const half alpha, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for SCOPY/DCOPY/CCOPY/ZCOPY template @@ -317,6 +332,14 @@ clblasStatus clblasXcopy(const size_t n, y_buffer, y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } +template <> +clblasStatus clblasXcopy(const size_t n, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for SAXPY/DAXPY/CAXPY/ZAXPY clblasStatus clblasXaxpy(const size_t n, @@ -367,6 +390,14 @@ clblasStatus clblasXaxpy(const size_t n, y_buffer, y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXaxpy(const size_t n, + const half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for SDOT/DDOT template @@ -410,6 +441,15 @@ clblasStatus clblasXdot(const size_t n, scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } +template <> +clblasStatus clblasXdot(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for CDOTU/ZDOTU template @@ -564,6 +604,14 @@ clblasStatus clblasXnrm2(const size_t n, scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } +template <> +clblasStatus clblasXnrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for SASUM/DASUM/ScASUM/DzASUM template @@ -632,6 +680,14 @@ clblasStatus clblasXasum(const size_t n, scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } +template <> +clblasStatus clblasXasum(const size_t n, + cl_mem asum_buffer, const size_t asum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX template @@ -700,6 +756,14 @@ clblasStatus clblasXamax(const size_t n, scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } +template <> +clblasStatus clblasXamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // ================================================================================================= // BLAS level-2 (matrix-vector) routines @@ -778,6 +842,17 @@ clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_trans y_buffer, y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXgemv(const clblasOrder layout, const clblasTranspose a_transpose, + const size_t m, const size_t n, + const half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for SGBMV/DGBMV/CGBMV/ZGBMV clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose, @@ -852,6 +927,17 @@ clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_trans y_buffer, y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXgbmv(const clblasOrder layout, const clblasTranspose a_transpose, + const size_t m, const size_t n, const size_t kl, const size_t ku, + const half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for CHEMV/ZHEMV clblasStatus clblasXhemv(const clblasOrder layout, const clblasUplo triangle, @@ -1004,6 +1090,17 @@ clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle, y_buffer, y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXsymv(const clblasOrder layout, const clblasUplo triangle, + const size_t n, + const half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for SSBMV/DSBMV clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle, @@ -1042,6 +1139,17 @@ clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle, y_buffer, y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXsbmv(const clblasOrder layout, const clblasUplo triangle, + const size_t n, const size_t k, + const half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for SSPMV/DSPMV clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle, @@ -1080,6 +1188,17 @@ clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle, y_buffer, y_offset, static_cast(y_inc), num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXspmv(const clblasOrder layout, const clblasUplo triangle, + const size_t n, + const half alpha, + const cl_mem ap_buffer, const size_t ap_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const half beta, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for STRMV/DTRMV/CTRMV/ZTRMV template @@ -1157,6 +1276,15 @@ clblasStatus clblasXtrmv(const clblasOrder layout, const clblasUplo tri scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } +template <> +clblasStatus clblasXtrmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, + const size_t n, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for STBMV/DTBMV/CTBMV/ZTBMV template @@ -1234,6 +1362,15 @@ clblasStatus clblasXtbmv(const clblasOrder layout, const clblasUplo tri scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } +template <> +clblasStatus clblasXtbmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, + const size_t n, const size_t k, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for STPMV/DTPMV/CTPMV/ZTPMV template @@ -1311,6 +1448,15 @@ clblasStatus clblasXtpmv(const clblasOrder layout, const clblasUplo tri scratch_buffer(), num_queues, queues, num_wait_events, wait_events, events); } +template <> +clblasStatus clblasXtpmv(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, + const size_t n, + const cl_mem ap_buffer, const size_t ap_offset, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for STRSV/DTRSV/CTRSV/ZTRSV template @@ -1528,6 +1674,16 @@ clblasStatus clblasXger(const clblasOrder layout, a_buffer, a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXger(const clblasOrder layout, + const size_t m, const size_t n, + const half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for CGERU/ZGERU clblasStatus clblasXgeru(const clblasOrder layout, @@ -1754,6 +1910,15 @@ clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle, a_buffer, a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXsyr(const clblasOrder layout, const clblasUplo triangle, + const size_t n, + const half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for SSPR/DSPR clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle, @@ -1784,6 +1949,15 @@ clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle, ap_buffer, ap_offset, num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXspr(const clblasOrder layout, const clblasUplo triangle, + const size_t n, + const half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for SSYR2/DSYR2 clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle, @@ -1818,6 +1992,16 @@ clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle, a_buffer, a_offset, a_ld, num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXsyr2(const clblasOrder layout, const clblasUplo triangle, + const size_t n, + const half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for SSPR2/DSPR2 clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle, @@ -1852,6 +2036,16 @@ clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle, ap_buffer, ap_offset, num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXspr2(const clblasOrder layout, const clblasUplo triangle, + const size_t n, + const half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_mem ap_buffer, const size_t ap_offset, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // ================================================================================================= // BLAS level-3 (matrix-matrix) routines @@ -1930,6 +2124,17 @@ clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_trans c_buffer, c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXgemm(const clblasOrder layout, const clblasTranspose a_transpose, const clblasTranspose b_transpose, + const size_t m, const size_t n, const size_t k, + const half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const half beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for SSYMM/DSYMM/CSYMM/ZSYMM clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, @@ -2004,6 +2209,17 @@ clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const c_buffer, c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXsymm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, + const size_t m, const size_t n, + const half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const half beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for CHEMM/ZHEMM clblasStatus clblasXhemm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, @@ -2108,6 +2324,16 @@ clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, co c_buffer, c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXsyrk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, + const size_t n, const size_t k, + const half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const half beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for CHERK/ZHERK clblasStatus clblasXherk(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose a_transpose, @@ -2216,6 +2442,17 @@ clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, c c_buffer, c_offset, c_ld, num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXsyr2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose, + const size_t n, const size_t k, + const half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const half beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for CHER2K/ZHER2K clblasStatus clblasXher2k(const clblasOrder layout, const clblasUplo triangle, const clblasTranspose ab_transpose, @@ -2312,6 +2549,15 @@ clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const b_buffer, b_offset, b_ld, num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXtrmm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, + const size_t m, const size_t n, + const half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // Forwards the clBLAS calls for STRSM/DTRSM/CTRSM/ZTRSM clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, @@ -2370,6 +2616,15 @@ clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const b_buffer, b_offset, b_ld, num_queues, queues, num_wait_events, wait_events, events); } +clblasStatus clblasXtrsm(const clblasOrder layout, const clblasSide side, const clblasUplo triangle, const clblasTranspose a_transpose, const clblasDiag diagonal, + const size_t m, const size_t n, + const half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + return clblasNotImplemented; +} // ================================================================================================= } // namespace clblast