From b1f52f130c4f9e4346579003b2786aa2e082f234 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 23 Dec 2017 13:55:22 +0100 Subject: [PATCH] Updated the database to use the new TRSV and Invert tuners --- CHANGELOG | 1 + CMakeLists.txt | 2 +- src/database/database.cpp | 8 +- src/database/kernels/invert.hpp | 78 ------------------- src/database/kernels/invert/invert.hpp | 14 ++++ src/database/kernels/invert/invert_16.hpp | 34 ++++++++ src/database/kernels/invert/invert_32.hpp | 34 ++++++++ src/database/kernels/invert/invert_3232.hpp | 34 ++++++++ src/database/kernels/invert/invert_64.hpp | 26 +++++++ src/database/kernels/invert/invert_6464.hpp | 26 +++++++ .../kernels/trsv_routine/trsv_routine.hpp | 14 ++++ .../kernels/trsv_routine/trsv_routine_16.hpp | 26 +++++++ .../kernels/trsv_routine/trsv_routine_32.hpp | 34 ++++++++ .../trsv_routine/trsv_routine_3232.hpp | 34 ++++++++ .../kernels/trsv_routine/trsv_routine_64.hpp | 26 +++++++ .../trsv_routine/trsv_routine_6464.hpp | 26 +++++++ src/database/kernels/xtrsv.hpp | 78 ------------------- src/routines/level2/xgemv.cpp | 2 +- src/tuning/routines/xtrsv.cpp | 2 +- 19 files changed, 336 insertions(+), 163 deletions(-) delete mode 100644 src/database/kernels/invert.hpp create mode 100644 src/database/kernels/invert/invert.hpp create mode 100644 src/database/kernels/invert/invert_16.hpp create mode 100644 src/database/kernels/invert/invert_32.hpp create mode 100644 src/database/kernels/invert/invert_3232.hpp create mode 100644 src/database/kernels/invert/invert_64.hpp create mode 100644 src/database/kernels/invert/invert_6464.hpp create mode 100644 src/database/kernels/trsv_routine/trsv_routine.hpp create mode 100644 src/database/kernels/trsv_routine/trsv_routine_16.hpp create mode 100644 src/database/kernels/trsv_routine/trsv_routine_32.hpp create mode 100644 src/database/kernels/trsv_routine/trsv_routine_3232.hpp create mode 100644 src/database/kernels/trsv_routine/trsv_routine_64.hpp create mode 100644 src/database/kernels/trsv_routine/trsv_routine_6464.hpp delete mode 100644 src/database/kernels/xtrsv.hpp diff --git a/CHANGELOG b/CHANGELOG index 175524aa..83d5178b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,6 +4,7 @@ Development (next version) - Made it possible to override the tuning parameters in the clients straight from JSON tuning files - Added OpenCL pre-processor to unroll loops and perform array-to-register promotions for compilers which don't do this themselves (ARM Mali) - greatly improves performance on these platforms +- Added first tuners for the TRSV (block size) and TRSM (invert kernel) routines - Various minor fixes and enhancements - Added tuned parameters for various devices (see README) diff --git a/CMakeLists.txt b/CMakeLists.txt index f83ba33c..63ab8e79 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -195,7 +195,7 @@ endif() set(KERNELS copy_fast copy_pad transpose_fast transpose_pad xaxpy xdot xger xgemm xgemm_direct xgemv invert) set(DATABASES copy pad padtranspose transpose xaxpy xdot - xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger + xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger invert gemm_routine trsv_routine) set(ROUTINE_TUNERS xgemm xtrsv) set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc xnrm2 xasum xamax) diff --git a/src/database/database.cpp b/src/database/database.cpp index 2fa86151..56c93f18 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -29,11 +29,11 @@ #include "database/kernels/pad/pad.hpp" #include "database/kernels/transpose/transpose.hpp" #include "database/kernels/padtranspose/padtranspose.hpp" +#include "database/kernels/invert/invert.hpp" #include "database/kernels/gemm_routine/gemm_routine.hpp" +#include "database/kernels/trsv_routine/trsv_routine.hpp" -#include "database/kernels/xtrsv.hpp" -#include "database/kernels/invert.hpp" #include "database/apple_cpu_fallback.hpp" namespace clblast { @@ -47,7 +47,6 @@ const std::vector Database::database = std::vector Database::database = std::vector Database::apple_cpu_fallback = std::vector{ database::XaxpyApple, database::XdotApple, diff --git a/src/database/kernels/invert.hpp b/src/database/kernels/invert.hpp deleted file mode 100644 index b7464382..00000000 --- a/src/database/kernels/invert.hpp +++ /dev/null @@ -1,78 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// Tuning parameters for the diagonal matrix inversion kernels -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const DatabaseEntry InvertHalf = { - "Invert", Precision::kHalf, {"INTERNAL_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry InvertSingle = { - "Invert", Precision::kSingle, {"INTERNAL_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry InvertComplexSingle = { - "Invert", Precision::kComplexSingle, {"INTERNAL_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry InvertDouble = { - "Invert", Precision::kDouble, {"INTERNAL_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry InvertComplexDouble = { - "Invert", Precision::kComplexDouble, {"INTERNAL_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/invert/invert.hpp b/src/database/kernels/invert/invert.hpp new file mode 100644 index 00000000..9b7c2d30 --- /dev/null +++ b/src/database/kernels/invert/invert.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Invert' kernels. +// +// ================================================================================================= + +#include "database/kernels/invert/invert_16.hpp" +#include "database/kernels/invert/invert_32.hpp" +#include "database/kernels/invert/invert_3232.hpp" +#include "database/kernels/invert/invert_64.hpp" +#include "database/kernels/invert/invert_6464.hpp" diff --git a/src/database/kernels/invert/invert_16.hpp b/src/database/kernels/invert/invert_16.hpp new file mode 100644 index 00000000..e3941370 --- /dev/null +++ b/src/database/kernels/invert/invert_16.hpp @@ -0,0 +1,34 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Invert16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry InvertHalf = { + "Invert", Precision::kHalf, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/invert/invert_32.hpp b/src/database/kernels/invert/invert_32.hpp new file mode 100644 index 00000000..ca07e947 --- /dev/null +++ b/src/database/kernels/invert/invert_32.hpp @@ -0,0 +1,34 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Invert32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry InvertSingle = { + "Invert", Precision::kSingle, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/invert/invert_3232.hpp b/src/database/kernels/invert/invert_3232.hpp new file mode 100644 index 00000000..f01b3c7f --- /dev/null +++ b/src/database/kernels/invert/invert_3232.hpp @@ -0,0 +1,34 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Invert3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry InvertComplexSingle = { + "Invert", Precision::kComplexSingle, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/invert/invert_64.hpp b/src/database/kernels/invert/invert_64.hpp new file mode 100644 index 00000000..e73120ca --- /dev/null +++ b/src/database/kernels/invert/invert_64.hpp @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Invert64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry InvertDouble = { + "Invert", Precision::kDouble, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/invert/invert_6464.hpp b/src/database/kernels/invert/invert_6464.hpp new file mode 100644 index 00000000..184b956a --- /dev/null +++ b/src/database/kernels/invert/invert_6464.hpp @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Invert6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry InvertComplexDouble = { + "Invert", Precision::kComplexDouble, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, { + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/trsv_routine/trsv_routine.hpp b/src/database/kernels/trsv_routine/trsv_routine.hpp new file mode 100644 index 00000000..c4659ad1 --- /dev/null +++ b/src/database/kernels/trsv_routine/trsv_routine.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Trsv_Routine' kernels. +// +// ================================================================================================= + +#include "database/kernels/trsv_routine/trsv_routine_16.hpp" +#include "database/kernels/trsv_routine/trsv_routine_32.hpp" +#include "database/kernels/trsv_routine/trsv_routine_3232.hpp" +#include "database/kernels/trsv_routine/trsv_routine_64.hpp" +#include "database/kernels/trsv_routine/trsv_routine_6464.hpp" diff --git a/src/database/kernels/trsv_routine/trsv_routine_16.hpp b/src/database/kernels/trsv_routine/trsv_routine_16.hpp new file mode 100644 index 00000000..c6d5d876 --- /dev/null +++ b/src/database/kernels/trsv_routine/trsv_routine_16.hpp @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Trsv_Routine16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TrsvRoutineHalf = { + "TrsvRoutine", Precision::kHalf, {"TRSV_BLOCK_SIZE"}, { + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/trsv_routine/trsv_routine_32.hpp b/src/database/kernels/trsv_routine/trsv_routine_32.hpp new file mode 100644 index 00000000..7912faf4 --- /dev/null +++ b/src/database/kernels/trsv_routine/trsv_routine_32.hpp @@ -0,0 +1,34 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Trsv_Routine32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TrsvRoutineSingle = { + "TrsvRoutine", Precision::kSingle, {"TRSV_BLOCK_SIZE"}, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/trsv_routine/trsv_routine_3232.hpp b/src/database/kernels/trsv_routine/trsv_routine_3232.hpp new file mode 100644 index 00000000..4c7f4c88 --- /dev/null +++ b/src/database/kernels/trsv_routine/trsv_routine_3232.hpp @@ -0,0 +1,34 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Trsv_Routine3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TrsvRoutineComplexSingle = { + "TrsvRoutine", Precision::kComplexSingle, {"TRSV_BLOCK_SIZE"}, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/trsv_routine/trsv_routine_64.hpp b/src/database/kernels/trsv_routine/trsv_routine_64.hpp new file mode 100644 index 00000000..e1897b79 --- /dev/null +++ b/src/database/kernels/trsv_routine/trsv_routine_64.hpp @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Trsv_Routine64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TrsvRoutineDouble = { + "TrsvRoutine", Precision::kDouble, {"TRSV_BLOCK_SIZE"}, { + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/trsv_routine/trsv_routine_6464.hpp b/src/database/kernels/trsv_routine/trsv_routine_6464.hpp new file mode 100644 index 00000000..082d3a8e --- /dev/null +++ b/src/database/kernels/trsv_routine/trsv_routine_6464.hpp @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Trsv_Routine6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TrsvRoutineComplexDouble = { + "TrsvRoutine", Precision::kComplexDouble, {"TRSV_BLOCK_SIZE"}, { + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xtrsv.hpp b/src/database/kernels/xtrsv.hpp deleted file mode 100644 index 2d6afbea..00000000 --- a/src/database/kernels/xtrsv.hpp +++ /dev/null @@ -1,78 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Cedric Nugteren -// -// This file populates the database with best-found tuning parameters for the 'Xtrsv' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const DatabaseEntry XtrsvHalf = { - "Xtrsv", Precision::kHalf, {"TRSV_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry XtrsvSingle = { - "Xtrsv", Precision::kSingle, {"TRSV_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry XtrsvComplexSingle = { - "Xtrsv", Precision::kComplexSingle, {"TRSV_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry XtrsvDouble = { - "Xtrsv", Precision::kDouble, {"TRSV_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= - -const DatabaseEntry XtrsvComplexDouble = { - "Xtrsv", Precision::kComplexDouble, {"TRSV_BLOCK_SIZE"}, { - { // Default - kDeviceTypeAll, "default", { - { "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/routines/level2/xgemv.cpp b/src/routines/level2/xgemv.cpp index b7e8081b..63dab9f7 100644 --- a/src/routines/level2/xgemv.cpp +++ b/src/routines/level2/xgemv.cpp @@ -22,7 +22,7 @@ namespace clblast { // Constructor: forwards to base class constructor template Xgemv::Xgemv(Queue &queue, EventPointer event, const std::string &name): - Routine(queue, event, name, {"Xgemv", "XgemvFast", "XgemvFastRot", "Xtrsv"}, PrecisionValue(), {}, { + Routine(queue, event, name, {"Xgemv", "XgemvFast", "XgemvFastRot", "TrsvRoutine"}, PrecisionValue(), {}, { #include "../../kernels/level2/xgemv.opencl" #include "../../kernels/level2/xgemv_fast.opencl" #include "../../kernels/level2/xtrsv.opencl" diff --git a/src/tuning/routines/xtrsv.cpp b/src/tuning/routines/xtrsv.cpp index 9e8f26fa..29db0cd0 100644 --- a/src/tuning/routines/xtrsv.cpp +++ b/src/tuning/routines/xtrsv.cpp @@ -59,7 +59,7 @@ void TuneXtrsv(int argc, char* argv[]) { // Values for the block size const auto from = size_t{8}; - const auto to = size_t{64 + 1}; + const auto to = size_t{32 + 1}; const auto step = size_t{8}; // OpenCL initialisation