diff --git a/CMakeLists.txt b/CMakeLists.txt index 1356a509..1217497f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -212,11 +212,10 @@ endif() # Sets the supported routines and the used kernels. New routines and kernels should be added here. set(KERNELS copy_fast copy_pad transpose_fast transpose_pad xaxpy xdot xger - xgemm xgemm_direct xgemv invert) -set(KERNELS_EXTRA xconvgemm) # kernels for which not to include a tuner in 'all tuners' target + xgemm xgemm_direct xgemv invert xconvgemm) set(DATABASES copy pad padtranspose transpose xaxpy xdot xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger invert - gemm_routine trsv_routine) + gemm_routine trsv_routine xconvgemm) set(ROUTINE_TUNERS xgemm xtrsv) set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc xnrm2 xasum xamax) set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv xtrsv @@ -435,7 +434,7 @@ if(TUNERS) endif() # Adds tuning executables - set(ALLKERNELS ${KERNELS} ${KERNELS_EXTRA}) + set(ALLKERNELS ${KERNELS}) foreach(KERNEL ${ALLKERNELS}) add_executable(clblast_tuner_${KERNEL} ${TUNERS_COMMON} src/tuning/kernels/${KERNEL}.cpp) target_link_libraries(clblast_tuner_${KERNEL} ${API_LIBRARIES}) diff --git a/src/database/apple_cpu_fallback.hpp b/src/database/apple_cpu_fallback.hpp index 55bcc220..98dd242a 100644 --- a/src/database/apple_cpu_fallback.hpp +++ b/src/database/apple_cpu_fallback.hpp @@ -49,6 +49,9 @@ const DatabaseEntry XgemmApple = { const DatabaseEntry XgemmDirectApple = { "XgemmDirect", Precision::kAny, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0 } } } } } } } }; +const DatabaseEntry XconvgemmApple = { + "Xconvgemm", Precision::kAny, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0 } } } } } } } +}; const DatabaseEntry CopyApple = { "Copy", Precision::kAny, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } } }; diff --git a/src/database/database.cpp b/src/database/database.cpp index fca3102d..07d75ece 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -25,6 +25,7 @@ #include "database/kernels/xger/xger.hpp" #include "database/kernels/xgemm/xgemm.hpp" #include "database/kernels/xgemm_direct/xgemm_direct.hpp" +#include "database/kernels/xconvgemm/xconvgemm.hpp" #include "database/kernels/copy/copy.hpp" #include "database/kernels/pad/pad.hpp" #include "database/kernels/transpose/transpose.hpp" @@ -43,7 +44,7 @@ std::vector Database::database = std::vector Database::apple_cpu_fallback = std::vector{ database::XaxpyApple, database::XdotApple, database::XgemvApple, database::XgemvFastApple, database::XgemvFastRotApple, database::XgerApple, database::XtrsvApple, - database::XgemmApple, database::XgemmDirectApple, + database::XgemmApple, database::XgemmDirectApple, database::XconvgemmApple, database::CopyApple, database::PadApple, database::TransposeApple, database::PadtransposeApple, database::InvertApple, database::TrsvRoutineApple @@ -71,6 +72,7 @@ Database::Database(const Device &device, const std::string &kernel_name, database::XgerHalf, database::XgerSingle, database::XgerDouble, database::XgerComplexSingle, database::XgerComplexDouble, database::XgemmHalf, database::XgemmSingle, database::XgemmDouble, database::XgemmComplexSingle, database::XgemmComplexDouble, database::XgemmDirectHalf, database::XgemmDirectSingle, database::XgemmDirectDouble, database::XgemmDirectComplexSingle, database::XgemmDirectComplexDouble, + database::XconvgemmHalf, database::XconvgemmSingle, database::XconvgemmDouble, database::XconvgemmComplexSingle, database::XconvgemmComplexDouble, database::CopyHalf, database::CopySingle, database::CopyDouble, database::CopyComplexSingle, database::CopyComplexDouble, database::PadHalf, database::PadSingle, database::PadDouble, database::PadComplexSingle, database::PadComplexDouble, database::TransposeHalf, database::TransposeSingle, database::TransposeDouble, database::TransposeComplexSingle, database::TransposeComplexDouble, diff --git a/src/database/kernels/xconvgemm/xconvgemm.cpp b/src/database/kernels/xconvgemm/xconvgemm.cpp new file mode 100644 index 00000000..1138f8b8 --- /dev/null +++ b/src/database/kernels/xconvgemm/xconvgemm.cpp @@ -0,0 +1,15 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xconvgemm' kernels. +// +// ================================================================================================= + +#include "database/kernels/xconvgemm/xconvgemm.hpp" +#include "database/kernels/xconvgemm/xconvgemm_16.hpp" +#include "database/kernels/xconvgemm/xconvgemm_32.hpp" +#include "database/kernels/xconvgemm/xconvgemm_3232.hpp" +#include "database/kernels/xconvgemm/xconvgemm_64.hpp" +#include "database/kernels/xconvgemm/xconvgemm_6464.hpp" diff --git a/src/database/kernels/xconvgemm/xconvgemm.hpp b/src/database/kernels/xconvgemm/xconvgemm.hpp new file mode 100644 index 00000000..ac07dc42 --- /dev/null +++ b/src/database/kernels/xconvgemm/xconvgemm.hpp @@ -0,0 +1,22 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xconvgemm' kernels. +// +// ================================================================================================= + +#include "database/database_structure.hpp" + +namespace clblast { +namespace database { + +extern const DatabaseEntry XconvgemmHalf; +extern const DatabaseEntry XconvgemmSingle; +extern const DatabaseEntry XconvgemmComplexSingle; +extern const DatabaseEntry XconvgemmDouble; +extern const DatabaseEntry XconvgemmComplexDouble; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xconvgemm/xconvgemm_16.hpp b/src/database/kernels/xconvgemm/xconvgemm_16.hpp new file mode 100644 index 00000000..97cb442f --- /dev/null +++ b/src/database/kernels/xconvgemm/xconvgemm_16.hpp @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xconvgemm16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XconvgemmHalf = { + "Xconvgemm", Precision::kHalf, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 1, 16, 8, 8, 16, 0, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xconvgemm/xconvgemm_32.hpp b/src/database/kernels/xconvgemm/xconvgemm_32.hpp new file mode 100644 index 00000000..c8d8dfa7 --- /dev/null +++ b/src/database/kernels/xconvgemm/xconvgemm_32.hpp @@ -0,0 +1,34 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xconvgemm32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XconvgemmSingle = { + "Xconvgemm", Precision::kSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "default", { + { Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 1, 16, 8, 8, 16, 0, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 8, 8, 16, 0, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 1, 16, 8, 8, 16, 0, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xconvgemm/xconvgemm_3232.hpp b/src/database/kernels/xconvgemm/xconvgemm_3232.hpp new file mode 100644 index 00000000..bb9a5593 --- /dev/null +++ b/src/database/kernels/xconvgemm/xconvgemm_3232.hpp @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xconvgemm3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XconvgemmComplexSingle = { + "Xconvgemm", Precision::kComplexSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 1, 16, 8, 8, 16, 0, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xconvgemm/xconvgemm_64.hpp b/src/database/kernels/xconvgemm/xconvgemm_64.hpp new file mode 100644 index 00000000..dfc34160 --- /dev/null +++ b/src/database/kernels/xconvgemm/xconvgemm_64.hpp @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xconvgemm64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XconvgemmDouble = { + "Xconvgemm", Precision::kDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 1, 16, 8, 8, 16, 0, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xconvgemm/xconvgemm_6464.hpp b/src/database/kernels/xconvgemm/xconvgemm_6464.hpp new file mode 100644 index 00000000..6aabe88c --- /dev/null +++ b/src/database/kernels/xconvgemm/xconvgemm_6464.hpp @@ -0,0 +1,26 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xconvgemm6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XconvgemmComplexDouble = { + "Xconvgemm", Precision::kComplexDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { + { // Default + kDeviceTypeAll, "default", { + { "default", { + { kDeviceNameDefault , Params{ 1, 16, 8, 8, 16, 0, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/routines/levelx/xconvgemm.cpp b/src/routines/levelx/xconvgemm.cpp index 8bd24f15..d137e6fe 100644 --- a/src/routines/levelx/xconvgemm.cpp +++ b/src/routines/levelx/xconvgemm.cpp @@ -25,7 +25,7 @@ namespace clblast { template Xconvgemm::Xconvgemm(Queue &queue, EventPointer event, const std::string &name, const ConvGemmMethod method): - Routine(queue, event, name, {"XgemmDirect"}, + Routine(queue, event, name, {"Xconvgemm"}, PrecisionValue(), {}, { (method == ConvGemmMethod::kWithIm2Col) ? "#define CONVGEMM_WITH_IM2COL\n" : "", #include "../../kernels/level3/level3.opencl"