Updated the database to use the new TRSV and Invert tuners

pull/232/head
Cedric Nugteren 2017-12-23 13:55:22 +01:00
parent aa7db4f987
commit b1f52f130c
19 changed files with 336 additions and 163 deletions

View File

@ -4,6 +4,7 @@ Development (next version)
- Made it possible to override the tuning parameters in the clients straight from JSON tuning files
- Added OpenCL pre-processor to unroll loops and perform array-to-register promotions for compilers
which don't do this themselves (ARM Mali) - greatly improves performance on these platforms
- Added first tuners for the TRSV (block size) and TRSM (invert kernel) routines
- Various minor fixes and enhancements
- Added tuned parameters for various devices (see README)

View File

@ -195,7 +195,7 @@ endif()
set(KERNELS copy_fast copy_pad transpose_fast transpose_pad xaxpy xdot xger
xgemm xgemm_direct xgemv invert)
set(DATABASES copy pad padtranspose transpose xaxpy xdot
xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger
xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger invert
gemm_routine trsv_routine)
set(ROUTINE_TUNERS xgemm xtrsv)
set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc xnrm2 xasum xamax)

View File

@ -29,11 +29,11 @@
#include "database/kernels/pad/pad.hpp"
#include "database/kernels/transpose/transpose.hpp"
#include "database/kernels/padtranspose/padtranspose.hpp"
#include "database/kernels/invert/invert.hpp"
#include "database/kernels/gemm_routine/gemm_routine.hpp"
#include "database/kernels/trsv_routine/trsv_routine.hpp"
#include "database/kernels/xtrsv.hpp"
#include "database/kernels/invert.hpp"
#include "database/apple_cpu_fallback.hpp"
namespace clblast {
@ -47,7 +47,6 @@ const std::vector<database::DatabaseEntry> Database::database = std::vector<data
database::XgemvFastHalf, database::XgemvFastSingle, database::XgemvFastDouble, database::XgemvFastComplexSingle, database::XgemvFastComplexDouble,
database::XgemvFastRotHalf, database::XgemvFastRotSingle, database::XgemvFastRotDouble, database::XgemvFastRotComplexSingle, database::XgemvFastRotComplexDouble,
database::XgerHalf, database::XgerSingle, database::XgerDouble, database::XgerComplexSingle, database::XgerComplexDouble,
database::XtrsvHalf, database::XtrsvSingle, database::XtrsvDouble, database::XtrsvComplexSingle, database::XtrsvComplexDouble,
database::XgemmHalf, database::XgemmSingle, database::XgemmDouble, database::XgemmComplexSingle, database::XgemmComplexDouble,
database::XgemmDirectHalf, database::XgemmDirectSingle, database::XgemmDirectDouble, database::XgemmDirectComplexSingle, database::XgemmDirectComplexDouble,
database::CopyHalf, database::CopySingle, database::CopyDouble, database::CopyComplexSingle, database::CopyComplexDouble,
@ -55,7 +54,8 @@ const std::vector<database::DatabaseEntry> Database::database = std::vector<data
database::TransposeHalf, database::TransposeSingle, database::TransposeDouble, database::TransposeComplexSingle, database::TransposeComplexDouble,
database::PadtransposeHalf, database::PadtransposeSingle, database::PadtransposeDouble, database::PadtransposeComplexSingle, database::PadtransposeComplexDouble,
database::InvertHalf, database::InvertSingle, database::InvertDouble, database::InvertComplexSingle, database::InvertComplexDouble,
database::GemmRoutineHalf, database::GemmRoutineSingle, database::GemmRoutineDouble, database::GemmRoutineComplexSingle, database::GemmRoutineComplexDouble
database::GemmRoutineHalf, database::GemmRoutineSingle, database::GemmRoutineDouble, database::GemmRoutineComplexSingle, database::GemmRoutineComplexDouble,
database::TrsvRoutineHalf, database::TrsvRoutineSingle, database::TrsvRoutineDouble, database::TrsvRoutineComplexSingle, database::TrsvRoutineComplexDouble
};
const std::vector<database::DatabaseEntry> Database::apple_cpu_fallback = std::vector<database::DatabaseEntry>{
database::XaxpyApple, database::XdotApple,

View File

@ -1,78 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// Tuning parameters for the diagonal matrix inversion kernels
//
// =================================================================================================
namespace clblast {
namespace database {
// =================================================================================================
const DatabaseEntry InvertHalf = {
"Invert", Precision::kHalf, {"INTERNAL_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
}
},
}
};
// =================================================================================================
const DatabaseEntry InvertSingle = {
"Invert", Precision::kSingle, {"INTERNAL_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
}
},
}
};
// =================================================================================================
const DatabaseEntry InvertComplexSingle = {
"Invert", Precision::kComplexSingle, {"INTERNAL_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
}
},
}
};
// =================================================================================================
const DatabaseEntry InvertDouble = {
"Invert", Precision::kDouble, {"INTERNAL_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
}
},
}
};
// =================================================================================================
const DatabaseEntry InvertComplexDouble = {
"Invert", Precision::kComplexDouble, {"INTERNAL_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
}
},
}
};
// =================================================================================================
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,14 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Invert' kernels.
//
// =================================================================================================
#include "database/kernels/invert/invert_16.hpp"
#include "database/kernels/invert/invert_32.hpp"
#include "database/kernels/invert/invert_3232.hpp"
#include "database/kernels/invert/invert_64.hpp"
#include "database/kernels/invert/invert_6464.hpp"

View File

@ -0,0 +1,34 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Invert16' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry InvertHalf = {
"Invert", Precision::kHalf, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "default", {
{ Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,34 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Invert32' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry InvertSingle = {
"Invert", Precision::kSingle, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "default", {
{ Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,34 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Invert3232' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry InvertComplexSingle = {
"Invert", Precision::kComplexSingle, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "default", {
{ Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,26 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Invert64' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry InvertDouble = {
"Invert", Precision::kDouble, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,26 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Invert6464' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry InvertComplexDouble = {
"Invert", Precision::kComplexDouble, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,14 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Trsv_Routine' kernels.
//
// =================================================================================================
#include "database/kernels/trsv_routine/trsv_routine_16.hpp"
#include "database/kernels/trsv_routine/trsv_routine_32.hpp"
#include "database/kernels/trsv_routine/trsv_routine_3232.hpp"
#include "database/kernels/trsv_routine/trsv_routine_64.hpp"
#include "database/kernels/trsv_routine/trsv_routine_6464.hpp"

View File

@ -0,0 +1,26 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Trsv_Routine16' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry TrsvRoutineHalf = {
"TrsvRoutine", Precision::kHalf, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
{ kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,34 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Trsv_Routine32' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry TrsvRoutineSingle = {
"TrsvRoutine", Precision::kSingle, {"TRSV_BLOCK_SIZE"}, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "default", {
{ Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
{ kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,34 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Trsv_Routine3232' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry TrsvRoutineComplexSingle = {
"TrsvRoutine", Precision::kComplexSingle, {"TRSV_BLOCK_SIZE"}, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "default", {
{ Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
{ kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,26 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Trsv_Routine64' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry TrsvRoutineDouble = {
"TrsvRoutine", Precision::kDouble, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
{ kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,26 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Trsv_Routine6464' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry TrsvRoutineComplexDouble = {
"TrsvRoutine", Precision::kComplexDouble, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
{ kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -1,78 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file populates the database with best-found tuning parameters for the 'Xtrsv' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
// =================================================================================================
const DatabaseEntry XtrsvHalf = {
"Xtrsv", Precision::kHalf, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
}
},
}
};
// =================================================================================================
const DatabaseEntry XtrsvSingle = {
"Xtrsv", Precision::kSingle, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
}
},
}
};
// =================================================================================================
const DatabaseEntry XtrsvComplexSingle = {
"Xtrsv", Precision::kComplexSingle, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
}
},
}
};
// =================================================================================================
const DatabaseEntry XtrsvDouble = {
"Xtrsv", Precision::kDouble, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
}
},
}
};
// =================================================================================================
const DatabaseEntry XtrsvComplexDouble = {
"Xtrsv", Precision::kComplexDouble, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
}
},
}
};
// =================================================================================================
} // namespace database
} // namespace clblast

View File

@ -22,7 +22,7 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xgemv<T>::Xgemv(Queue &queue, EventPointer event, const std::string &name):
Routine(queue, event, name, {"Xgemv", "XgemvFast", "XgemvFastRot", "Xtrsv"}, PrecisionValue<T>(), {}, {
Routine(queue, event, name, {"Xgemv", "XgemvFast", "XgemvFastRot", "TrsvRoutine"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level2/xgemv.opencl"
#include "../../kernels/level2/xgemv_fast.opencl"
#include "../../kernels/level2/xtrsv.opencl"

View File

@ -59,7 +59,7 @@ void TuneXtrsv(int argc, char* argv[]) {
// Values for the block size
const auto from = size_t{8};
const auto to = size_t{64 + 1};
const auto to = size_t{32 + 1};
const auto step = size_t{8};
// OpenCL initialisation