Added a kernel selection database to select between the direct and indirect GEMM kernels
parent
7052a00a3e
commit
a3e67f2be2
|
@ -26,6 +26,7 @@
|
|||
#include "database/kernels/pad.hpp"
|
||||
#include "database/kernels/transpose.hpp"
|
||||
#include "database/kernels/padtranspose.hpp"
|
||||
#include "database/kernel_selection.hpp"
|
||||
|
||||
namespace clblast {
|
||||
// =================================================================================================
|
||||
|
@ -43,7 +44,8 @@ const std::vector<Database::DatabaseEntry> Database::database = {
|
|||
CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble,
|
||||
PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble,
|
||||
TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble,
|
||||
PadtransposeHalf, PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble
|
||||
PadtransposeHalf, PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble,
|
||||
KernelSelectionHalf, KernelSelectionSingle, KernelSelectionDouble, KernelSelectionComplexSingle, KernelSelectionComplexDouble
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -80,6 +80,7 @@ class Database {
|
|||
static const DatabaseEntry PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble;
|
||||
static const DatabaseEntry TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble;
|
||||
static const DatabaseEntry PadtransposeHalf, PadtransposeSingle, PadtransposeDouble, PadtransposeComplexSingle, PadtransposeComplexDouble;
|
||||
static const DatabaseEntry KernelSelectionHalf, KernelSelectionSingle, KernelSelectionDouble, KernelSelectionComplexSingle, KernelSelectionComplexDouble;
|
||||
static const std::vector<DatabaseEntry> database;
|
||||
|
||||
// The constructor with a user-provided database overlay (potentially an empty vector)
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
|
||||
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
|
||||
// width of 100 characters per line.
|
||||
//
|
||||
// Author(s):
|
||||
// Cedric Nugteren <www.cedricnugteren.nl>
|
||||
//
|
||||
// This determines when to switch between the direct (for small sizes) and in-direct GEMM kernel
|
||||
// with pre/post-processing kernels (for larger sizes). These can be set in a similar way as for the
|
||||
// regular kernel tuning parameters: they can be specific for a certain vendor or device or can use
|
||||
// some common default values.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
namespace clblast {
|
||||
// =================================================================================================
|
||||
|
||||
const Database::DatabaseEntry Database::KernelSelectionHalf = {
|
||||
"KernelSelection", Precision::kHalf, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"XGEMM_MIN_INDIRECT_SIZE",512*512*512} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
const Database::DatabaseEntry Database::KernelSelectionSingle = {
|
||||
"KernelSelection", Precision::kSingle, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"XGEMM_MIN_INDIRECT_SIZE",512*512*512} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
const Database::DatabaseEntry Database::KernelSelectionComplexSingle = {
|
||||
"KernelSelection", Precision::kComplexSingle, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"XGEMM_MIN_INDIRECT_SIZE",512*512*512} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
const Database::DatabaseEntry Database::KernelSelectionDouble = {
|
||||
"KernelSelection", Precision::kDouble, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"XGEMM_MIN_INDIRECT_SIZE",512*512*512} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
const Database::DatabaseEntry Database::KernelSelectionComplexDouble = {
|
||||
"KernelSelection", Precision::kComplexDouble, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"XGEMM_MIN_INDIRECT_SIZE",512*512*512} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
} // namespace clblast
|
|
@ -22,7 +22,8 @@ namespace clblast {
|
|||
// Constructor: forwards to base class constructor
|
||||
template <typename T>
|
||||
Xgemm<T>::Xgemm(Queue &queue, EventPointer event, const std::string &name):
|
||||
Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm", "XgemmDirect"},
|
||||
Routine(queue, event, name,
|
||||
{"Copy","Pad","Transpose","Padtranspose","Xgemm","XgemmDirect","KernelSelection"},
|
||||
PrecisionValue<T>()) {
|
||||
source_string_ =
|
||||
#include "../../kernels/level3/level3.opencl"
|
||||
|
@ -102,15 +103,15 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout,
|
|||
status = TestMatrixC(c_one, c_two, c_buffer, c_offset, c_ld);
|
||||
if (ErrorIn(status)) { return status; }
|
||||
|
||||
// Optionally runs the direct version of GEMM. TODO: Set this based on the arguments
|
||||
const auto do_gemm_direct = true; // for now, for testing
|
||||
if (do_gemm_direct) {
|
||||
// Selects which version of GEMM to run
|
||||
const auto do_gemm_direct = (m * n * k < db_["XGEMM_MIN_INDIRECT_SIZE"]);
|
||||
if (do_gemm_direct) { // for small sizes (single kernel)
|
||||
return GemmDirect(m, n, k, alpha,
|
||||
a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta,
|
||||
c_buffer, c_offset, c_ld,
|
||||
a_do_transpose, b_do_transpose, c_do_transpose, a_conjugate, b_conjugate);
|
||||
}
|
||||
else {
|
||||
else { // for larger sizes (pre/post-processing plus a very fast kernel)
|
||||
return GemmIndirect(m, n, k, alpha,
|
||||
a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta,
|
||||
c_buffer, c_offset, c_ld,
|
||||
|
|
Loading…
Reference in New Issue