mirror of
https://github.com/CNugteren/CLBlast.git
synced 2024-07-04 21:36:57 +02:00
Added extra temporary buffer to tuners in preparation of Xdot routines
This commit is contained in:
parent
e0c5312abb
commit
2a383f3450
|
@ -64,11 +64,13 @@ void Tuner(int argc, char* argv[]) {
|
|||
auto a_mat = std::vector<T>(C::GetSizeA(args));
|
||||
auto b_mat = std::vector<T>(C::GetSizeB(args));
|
||||
auto c_mat = std::vector<T>(C::GetSizeC(args));
|
||||
auto temp = std::vector<T>(C::GetSizeTemp(args));
|
||||
PopulateVector(x_vec);
|
||||
PopulateVector(y_vec);
|
||||
PopulateVector(a_mat);
|
||||
PopulateVector(b_mat);
|
||||
PopulateVector(c_mat);
|
||||
PopulateVector(temp);
|
||||
|
||||
// Initializes the tuner for the chosen device
|
||||
cltune::Tuner tuner(args.platform_id, args.device_id);
|
||||
|
@ -85,7 +87,7 @@ void Tuner(int argc, char* argv[]) {
|
|||
// Loads the kernel sources and defines the kernel to tune
|
||||
auto sources = C::GetSources();
|
||||
auto id = tuner.AddKernelFromString(sources, C::KernelName(), C::GlobalSize(args), C::LocalSize());
|
||||
tuner.SetReferenceFromString(sources, C::KernelName(), C::GlobalSize(args), C::LocalSizeRef());
|
||||
tuner.SetReferenceFromString(sources, C::KernelName(), C::GlobalSizeRef(args), C::LocalSizeRef());
|
||||
|
||||
// Sets the tunable parameters and their possible values
|
||||
C::SetParameters(tuner, id);
|
||||
|
@ -103,7 +105,7 @@ void Tuner(int argc, char* argv[]) {
|
|||
for (auto ¶meters: C::DivGlobal()) { tuner.DivGlobalSize(id, parameters); }
|
||||
|
||||
// Sets the function's arguments
|
||||
C::SetArguments(tuner, args, x_vec, y_vec, a_mat, b_mat, c_mat);
|
||||
C::SetArguments(tuner, args, x_vec, y_vec, a_mat, b_mat, c_mat, temp);
|
||||
|
||||
// Starts the tuning process
|
||||
tuner.Tune();
|
||||
|
|
|
@ -53,6 +53,7 @@ class TuneCopy {
|
|||
static size_t GetSizeA(const Arguments<T> &args) { return args.m * args.n; }
|
||||
static size_t GetSizeB(const Arguments<T> &args) { return args.m * args.n; }
|
||||
static size_t GetSizeC(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
static size_t GetSizeTemp(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
|
||||
// Sets the tuning parameters and their possible values
|
||||
static void SetParameters(cltune::Tuner &tuner, const size_t id) {
|
||||
|
@ -68,6 +69,7 @@ class TuneCopy {
|
|||
|
||||
// Sets the base thread configuration
|
||||
static std::vector<size_t> GlobalSize(const Arguments<T> &args) { return {args.m, args.n}; }
|
||||
static std::vector<size_t> GlobalSizeRef(const Arguments<T> &args) { return GlobalSize(args); }
|
||||
static std::vector<size_t> LocalSize() { return {1, 1}; }
|
||||
static std::vector<size_t> LocalSizeRef() { return {8, 8}; }
|
||||
|
||||
|
@ -81,7 +83,8 @@ class TuneCopy {
|
|||
// Sets the kernel's arguments
|
||||
static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
|
||||
std::vector<T> &, std::vector<T> &,
|
||||
std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &) {
|
||||
std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &,
|
||||
std::vector<T> &) {
|
||||
tuner.AddArgumentScalar(static_cast<int>(args.m));
|
||||
tuner.AddArgumentInput(a_mat);
|
||||
tuner.AddArgumentOutput(b_mat);
|
||||
|
|
|
@ -53,6 +53,7 @@ class TunePad {
|
|||
static size_t GetSizeA(const Arguments<T> &args) { return args.m * args.n; }
|
||||
static size_t GetSizeB(const Arguments<T> &args) { return args.m * args.n; }
|
||||
static size_t GetSizeC(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
static size_t GetSizeTemp(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
|
||||
// Sets the tuning parameters and their possible values
|
||||
static void SetParameters(cltune::Tuner &tuner, const size_t id) {
|
||||
|
@ -68,6 +69,7 @@ class TunePad {
|
|||
|
||||
// Sets the base thread configuration
|
||||
static std::vector<size_t> GlobalSize(const Arguments<T> &args) { return {args.m, args.n}; }
|
||||
static std::vector<size_t> GlobalSizeRef(const Arguments<T> &args) { return GlobalSize(args); }
|
||||
static std::vector<size_t> LocalSize() { return {1, 1}; }
|
||||
static std::vector<size_t> LocalSizeRef() { return {8, 8}; }
|
||||
|
||||
|
@ -81,7 +83,8 @@ class TunePad {
|
|||
// Sets the kernel's arguments
|
||||
static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
|
||||
std::vector<T> &, std::vector<T> &,
|
||||
std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &) {
|
||||
std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &,
|
||||
std::vector<T> &) {
|
||||
tuner.AddArgumentScalar(static_cast<int>(args.m));
|
||||
tuner.AddArgumentScalar(static_cast<int>(args.n));
|
||||
tuner.AddArgumentScalar(static_cast<int>(args.m));
|
||||
|
|
|
@ -53,6 +53,7 @@ class TunePadTranspose {
|
|||
static size_t GetSizeA(const Arguments<T> &args) { return args.m * args.n; }
|
||||
static size_t GetSizeB(const Arguments<T> &args) { return args.m * args.n; }
|
||||
static size_t GetSizeC(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
static size_t GetSizeTemp(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
|
||||
// Sets the tuning parameters and their possible values
|
||||
static void SetParameters(cltune::Tuner &tuner, const size_t id) {
|
||||
|
@ -72,6 +73,7 @@ class TunePadTranspose {
|
|||
|
||||
// Sets the base thread configuration
|
||||
static std::vector<size_t> GlobalSize(const Arguments<T> &args) { return {args.m, args.n}; }
|
||||
static std::vector<size_t> GlobalSizeRef(const Arguments<T> &args) { return GlobalSize(args); }
|
||||
static std::vector<size_t> LocalSize() { return {1, 1}; }
|
||||
static std::vector<size_t> LocalSizeRef() { return {8, 8}; }
|
||||
|
||||
|
@ -85,7 +87,8 @@ class TunePadTranspose {
|
|||
// Sets the kernel's arguments
|
||||
static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
|
||||
std::vector<T> &, std::vector<T> &,
|
||||
std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &) {
|
||||
std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &,
|
||||
std::vector<T> &) {
|
||||
tuner.AddArgumentScalar(static_cast<int>(args.m));
|
||||
tuner.AddArgumentScalar(static_cast<int>(args.n));
|
||||
tuner.AddArgumentScalar(static_cast<int>(args.m));
|
||||
|
|
|
@ -53,6 +53,7 @@ class TuneTranspose {
|
|||
static size_t GetSizeA(const Arguments<T> &args) { return args.m * args.n; }
|
||||
static size_t GetSizeB(const Arguments<T> &args) { return args.m * args.n; }
|
||||
static size_t GetSizeC(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
static size_t GetSizeTemp(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
|
||||
// Sets the tuning parameters and their possible values
|
||||
static void SetParameters(cltune::Tuner &tuner, const size_t id) {
|
||||
|
@ -73,6 +74,7 @@ class TuneTranspose {
|
|||
|
||||
// Sets the base thread configuration
|
||||
static std::vector<size_t> GlobalSize(const Arguments<T> &args) { return {args.m, args.n}; }
|
||||
static std::vector<size_t> GlobalSizeRef(const Arguments<T> &args) { return GlobalSize(args); }
|
||||
static std::vector<size_t> LocalSize() { return {1, 1}; }
|
||||
static std::vector<size_t> LocalSizeRef() { return {8, 8}; }
|
||||
|
||||
|
@ -86,7 +88,8 @@ class TuneTranspose {
|
|||
// Sets the kernel's arguments
|
||||
static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
|
||||
std::vector<T> &, std::vector<T> &,
|
||||
std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &) {
|
||||
std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &,
|
||||
std::vector<T> &) {
|
||||
tuner.AddArgumentScalar(static_cast<int>(args.m));
|
||||
tuner.AddArgumentInput(a_mat);
|
||||
tuner.AddArgumentOutput(b_mat);
|
||||
|
|
|
@ -53,11 +53,12 @@ class TuneXaxpy {
|
|||
static double DefaultFraction() { return 1.0; } // N/A for this kernel
|
||||
|
||||
// Describes how to obtain the sizes of the buffers
|
||||
static size_t GetSizeX(const Arguments<T> &args) { return args.n; } // N/A for this kernel
|
||||
static size_t GetSizeY(const Arguments<T> &args) { return args.n; } // N/A for this kernel
|
||||
static size_t GetSizeX(const Arguments<T> &args) { return args.n; }
|
||||
static size_t GetSizeY(const Arguments<T> &args) { return args.n; }
|
||||
static size_t GetSizeA(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
static size_t GetSizeB(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
static size_t GetSizeC(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
static size_t GetSizeTemp(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
|
||||
// Sets the tuning parameters and their possible values
|
||||
static void SetParameters(cltune::Tuner &tuner, const size_t id) {
|
||||
|
@ -72,6 +73,7 @@ class TuneXaxpy {
|
|||
|
||||
// Sets the base thread configuration
|
||||
static std::vector<size_t> GlobalSize(const Arguments<T> &args) { return {args.n}; }
|
||||
static std::vector<size_t> GlobalSizeRef(const Arguments<T> &args) { return GlobalSize(args); }
|
||||
static std::vector<size_t> LocalSize() { return {1}; }
|
||||
static std::vector<size_t> LocalSizeRef() { return {64}; }
|
||||
|
||||
|
@ -85,7 +87,8 @@ class TuneXaxpy {
|
|||
// Sets the kernel's arguments
|
||||
static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
|
||||
std::vector<T> &x_vec, std::vector<T> &y_vec,
|
||||
std::vector<T> &, std::vector<T> &, std::vector<T> &) {
|
||||
std::vector<T> &, std::vector<T> &, std::vector<T> &,
|
||||
std::vector<T> &) {
|
||||
tuner.AddArgumentScalar(static_cast<int>(args.n));
|
||||
tuner.AddArgumentScalar(args.alpha);
|
||||
tuner.AddArgumentInput(x_vec);
|
||||
|
|
|
@ -55,6 +55,7 @@ class TuneXgemm {
|
|||
static size_t GetSizeA(const Arguments<T> &args) { return args.m * args.k; }
|
||||
static size_t GetSizeB(const Arguments<T> &args) { return args.n * args.k; }
|
||||
static size_t GetSizeC(const Arguments<T> &args) { return args.m * args.n; }
|
||||
static size_t GetSizeTemp(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
|
||||
// Sets the tuning parameters and their possible values
|
||||
static void SetParameters(cltune::Tuner &tuner, const size_t id) {
|
||||
|
@ -103,6 +104,7 @@ class TuneXgemm {
|
|||
|
||||
// Sets the base thread configuration
|
||||
static std::vector<size_t> GlobalSize(const Arguments<T> &args) { return {args.m, args.n}; }
|
||||
static std::vector<size_t> GlobalSizeRef(const Arguments<T> &args) { return GlobalSize(args); }
|
||||
static std::vector<size_t> LocalSize() { return {1, 1}; }
|
||||
static std::vector<size_t> LocalSizeRef() { return {8, 8}; }
|
||||
|
||||
|
@ -116,7 +118,8 @@ class TuneXgemm {
|
|||
// Sets the kernel's arguments
|
||||
static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
|
||||
std::vector<T> &, std::vector<T> &,
|
||||
std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &c_mat) {
|
||||
std::vector<T> &a_mat, std::vector<T> &b_mat, std::vector<T> &c_mat,
|
||||
std::vector<T> &) {
|
||||
tuner.AddArgumentScalar(static_cast<int>(args.m));
|
||||
tuner.AddArgumentScalar(static_cast<int>(args.n));
|
||||
tuner.AddArgumentScalar(static_cast<int>(args.k));
|
||||
|
|
|
@ -56,6 +56,7 @@ class TuneXgemv {
|
|||
static size_t GetSizeA(const Arguments<T> &args) { return args.m * args.n; }
|
||||
static size_t GetSizeB(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
static size_t GetSizeC(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
static size_t GetSizeTemp(const Arguments<T> &) { return 1; } // N/A for this kernel
|
||||
|
||||
// Sets the tuning parameters and their possible values
|
||||
static void SetParameters(cltune::Tuner &tuner, const size_t id) {
|
||||
|
@ -75,6 +76,7 @@ class TuneXgemv {
|
|||
|
||||
// Sets the base thread configuration
|
||||
static std::vector<size_t> GlobalSize(const Arguments<T> &args) { return {args.m}; }
|
||||
static std::vector<size_t> GlobalSizeRef(const Arguments<T> &args) { return GlobalSize(args); }
|
||||
static std::vector<size_t> LocalSize() { return {1}; }
|
||||
static std::vector<size_t> LocalSizeRef() { return {64}; }
|
||||
|
||||
|
@ -88,7 +90,8 @@ class TuneXgemv {
|
|||
// Sets the kernel's arguments
|
||||
static void SetArguments(cltune::Tuner &tuner, const Arguments<T> &args,
|
||||
std::vector<T> &x_vec, std::vector<T> &y_vec,
|
||||
std::vector<T> &a_mat, std::vector<T> &, std::vector<T> &) {
|
||||
std::vector<T> &a_mat, std::vector<T> &, std::vector<T> &,
|
||||
std::vector<T> &) {
|
||||
auto a_rotated = (V==3) ? 1 : 0;
|
||||
tuner.AddArgumentScalar(static_cast<int>(args.m));
|
||||
tuner.AddArgumentScalar(static_cast<int>(args.n));
|
||||
|
|
Loading…
Reference in a new issue