From 2a383f34501b386b8e6c4beb56c6ac694622f060 Mon Sep 17 00:00:00 2001 From: CNugteren Date: Mon, 14 Sep 2015 15:53:34 +0200 Subject: [PATCH] Added extra temporary buffer to tuners in preparation of Xdot routines --- include/internal/tuning.h | 6 ++++-- src/tuning/copy.cc | 5 ++++- src/tuning/pad.cc | 5 ++++- src/tuning/padtranspose.cc | 5 ++++- src/tuning/transpose.cc | 5 ++++- src/tuning/xaxpy.cc | 9 ++++++--- src/tuning/xgemm.cc | 5 ++++- src/tuning/xgemv.cc | 5 ++++- 8 files changed, 34 insertions(+), 11 deletions(-) diff --git a/include/internal/tuning.h b/include/internal/tuning.h index f029c704..6ea530ba 100644 --- a/include/internal/tuning.h +++ b/include/internal/tuning.h @@ -64,11 +64,13 @@ void Tuner(int argc, char* argv[]) { auto a_mat = std::vector(C::GetSizeA(args)); auto b_mat = std::vector(C::GetSizeB(args)); auto c_mat = std::vector(C::GetSizeC(args)); + auto temp = std::vector(C::GetSizeTemp(args)); PopulateVector(x_vec); PopulateVector(y_vec); PopulateVector(a_mat); PopulateVector(b_mat); PopulateVector(c_mat); + PopulateVector(temp); // Initializes the tuner for the chosen device cltune::Tuner tuner(args.platform_id, args.device_id); @@ -85,7 +87,7 @@ void Tuner(int argc, char* argv[]) { // Loads the kernel sources and defines the kernel to tune auto sources = C::GetSources(); auto id = tuner.AddKernelFromString(sources, C::KernelName(), C::GlobalSize(args), C::LocalSize()); - tuner.SetReferenceFromString(sources, C::KernelName(), C::GlobalSize(args), C::LocalSizeRef()); + tuner.SetReferenceFromString(sources, C::KernelName(), C::GlobalSizeRef(args), C::LocalSizeRef()); // Sets the tunable parameters and their possible values C::SetParameters(tuner, id); @@ -103,7 +105,7 @@ void Tuner(int argc, char* argv[]) { for (auto ¶meters: C::DivGlobal()) { tuner.DivGlobalSize(id, parameters); } // Sets the function's arguments - C::SetArguments(tuner, args, x_vec, y_vec, a_mat, b_mat, c_mat); + C::SetArguments(tuner, args, x_vec, y_vec, a_mat, b_mat, c_mat, temp); // Starts the tuning process tuner.Tune(); diff --git a/src/tuning/copy.cc b/src/tuning/copy.cc index f38a28f3..23828b25 100644 --- a/src/tuning/copy.cc +++ b/src/tuning/copy.cc @@ -53,6 +53,7 @@ class TuneCopy { static size_t GetSizeA(const Arguments &args) { return args.m * args.n; } static size_t GetSizeB(const Arguments &args) { return args.m * args.n; } static size_t GetSizeC(const Arguments &) { return 1; } // N/A for this kernel + static size_t GetSizeTemp(const Arguments &) { return 1; } // N/A for this kernel // Sets the tuning parameters and their possible values static void SetParameters(cltune::Tuner &tuner, const size_t id) { @@ -68,6 +69,7 @@ class TuneCopy { // Sets the base thread configuration static std::vector GlobalSize(const Arguments &args) { return {args.m, args.n}; } + static std::vector GlobalSizeRef(const Arguments &args) { return GlobalSize(args); } static std::vector LocalSize() { return {1, 1}; } static std::vector LocalSizeRef() { return {8, 8}; } @@ -81,7 +83,8 @@ class TuneCopy { // Sets the kernel's arguments static void SetArguments(cltune::Tuner &tuner, const Arguments &args, std::vector &, std::vector &, - std::vector &a_mat, std::vector &b_mat, std::vector &) { + std::vector &a_mat, std::vector &b_mat, std::vector &, + std::vector &) { tuner.AddArgumentScalar(static_cast(args.m)); tuner.AddArgumentInput(a_mat); tuner.AddArgumentOutput(b_mat); diff --git a/src/tuning/pad.cc b/src/tuning/pad.cc index 2ce566fb..6a826b6b 100644 --- a/src/tuning/pad.cc +++ b/src/tuning/pad.cc @@ -53,6 +53,7 @@ class TunePad { static size_t GetSizeA(const Arguments &args) { return args.m * args.n; } static size_t GetSizeB(const Arguments &args) { return args.m * args.n; } static size_t GetSizeC(const Arguments &) { return 1; } // N/A for this kernel + static size_t GetSizeTemp(const Arguments &) { return 1; } // N/A for this kernel // Sets the tuning parameters and their possible values static void SetParameters(cltune::Tuner &tuner, const size_t id) { @@ -68,6 +69,7 @@ class TunePad { // Sets the base thread configuration static std::vector GlobalSize(const Arguments &args) { return {args.m, args.n}; } + static std::vector GlobalSizeRef(const Arguments &args) { return GlobalSize(args); } static std::vector LocalSize() { return {1, 1}; } static std::vector LocalSizeRef() { return {8, 8}; } @@ -81,7 +83,8 @@ class TunePad { // Sets the kernel's arguments static void SetArguments(cltune::Tuner &tuner, const Arguments &args, std::vector &, std::vector &, - std::vector &a_mat, std::vector &b_mat, std::vector &) { + std::vector &a_mat, std::vector &b_mat, std::vector &, + std::vector &) { tuner.AddArgumentScalar(static_cast(args.m)); tuner.AddArgumentScalar(static_cast(args.n)); tuner.AddArgumentScalar(static_cast(args.m)); diff --git a/src/tuning/padtranspose.cc b/src/tuning/padtranspose.cc index 8d494745..3f233809 100644 --- a/src/tuning/padtranspose.cc +++ b/src/tuning/padtranspose.cc @@ -53,6 +53,7 @@ class TunePadTranspose { static size_t GetSizeA(const Arguments &args) { return args.m * args.n; } static size_t GetSizeB(const Arguments &args) { return args.m * args.n; } static size_t GetSizeC(const Arguments &) { return 1; } // N/A for this kernel + static size_t GetSizeTemp(const Arguments &) { return 1; } // N/A for this kernel // Sets the tuning parameters and their possible values static void SetParameters(cltune::Tuner &tuner, const size_t id) { @@ -72,6 +73,7 @@ class TunePadTranspose { // Sets the base thread configuration static std::vector GlobalSize(const Arguments &args) { return {args.m, args.n}; } + static std::vector GlobalSizeRef(const Arguments &args) { return GlobalSize(args); } static std::vector LocalSize() { return {1, 1}; } static std::vector LocalSizeRef() { return {8, 8}; } @@ -85,7 +87,8 @@ class TunePadTranspose { // Sets the kernel's arguments static void SetArguments(cltune::Tuner &tuner, const Arguments &args, std::vector &, std::vector &, - std::vector &a_mat, std::vector &b_mat, std::vector &) { + std::vector &a_mat, std::vector &b_mat, std::vector &, + std::vector &) { tuner.AddArgumentScalar(static_cast(args.m)); tuner.AddArgumentScalar(static_cast(args.n)); tuner.AddArgumentScalar(static_cast(args.m)); diff --git a/src/tuning/transpose.cc b/src/tuning/transpose.cc index 2ffdb7aa..3998ba66 100644 --- a/src/tuning/transpose.cc +++ b/src/tuning/transpose.cc @@ -53,6 +53,7 @@ class TuneTranspose { static size_t GetSizeA(const Arguments &args) { return args.m * args.n; } static size_t GetSizeB(const Arguments &args) { return args.m * args.n; } static size_t GetSizeC(const Arguments &) { return 1; } // N/A for this kernel + static size_t GetSizeTemp(const Arguments &) { return 1; } // N/A for this kernel // Sets the tuning parameters and their possible values static void SetParameters(cltune::Tuner &tuner, const size_t id) { @@ -73,6 +74,7 @@ class TuneTranspose { // Sets the base thread configuration static std::vector GlobalSize(const Arguments &args) { return {args.m, args.n}; } + static std::vector GlobalSizeRef(const Arguments &args) { return GlobalSize(args); } static std::vector LocalSize() { return {1, 1}; } static std::vector LocalSizeRef() { return {8, 8}; } @@ -86,7 +88,8 @@ class TuneTranspose { // Sets the kernel's arguments static void SetArguments(cltune::Tuner &tuner, const Arguments &args, std::vector &, std::vector &, - std::vector &a_mat, std::vector &b_mat, std::vector &) { + std::vector &a_mat, std::vector &b_mat, std::vector &, + std::vector &) { tuner.AddArgumentScalar(static_cast(args.m)); tuner.AddArgumentInput(a_mat); tuner.AddArgumentOutput(b_mat); diff --git a/src/tuning/xaxpy.cc b/src/tuning/xaxpy.cc index 7715b128..31aa6a8e 100644 --- a/src/tuning/xaxpy.cc +++ b/src/tuning/xaxpy.cc @@ -53,11 +53,12 @@ class TuneXaxpy { static double DefaultFraction() { return 1.0; } // N/A for this kernel // Describes how to obtain the sizes of the buffers - static size_t GetSizeX(const Arguments &args) { return args.n; } // N/A for this kernel - static size_t GetSizeY(const Arguments &args) { return args.n; } // N/A for this kernel + static size_t GetSizeX(const Arguments &args) { return args.n; } + static size_t GetSizeY(const Arguments &args) { return args.n; } static size_t GetSizeA(const Arguments &) { return 1; } // N/A for this kernel static size_t GetSizeB(const Arguments &) { return 1; } // N/A for this kernel static size_t GetSizeC(const Arguments &) { return 1; } // N/A for this kernel + static size_t GetSizeTemp(const Arguments &) { return 1; } // N/A for this kernel // Sets the tuning parameters and their possible values static void SetParameters(cltune::Tuner &tuner, const size_t id) { @@ -72,6 +73,7 @@ class TuneXaxpy { // Sets the base thread configuration static std::vector GlobalSize(const Arguments &args) { return {args.n}; } + static std::vector GlobalSizeRef(const Arguments &args) { return GlobalSize(args); } static std::vector LocalSize() { return {1}; } static std::vector LocalSizeRef() { return {64}; } @@ -85,7 +87,8 @@ class TuneXaxpy { // Sets the kernel's arguments static void SetArguments(cltune::Tuner &tuner, const Arguments &args, std::vector &x_vec, std::vector &y_vec, - std::vector &, std::vector &, std::vector &) { + std::vector &, std::vector &, std::vector &, + std::vector &) { tuner.AddArgumentScalar(static_cast(args.n)); tuner.AddArgumentScalar(args.alpha); tuner.AddArgumentInput(x_vec); diff --git a/src/tuning/xgemm.cc b/src/tuning/xgemm.cc index 302f2bd5..e820cfb0 100644 --- a/src/tuning/xgemm.cc +++ b/src/tuning/xgemm.cc @@ -55,6 +55,7 @@ class TuneXgemm { static size_t GetSizeA(const Arguments &args) { return args.m * args.k; } static size_t GetSizeB(const Arguments &args) { return args.n * args.k; } static size_t GetSizeC(const Arguments &args) { return args.m * args.n; } + static size_t GetSizeTemp(const Arguments &) { return 1; } // N/A for this kernel // Sets the tuning parameters and their possible values static void SetParameters(cltune::Tuner &tuner, const size_t id) { @@ -103,6 +104,7 @@ class TuneXgemm { // Sets the base thread configuration static std::vector GlobalSize(const Arguments &args) { return {args.m, args.n}; } + static std::vector GlobalSizeRef(const Arguments &args) { return GlobalSize(args); } static std::vector LocalSize() { return {1, 1}; } static std::vector LocalSizeRef() { return {8, 8}; } @@ -116,7 +118,8 @@ class TuneXgemm { // Sets the kernel's arguments static void SetArguments(cltune::Tuner &tuner, const Arguments &args, std::vector &, std::vector &, - std::vector &a_mat, std::vector &b_mat, std::vector &c_mat) { + std::vector &a_mat, std::vector &b_mat, std::vector &c_mat, + std::vector &) { tuner.AddArgumentScalar(static_cast(args.m)); tuner.AddArgumentScalar(static_cast(args.n)); tuner.AddArgumentScalar(static_cast(args.k)); diff --git a/src/tuning/xgemv.cc b/src/tuning/xgemv.cc index e22b5103..3d6fe595 100644 --- a/src/tuning/xgemv.cc +++ b/src/tuning/xgemv.cc @@ -56,6 +56,7 @@ class TuneXgemv { static size_t GetSizeA(const Arguments &args) { return args.m * args.n; } static size_t GetSizeB(const Arguments &) { return 1; } // N/A for this kernel static size_t GetSizeC(const Arguments &) { return 1; } // N/A for this kernel + static size_t GetSizeTemp(const Arguments &) { return 1; } // N/A for this kernel // Sets the tuning parameters and their possible values static void SetParameters(cltune::Tuner &tuner, const size_t id) { @@ -75,6 +76,7 @@ class TuneXgemv { // Sets the base thread configuration static std::vector GlobalSize(const Arguments &args) { return {args.m}; } + static std::vector GlobalSizeRef(const Arguments &args) { return GlobalSize(args); } static std::vector LocalSize() { return {1}; } static std::vector LocalSizeRef() { return {64}; } @@ -88,7 +90,8 @@ class TuneXgemv { // Sets the kernel's arguments static void SetArguments(cltune::Tuner &tuner, const Arguments &args, std::vector &x_vec, std::vector &y_vec, - std::vector &a_mat, std::vector &, std::vector &) { + std::vector &a_mat, std::vector &, std::vector &, + std::vector &) { auto a_rotated = (V==3) ? 1 : 0; tuner.AddArgumentScalar(static_cast(args.m)); tuner.AddArgumentScalar(static_cast(args.n));