The kernel source string is now a routine's member variable

This commit is contained in:
CNugteren 2015-07-19 13:44:37 +02:00
parent 250f8ab295
commit 4e499a67c1
25 changed files with 118 additions and 181 deletions

View file

@ -58,11 +58,11 @@ class Routine {
static constexpr bool ErrorIn(const StatusCode s) { return (s != StatusCode::kSuccess); } static constexpr bool ErrorIn(const StatusCode s) { return (s != StatusCode::kSuccess); }
// Base class constructor // Base class constructor
explicit Routine(CommandQueue &queue, Event &event, explicit Routine(CommandQueue &queue, Event &event, const std::string &name,
const std::vector<std::string> &routines, const Precision precision); const std::vector<std::string> &routines, const Precision precision);
// Set-up phase of the kernel // Set-up phase of the kernel
StatusCode SetUp(const std::string &routine_source); StatusCode SetUp();
protected: protected:
@ -107,6 +107,10 @@ class Routine {
// a derived class. // a derived class.
const Precision precision_; const Precision precision_;
// The routine's name and its kernel-source in string form
const std::string routine_name_;
std::string source_string_;
// The OpenCL objects, accessible only from derived classes // The OpenCL objects, accessible only from derived classes
CommandQueue queue_; CommandQueue queue_;
Event event_; Event event_;

View file

@ -47,10 +47,8 @@ StatusCode Axpy(const size_t n, const T alpha,
auto event_cpp = Event(*event); auto event_cpp = Event(*event);
auto routine = Xaxpy<T>(queue_cpp, event_cpp); auto routine = Xaxpy<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal) // Compiles the routine's device kernels
std::string kernel_source = auto status = routine.SetUp();
#include "kernels/xaxpy.opencl"
auto status = routine.SetUp(kernel_source);
if (status != StatusCode::kSuccess) { return status; } if (status != StatusCode::kSuccess) { return status; }
// Runs the routine // Runs the routine
@ -91,10 +89,8 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose,
auto event_cpp = Event(*event); auto event_cpp = Event(*event);
auto routine = Xgemv<T>(queue_cpp, event_cpp); auto routine = Xgemv<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal) // Compiles the routine's device kernels
std::string kernel_source = auto status = routine.SetUp();
#include "kernels/xgemv.opencl"
auto status = routine.SetUp(kernel_source);
if (status != StatusCode::kSuccess) { return status; } if (status != StatusCode::kSuccess) { return status; }
// Runs the routine // Runs the routine
@ -143,19 +139,8 @@ StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpos
auto event_cpp = Event(*event); auto event_cpp = Event(*event);
auto routine = Xgemm<T>(queue_cpp, event_cpp); auto routine = Xgemm<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal) // Compiles the routine's device kernels
std::string common_source1 = auto status = routine.SetUp();
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
if (status != StatusCode::kSuccess) { return status; } if (status != StatusCode::kSuccess) { return status; }
// Runs the routine // Runs the routine
@ -203,19 +188,8 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle,
auto event_cpp = Event(*event); auto event_cpp = Event(*event);
auto routine = Xsymm<T>(queue_cpp, event_cpp); auto routine = Xsymm<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal) // Compiles the routine's device kernels
std::string common_source1 = auto status = routine.SetUp();
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
if (status != StatusCode::kSuccess) { return status; } if (status != StatusCode::kSuccess) { return status; }
// Runs the routine // Runs the routine
@ -263,19 +237,8 @@ StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle,
auto event_cpp = Event(*event); auto event_cpp = Event(*event);
auto routine = Xhemm<T>(queue_cpp, event_cpp); auto routine = Xhemm<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal) // Compiles the routine's device kernels
std::string common_source1 = auto status = routine.SetUp();
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
if (status != StatusCode::kSuccess) { return status; } if (status != StatusCode::kSuccess) { return status; }
// Runs the routine // Runs the routine
@ -310,19 +273,8 @@ StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_
auto event_cpp = Event(*event); auto event_cpp = Event(*event);
auto routine = Xsyrk<T>(queue_cpp, event_cpp); auto routine = Xsyrk<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal) // Compiles the routine's device kernels
std::string common_source1 = auto status = routine.SetUp();
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
if (status != StatusCode::kSuccess) { return status; } if (status != StatusCode::kSuccess) { return status; }
// Runs the routine // Runs the routine
@ -364,19 +316,8 @@ StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_
auto event_cpp = Event(*event); auto event_cpp = Event(*event);
auto routine = Xherk<std::complex<T>,T>(queue_cpp, event_cpp); auto routine = Xherk<std::complex<T>,T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal) // Compiles the routine's device kernels
std::string common_source1 = auto status = routine.SetUp();
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
if (status != StatusCode::kSuccess) { return status; } if (status != StatusCode::kSuccess) { return status; }
// Runs the routine // Runs the routine
@ -409,19 +350,8 @@ StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose a
auto event_cpp = Event(*event); auto event_cpp = Event(*event);
auto routine = Xsyr2k<T>(queue_cpp, event_cpp); auto routine = Xsyr2k<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal) // Compiles the routine's device kernels
std::string common_source1 = auto status = routine.SetUp();
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
if (status != StatusCode::kSuccess) { return status; } if (status != StatusCode::kSuccess) { return status; }
// Runs the routine // Runs the routine
@ -469,19 +399,8 @@ StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose a
auto event_cpp = Event(*event); auto event_cpp = Event(*event);
auto routine = Xher2k<T,U>(queue_cpp, event_cpp); auto routine = Xher2k<T,U>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal) // Compiles the routine's device kernels
std::string common_source1 = auto status = routine.SetUp();
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
if (status != StatusCode::kSuccess) { return status; } if (status != StatusCode::kSuccess) { return status; }
// Runs the routine // Runs the routine
@ -518,19 +437,8 @@ StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle,
auto event_cpp = Event(*event); auto event_cpp = Event(*event);
auto routine = Xtrmm<T>(queue_cpp, event_cpp); auto routine = Xtrmm<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal) // Compiles the routine's device kernels
std::string common_source1 = auto status = routine.SetUp();
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
if (status != StatusCode::kSuccess) { return status; } if (status != StatusCode::kSuccess) { return status; }
// Runs the routine // Runs the routine
@ -579,19 +487,8 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle,
auto event_cpp = Event(*event); auto event_cpp = Event(*event);
auto routine = Xtrsm<T>(queue_cpp, event_cpp); auto routine = Xtrsm<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal) // Compiles the routine's device kernels
std::string common_source1 = auto status = routine.SetUp();
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
if (status != StatusCode::kSuccess) { return status; } if (status != StatusCode::kSuccess) { return status; }
// Runs the routine // Runs the routine

View file

@ -143,6 +143,6 @@ R"(
// ================================================================================================= // =================================================================================================
// End of the C++11 raw string literal // End of the C++11 raw string literal
)"; )"
// ================================================================================================= // =================================================================================================

View file

@ -68,6 +68,6 @@ __kernel void CopyMatrix(const int ld,
// ================================================================================================= // =================================================================================================
// End of the C++11 raw string literal // End of the C++11 raw string literal
)"; )"
// ================================================================================================= // =================================================================================================

View file

@ -344,6 +344,6 @@ __kernel void TrmmUpperToSquared(const int src_dim,
// ================================================================================================= // =================================================================================================
// End of the C++11 raw string literal // End of the C++11 raw string literal
)"; )"
// ================================================================================================= // =================================================================================================

View file

@ -159,6 +159,6 @@ __kernel void UnPadTransposeMatrix(const int src_one, const int src_two,
// ================================================================================================= // =================================================================================================
// End of the C++11 raw string literal // End of the C++11 raw string literal
)"; )"
// ================================================================================================= // =================================================================================================

View file

@ -163,6 +163,6 @@ __kernel void TransposeMatrix(const int ld,
// ================================================================================================= // =================================================================================================
// End of the C++11 raw string literal // End of the C++11 raw string literal
)"; )"
// ================================================================================================= // =================================================================================================

View file

@ -123,6 +123,6 @@ __kernel void XaxpyFast(const int n, const real alpha,
// ================================================================================================= // =================================================================================================
// End of the C++11 raw string literal // End of the C++11 raw string literal
)"; )"
// ================================================================================================= // =================================================================================================

View file

@ -675,6 +675,6 @@ __kernel void XgemmLower(const int kSizeN, const int kSizeK,
// ================================================================================================= // =================================================================================================
// End of the C++11 raw string literal // End of the C++11 raw string literal
)"; )"
// ================================================================================================= // =================================================================================================

View file

@ -368,6 +368,6 @@ __kernel void XgemvFastRot(const int m, const int n, const real alpha, const rea
// ================================================================================================= // =================================================================================================
// End of the C++11 raw string literal // End of the C++11 raw string literal
)"; )"
// ================================================================================================= // =================================================================================================

View file

@ -22,9 +22,10 @@ namespace clblast {
std::vector<Routine::ProgramCache> Routine::program_cache_; std::vector<Routine::ProgramCache> Routine::program_cache_;
// Constructor: not much here, because no status codes can be returned // Constructor: not much here, because no status codes can be returned
Routine::Routine(CommandQueue &queue, Event &event, Routine::Routine(CommandQueue &queue, Event &event, const std::string &name,
const std::vector<std::string> &routines, const Precision precision): const std::vector<std::string> &routines, const Precision precision):
precision_(precision), precision_(precision),
routine_name_(name),
queue_(queue), queue_(queue),
event_(event), event_(event),
context_(queue_.GetContext()), context_(queue_.GetContext()),
@ -40,7 +41,7 @@ Routine::Routine(CommandQueue &queue, Event &event,
// ================================================================================================= // =================================================================================================
// Separate set-up function to allow for status codes to be returned // Separate set-up function to allow for status codes to be returned
StatusCode Routine::SetUp(const std::string &routine_source) { StatusCode Routine::SetUp() {
// Queries the cache to see whether or not the compiled kernel is already there. If not, it will // Queries the cache to see whether or not the compiled kernel is already there. If not, it will
// be built and added to the cache. // be built and added to the cache.
@ -63,7 +64,8 @@ StatusCode Routine::SetUp(const std::string &routine_source) {
// Loads the common header (typedefs and defines and such) // Loads the common header (typedefs and defines and such)
std::string common_header = std::string common_header =
#include "kernels/common.opencl" #include "kernels/common.opencl"
;
// Collects the parameters for this device in the form of defines, and adds the precision // Collects the parameters for this device in the form of defines, and adds the precision
auto defines = db_.GetDefines(); auto defines = db_.GetDefines();
@ -76,7 +78,7 @@ StatusCode Routine::SetUp(const std::string &routine_source) {
} }
// Combines everything together into a single source string // Combines everything together into a single source string
auto source_string = defines + common_header + routine_source; auto source_string = defines + common_header + source_string_;
// Compiles the kernel // Compiles the kernel
try { try {

View file

@ -30,7 +30,10 @@ template <> const Precision Xaxpy<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor // Constructor: forwards to base class constructor
template <typename T> template <typename T>
Xaxpy<T>::Xaxpy(CommandQueue &queue, Event &event): Xaxpy<T>::Xaxpy(CommandQueue &queue, Event &event):
Routine(queue, event, {"Xaxpy"}, precision_) { Routine(queue, event, "Xaxpy", {"Xaxpy"}, precision_) {
source_string_ =
#include "../../kernels/xaxpy.opencl"
;
} }
// ================================================================================================= // =================================================================================================

View file

@ -30,7 +30,10 @@ template <> const Precision Xgemv<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor // Constructor: forwards to base class constructor
template <typename T> template <typename T>
Xgemv<T>::Xgemv(CommandQueue &queue, Event &event): Xgemv<T>::Xgemv(CommandQueue &queue, Event &event):
Routine(queue, event, {"Xgemv"}, precision_) { Routine(queue, event, "Xgemv", {"Xgemv"}, precision_) {
source_string_ =
#include "../../kernels/xgemv.opencl"
;
} }
// ================================================================================================= // =================================================================================================

View file

@ -30,7 +30,14 @@ template <> const Precision Xgemm<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor // Constructor: forwards to base class constructor
template <typename T> template <typename T>
Xgemm<T>::Xgemm(CommandQueue &queue, Event &event): Xgemm<T>::Xgemm(CommandQueue &queue, Event &event):
Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) { Routine(queue, event, "Xgemm", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
#include "../../kernels/transpose.opencl"
#include "../../kernels/padtranspose.opencl"
#include "../../kernels/xgemm.opencl"
;
} }
// ================================================================================================= // =================================================================================================

View file

@ -28,7 +28,14 @@ template <> const Precision Xher2k<double2,double>::precision_ = Precision::kCom
// Constructor: forwards to base class constructor // Constructor: forwards to base class constructor
template <typename T, typename U> template <typename T, typename U>
Xher2k<T,U>::Xher2k(CommandQueue &queue, Event &event): Xher2k<T,U>::Xher2k(CommandQueue &queue, Event &event):
Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) { Routine(queue, event, "Xher2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
#include "../../kernels/transpose.opencl"
#include "../../kernels/padtranspose.opencl"
#include "../../kernels/xgemm.opencl"
;
} }
// ================================================================================================= // =================================================================================================

View file

@ -28,7 +28,14 @@ template <> const Precision Xherk<double2,double>::precision_ = Precision::kComp
// Constructor: forwards to base class constructor // Constructor: forwards to base class constructor
template <typename T, typename U> template <typename T, typename U>
Xherk<T,U>::Xherk(CommandQueue &queue, Event &event): Xherk<T,U>::Xherk(CommandQueue &queue, Event &event):
Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) { Routine(queue, event, "Xherk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
#include "../../kernels/transpose.opencl"
#include "../../kernels/padtranspose.opencl"
#include "../../kernels/xgemm.opencl"
;
} }
// ================================================================================================= // =================================================================================================

View file

@ -30,7 +30,14 @@ template <> const Precision Xsyr2k<double2>::precision_ = Precision::kComplexDou
// Constructor: forwards to base class constructor // Constructor: forwards to base class constructor
template <typename T> template <typename T>
Xsyr2k<T>::Xsyr2k(CommandQueue &queue, Event &event): Xsyr2k<T>::Xsyr2k(CommandQueue &queue, Event &event):
Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) { Routine(queue, event, "Xsyr2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
#include "../../kernels/transpose.opencl"
#include "../../kernels/padtranspose.opencl"
#include "../../kernels/xgemm.opencl"
;
} }
// ================================================================================================= // =================================================================================================

View file

@ -30,7 +30,14 @@ template <> const Precision Xsyrk<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor // Constructor: forwards to base class constructor
template <typename T> template <typename T>
Xsyrk<T>::Xsyrk(CommandQueue &queue, Event &event): Xsyrk<T>::Xsyrk(CommandQueue &queue, Event &event):
Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) { Routine(queue, event, "Xsyrk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
#include "../../kernels/transpose.opencl"
#include "../../kernels/padtranspose.opencl"
#include "../../kernels/xgemm.opencl"
;
} }
// ================================================================================================= // =================================================================================================

View file

@ -30,11 +30,10 @@ void CopyTune(const Arguments<T> &args,
// This points to the CopyMatrix kernel as found in the CLBlast library. This is just one example // This points to the CopyMatrix kernel as found in the CLBlast library. This is just one example
// of a copy kernel. However, all copy-kernels use the same tuning parameters, so one has to be // of a copy kernel. However, all copy-kernels use the same tuning parameters, so one has to be
// chosen as a representative. // chosen as a representative.
std::string common_source = std::string sources =
#include "../src/kernels/common.opencl" #include "../src/kernels/common.opencl"
std::string kernel_source = #include "../src/kernels/copy.opencl"
#include "../src/kernels/copy.opencl" ;
auto sources = common_source + kernel_source;
auto id = tuner.AddKernelFromString(sources, "CopyMatrix", {args.m, args.n}, {1, 1}); auto id = tuner.AddKernelFromString(sources, "CopyMatrix", {args.m, args.n}, {1, 1});
tuner.SetReferenceFromString(sources, "CopyMatrix", {args.m, args.n}, {8, 8}); tuner.SetReferenceFromString(sources, "CopyMatrix", {args.m, args.n}, {8, 8});

View file

@ -30,11 +30,10 @@ void PadTune(const Arguments<T> &args,
// This points to the PadMatrix kernel as found in the CLBlast library. This is just one // This points to the PadMatrix kernel as found in the CLBlast library. This is just one
// example of a pad kernel. However, all pad-kernels use the same tuning parameters, so one has // example of a pad kernel. However, all pad-kernels use the same tuning parameters, so one has
// to be chosen as a representative. // to be chosen as a representative.
std::string common_source = std::string sources =
#include "../src/kernels/common.opencl" #include "../src/kernels/common.opencl"
std::string kernel_source = #include "../src/kernels/pad.opencl"
#include "../src/kernels/pad.opencl" ;
auto sources = common_source + kernel_source;
auto id = tuner.AddKernelFromString(sources, "PadMatrix", {args.m, args.n}, {1, 1}); auto id = tuner.AddKernelFromString(sources, "PadMatrix", {args.m, args.n}, {1, 1});
tuner.SetReferenceFromString(sources, "PadMatrix", {args.m, args.n}, {8, 8}); tuner.SetReferenceFromString(sources, "PadMatrix", {args.m, args.n}, {8, 8});

View file

@ -30,11 +30,10 @@ void PadTransposeTune(const Arguments<T> &args,
// This points to the PadTransposeMatrix kernel as found in the CLBlast library. This is just one // This points to the PadTransposeMatrix kernel as found in the CLBlast library. This is just one
// example of a transpose kernel. However, all kernels use the same tuning parameters, so one has // example of a transpose kernel. However, all kernels use the same tuning parameters, so one has
// to be chosen as a representative. // to be chosen as a representative.
std::string common_source = std::string sources =
#include "../src/kernels/common.opencl" #include "../src/kernels/common.opencl"
std::string kernel_source = #include "../src/kernels/padtranspose.opencl"
#include "../src/kernels/padtranspose.opencl" ;
auto sources = common_source + kernel_source;
auto id = tuner.AddKernelFromString(sources, "PadTransposeMatrix", {args.m, args.n}, {1, 1}); auto id = tuner.AddKernelFromString(sources, "PadTransposeMatrix", {args.m, args.n}, {1, 1});
tuner.SetReferenceFromString(sources, "PadTransposeMatrix", {args.m, args.n}, {8, 8}); tuner.SetReferenceFromString(sources, "PadTransposeMatrix", {args.m, args.n}, {8, 8});

View file

@ -30,11 +30,10 @@ void TransposeTune(const Arguments<T> &args,
// This points to the PadTransposeMatrix kernel as found in the CLBlast library. This is just one // This points to the PadTransposeMatrix kernel as found in the CLBlast library. This is just one
// example of a transpose kernel. However, all kernels use the same tuning parameters, so one has // example of a transpose kernel. However, all kernels use the same tuning parameters, so one has
// to be chosen as a representative. // to be chosen as a representative.
std::string common_source = std::string sources =
#include "../src/kernels/common.opencl" #include "../src/kernels/common.opencl"
std::string kernel_source = #include "../src/kernels/transpose.opencl"
#include "../src/kernels/transpose.opencl" ;
auto sources = common_source + kernel_source;
auto id = tuner.AddKernelFromString(sources, "TransposeMatrix", {args.m, args.n}, {1, 1}); auto id = tuner.AddKernelFromString(sources, "TransposeMatrix", {args.m, args.n}, {1, 1});
tuner.SetReferenceFromString(sources, "TransposeMatrix", {args.m, args.n}, {8, 8}); tuner.SetReferenceFromString(sources, "TransposeMatrix", {args.m, args.n}, {8, 8});

View file

@ -34,11 +34,10 @@ void XaxpyTune(const Arguments<T> &args,
} }
// This points to the XaxpyFast kernel as found in the CLBlast library // This points to the XaxpyFast kernel as found in the CLBlast library
std::string common_source = std::string sources =
#include "../src/kernels/common.opencl" #include "../src/kernels/common.opencl"
std::string kernel_source = #include "../src/kernels/xaxpy.opencl"
#include "../src/kernels/xaxpy.opencl" ;
auto sources = common_source + kernel_source;
auto id = tuner.AddKernelFromString(sources, "XaxpyFast", {args.n}, {1}); auto id = tuner.AddKernelFromString(sources, "XaxpyFast", {args.n}, {1});
tuner.SetReferenceFromString(sources, "XaxpyFast", {args.n}, {64}); tuner.SetReferenceFromString(sources, "XaxpyFast", {args.n}, {64});

View file

@ -30,11 +30,10 @@ void XgemmTune(const Arguments<T> &args,
cltune::Tuner &tuner) { cltune::Tuner &tuner) {
// This points to the Xgemm kernel as found in the CLBlast library and its golden reference // This points to the Xgemm kernel as found in the CLBlast library and its golden reference
std::string common_source = std::string sources =
#include "../src/kernels/common.opencl" #include "../src/kernels/common.opencl"
std::string kernel_source = #include "../src/kernels/xgemm.opencl"
#include "../src/kernels/xgemm.opencl" ;
auto sources = common_source + kernel_source;
auto id = tuner.AddKernelFromString(sources, "Xgemm", {args.m, args.n}, {1, 1}); auto id = tuner.AddKernelFromString(sources, "Xgemm", {args.m, args.n}, {1, 1});
tuner.SetReferenceFromString(sources, "Xgemm", {args.m, args.n}, {8, 8}); tuner.SetReferenceFromString(sources, "Xgemm", {args.m, args.n}, {8, 8});

View file

@ -36,11 +36,10 @@ void XgemvTune(const Arguments<T> &args, const size_t variation,
auto a_rotated = (variation == 3) ? 1 : 0; auto a_rotated = (variation == 3) ? 1 : 0;
// This points to the Xgemv kernel as found in the CLBlast library // This points to the Xgemv kernel as found in the CLBlast library
std::string common_source = std::string sources =
#include "../src/kernels/common.opencl" #include "../src/kernels/common.opencl"
std::string kernel_source = #include "../src/kernels/xgemv.opencl"
#include "../src/kernels/xgemv.opencl" ;
auto sources = common_source + kernel_source;
auto id = tuner.AddKernelFromString(sources, kernel_name, {args.m}, {1}); auto id = tuner.AddKernelFromString(sources, kernel_name, {args.m}, {1});
tuner.SetReferenceFromString(sources, "Xgemv", {args.m}, {64}); tuner.SetReferenceFromString(sources, "Xgemv", {args.m}, {64});