The kernel source string is now a routine's member variable

pull/14/head
CNugteren 2015-07-19 13:44:37 +02:00
parent 250f8ab295
commit 4e499a67c1
25 changed files with 118 additions and 181 deletions

View File

@ -58,11 +58,11 @@ class Routine {
static constexpr bool ErrorIn(const StatusCode s) { return (s != StatusCode::kSuccess); }
// Base class constructor
explicit Routine(CommandQueue &queue, Event &event,
explicit Routine(CommandQueue &queue, Event &event, const std::string &name,
const std::vector<std::string> &routines, const Precision precision);
// Set-up phase of the kernel
StatusCode SetUp(const std::string &routine_source);
StatusCode SetUp();
protected:
@ -107,6 +107,10 @@ class Routine {
// a derived class.
const Precision precision_;
// The routine's name and its kernel-source in string form
const std::string routine_name_;
std::string source_string_;
// The OpenCL objects, accessible only from derived classes
CommandQueue queue_;
Event event_;

View File

@ -47,10 +47,8 @@ StatusCode Axpy(const size_t n, const T alpha,
auto event_cpp = Event(*event);
auto routine = Xaxpy<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal)
std::string kernel_source =
#include "kernels/xaxpy.opencl"
auto status = routine.SetUp(kernel_source);
// Compiles the routine's device kernels
auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@ -91,10 +89,8 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose,
auto event_cpp = Event(*event);
auto routine = Xgemv<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal)
std::string kernel_source =
#include "kernels/xgemv.opencl"
auto status = routine.SetUp(kernel_source);
// Compiles the routine's device kernels
auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@ -143,19 +139,8 @@ StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpos
auto event_cpp = Event(*event);
auto routine = Xgemm<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal)
std::string common_source1 =
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
// Compiles the routine's device kernels
auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@ -203,19 +188,8 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle,
auto event_cpp = Event(*event);
auto routine = Xsymm<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal)
std::string common_source1 =
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
// Compiles the routine's device kernels
auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@ -263,19 +237,8 @@ StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle,
auto event_cpp = Event(*event);
auto routine = Xhemm<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal)
std::string common_source1 =
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
// Compiles the routine's device kernels
auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@ -310,19 +273,8 @@ StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_
auto event_cpp = Event(*event);
auto routine = Xsyrk<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal)
std::string common_source1 =
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
// Compiles the routine's device kernels
auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@ -364,19 +316,8 @@ StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_
auto event_cpp = Event(*event);
auto routine = Xherk<std::complex<T>,T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal)
std::string common_source1 =
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
// Compiles the routine's device kernels
auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@ -409,19 +350,8 @@ StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose a
auto event_cpp = Event(*event);
auto routine = Xsyr2k<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal)
std::string common_source1 =
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
// Compiles the routine's device kernels
auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@ -469,19 +399,8 @@ StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose a
auto event_cpp = Event(*event);
auto routine = Xher2k<T,U>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal)
std::string common_source1 =
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
// Compiles the routine's device kernels
auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@ -518,19 +437,8 @@ StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle,
auto event_cpp = Event(*event);
auto routine = Xtrmm<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal)
std::string common_source1 =
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
// Compiles the routine's device kernels
auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine
@ -579,19 +487,8 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle,
auto event_cpp = Event(*event);
auto routine = Xtrsm<T>(queue_cpp, event_cpp);
// Loads the kernel source-code as an include (C++11 raw string literal)
std::string common_source1 =
#include "kernels/copy.opencl"
std::string common_source2 =
#include "kernels/pad.opencl"
std::string common_source3 =
#include "kernels/transpose.opencl"
std::string common_source4 =
#include "kernels/padtranspose.opencl"
std::string kernel_source =
#include "kernels/xgemm.opencl"
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
kernel_source);
// Compiles the routine's device kernels
auto status = routine.SetUp();
if (status != StatusCode::kSuccess) { return status; }
// Runs the routine

View File

@ -143,6 +143,6 @@ R"(
// =================================================================================================
// End of the C++11 raw string literal
)";
)"
// =================================================================================================

View File

@ -68,6 +68,6 @@ __kernel void CopyMatrix(const int ld,
// =================================================================================================
// End of the C++11 raw string literal
)";
)"
// =================================================================================================

View File

@ -344,6 +344,6 @@ __kernel void TrmmUpperToSquared(const int src_dim,
// =================================================================================================
// End of the C++11 raw string literal
)";
)"
// =================================================================================================

View File

@ -159,6 +159,6 @@ __kernel void UnPadTransposeMatrix(const int src_one, const int src_two,
// =================================================================================================
// End of the C++11 raw string literal
)";
)"
// =================================================================================================

View File

@ -163,6 +163,6 @@ __kernel void TransposeMatrix(const int ld,
// =================================================================================================
// End of the C++11 raw string literal
)";
)"
// =================================================================================================

View File

@ -123,6 +123,6 @@ __kernel void XaxpyFast(const int n, const real alpha,
// =================================================================================================
// End of the C++11 raw string literal
)";
)"
// =================================================================================================

View File

@ -675,6 +675,6 @@ __kernel void XgemmLower(const int kSizeN, const int kSizeK,
// =================================================================================================
// End of the C++11 raw string literal
)";
)"
// =================================================================================================

View File

@ -368,6 +368,6 @@ __kernel void XgemvFastRot(const int m, const int n, const real alpha, const rea
// =================================================================================================
// End of the C++11 raw string literal
)";
)"
// =================================================================================================

View File

@ -22,9 +22,10 @@ namespace clblast {
std::vector<Routine::ProgramCache> Routine::program_cache_;
// Constructor: not much here, because no status codes can be returned
Routine::Routine(CommandQueue &queue, Event &event,
Routine::Routine(CommandQueue &queue, Event &event, const std::string &name,
const std::vector<std::string> &routines, const Precision precision):
precision_(precision),
routine_name_(name),
queue_(queue),
event_(event),
context_(queue_.GetContext()),
@ -40,7 +41,7 @@ Routine::Routine(CommandQueue &queue, Event &event,
// =================================================================================================
// Separate set-up function to allow for status codes to be returned
StatusCode Routine::SetUp(const std::string &routine_source) {
StatusCode Routine::SetUp() {
// Queries the cache to see whether or not the compiled kernel is already there. If not, it will
// be built and added to the cache.
@ -63,7 +64,8 @@ StatusCode Routine::SetUp(const std::string &routine_source) {
// Loads the common header (typedefs and defines and such)
std::string common_header =
#include "kernels/common.opencl"
#include "kernels/common.opencl"
;
// Collects the parameters for this device in the form of defines, and adds the precision
auto defines = db_.GetDefines();
@ -76,7 +78,7 @@ StatusCode Routine::SetUp(const std::string &routine_source) {
}
// Combines everything together into a single source string
auto source_string = defines + common_header + routine_source;
auto source_string = defines + common_header + source_string_;
// Compiles the kernel
try {

View File

@ -30,7 +30,10 @@ template <> const Precision Xaxpy<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xaxpy<T>::Xaxpy(CommandQueue &queue, Event &event):
Routine(queue, event, {"Xaxpy"}, precision_) {
Routine(queue, event, "Xaxpy", {"Xaxpy"}, precision_) {
source_string_ =
#include "../../kernels/xaxpy.opencl"
;
}
// =================================================================================================

View File

@ -30,7 +30,10 @@ template <> const Precision Xgemv<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xgemv<T>::Xgemv(CommandQueue &queue, Event &event):
Routine(queue, event, {"Xgemv"}, precision_) {
Routine(queue, event, "Xgemv", {"Xgemv"}, precision_) {
source_string_ =
#include "../../kernels/xgemv.opencl"
;
}
// =================================================================================================

View File

@ -30,7 +30,14 @@ template <> const Precision Xgemm<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xgemm<T>::Xgemm(CommandQueue &queue, Event &event):
Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) {
Routine(queue, event, "Xgemm", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
#include "../../kernels/transpose.opencl"
#include "../../kernels/padtranspose.opencl"
#include "../../kernels/xgemm.opencl"
;
}
// =================================================================================================

View File

@ -28,7 +28,14 @@ template <> const Precision Xher2k<double2,double>::precision_ = Precision::kCom
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xher2k<T,U>::Xher2k(CommandQueue &queue, Event &event):
Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) {
Routine(queue, event, "Xher2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
#include "../../kernels/transpose.opencl"
#include "../../kernels/padtranspose.opencl"
#include "../../kernels/xgemm.opencl"
;
}
// =================================================================================================

View File

@ -28,7 +28,14 @@ template <> const Precision Xherk<double2,double>::precision_ = Precision::kComp
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xherk<T,U>::Xherk(CommandQueue &queue, Event &event):
Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) {
Routine(queue, event, "Xherk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
#include "../../kernels/transpose.opencl"
#include "../../kernels/padtranspose.opencl"
#include "../../kernels/xgemm.opencl"
;
}
// =================================================================================================

View File

@ -30,7 +30,14 @@ template <> const Precision Xsyr2k<double2>::precision_ = Precision::kComplexDou
// Constructor: forwards to base class constructor
template <typename T>
Xsyr2k<T>::Xsyr2k(CommandQueue &queue, Event &event):
Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) {
Routine(queue, event, "Xsyr2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
#include "../../kernels/transpose.opencl"
#include "../../kernels/padtranspose.opencl"
#include "../../kernels/xgemm.opencl"
;
}
// =================================================================================================

View File

@ -30,7 +30,14 @@ template <> const Precision Xsyrk<double2>::precision_ = Precision::kComplexDoub
// Constructor: forwards to base class constructor
template <typename T>
Xsyrk<T>::Xsyrk(CommandQueue &queue, Event &event):
Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) {
Routine(queue, event, "Xsyrk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
source_string_ =
#include "../../kernels/copy.opencl"
#include "../../kernels/pad.opencl"
#include "../../kernels/transpose.opencl"
#include "../../kernels/padtranspose.opencl"
#include "../../kernels/xgemm.opencl"
;
}
// =================================================================================================

View File

@ -30,11 +30,10 @@ void CopyTune(const Arguments<T> &args,
// This points to the CopyMatrix kernel as found in the CLBlast library. This is just one example
// of a copy kernel. However, all copy-kernels use the same tuning parameters, so one has to be
// chosen as a representative.
std::string common_source =
#include "../src/kernels/common.opencl"
std::string kernel_source =
#include "../src/kernels/copy.opencl"
auto sources = common_source + kernel_source;
std::string sources =
#include "../src/kernels/common.opencl"
#include "../src/kernels/copy.opencl"
;
auto id = tuner.AddKernelFromString(sources, "CopyMatrix", {args.m, args.n}, {1, 1});
tuner.SetReferenceFromString(sources, "CopyMatrix", {args.m, args.n}, {8, 8});

View File

@ -30,11 +30,10 @@ void PadTune(const Arguments<T> &args,
// This points to the PadMatrix kernel as found in the CLBlast library. This is just one
// example of a pad kernel. However, all pad-kernels use the same tuning parameters, so one has
// to be chosen as a representative.
std::string common_source =
#include "../src/kernels/common.opencl"
std::string kernel_source =
#include "../src/kernels/pad.opencl"
auto sources = common_source + kernel_source;
std::string sources =
#include "../src/kernels/common.opencl"
#include "../src/kernels/pad.opencl"
;
auto id = tuner.AddKernelFromString(sources, "PadMatrix", {args.m, args.n}, {1, 1});
tuner.SetReferenceFromString(sources, "PadMatrix", {args.m, args.n}, {8, 8});

View File

@ -30,11 +30,10 @@ void PadTransposeTune(const Arguments<T> &args,
// This points to the PadTransposeMatrix kernel as found in the CLBlast library. This is just one
// example of a transpose kernel. However, all kernels use the same tuning parameters, so one has
// to be chosen as a representative.
std::string common_source =
#include "../src/kernels/common.opencl"
std::string kernel_source =
#include "../src/kernels/padtranspose.opencl"
auto sources = common_source + kernel_source;
std::string sources =
#include "../src/kernels/common.opencl"
#include "../src/kernels/padtranspose.opencl"
;
auto id = tuner.AddKernelFromString(sources, "PadTransposeMatrix", {args.m, args.n}, {1, 1});
tuner.SetReferenceFromString(sources, "PadTransposeMatrix", {args.m, args.n}, {8, 8});

View File

@ -30,11 +30,10 @@ void TransposeTune(const Arguments<T> &args,
// This points to the PadTransposeMatrix kernel as found in the CLBlast library. This is just one
// example of a transpose kernel. However, all kernels use the same tuning parameters, so one has
// to be chosen as a representative.
std::string common_source =
#include "../src/kernels/common.opencl"
std::string kernel_source =
#include "../src/kernels/transpose.opencl"
auto sources = common_source + kernel_source;
std::string sources =
#include "../src/kernels/common.opencl"
#include "../src/kernels/transpose.opencl"
;
auto id = tuner.AddKernelFromString(sources, "TransposeMatrix", {args.m, args.n}, {1, 1});
tuner.SetReferenceFromString(sources, "TransposeMatrix", {args.m, args.n}, {8, 8});

View File

@ -34,11 +34,10 @@ void XaxpyTune(const Arguments<T> &args,
}
// This points to the XaxpyFast kernel as found in the CLBlast library
std::string common_source =
#include "../src/kernels/common.opencl"
std::string kernel_source =
#include "../src/kernels/xaxpy.opencl"
auto sources = common_source + kernel_source;
std::string sources =
#include "../src/kernels/common.opencl"
#include "../src/kernels/xaxpy.opencl"
;
auto id = tuner.AddKernelFromString(sources, "XaxpyFast", {args.n}, {1});
tuner.SetReferenceFromString(sources, "XaxpyFast", {args.n}, {64});

View File

@ -30,11 +30,10 @@ void XgemmTune(const Arguments<T> &args,
cltune::Tuner &tuner) {
// This points to the Xgemm kernel as found in the CLBlast library and its golden reference
std::string common_source =
#include "../src/kernels/common.opencl"
std::string kernel_source =
#include "../src/kernels/xgemm.opencl"
auto sources = common_source + kernel_source;
std::string sources =
#include "../src/kernels/common.opencl"
#include "../src/kernels/xgemm.opencl"
;
auto id = tuner.AddKernelFromString(sources, "Xgemm", {args.m, args.n}, {1, 1});
tuner.SetReferenceFromString(sources, "Xgemm", {args.m, args.n}, {8, 8});

View File

@ -36,11 +36,10 @@ void XgemvTune(const Arguments<T> &args, const size_t variation,
auto a_rotated = (variation == 3) ? 1 : 0;
// This points to the Xgemv kernel as found in the CLBlast library
std::string common_source =
#include "../src/kernels/common.opencl"
std::string kernel_source =
#include "../src/kernels/xgemv.opencl"
auto sources = common_source + kernel_source;
std::string sources =
#include "../src/kernels/common.opencl"
#include "../src/kernels/xgemv.opencl"
;
auto id = tuner.AddKernelFromString(sources, kernel_name, {args.m}, {1});
tuner.SetReferenceFromString(sources, "Xgemv", {args.m}, {64});