The kernel source string is now a routine's member variable
parent
250f8ab295
commit
4e499a67c1
|
@ -58,11 +58,11 @@ class Routine {
|
|||
static constexpr bool ErrorIn(const StatusCode s) { return (s != StatusCode::kSuccess); }
|
||||
|
||||
// Base class constructor
|
||||
explicit Routine(CommandQueue &queue, Event &event,
|
||||
explicit Routine(CommandQueue &queue, Event &event, const std::string &name,
|
||||
const std::vector<std::string> &routines, const Precision precision);
|
||||
|
||||
// Set-up phase of the kernel
|
||||
StatusCode SetUp(const std::string &routine_source);
|
||||
StatusCode SetUp();
|
||||
|
||||
protected:
|
||||
|
||||
|
@ -107,6 +107,10 @@ class Routine {
|
|||
// a derived class.
|
||||
const Precision precision_;
|
||||
|
||||
// The routine's name and its kernel-source in string form
|
||||
const std::string routine_name_;
|
||||
std::string source_string_;
|
||||
|
||||
// The OpenCL objects, accessible only from derived classes
|
||||
CommandQueue queue_;
|
||||
Event event_;
|
||||
|
|
147
src/clblast.cc
147
src/clblast.cc
|
@ -47,10 +47,8 @@ StatusCode Axpy(const size_t n, const T alpha,
|
|||
auto event_cpp = Event(*event);
|
||||
auto routine = Xaxpy<T>(queue_cpp, event_cpp);
|
||||
|
||||
// Loads the kernel source-code as an include (C++11 raw string literal)
|
||||
std::string kernel_source =
|
||||
#include "kernels/xaxpy.opencl"
|
||||
auto status = routine.SetUp(kernel_source);
|
||||
// Compiles the routine's device kernels
|
||||
auto status = routine.SetUp();
|
||||
if (status != StatusCode::kSuccess) { return status; }
|
||||
|
||||
// Runs the routine
|
||||
|
@ -91,10 +89,8 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose,
|
|||
auto event_cpp = Event(*event);
|
||||
auto routine = Xgemv<T>(queue_cpp, event_cpp);
|
||||
|
||||
// Loads the kernel source-code as an include (C++11 raw string literal)
|
||||
std::string kernel_source =
|
||||
#include "kernels/xgemv.opencl"
|
||||
auto status = routine.SetUp(kernel_source);
|
||||
// Compiles the routine's device kernels
|
||||
auto status = routine.SetUp();
|
||||
if (status != StatusCode::kSuccess) { return status; }
|
||||
|
||||
// Runs the routine
|
||||
|
@ -143,19 +139,8 @@ StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpos
|
|||
auto event_cpp = Event(*event);
|
||||
auto routine = Xgemm<T>(queue_cpp, event_cpp);
|
||||
|
||||
// Loads the kernel source-code as an include (C++11 raw string literal)
|
||||
std::string common_source1 =
|
||||
#include "kernels/copy.opencl"
|
||||
std::string common_source2 =
|
||||
#include "kernels/pad.opencl"
|
||||
std::string common_source3 =
|
||||
#include "kernels/transpose.opencl"
|
||||
std::string common_source4 =
|
||||
#include "kernels/padtranspose.opencl"
|
||||
std::string kernel_source =
|
||||
#include "kernels/xgemm.opencl"
|
||||
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
|
||||
kernel_source);
|
||||
// Compiles the routine's device kernels
|
||||
auto status = routine.SetUp();
|
||||
if (status != StatusCode::kSuccess) { return status; }
|
||||
|
||||
// Runs the routine
|
||||
|
@ -203,19 +188,8 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle,
|
|||
auto event_cpp = Event(*event);
|
||||
auto routine = Xsymm<T>(queue_cpp, event_cpp);
|
||||
|
||||
// Loads the kernel source-code as an include (C++11 raw string literal)
|
||||
std::string common_source1 =
|
||||
#include "kernels/copy.opencl"
|
||||
std::string common_source2 =
|
||||
#include "kernels/pad.opencl"
|
||||
std::string common_source3 =
|
||||
#include "kernels/transpose.opencl"
|
||||
std::string common_source4 =
|
||||
#include "kernels/padtranspose.opencl"
|
||||
std::string kernel_source =
|
||||
#include "kernels/xgemm.opencl"
|
||||
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
|
||||
kernel_source);
|
||||
// Compiles the routine's device kernels
|
||||
auto status = routine.SetUp();
|
||||
if (status != StatusCode::kSuccess) { return status; }
|
||||
|
||||
// Runs the routine
|
||||
|
@ -263,19 +237,8 @@ StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle,
|
|||
auto event_cpp = Event(*event);
|
||||
auto routine = Xhemm<T>(queue_cpp, event_cpp);
|
||||
|
||||
// Loads the kernel source-code as an include (C++11 raw string literal)
|
||||
std::string common_source1 =
|
||||
#include "kernels/copy.opencl"
|
||||
std::string common_source2 =
|
||||
#include "kernels/pad.opencl"
|
||||
std::string common_source3 =
|
||||
#include "kernels/transpose.opencl"
|
||||
std::string common_source4 =
|
||||
#include "kernels/padtranspose.opencl"
|
||||
std::string kernel_source =
|
||||
#include "kernels/xgemm.opencl"
|
||||
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
|
||||
kernel_source);
|
||||
// Compiles the routine's device kernels
|
||||
auto status = routine.SetUp();
|
||||
if (status != StatusCode::kSuccess) { return status; }
|
||||
|
||||
// Runs the routine
|
||||
|
@ -310,19 +273,8 @@ StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_
|
|||
auto event_cpp = Event(*event);
|
||||
auto routine = Xsyrk<T>(queue_cpp, event_cpp);
|
||||
|
||||
// Loads the kernel source-code as an include (C++11 raw string literal)
|
||||
std::string common_source1 =
|
||||
#include "kernels/copy.opencl"
|
||||
std::string common_source2 =
|
||||
#include "kernels/pad.opencl"
|
||||
std::string common_source3 =
|
||||
#include "kernels/transpose.opencl"
|
||||
std::string common_source4 =
|
||||
#include "kernels/padtranspose.opencl"
|
||||
std::string kernel_source =
|
||||
#include "kernels/xgemm.opencl"
|
||||
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
|
||||
kernel_source);
|
||||
// Compiles the routine's device kernels
|
||||
auto status = routine.SetUp();
|
||||
if (status != StatusCode::kSuccess) { return status; }
|
||||
|
||||
// Runs the routine
|
||||
|
@ -364,19 +316,8 @@ StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_
|
|||
auto event_cpp = Event(*event);
|
||||
auto routine = Xherk<std::complex<T>,T>(queue_cpp, event_cpp);
|
||||
|
||||
// Loads the kernel source-code as an include (C++11 raw string literal)
|
||||
std::string common_source1 =
|
||||
#include "kernels/copy.opencl"
|
||||
std::string common_source2 =
|
||||
#include "kernels/pad.opencl"
|
||||
std::string common_source3 =
|
||||
#include "kernels/transpose.opencl"
|
||||
std::string common_source4 =
|
||||
#include "kernels/padtranspose.opencl"
|
||||
std::string kernel_source =
|
||||
#include "kernels/xgemm.opencl"
|
||||
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
|
||||
kernel_source);
|
||||
// Compiles the routine's device kernels
|
||||
auto status = routine.SetUp();
|
||||
if (status != StatusCode::kSuccess) { return status; }
|
||||
|
||||
// Runs the routine
|
||||
|
@ -409,19 +350,8 @@ StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose a
|
|||
auto event_cpp = Event(*event);
|
||||
auto routine = Xsyr2k<T>(queue_cpp, event_cpp);
|
||||
|
||||
// Loads the kernel source-code as an include (C++11 raw string literal)
|
||||
std::string common_source1 =
|
||||
#include "kernels/copy.opencl"
|
||||
std::string common_source2 =
|
||||
#include "kernels/pad.opencl"
|
||||
std::string common_source3 =
|
||||
#include "kernels/transpose.opencl"
|
||||
std::string common_source4 =
|
||||
#include "kernels/padtranspose.opencl"
|
||||
std::string kernel_source =
|
||||
#include "kernels/xgemm.opencl"
|
||||
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
|
||||
kernel_source);
|
||||
// Compiles the routine's device kernels
|
||||
auto status = routine.SetUp();
|
||||
if (status != StatusCode::kSuccess) { return status; }
|
||||
|
||||
// Runs the routine
|
||||
|
@ -469,19 +399,8 @@ StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose a
|
|||
auto event_cpp = Event(*event);
|
||||
auto routine = Xher2k<T,U>(queue_cpp, event_cpp);
|
||||
|
||||
// Loads the kernel source-code as an include (C++11 raw string literal)
|
||||
std::string common_source1 =
|
||||
#include "kernels/copy.opencl"
|
||||
std::string common_source2 =
|
||||
#include "kernels/pad.opencl"
|
||||
std::string common_source3 =
|
||||
#include "kernels/transpose.opencl"
|
||||
std::string common_source4 =
|
||||
#include "kernels/padtranspose.opencl"
|
||||
std::string kernel_source =
|
||||
#include "kernels/xgemm.opencl"
|
||||
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
|
||||
kernel_source);
|
||||
// Compiles the routine's device kernels
|
||||
auto status = routine.SetUp();
|
||||
if (status != StatusCode::kSuccess) { return status; }
|
||||
|
||||
// Runs the routine
|
||||
|
@ -518,19 +437,8 @@ StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle,
|
|||
auto event_cpp = Event(*event);
|
||||
auto routine = Xtrmm<T>(queue_cpp, event_cpp);
|
||||
|
||||
// Loads the kernel source-code as an include (C++11 raw string literal)
|
||||
std::string common_source1 =
|
||||
#include "kernels/copy.opencl"
|
||||
std::string common_source2 =
|
||||
#include "kernels/pad.opencl"
|
||||
std::string common_source3 =
|
||||
#include "kernels/transpose.opencl"
|
||||
std::string common_source4 =
|
||||
#include "kernels/padtranspose.opencl"
|
||||
std::string kernel_source =
|
||||
#include "kernels/xgemm.opencl"
|
||||
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
|
||||
kernel_source);
|
||||
// Compiles the routine's device kernels
|
||||
auto status = routine.SetUp();
|
||||
if (status != StatusCode::kSuccess) { return status; }
|
||||
|
||||
// Runs the routine
|
||||
|
@ -579,19 +487,8 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle,
|
|||
auto event_cpp = Event(*event);
|
||||
auto routine = Xtrsm<T>(queue_cpp, event_cpp);
|
||||
|
||||
// Loads the kernel source-code as an include (C++11 raw string literal)
|
||||
std::string common_source1 =
|
||||
#include "kernels/copy.opencl"
|
||||
std::string common_source2 =
|
||||
#include "kernels/pad.opencl"
|
||||
std::string common_source3 =
|
||||
#include "kernels/transpose.opencl"
|
||||
std::string common_source4 =
|
||||
#include "kernels/padtranspose.opencl"
|
||||
std::string kernel_source =
|
||||
#include "kernels/xgemm.opencl"
|
||||
auto status = routine.SetUp(common_source1 + common_source2 + common_source3 + common_source4 +
|
||||
kernel_source);
|
||||
// Compiles the routine's device kernels
|
||||
auto status = routine.SetUp();
|
||||
if (status != StatusCode::kSuccess) { return status; }
|
||||
|
||||
// Runs the routine
|
||||
|
|
|
@ -143,6 +143,6 @@ R"(
|
|||
// =================================================================================================
|
||||
|
||||
// End of the C++11 raw string literal
|
||||
)";
|
||||
)"
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -68,6 +68,6 @@ __kernel void CopyMatrix(const int ld,
|
|||
// =================================================================================================
|
||||
|
||||
// End of the C++11 raw string literal
|
||||
)";
|
||||
)"
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -344,6 +344,6 @@ __kernel void TrmmUpperToSquared(const int src_dim,
|
|||
// =================================================================================================
|
||||
|
||||
// End of the C++11 raw string literal
|
||||
)";
|
||||
)"
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -159,6 +159,6 @@ __kernel void UnPadTransposeMatrix(const int src_one, const int src_two,
|
|||
// =================================================================================================
|
||||
|
||||
// End of the C++11 raw string literal
|
||||
)";
|
||||
)"
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -163,6 +163,6 @@ __kernel void TransposeMatrix(const int ld,
|
|||
// =================================================================================================
|
||||
|
||||
// End of the C++11 raw string literal
|
||||
)";
|
||||
)"
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -123,6 +123,6 @@ __kernel void XaxpyFast(const int n, const real alpha,
|
|||
// =================================================================================================
|
||||
|
||||
// End of the C++11 raw string literal
|
||||
)";
|
||||
)"
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -675,6 +675,6 @@ __kernel void XgemmLower(const int kSizeN, const int kSizeK,
|
|||
// =================================================================================================
|
||||
|
||||
// End of the C++11 raw string literal
|
||||
)";
|
||||
)"
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -368,6 +368,6 @@ __kernel void XgemvFastRot(const int m, const int n, const real alpha, const rea
|
|||
// =================================================================================================
|
||||
|
||||
// End of the C++11 raw string literal
|
||||
)";
|
||||
)"
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -22,9 +22,10 @@ namespace clblast {
|
|||
std::vector<Routine::ProgramCache> Routine::program_cache_;
|
||||
|
||||
// Constructor: not much here, because no status codes can be returned
|
||||
Routine::Routine(CommandQueue &queue, Event &event,
|
||||
Routine::Routine(CommandQueue &queue, Event &event, const std::string &name,
|
||||
const std::vector<std::string> &routines, const Precision precision):
|
||||
precision_(precision),
|
||||
routine_name_(name),
|
||||
queue_(queue),
|
||||
event_(event),
|
||||
context_(queue_.GetContext()),
|
||||
|
@ -40,7 +41,7 @@ Routine::Routine(CommandQueue &queue, Event &event,
|
|||
// =================================================================================================
|
||||
|
||||
// Separate set-up function to allow for status codes to be returned
|
||||
StatusCode Routine::SetUp(const std::string &routine_source) {
|
||||
StatusCode Routine::SetUp() {
|
||||
|
||||
// Queries the cache to see whether or not the compiled kernel is already there. If not, it will
|
||||
// be built and added to the cache.
|
||||
|
@ -63,7 +64,8 @@ StatusCode Routine::SetUp(const std::string &routine_source) {
|
|||
|
||||
// Loads the common header (typedefs and defines and such)
|
||||
std::string common_header =
|
||||
#include "kernels/common.opencl"
|
||||
#include "kernels/common.opencl"
|
||||
;
|
||||
|
||||
// Collects the parameters for this device in the form of defines, and adds the precision
|
||||
auto defines = db_.GetDefines();
|
||||
|
@ -76,7 +78,7 @@ StatusCode Routine::SetUp(const std::string &routine_source) {
|
|||
}
|
||||
|
||||
// Combines everything together into a single source string
|
||||
auto source_string = defines + common_header + routine_source;
|
||||
auto source_string = defines + common_header + source_string_;
|
||||
|
||||
// Compiles the kernel
|
||||
try {
|
||||
|
|
|
@ -30,7 +30,10 @@ template <> const Precision Xaxpy<double2>::precision_ = Precision::kComplexDoub
|
|||
// Constructor: forwards to base class constructor
|
||||
template <typename T>
|
||||
Xaxpy<T>::Xaxpy(CommandQueue &queue, Event &event):
|
||||
Routine(queue, event, {"Xaxpy"}, precision_) {
|
||||
Routine(queue, event, "Xaxpy", {"Xaxpy"}, precision_) {
|
||||
source_string_ =
|
||||
#include "../../kernels/xaxpy.opencl"
|
||||
;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -30,7 +30,10 @@ template <> const Precision Xgemv<double2>::precision_ = Precision::kComplexDoub
|
|||
// Constructor: forwards to base class constructor
|
||||
template <typename T>
|
||||
Xgemv<T>::Xgemv(CommandQueue &queue, Event &event):
|
||||
Routine(queue, event, {"Xgemv"}, precision_) {
|
||||
Routine(queue, event, "Xgemv", {"Xgemv"}, precision_) {
|
||||
source_string_ =
|
||||
#include "../../kernels/xgemv.opencl"
|
||||
;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -30,7 +30,14 @@ template <> const Precision Xgemm<double2>::precision_ = Precision::kComplexDoub
|
|||
// Constructor: forwards to base class constructor
|
||||
template <typename T>
|
||||
Xgemm<T>::Xgemm(CommandQueue &queue, Event &event):
|
||||
Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) {
|
||||
Routine(queue, event, "Xgemm", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
|
||||
source_string_ =
|
||||
#include "../../kernels/copy.opencl"
|
||||
#include "../../kernels/pad.opencl"
|
||||
#include "../../kernels/transpose.opencl"
|
||||
#include "../../kernels/padtranspose.opencl"
|
||||
#include "../../kernels/xgemm.opencl"
|
||||
;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -28,7 +28,14 @@ template <> const Precision Xher2k<double2,double>::precision_ = Precision::kCom
|
|||
// Constructor: forwards to base class constructor
|
||||
template <typename T, typename U>
|
||||
Xher2k<T,U>::Xher2k(CommandQueue &queue, Event &event):
|
||||
Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) {
|
||||
Routine(queue, event, "Xher2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
|
||||
source_string_ =
|
||||
#include "../../kernels/copy.opencl"
|
||||
#include "../../kernels/pad.opencl"
|
||||
#include "../../kernels/transpose.opencl"
|
||||
#include "../../kernels/padtranspose.opencl"
|
||||
#include "../../kernels/xgemm.opencl"
|
||||
;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -28,7 +28,14 @@ template <> const Precision Xherk<double2,double>::precision_ = Precision::kComp
|
|||
// Constructor: forwards to base class constructor
|
||||
template <typename T, typename U>
|
||||
Xherk<T,U>::Xherk(CommandQueue &queue, Event &event):
|
||||
Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) {
|
||||
Routine(queue, event, "Xherk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
|
||||
source_string_ =
|
||||
#include "../../kernels/copy.opencl"
|
||||
#include "../../kernels/pad.opencl"
|
||||
#include "../../kernels/transpose.opencl"
|
||||
#include "../../kernels/padtranspose.opencl"
|
||||
#include "../../kernels/xgemm.opencl"
|
||||
;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -30,7 +30,14 @@ template <> const Precision Xsyr2k<double2>::precision_ = Precision::kComplexDou
|
|||
// Constructor: forwards to base class constructor
|
||||
template <typename T>
|
||||
Xsyr2k<T>::Xsyr2k(CommandQueue &queue, Event &event):
|
||||
Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) {
|
||||
Routine(queue, event, "Xsyr2k", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
|
||||
source_string_ =
|
||||
#include "../../kernels/copy.opencl"
|
||||
#include "../../kernels/pad.opencl"
|
||||
#include "../../kernels/transpose.opencl"
|
||||
#include "../../kernels/padtranspose.opencl"
|
||||
#include "../../kernels/xgemm.opencl"
|
||||
;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -30,7 +30,14 @@ template <> const Precision Xsyrk<double2>::precision_ = Precision::kComplexDoub
|
|||
// Constructor: forwards to base class constructor
|
||||
template <typename T>
|
||||
Xsyrk<T>::Xsyrk(CommandQueue &queue, Event &event):
|
||||
Routine(queue, event, {"Copy", "Pad", "Transpose", "PadTranspose", "Xgemm"}, precision_) {
|
||||
Routine(queue, event, "Xsyrk", {"Copy","Pad","Transpose","PadTranspose","Xgemm"}, precision_) {
|
||||
source_string_ =
|
||||
#include "../../kernels/copy.opencl"
|
||||
#include "../../kernels/pad.opencl"
|
||||
#include "../../kernels/transpose.opencl"
|
||||
#include "../../kernels/padtranspose.opencl"
|
||||
#include "../../kernels/xgemm.opencl"
|
||||
;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -30,11 +30,10 @@ void CopyTune(const Arguments<T> &args,
|
|||
// This points to the CopyMatrix kernel as found in the CLBlast library. This is just one example
|
||||
// of a copy kernel. However, all copy-kernels use the same tuning parameters, so one has to be
|
||||
// chosen as a representative.
|
||||
std::string common_source =
|
||||
#include "../src/kernels/common.opencl"
|
||||
std::string kernel_source =
|
||||
#include "../src/kernels/copy.opencl"
|
||||
auto sources = common_source + kernel_source;
|
||||
std::string sources =
|
||||
#include "../src/kernels/common.opencl"
|
||||
#include "../src/kernels/copy.opencl"
|
||||
;
|
||||
auto id = tuner.AddKernelFromString(sources, "CopyMatrix", {args.m, args.n}, {1, 1});
|
||||
tuner.SetReferenceFromString(sources, "CopyMatrix", {args.m, args.n}, {8, 8});
|
||||
|
||||
|
|
|
@ -30,11 +30,10 @@ void PadTune(const Arguments<T> &args,
|
|||
// This points to the PadMatrix kernel as found in the CLBlast library. This is just one
|
||||
// example of a pad kernel. However, all pad-kernels use the same tuning parameters, so one has
|
||||
// to be chosen as a representative.
|
||||
std::string common_source =
|
||||
#include "../src/kernels/common.opencl"
|
||||
std::string kernel_source =
|
||||
#include "../src/kernels/pad.opencl"
|
||||
auto sources = common_source + kernel_source;
|
||||
std::string sources =
|
||||
#include "../src/kernels/common.opencl"
|
||||
#include "../src/kernels/pad.opencl"
|
||||
;
|
||||
auto id = tuner.AddKernelFromString(sources, "PadMatrix", {args.m, args.n}, {1, 1});
|
||||
tuner.SetReferenceFromString(sources, "PadMatrix", {args.m, args.n}, {8, 8});
|
||||
|
||||
|
|
|
@ -30,11 +30,10 @@ void PadTransposeTune(const Arguments<T> &args,
|
|||
// This points to the PadTransposeMatrix kernel as found in the CLBlast library. This is just one
|
||||
// example of a transpose kernel. However, all kernels use the same tuning parameters, so one has
|
||||
// to be chosen as a representative.
|
||||
std::string common_source =
|
||||
#include "../src/kernels/common.opencl"
|
||||
std::string kernel_source =
|
||||
#include "../src/kernels/padtranspose.opencl"
|
||||
auto sources = common_source + kernel_source;
|
||||
std::string sources =
|
||||
#include "../src/kernels/common.opencl"
|
||||
#include "../src/kernels/padtranspose.opencl"
|
||||
;
|
||||
auto id = tuner.AddKernelFromString(sources, "PadTransposeMatrix", {args.m, args.n}, {1, 1});
|
||||
tuner.SetReferenceFromString(sources, "PadTransposeMatrix", {args.m, args.n}, {8, 8});
|
||||
|
||||
|
|
|
@ -30,11 +30,10 @@ void TransposeTune(const Arguments<T> &args,
|
|||
// This points to the PadTransposeMatrix kernel as found in the CLBlast library. This is just one
|
||||
// example of a transpose kernel. However, all kernels use the same tuning parameters, so one has
|
||||
// to be chosen as a representative.
|
||||
std::string common_source =
|
||||
#include "../src/kernels/common.opencl"
|
||||
std::string kernel_source =
|
||||
#include "../src/kernels/transpose.opencl"
|
||||
auto sources = common_source + kernel_source;
|
||||
std::string sources =
|
||||
#include "../src/kernels/common.opencl"
|
||||
#include "../src/kernels/transpose.opencl"
|
||||
;
|
||||
auto id = tuner.AddKernelFromString(sources, "TransposeMatrix", {args.m, args.n}, {1, 1});
|
||||
tuner.SetReferenceFromString(sources, "TransposeMatrix", {args.m, args.n}, {8, 8});
|
||||
|
||||
|
|
|
@ -34,11 +34,10 @@ void XaxpyTune(const Arguments<T> &args,
|
|||
}
|
||||
|
||||
// This points to the XaxpyFast kernel as found in the CLBlast library
|
||||
std::string common_source =
|
||||
#include "../src/kernels/common.opencl"
|
||||
std::string kernel_source =
|
||||
#include "../src/kernels/xaxpy.opencl"
|
||||
auto sources = common_source + kernel_source;
|
||||
std::string sources =
|
||||
#include "../src/kernels/common.opencl"
|
||||
#include "../src/kernels/xaxpy.opencl"
|
||||
;
|
||||
auto id = tuner.AddKernelFromString(sources, "XaxpyFast", {args.n}, {1});
|
||||
tuner.SetReferenceFromString(sources, "XaxpyFast", {args.n}, {64});
|
||||
|
||||
|
|
|
@ -30,11 +30,10 @@ void XgemmTune(const Arguments<T> &args,
|
|||
cltune::Tuner &tuner) {
|
||||
|
||||
// This points to the Xgemm kernel as found in the CLBlast library and its golden reference
|
||||
std::string common_source =
|
||||
#include "../src/kernels/common.opencl"
|
||||
std::string kernel_source =
|
||||
#include "../src/kernels/xgemm.opencl"
|
||||
auto sources = common_source + kernel_source;
|
||||
std::string sources =
|
||||
#include "../src/kernels/common.opencl"
|
||||
#include "../src/kernels/xgemm.opencl"
|
||||
;
|
||||
auto id = tuner.AddKernelFromString(sources, "Xgemm", {args.m, args.n}, {1, 1});
|
||||
tuner.SetReferenceFromString(sources, "Xgemm", {args.m, args.n}, {8, 8});
|
||||
|
||||
|
|
|
@ -36,11 +36,10 @@ void XgemvTune(const Arguments<T> &args, const size_t variation,
|
|||
auto a_rotated = (variation == 3) ? 1 : 0;
|
||||
|
||||
// This points to the Xgemv kernel as found in the CLBlast library
|
||||
std::string common_source =
|
||||
#include "../src/kernels/common.opencl"
|
||||
std::string kernel_source =
|
||||
#include "../src/kernels/xgemv.opencl"
|
||||
auto sources = common_source + kernel_source;
|
||||
std::string sources =
|
||||
#include "../src/kernels/common.opencl"
|
||||
#include "../src/kernels/xgemv.opencl"
|
||||
;
|
||||
auto id = tuner.AddKernelFromString(sources, kernel_name, {args.m}, {1});
|
||||
tuner.SetReferenceFromString(sources, "Xgemv", {args.m}, {64});
|
||||
|
||||
|
|
Loading…
Reference in New Issue