Now stores a shared_ptr to the Program class in the cache
parent
b2248a17ae
commit
8258321a74
|
@ -7,7 +7,7 @@ Development (next version)
|
||||||
- Added support for Intel specific subgroup shuffling extensions for faster GEMM on Intel GPUs
|
- Added support for Intel specific subgroup shuffling extensions for faster GEMM on Intel GPUs
|
||||||
- Re-added a local memory size constraint to the tuners
|
- Re-added a local memory size constraint to the tuners
|
||||||
- Updated and reorganised the CLBlast documentation
|
- Updated and reorganised the CLBlast documentation
|
||||||
- Fixed an access violation when compiled with Visual Studio upon releasing the OpenCL program
|
- Fixed incorrect releasing of the OpenCL program resulting in segfaults / access violations
|
||||||
- Various minor fixes and enhancements
|
- Various minor fixes and enhancements
|
||||||
- Added tuned parameters for various devices (see doc/tuning.md)
|
- Added tuned parameters for various devices (see doc/tuning.md)
|
||||||
- Added non-BLAS level-1 routines:
|
- Added non-BLAS level-1 routines:
|
||||||
|
|
|
@ -117,8 +117,8 @@ template std::string BinaryCache::Get(const BinaryKeyRef &, bool *) const;
|
||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
template class Cache<ProgramKey, Program>;
|
template class Cache<ProgramKey, std::shared_ptr<Program>>;
|
||||||
template Program ProgramCache::Get(const ProgramKeyRef &, bool *) const;
|
template std::shared_ptr<Program> ProgramCache::Get(const ProgramKeyRef &, bool *) const;
|
||||||
template void ProgramCache::RemoveBySubset<1, 2>(const ProgramKey &); // precision and routine name
|
template void ProgramCache::RemoveBySubset<1, 2>(const ProgramKey &); // precision and routine name
|
||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
|
@ -83,10 +83,10 @@ extern template std::string BinaryCache::Get(const BinaryKeyRef &, bool *) const
|
||||||
typedef std::tuple<RawContext, RawDeviceID, Precision, std::string> ProgramKey;
|
typedef std::tuple<RawContext, RawDeviceID, Precision, std::string> ProgramKey;
|
||||||
typedef std::tuple<const RawContext &, const RawDeviceID &, const Precision &, const std::string &> ProgramKeyRef;
|
typedef std::tuple<const RawContext &, const RawDeviceID &, const Precision &, const std::string &> ProgramKeyRef;
|
||||||
|
|
||||||
typedef Cache<ProgramKey, Program> ProgramCache;
|
typedef Cache<ProgramKey, std::shared_ptr<Program>> ProgramCache;
|
||||||
|
|
||||||
extern template class Cache<ProgramKey, Program>;
|
extern template class Cache<ProgramKey, std::shared_ptr<Program>>;
|
||||||
extern template Program ProgramCache::Get(const ProgramKeyRef &, bool *) const;
|
extern template std::shared_ptr<Program> ProgramCache::Get(const ProgramKeyRef &, bool *) const;
|
||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
|
|
|
@ -437,47 +437,41 @@ using ContextPointer = cl_context*;
|
||||||
// C++11 version of 'cl_program'.
|
// C++11 version of 'cl_program'.
|
||||||
class Program {
|
class Program {
|
||||||
public:
|
public:
|
||||||
Program() = default;
|
|
||||||
|
|
||||||
// Source-based constructor with memory management
|
// Source-based constructor with memory management
|
||||||
explicit Program(const Context &context, const std::string &source):
|
explicit Program(const Context &context, const std::string &source) {
|
||||||
program_(new cl_program, [](cl_program* p) {
|
|
||||||
#ifndef _MSC_VER // 'clReleaseProgram' caused an access violation with Visual Studio
|
|
||||||
if (*p) { CheckErrorDtor(clReleaseProgram(*p)); }
|
|
||||||
#endif
|
|
||||||
delete p;
|
|
||||||
}) {
|
|
||||||
const char *source_ptr = &source[0];
|
const char *source_ptr = &source[0];
|
||||||
const auto length = source.length();
|
const auto length = source.length();
|
||||||
auto status = CL_SUCCESS;
|
auto status = CL_SUCCESS;
|
||||||
*program_ = clCreateProgramWithSource(context(), 1, &source_ptr, &length, &status);
|
program_ = clCreateProgramWithSource(context(), 1, &source_ptr, &length, &status);
|
||||||
CLCudaAPIError::Check(status, "clCreateProgramWithSource");
|
CLCudaAPIError::Check(status, "clCreateProgramWithSource");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Binary-based constructor with memory management
|
// Binary-based constructor with memory management
|
||||||
explicit Program(const Device &device, const Context &context, const std::string &binary):
|
explicit Program(const Device &device, const Context &context, const std::string &binary) {
|
||||||
program_(new cl_program, [](cl_program* p) {
|
|
||||||
if (*p) { CheckErrorDtor(clReleaseProgram(*p)); }
|
|
||||||
delete p;
|
|
||||||
}) {
|
|
||||||
const char *binary_ptr = &binary[0];
|
const char *binary_ptr = &binary[0];
|
||||||
const auto length = binary.length();
|
const auto length = binary.length();
|
||||||
auto status1 = CL_SUCCESS;
|
auto status1 = CL_SUCCESS;
|
||||||
auto status2 = CL_SUCCESS;
|
auto status2 = CL_SUCCESS;
|
||||||
const auto dev = device();
|
const auto dev = device();
|
||||||
*program_ = clCreateProgramWithBinary(context(), 1, &dev, &length,
|
program_ = clCreateProgramWithBinary(context(), 1, &dev, &length,
|
||||||
reinterpret_cast<const unsigned char**>(&binary_ptr),
|
reinterpret_cast<const unsigned char**>(&binary_ptr),
|
||||||
&status1, &status2);
|
&status1, &status2);
|
||||||
CLCudaAPIError::Check(status1, "clCreateProgramWithBinary (binary status)");
|
CLCudaAPIError::Check(status1, "clCreateProgramWithBinary (binary status)");
|
||||||
CLCudaAPIError::Check(status2, "clCreateProgramWithBinary");
|
CLCudaAPIError::Check(status2, "clCreateProgramWithBinary");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clean-up
|
||||||
|
~Program() {
|
||||||
|
if (program_) { CheckErrorDtor(clReleaseProgram(program_)); }
|
||||||
|
}
|
||||||
|
|
||||||
// Compiles the device program and checks whether or not there are any warnings/errors
|
// Compiles the device program and checks whether or not there are any warnings/errors
|
||||||
void Build(const Device &device, std::vector<std::string> &options) {
|
void Build(const Device &device, std::vector<std::string> &options) {
|
||||||
options.push_back("-cl-std=CL1.1");
|
options.push_back("-cl-std=CL1.1");
|
||||||
auto options_string = std::accumulate(options.begin(), options.end(), std::string{" "});
|
auto options_string = std::accumulate(options.begin(), options.end(), std::string{" "});
|
||||||
const cl_device_id dev = device();
|
const cl_device_id dev = device();
|
||||||
CheckError(clBuildProgram(*program_, 1, &dev, options_string.c_str(), nullptr, nullptr));
|
CheckError(clBuildProgram(program_, 1, &dev, options_string.c_str(), nullptr, nullptr));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Confirms whether a certain status code is an actual compilation error or warning
|
// Confirms whether a certain status code is an actual compilation error or warning
|
||||||
|
@ -489,28 +483,28 @@ class Program {
|
||||||
std::string GetBuildInfo(const Device &device) const {
|
std::string GetBuildInfo(const Device &device) const {
|
||||||
auto bytes = size_t{0};
|
auto bytes = size_t{0};
|
||||||
auto query = cl_program_build_info{CL_PROGRAM_BUILD_LOG};
|
auto query = cl_program_build_info{CL_PROGRAM_BUILD_LOG};
|
||||||
CheckError(clGetProgramBuildInfo(*program_, device(), query, 0, nullptr, &bytes));
|
CheckError(clGetProgramBuildInfo(program_, device(), query, 0, nullptr, &bytes));
|
||||||
auto result = std::string{};
|
auto result = std::string{};
|
||||||
result.resize(bytes);
|
result.resize(bytes);
|
||||||
CheckError(clGetProgramBuildInfo(*program_, device(), query, bytes, &result[0], nullptr));
|
CheckError(clGetProgramBuildInfo(program_, device(), query, bytes, &result[0], nullptr));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Retrieves a binary or an intermediate representation of the compiled program
|
// Retrieves a binary or an intermediate representation of the compiled program
|
||||||
std::string GetIR() const {
|
std::string GetIR() const {
|
||||||
auto bytes = size_t{0};
|
auto bytes = size_t{0};
|
||||||
CheckError(clGetProgramInfo(*program_, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &bytes, nullptr));
|
CheckError(clGetProgramInfo(program_, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &bytes, nullptr));
|
||||||
auto result = std::string{};
|
auto result = std::string{};
|
||||||
result.resize(bytes);
|
result.resize(bytes);
|
||||||
auto result_ptr = result.data();
|
auto result_ptr = result.data();
|
||||||
CheckError(clGetProgramInfo(*program_, CL_PROGRAM_BINARIES, sizeof(char*), &result_ptr, nullptr));
|
CheckError(clGetProgramInfo(program_, CL_PROGRAM_BINARIES, sizeof(char*), &result_ptr, nullptr));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Accessor to the private data-member
|
// Accessor to the private data-member
|
||||||
const cl_program& operator()() const { return *program_; }
|
const cl_program& operator()() const { return program_; }
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<cl_program> program_;
|
cl_program program_ = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
@ -757,13 +751,13 @@ class Kernel {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Regular constructor with memory management
|
// Regular constructor with memory management
|
||||||
explicit Kernel(const Program &program, const std::string &name):
|
explicit Kernel(const std::shared_ptr<Program> program, const std::string &name):
|
||||||
kernel_(new cl_kernel, [](cl_kernel* k) {
|
kernel_(new cl_kernel, [](cl_kernel* k) {
|
||||||
if (*k) { CheckErrorDtor(clReleaseKernel(*k)); }
|
if (*k) { CheckErrorDtor(clReleaseKernel(*k)); }
|
||||||
delete k;
|
delete k;
|
||||||
}) {
|
}) {
|
||||||
auto status = CL_SUCCESS;
|
auto status = CL_SUCCESS;
|
||||||
*kernel_ = clCreateKernel(program(), name.c_str(), &status);
|
*kernel_ = clCreateKernel(program->operator()(), name.c_str(), &status);
|
||||||
CLCudaAPIError::Check(status, "clCreateKernel");
|
CLCudaAPIError::Check(status, "clCreateKernel");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -96,10 +96,10 @@ void Routine::InitProgram(std::initializer_list<const char *> source) {
|
||||||
auto binary = BinaryCache::Instance().Get(BinaryKeyRef{platform_id, precision_, routine_info, device_name },
|
auto binary = BinaryCache::Instance().Get(BinaryKeyRef{platform_id, precision_, routine_info, device_name },
|
||||||
&has_binary);
|
&has_binary);
|
||||||
if (has_binary) {
|
if (has_binary) {
|
||||||
program_ = Program(device_, context_, binary);
|
program_ = std::make_shared<Program>(Program(device_, context_, binary));
|
||||||
program_.Build(device_, options);
|
program_->Build(device_, options);
|
||||||
ProgramCache::Instance().Store(ProgramKey{ context_(), device_(), precision_, routine_info },
|
ProgramCache::Instance().Store(ProgramKey{ context_(), device_(), precision_, routine_info },
|
||||||
Program{ program_ });
|
std::shared_ptr<Program>{program_});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -135,10 +135,10 @@ void Routine::InitProgram(std::initializer_list<const char *> source) {
|
||||||
|
|
||||||
// Store the compiled binary and program in the cache
|
// Store the compiled binary and program in the cache
|
||||||
BinaryCache::Instance().Store(BinaryKey{platform_id, precision_, routine_info, device_name},
|
BinaryCache::Instance().Store(BinaryKey{platform_id, precision_, routine_info, device_name},
|
||||||
program_.GetIR());
|
program_->GetIR());
|
||||||
|
|
||||||
ProgramCache::Instance().Store(ProgramKey{context_(), device_(), precision_, routine_info},
|
ProgramCache::Instance().Store(ProgramKey{context_(), device_(), precision_, routine_info},
|
||||||
Program{ program_ });
|
std::shared_ptr<Program>{program_});
|
||||||
}
|
}
|
||||||
|
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
|
@ -33,6 +33,7 @@ namespace clblast {
|
||||||
class Routine {
|
class Routine {
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
// Initializes db_, fetching cached database or building one
|
||||||
static void InitDatabase(const Device &device, const std::vector<std::string> &kernel_names,
|
static void InitDatabase(const Device &device, const std::vector<std::string> &kernel_names,
|
||||||
const Precision precision, const std::vector<database::DatabaseEntry> &userDatabase,
|
const Precision precision, const std::vector<database::DatabaseEntry> &userDatabase,
|
||||||
Databases &db) {
|
Databases &db) {
|
||||||
|
@ -78,9 +79,6 @@ class Routine {
|
||||||
// Initializes program_, fetching cached program or building one
|
// Initializes program_, fetching cached program or building one
|
||||||
void InitProgram(std::initializer_list<const char *> source);
|
void InitProgram(std::initializer_list<const char *> source);
|
||||||
|
|
||||||
// Initializes db_, fetching cached database or building one
|
|
||||||
void InitDatabase(const std::vector<database::DatabaseEntry> &userDatabase);
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
||||||
// Non-static variable for the precision
|
// Non-static variable for the precision
|
||||||
|
@ -97,7 +95,7 @@ class Routine {
|
||||||
const Device device_;
|
const Device device_;
|
||||||
|
|
||||||
// Compiled program (either retrieved from cache or compiled in slow path)
|
// Compiled program (either retrieved from cache or compiled in slow path)
|
||||||
Program program_;
|
std::shared_ptr<Program> program_;
|
||||||
|
|
||||||
// Connection to the database for all the device-specific parameters
|
// Connection to the database for all the device-specific parameters
|
||||||
Databases db_;
|
Databases db_;
|
||||||
|
|
|
@ -77,7 +77,7 @@ void RunKernel(Kernel &kernel, Queue &queue, const Device &device,
|
||||||
// Sets all elements of a matrix to a constant value
|
// Sets all elements of a matrix to a constant value
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void FillMatrix(Queue &queue, const Device &device,
|
void FillMatrix(Queue &queue, const Device &device,
|
||||||
const Program &program, const Databases &,
|
const std::shared_ptr<Program> program, const Databases &,
|
||||||
EventPointer event, const std::vector<Event> &waitForEvents,
|
EventPointer event, const std::vector<Event> &waitForEvents,
|
||||||
const size_t m, const size_t n, const size_t ld, const size_t offset,
|
const size_t m, const size_t n, const size_t ld, const size_t offset,
|
||||||
const Buffer<T> &dest,
|
const Buffer<T> &dest,
|
||||||
|
@ -95,26 +95,26 @@ void FillMatrix(Queue &queue, const Device &device,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compiles the above function
|
// Compiles the above function
|
||||||
template void FillMatrix<half>(Queue&, const Device&, const Program&, const Databases&,
|
template void FillMatrix<half>(Queue&, const Device&, const std::shared_ptr<Program>, const Databases&,
|
||||||
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
||||||
const size_t, const size_t, const Buffer<half>&, const half);
|
const size_t, const size_t, const Buffer<half>&, const half);
|
||||||
template void FillMatrix<float>(Queue&, const Device&, const Program&, const Databases&,
|
template void FillMatrix<float>(Queue&, const Device&, const std::shared_ptr<Program>, const Databases&,
|
||||||
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
||||||
const size_t, const size_t, const Buffer<float>&, const float);
|
const size_t, const size_t, const Buffer<float>&, const float);
|
||||||
template void FillMatrix<double>(Queue&, const Device&, const Program&, const Databases&,
|
template void FillMatrix<double>(Queue&, const Device&, const std::shared_ptr<Program>, const Databases&,
|
||||||
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
||||||
const size_t, const size_t, const Buffer<double>&, const double);
|
const size_t, const size_t, const Buffer<double>&, const double);
|
||||||
template void FillMatrix<float2>(Queue&, const Device&, const Program&, const Databases&,
|
template void FillMatrix<float2>(Queue&, const Device&, const std::shared_ptr<Program>, const Databases&,
|
||||||
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
||||||
const size_t, const size_t, const Buffer<float2>&, const float2);
|
const size_t, const size_t, const Buffer<float2>&, const float2);
|
||||||
template void FillMatrix<double2>(Queue&, const Device&, const Program&, const Databases&,
|
template void FillMatrix<double2>(Queue&, const Device&, const std::shared_ptr<Program>, const Databases&,
|
||||||
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
||||||
const size_t, const size_t, const Buffer<double2>&, const double2);
|
const size_t, const size_t, const Buffer<double2>&, const double2);
|
||||||
|
|
||||||
// Sets all elements of a vector to a constant value
|
// Sets all elements of a vector to a constant value
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void FillVector(Queue &queue, const Device &device,
|
void FillVector(Queue &queue, const Device &device,
|
||||||
const Program &program, const Databases &,
|
const std::shared_ptr<Program> program, const Databases &,
|
||||||
EventPointer event, const std::vector<Event> &waitForEvents,
|
EventPointer event, const std::vector<Event> &waitForEvents,
|
||||||
const size_t n, const size_t inc, const size_t offset,
|
const size_t n, const size_t inc, const size_t offset,
|
||||||
const Buffer<T> &dest,
|
const Buffer<T> &dest,
|
||||||
|
@ -131,19 +131,19 @@ void FillVector(Queue &queue, const Device &device,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compiles the above function
|
// Compiles the above function
|
||||||
template void FillVector<half>(Queue&, const Device&, const Program&, const Databases&,
|
template void FillVector<half>(Queue&, const Device&, const std::shared_ptr<Program>, const Databases&,
|
||||||
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
||||||
const size_t, const Buffer<half>&, const half);
|
const size_t, const Buffer<half>&, const half);
|
||||||
template void FillVector<float>(Queue&, const Device&, const Program&, const Databases&,
|
template void FillVector<float>(Queue&, const Device&, const std::shared_ptr<Program>, const Databases&,
|
||||||
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
||||||
const size_t, const Buffer<float>&, const float);
|
const size_t, const Buffer<float>&, const float);
|
||||||
template void FillVector<double>(Queue&, const Device&, const Program&, const Databases&,
|
template void FillVector<double>(Queue&, const Device&, const std::shared_ptr<Program>, const Databases&,
|
||||||
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
||||||
const size_t, const Buffer<double>&, const double);
|
const size_t, const Buffer<double>&, const double);
|
||||||
template void FillVector<float2>(Queue&, const Device&, const Program&, const Databases&,
|
template void FillVector<float2>(Queue&, const Device&, const std::shared_ptr<Program>, const Databases&,
|
||||||
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
||||||
const size_t, const Buffer<float2>&, const float2);
|
const size_t, const Buffer<float2>&, const float2);
|
||||||
template void FillVector<double2>(Queue&, const Device&, const Program&, const Databases&,
|
template void FillVector<double2>(Queue&, const Device&, const std::shared_ptr<Program>, const Databases&,
|
||||||
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
EventPointer, const std::vector<Event>&, const size_t, const size_t,
|
||||||
const size_t, const Buffer<double2>&, const double2);
|
const size_t, const Buffer<double2>&, const double2);
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ void RunKernel(Kernel &kernel, Queue &queue, const Device &device,
|
||||||
// Sets all elements of a matrix to a constant value
|
// Sets all elements of a matrix to a constant value
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void FillMatrix(Queue &queue, const Device &device,
|
void FillMatrix(Queue &queue, const Device &device,
|
||||||
const Program &program, const Databases &,
|
const std::shared_ptr<Program> program, const Databases &,
|
||||||
EventPointer event, const std::vector<Event> &waitForEvents,
|
EventPointer event, const std::vector<Event> &waitForEvents,
|
||||||
const size_t m, const size_t n, const size_t ld, const size_t offset,
|
const size_t m, const size_t n, const size_t ld, const size_t offset,
|
||||||
const Buffer<T> &dest,
|
const Buffer<T> &dest,
|
||||||
|
@ -45,7 +45,7 @@ void FillMatrix(Queue &queue, const Device &device,
|
||||||
// Sets all elements of a vector to a constant value
|
// Sets all elements of a vector to a constant value
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void FillVector(Queue &queue, const Device &device,
|
void FillVector(Queue &queue, const Device &device,
|
||||||
const Program &program, const Databases &,
|
const std::shared_ptr<Program> program, const Databases &,
|
||||||
EventPointer event, const std::vector<Event> &waitForEvents,
|
EventPointer event, const std::vector<Event> &waitForEvents,
|
||||||
const size_t n, const size_t inc, const size_t offset,
|
const size_t n, const size_t inc, const size_t offset,
|
||||||
const Buffer<T> &dest,
|
const Buffer<T> &dest,
|
||||||
|
@ -66,7 +66,7 @@ void PadCopyTransposeMatrix(Queue &queue, const Device &device,
|
||||||
const size_t dest_ld, const size_t dest_offset,
|
const size_t dest_ld, const size_t dest_offset,
|
||||||
const Buffer<T> &dest,
|
const Buffer<T> &dest,
|
||||||
const T alpha,
|
const T alpha,
|
||||||
const Program &program, const bool do_pad,
|
const std::shared_ptr<Program> program, const bool do_pad,
|
||||||
const bool do_transpose, const bool do_conjugate,
|
const bool do_transpose, const bool do_conjugate,
|
||||||
const bool upper = false, const bool lower = false,
|
const bool upper = false, const bool lower = false,
|
||||||
const bool diagonal_imag_zero = false) {
|
const bool diagonal_imag_zero = false) {
|
||||||
|
@ -186,7 +186,7 @@ void PadCopyTransposeMatrixBatched(Queue &queue, const Device &device,
|
||||||
const size_t dest_one, const size_t dest_two,
|
const size_t dest_one, const size_t dest_two,
|
||||||
const size_t dest_ld, const Buffer<int> &dest_offsets,
|
const size_t dest_ld, const Buffer<int> &dest_offsets,
|
||||||
const Buffer<T> &dest,
|
const Buffer<T> &dest,
|
||||||
const Program &program, const bool do_pad,
|
const std::shared_ptr<Program> program, const bool do_pad,
|
||||||
const bool do_transpose, const bool do_conjugate,
|
const bool do_transpose, const bool do_conjugate,
|
||||||
const size_t batch_count) {
|
const size_t batch_count) {
|
||||||
|
|
||||||
|
@ -250,7 +250,7 @@ void PadCopyTransposeMatrixStridedBatched(Queue &queue, const Device &device,
|
||||||
const size_t dest_one, const size_t dest_two,
|
const size_t dest_one, const size_t dest_two,
|
||||||
const size_t dest_ld, const size_t dest_offset,
|
const size_t dest_ld, const size_t dest_offset,
|
||||||
const size_t dest_stride, const Buffer<T> &dest,
|
const size_t dest_stride, const Buffer<T> &dest,
|
||||||
const Program &program, const bool do_pad,
|
const std::shared_ptr<Program> program, const bool do_pad,
|
||||||
const bool do_transpose, const bool do_conjugate,
|
const bool do_transpose, const bool do_conjugate,
|
||||||
const size_t batch_count) {
|
const size_t batch_count) {
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,8 @@ namespace clblast {
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
// Compiles a program from source code
|
// Compiles a program from source code
|
||||||
Program CompileFromSource(const std::string &source_string, const Precision precision,
|
std::shared_ptr<Program> CompileFromSource(
|
||||||
|
const std::string &source_string, const Precision precision,
|
||||||
const std::string &routine_name,
|
const std::string &routine_name,
|
||||||
const Device& device, const Context& context,
|
const Device& device, const Context& context,
|
||||||
std::vector<std::string>& options,
|
std::vector<std::string>& options,
|
||||||
|
@ -93,13 +94,13 @@ Program CompileFromSource(const std::string &source_string, const Precision prec
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compiles the kernel
|
// Compiles the kernel
|
||||||
auto program = Program(context, kernel_string);
|
auto program = std::make_shared<Program>(context, kernel_string);
|
||||||
try {
|
try {
|
||||||
program.Build(device, options);
|
program->Build(device, options);
|
||||||
} catch (const CLCudaAPIBuildError &e) {
|
} catch (const CLCudaAPIBuildError &e) {
|
||||||
if (program.StatusIsCompilationWarningOrError(e.status()) && !silent) {
|
if (program->StatusIsCompilationWarningOrError(e.status()) && !silent) {
|
||||||
fprintf(stdout, "OpenCL compiler error/warning:\n%s\n",
|
fprintf(stdout, "OpenCL compiler error/warning:\n%s\n",
|
||||||
program.GetBuildInfo(device).c_str());
|
program->GetBuildInfo(device).c_str());
|
||||||
}
|
}
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,8 @@ namespace clblast {
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
// Compiles a program from source code
|
// Compiles a program from source code
|
||||||
Program CompileFromSource(const std::string &source_string, const Precision precision,
|
std::shared_ptr<Program> CompileFromSource(
|
||||||
|
const std::string &source_string, const Precision precision,
|
||||||
const std::string &routine_name,
|
const std::string &routine_name,
|
||||||
const Device& device, const Context& context,
|
const Device& device, const Context& context,
|
||||||
std::vector<std::string>& options,
|
std::vector<std::string>& options,
|
||||||
|
|
Loading…
Reference in New Issue