// ================================================================================================= // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- // width of 100 characters per line. // // Author(s): // Cedric Nugteren // // This file implements a C++11 wrapper around some OpenCL C data-types, similar to Khronos' cl.hpp. // The main differences are modern C++11 support and a straightforward implemenation of the basic // needs (as required for this project). It also includes some extra functionality not available // in cl.hpp, such as including the sources with a Program object and querying a Kernel's validity // in terms of local memory usage. // // This file is adapted from the C++ bindings from the CLTune project and therefore contains the // following copyright notice: // // ================================================================================================= // // Copyright 2014 SURFsara // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // ================================================================================================= #ifndef CLBLAST_CLPP11_H_ #define CLBLAST_CLPP11_H_ #include // std::swap #include // std::copy #include // std::string #include // std::vector #include // std::runtime_error // Includes the normal OpenCL C header #if defined(__APPLE__) || defined(__MACOSX) #include #else #include #endif namespace clblast { // ================================================================================================= // Base class for any object class Object { protected: // Error handling (NOTE: these functions are [[noreturn]]) void Error(const std::string &message) const { throw std::runtime_error("Internal OpenCL error: "+message); } void Error(const cl_int status) const { throw std::runtime_error("Internal OpenCL error with status: "+std::to_string(status)); } }; // ================================================================================================= // Base class for objects which require memory management class ObjectWithState: public Object { }; // ================================================================================================= // C++11 version of cl_event class Event: public Object { public: // Constructor based on the plain C data-type explicit Event(const cl_event event): event_(event) { } // New event Event(): event_() {} // Public functions size_t GetProfilingStart() const { auto bytes = size_t{0}; clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, 0, nullptr, &bytes); auto result = size_t{0}; clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, bytes, &result, nullptr); return result; } size_t GetProfilingEnd() const { auto bytes = size_t{0}; clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, 0, nullptr, &bytes); auto result = size_t{0}; clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, bytes, &result, nullptr); return result; } cl_int Wait() const { return clWaitForEvents(1, &event_); } // Accessors to the private data-member cl_event operator()() const { return event_; } cl_event& operator()() { return event_; } private: cl_event event_; }; // ================================================================================================= // C++11 version of cl_platform_id class Platform: public Object { public: // Constructor based on the plain C data-type explicit Platform(const cl_platform_id platform): platform_(platform) { } // Initialize the platform. Note that this constructor can throw exceptions! explicit Platform(const size_t platform_id) { auto num_platforms = cl_uint{0}; auto status = clGetPlatformIDs(0, nullptr, &num_platforms); if (status != CL_SUCCESS) { Error(status); } if (num_platforms == 0) { Error("no platforms found"); } auto platforms = std::vector(num_platforms); status = clGetPlatformIDs(num_platforms, platforms.data(), nullptr); if (status != CL_SUCCESS) { Error(status); } if (platform_id >= num_platforms) { Error("invalid platform ID "+std::to_string(platform_id)); } platform_ = platforms[platform_id]; } // Accessors to the private data-member const cl_platform_id& operator()() const { return platform_; } private: cl_platform_id platform_; }; // ================================================================================================= // C++11 version of cl_device_id class Device: public Object { public: // Constructor based on the plain C data-type explicit Device(const cl_device_id device): device_(device) { } // Initialize the device. Note that this constructor can throw exceptions! explicit Device(const Platform &platform, const cl_device_type type, const size_t device_id) { auto num_devices = cl_uint{0}; auto status = clGetDeviceIDs(platform(), type, 0, nullptr, &num_devices); if (status != CL_SUCCESS) { Error(status); } if (num_devices == 0) { Error("no devices found"); } auto devices = std::vector(num_devices); status = clGetDeviceIDs(platform(), type, num_devices, devices.data(), nullptr); if (status != CL_SUCCESS) { Error(status); } if (device_id >= num_devices) { Error("invalid device ID "+std::to_string(device_id)); } device_ = devices[device_id]; } // Public functions std::string Version() const { return GetInfoString(CL_DEVICE_VERSION); } cl_device_type Type() const { return GetInfo(CL_DEVICE_TYPE); } std::string Vendor() const { return GetInfoString(CL_DEVICE_VENDOR); } std::string Name() const { return GetInfoString(CL_DEVICE_NAME); } std::string Extensions() const { return GetInfoString(CL_DEVICE_EXTENSIONS); } size_t MaxWorkGroupSize() const { return GetInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE); } cl_ulong LocalMemSize() const { return GetInfo(CL_DEVICE_LOCAL_MEM_SIZE); } cl_uint MaxWorkItemDimensions() const { return GetInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS); } std::vector MaxWorkItemSizes() const { return GetInfoVector(CL_DEVICE_MAX_WORK_ITEM_SIZES); } // Configuration-validity checks bool IsLocalMemoryValid(const size_t local_mem_usage) const { return (local_mem_usage <= LocalMemSize()); } bool IsThreadConfigValid(const std::vector &local) const { auto local_size = size_t{1}; for (auto &item: local) { local_size *= item; } for (auto i=size_t{0}; i MaxWorkItemSizes()[i]) { return false; } } if (local_size > MaxWorkGroupSize()) { return false; } if (local.size() > MaxWorkItemDimensions()) { return false; } return true; } // Accessors to the private data-member const cl_device_id& operator()() const { return device_; } private: // Helper functions template T GetInfo(const cl_device_info info) const { auto bytes = size_t{0}; clGetDeviceInfo(device_, info, 0, nullptr, &bytes); auto result = T(0); clGetDeviceInfo(device_, info, bytes, &result, nullptr); return result; } template std::vector GetInfoVector(const cl_device_info info) const { auto bytes = size_t{0}; clGetDeviceInfo(device_, info, 0, nullptr, &bytes); auto result = std::vector(bytes/sizeof(T)); clGetDeviceInfo(device_, info, bytes, result.data(), nullptr); return result; } std::string GetInfoString(const cl_device_info info) const { auto bytes = size_t{0}; clGetDeviceInfo(device_, info, 0, nullptr, &bytes); auto result = std::vector(bytes); clGetDeviceInfo(device_, info, bytes, result.data(), nullptr); return std::string(result.data()); } cl_device_id device_; }; // ================================================================================================= // C++11 version of cl_context class Context: public ObjectWithState { public: // Constructor based on the plain C data-type explicit Context(const cl_context context): context_(context) { clRetainContext(context_); } // Memory management explicit Context(const Device &device) { auto status = CL_SUCCESS; const cl_device_id dev = device(); context_ = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &status); if (status != CL_SUCCESS) { Error(status); } } ~Context() { clReleaseContext(context_); } Context(const Context &other): context_(other.context_) { clRetainContext(context_); } Context& operator=(Context other) { swap(*this, other); return *this; } friend void swap(Context &first, Context &second) { std::swap(first.context_, second.context_); } // Accessors to the private data-member const cl_context& operator()() const { return context_; } private: cl_context context_; }; // ================================================================================================= // C++11 version of cl_program. Additionally holds the program's source code. class Program: public ObjectWithState { public: // Note that there is no constructor based on the plain C data-type because of extra state // Memory management explicit Program(const Context &context, const std::string &source): length_(source.length()) { std::copy(source.begin(), source.end(), back_inserter(source_)); source_ptr_ = source_.data(); auto status = CL_SUCCESS; program_ = clCreateProgramWithSource(context(), 1, &source_ptr_, &length_, &status); if (status != CL_SUCCESS) { Error(status); } } ~Program() { clReleaseProgram(program_); } Program(const Program &other): length_(other.length_), source_(other.source_), source_ptr_(other.source_ptr_), program_(other.program_) { clRetainProgram(program_); } Program& operator=(Program other) { swap(*this, other); return *this; } friend void swap(Program &first, Program &second) { std::swap(first.length_, second.length_); std::swap(first.source_, second.source_); std::swap(first.source_ptr_, second.source_ptr_); std::swap(first.program_, second.program_); } // Public functions cl_int Build(const Device &device, const std::string &options) { const cl_device_id dev = device(); return clBuildProgram(program_, 1, &dev, options.c_str(), nullptr, nullptr); } std::string GetBuildInfo(const Device &device) const { auto bytes = size_t{0}; clGetProgramBuildInfo(program_, device(), CL_PROGRAM_BUILD_LOG, 0, nullptr, &bytes); auto result = std::vector(bytes); clGetProgramBuildInfo(program_, device(), CL_PROGRAM_BUILD_LOG, bytes, result.data(), nullptr); return std::string(result.data()); } // Accessors to the private data-member const cl_program& operator()() const { return program_; } private: size_t length_; std::vector source_; const char* source_ptr_; cl_program program_; }; // ================================================================================================= // C++11 version of cl_kernel class Kernel: public ObjectWithState { public: // Constructor based on the plain C data-type explicit Kernel(const cl_kernel kernel): kernel_(kernel) { clRetainKernel(kernel_); } // Memory management explicit Kernel(const Program &program, const std::string &name) { auto status = CL_SUCCESS; kernel_ = clCreateKernel(program(), name.c_str(), &status); if (status != CL_SUCCESS) { Error(status); } } ~Kernel() { clReleaseKernel(kernel_); } Kernel(const Kernel &other): kernel_(other.kernel_) { clRetainKernel(kernel_); } Kernel& operator=(Kernel other) { swap(*this, other); return *this; } friend void swap(Kernel &first, Kernel &second) { std::swap(first.kernel_, second.kernel_); } // Public functions template // Note: doesn't work with T=Buffer cl_int SetArgument(const cl_uint index, const T &value) { return clSetKernelArg(kernel_, index, sizeof(T), &value); } size_t LocalMemUsage(const Device &device) const { auto bytes = size_t{0}; clGetKernelWorkGroupInfo(kernel_, device(), CL_KERNEL_LOCAL_MEM_SIZE, 0, nullptr, &bytes); auto result = size_t{0}; clGetKernelWorkGroupInfo(kernel_, device(), CL_KERNEL_LOCAL_MEM_SIZE, bytes, &result, nullptr); return result; } // Accessors to the private data-member const cl_kernel& operator()() const { return kernel_; } private: cl_kernel kernel_; }; // ================================================================================================= // C++11 version of cl_command_queue class CommandQueue: public ObjectWithState { public: // Constructor based on the plain C data-type explicit CommandQueue(const cl_command_queue queue): queue_(queue) { clRetainCommandQueue(queue_); } // Memory management explicit CommandQueue(const Context &context, const Device &device) { auto status = CL_SUCCESS; queue_ = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &status); if (status != CL_SUCCESS) { Error(status); } } ~CommandQueue() { clReleaseCommandQueue(queue_); } CommandQueue(const CommandQueue &other): queue_(other.queue_) { clRetainCommandQueue(queue_); } CommandQueue& operator=(CommandQueue other) { swap(*this, other); return *this; } friend void swap(CommandQueue &first, CommandQueue &second) { std::swap(first.queue_, second.queue_); } // Public functions cl_int EnqueueKernel(const Kernel &kernel, const std::vector &global, const std::vector &local, Event &event) { return clEnqueueNDRangeKernel(queue_, kernel(), static_cast(global.size()), nullptr, global.data(), local.data(), 0, nullptr, &(event())); } Context GetContext() const { auto bytes = size_t{0}; clGetCommandQueueInfo(queue_, CL_QUEUE_CONTEXT, 0, nullptr, &bytes); cl_context result; clGetCommandQueueInfo(queue_, CL_QUEUE_CONTEXT, bytes, &result, nullptr); return Context(result); } Device GetDevice() const { auto bytes = size_t{0}; clGetCommandQueueInfo(queue_, CL_QUEUE_DEVICE, 0, nullptr, &bytes); cl_device_id result; clGetCommandQueueInfo(queue_, CL_QUEUE_DEVICE, bytes, &result, nullptr); return Device(result); } cl_int Finish() { return clFinish(queue_); } // Accessors to the private data-member const cl_command_queue& operator()() const { return queue_; } private: cl_command_queue queue_; }; // ================================================================================================= // C++11 version of cl_mem class Buffer: public ObjectWithState { public: // Constructor based on the plain C data-type explicit Buffer(const cl_mem buffer): buffer_(buffer) { clRetainMemObject(buffer_); } // Memory management explicit Buffer(const Context &context, const cl_mem_flags flags, const size_t bytes) { auto status = CL_SUCCESS; buffer_ = clCreateBuffer(context(), flags, bytes, nullptr, &status); if (status != CL_SUCCESS) { Error(status); } } ~Buffer() { clReleaseMemObject(buffer_); } Buffer(const Buffer &other): buffer_(other.buffer_) { clRetainMemObject(buffer_); } Buffer& operator=(Buffer other) { swap(*this, other); return *this; } friend void swap(Buffer &first, Buffer &second) { std::swap(first.buffer_, second.buffer_); } // Public functions template cl_int ReadBuffer(const CommandQueue &queue, const size_t bytes, T* host) { return clEnqueueReadBuffer(queue(), buffer_, CL_TRUE, 0, bytes, host, 0, nullptr, nullptr); } template cl_int ReadBuffer(const CommandQueue &queue, const size_t bytes, std::vector &host) { return ReadBuffer(queue, bytes, host.data()); } template cl_int WriteBuffer(const CommandQueue &queue, const size_t bytes, const T* host) { return clEnqueueWriteBuffer(queue(), buffer_, CL_TRUE, 0, bytes, host, 0, nullptr, nullptr); } template cl_int WriteBuffer(const CommandQueue &queue, const size_t bytes, const std::vector &host) { return WriteBuffer(queue, bytes, &host[0]); } size_t GetSize() const { auto bytes = size_t{0}; auto status = clGetMemObjectInfo(buffer_, CL_MEM_SIZE, 0, nullptr, &bytes); if (status != CL_SUCCESS) { Error(status); } auto result = size_t{0}; status = clGetMemObjectInfo(buffer_, CL_MEM_SIZE, bytes, &result, nullptr); if (status != CL_SUCCESS) { Error(status); } return result; } // Accessors to the private data-member const cl_mem& operator()() const { return buffer_; } private: cl_mem buffer_; }; // ================================================================================================= } // namespace clblast // CLBLAST_CLPP11_H_ #endif