mirror of
https://github.com/CNugteren/CLBlast.git
synced 2024-08-22 13:02:27 +02:00
508 lines
17 KiB
C++
508 lines
17 KiB
C++
|
|
// =================================================================================================
|
|
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
|
|
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
|
|
// width of 100 characters per line.
|
|
//
|
|
// Author(s):
|
|
// Cedric Nugteren <www.cedricnugteren.nl>
|
|
//
|
|
// This file implements a C++11 wrapper around some OpenCL C data-types, similar to Khronos' cl.hpp.
|
|
// The main differences are modern C++11 support and a straightforward implemenation of the basic
|
|
// needs (as required for this project). It also includes some extra functionality not available
|
|
// in cl.hpp, such as including the sources with a Program object and querying a Kernel's validity
|
|
// in terms of local memory usage.
|
|
//
|
|
// This file is adapted from the C++ bindings from the CLTune project and therefore contains the
|
|
// following copyright notice:
|
|
//
|
|
// =================================================================================================
|
|
//
|
|
// Copyright 2014 SURFsara
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
// =================================================================================================
|
|
|
|
#ifndef CLBLAST_CLPP11_H_
|
|
#define CLBLAST_CLPP11_H_
|
|
|
|
#include <utility> // std::swap
|
|
#include <algorithm> // std::copy
|
|
#include <string> // std::string
|
|
#include <vector> // std::vector
|
|
#include <stdexcept> // std::runtime_error
|
|
|
|
// Includes the normal OpenCL C header
|
|
#if defined(__APPLE__) || defined(__MACOSX)
|
|
#include <OpenCL/opencl.h>
|
|
#else
|
|
#include <CL/opencl.h>
|
|
#endif
|
|
|
|
namespace clblast {
|
|
// =================================================================================================
|
|
|
|
// Base class for any object
|
|
class Object {
|
|
protected:
|
|
|
|
// Error handling (NOTE: these functions are [[noreturn]])
|
|
void Error(const std::string &message) const {
|
|
throw std::runtime_error("Internal OpenCL error: "+message);
|
|
}
|
|
void Error(const cl_int status) const {
|
|
throw std::runtime_error("Internal OpenCL error with status: "+std::to_string(status));
|
|
}
|
|
};
|
|
|
|
// =================================================================================================
|
|
|
|
// Base class for objects which require memory management
|
|
class ObjectWithState: public Object {
|
|
|
|
};
|
|
|
|
// =================================================================================================
|
|
|
|
// C++11 version of cl_event
|
|
class Event: public Object {
|
|
public:
|
|
|
|
// Constructor based on the plain C data-type
|
|
explicit Event(const cl_event event): event_(event) { }
|
|
|
|
// New event
|
|
Event(): event_() {}
|
|
|
|
// Public functions
|
|
size_t GetProfilingStart() const {
|
|
auto bytes = size_t{0};
|
|
clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, 0, nullptr, &bytes);
|
|
auto result = size_t{0};
|
|
clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_START, bytes, &result, nullptr);
|
|
return result;
|
|
}
|
|
size_t GetProfilingEnd() const {
|
|
auto bytes = size_t{0};
|
|
clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, 0, nullptr, &bytes);
|
|
auto result = size_t{0};
|
|
clGetEventProfilingInfo(event_, CL_PROFILING_COMMAND_END, bytes, &result, nullptr);
|
|
return result;
|
|
}
|
|
cl_int Wait() const {
|
|
return clWaitForEvents(1, &event_);
|
|
}
|
|
|
|
// Accessors to the private data-member
|
|
cl_event operator()() const { return event_; }
|
|
cl_event& operator()() { return event_; }
|
|
private:
|
|
cl_event event_;
|
|
};
|
|
|
|
// =================================================================================================
|
|
|
|
// C++11 version of cl_platform_id
|
|
class Platform: public Object {
|
|
public:
|
|
|
|
// Constructor based on the plain C data-type
|
|
explicit Platform(const cl_platform_id platform): platform_(platform) { }
|
|
|
|
// Initialize the platform. Note that this constructor can throw exceptions!
|
|
explicit Platform(const size_t platform_id) {
|
|
auto num_platforms = cl_uint{0};
|
|
auto status = clGetPlatformIDs(0, nullptr, &num_platforms);
|
|
if (status != CL_SUCCESS) { Error(status); }
|
|
if (num_platforms == 0) { Error("no platforms found"); }
|
|
auto platforms = std::vector<cl_platform_id>(num_platforms);
|
|
status = clGetPlatformIDs(num_platforms, platforms.data(), nullptr);
|
|
if (status != CL_SUCCESS) { Error(status); }
|
|
if (platform_id >= num_platforms) { Error("invalid platform ID "+std::to_string(platform_id)); }
|
|
platform_ = platforms[platform_id];
|
|
}
|
|
|
|
// Accessors to the private data-member
|
|
const cl_platform_id& operator()() const { return platform_; }
|
|
private:
|
|
cl_platform_id platform_;
|
|
};
|
|
|
|
// =================================================================================================
|
|
|
|
// C++11 version of cl_device_id
|
|
class Device: public Object {
|
|
public:
|
|
|
|
// Constructor based on the plain C data-type
|
|
explicit Device(const cl_device_id device): device_(device) { }
|
|
|
|
// Initialize the device. Note that this constructor can throw exceptions!
|
|
explicit Device(const Platform &platform, const cl_device_type type, const size_t device_id) {
|
|
auto num_devices = cl_uint{0};
|
|
auto status = clGetDeviceIDs(platform(), type, 0, nullptr, &num_devices);
|
|
if (status != CL_SUCCESS) { Error(status); }
|
|
if (num_devices == 0) { Error("no devices found"); }
|
|
auto devices = std::vector<cl_device_id>(num_devices);
|
|
status = clGetDeviceIDs(platform(), type, num_devices, devices.data(), nullptr);
|
|
if (status != CL_SUCCESS) { Error(status); }
|
|
if (device_id >= num_devices) { Error("invalid device ID "+std::to_string(device_id)); }
|
|
device_ = devices[device_id];
|
|
}
|
|
|
|
// Public functions
|
|
std::string Version() const { return GetInfoString(CL_DEVICE_VERSION); }
|
|
cl_device_type Type() const { return GetInfo<cl_device_type>(CL_DEVICE_TYPE); }
|
|
std::string Vendor() const { return GetInfoString(CL_DEVICE_VENDOR); }
|
|
std::string Name() const { return GetInfoString(CL_DEVICE_NAME); }
|
|
std::string Extensions() const { return GetInfoString(CL_DEVICE_EXTENSIONS); }
|
|
size_t MaxWorkGroupSize() const { return GetInfo<size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE); }
|
|
cl_ulong LocalMemSize() const { return GetInfo<cl_ulong>(CL_DEVICE_LOCAL_MEM_SIZE); }
|
|
cl_uint MaxWorkItemDimensions() const {
|
|
return GetInfo<cl_uint>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
|
|
}
|
|
std::vector<size_t> MaxWorkItemSizes() const {
|
|
return GetInfoVector<size_t>(CL_DEVICE_MAX_WORK_ITEM_SIZES);
|
|
}
|
|
|
|
// Configuration-validity checks
|
|
bool IsLocalMemoryValid(const size_t local_mem_usage) const {
|
|
return (local_mem_usage <= LocalMemSize());
|
|
}
|
|
bool IsThreadConfigValid(const std::vector<size_t> &local) const {
|
|
auto local_size = size_t{1};
|
|
for (auto &item: local) { local_size *= item; }
|
|
for (auto i=size_t{0}; i<local.size(); ++i) {
|
|
if (local[i] > MaxWorkItemSizes()[i]) { return false; }
|
|
}
|
|
if (local_size > MaxWorkGroupSize()) { return false; }
|
|
if (local.size() > MaxWorkItemDimensions()) { return false; }
|
|
return true;
|
|
}
|
|
|
|
// Accessors to the private data-member
|
|
const cl_device_id& operator()() const { return device_; }
|
|
private:
|
|
|
|
// Helper functions
|
|
template <typename T>
|
|
T GetInfo(const cl_device_info info) const {
|
|
auto bytes = size_t{0};
|
|
clGetDeviceInfo(device_, info, 0, nullptr, &bytes);
|
|
auto result = T(0);
|
|
clGetDeviceInfo(device_, info, bytes, &result, nullptr);
|
|
return result;
|
|
}
|
|
template <typename T>
|
|
std::vector<T> GetInfoVector(const cl_device_info info) const {
|
|
auto bytes = size_t{0};
|
|
clGetDeviceInfo(device_, info, 0, nullptr, &bytes);
|
|
auto result = std::vector<T>(bytes/sizeof(T));
|
|
clGetDeviceInfo(device_, info, bytes, result.data(), nullptr);
|
|
return result;
|
|
}
|
|
std::string GetInfoString(const cl_device_info info) const {
|
|
auto bytes = size_t{0};
|
|
clGetDeviceInfo(device_, info, 0, nullptr, &bytes);
|
|
auto result = std::vector<char>(bytes);
|
|
clGetDeviceInfo(device_, info, bytes, result.data(), nullptr);
|
|
return std::string(result.data());
|
|
}
|
|
|
|
cl_device_id device_;
|
|
};
|
|
|
|
// =================================================================================================
|
|
|
|
// C++11 version of cl_context
|
|
class Context: public ObjectWithState {
|
|
public:
|
|
|
|
// Constructor based on the plain C data-type
|
|
explicit Context(const cl_context context): context_(context) {
|
|
clRetainContext(context_);
|
|
}
|
|
|
|
// Memory management
|
|
explicit Context(const Device &device) {
|
|
auto status = CL_SUCCESS;
|
|
const cl_device_id dev = device();
|
|
context_ = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &status);
|
|
if (status != CL_SUCCESS) { Error(status); }
|
|
}
|
|
~Context() {
|
|
clReleaseContext(context_);
|
|
}
|
|
Context(const Context &other):
|
|
context_(other.context_) {
|
|
clRetainContext(context_);
|
|
}
|
|
Context& operator=(Context other) {
|
|
swap(*this, other);
|
|
return *this;
|
|
}
|
|
friend void swap(Context &first, Context &second) {
|
|
std::swap(first.context_, second.context_);
|
|
}
|
|
|
|
// Accessors to the private data-member
|
|
const cl_context& operator()() const { return context_; }
|
|
private:
|
|
cl_context context_;
|
|
};
|
|
|
|
// =================================================================================================
|
|
|
|
// C++11 version of cl_program. Additionally holds the program's source code.
|
|
class Program: public ObjectWithState {
|
|
public:
|
|
|
|
// Note that there is no constructor based on the plain C data-type because of extra state
|
|
|
|
// Memory management
|
|
explicit Program(const Context &context, const std::string &source):
|
|
length_(source.length()) {
|
|
std::copy(source.begin(), source.end(), back_inserter(source_));
|
|
source_ptr_ = source_.data();
|
|
auto status = CL_SUCCESS;
|
|
program_ = clCreateProgramWithSource(context(), 1, &source_ptr_, &length_, &status);
|
|
if (status != CL_SUCCESS) { Error(status); }
|
|
}
|
|
~Program() {
|
|
clReleaseProgram(program_);
|
|
}
|
|
Program(const Program &other):
|
|
length_(other.length_),
|
|
source_(other.source_),
|
|
source_ptr_(other.source_ptr_),
|
|
program_(other.program_) {
|
|
clRetainProgram(program_);
|
|
}
|
|
Program& operator=(Program other) {
|
|
swap(*this, other);
|
|
return *this;
|
|
}
|
|
friend void swap(Program &first, Program &second) {
|
|
std::swap(first.length_, second.length_);
|
|
std::swap(first.source_, second.source_);
|
|
std::swap(first.source_ptr_, second.source_ptr_);
|
|
std::swap(first.program_, second.program_);
|
|
}
|
|
|
|
// Public functions
|
|
cl_int Build(const Device &device, const std::string &options) {
|
|
const cl_device_id dev = device();
|
|
return clBuildProgram(program_, 1, &dev, options.c_str(), nullptr, nullptr);
|
|
}
|
|
std::string GetBuildInfo(const Device &device) const {
|
|
auto bytes = size_t{0};
|
|
clGetProgramBuildInfo(program_, device(), CL_PROGRAM_BUILD_LOG, 0, nullptr, &bytes);
|
|
auto result = std::vector<char>(bytes);
|
|
clGetProgramBuildInfo(program_, device(), CL_PROGRAM_BUILD_LOG, bytes, result.data(), nullptr);
|
|
return std::string(result.data());
|
|
}
|
|
|
|
// Accessors to the private data-member
|
|
const cl_program& operator()() const { return program_; }
|
|
private:
|
|
size_t length_;
|
|
std::vector<char> source_;
|
|
const char* source_ptr_;
|
|
cl_program program_;
|
|
};
|
|
|
|
// =================================================================================================
|
|
|
|
// C++11 version of cl_kernel
|
|
class Kernel: public ObjectWithState {
|
|
public:
|
|
|
|
// Constructor based on the plain C data-type
|
|
explicit Kernel(const cl_kernel kernel): kernel_(kernel) {
|
|
clRetainKernel(kernel_);
|
|
}
|
|
|
|
// Memory management
|
|
explicit Kernel(const Program &program, const std::string &name) {
|
|
auto status = CL_SUCCESS;
|
|
kernel_ = clCreateKernel(program(), name.c_str(), &status);
|
|
if (status != CL_SUCCESS) { Error(status); }
|
|
}
|
|
~Kernel() {
|
|
clReleaseKernel(kernel_);
|
|
}
|
|
Kernel(const Kernel &other):
|
|
kernel_(other.kernel_) {
|
|
clRetainKernel(kernel_);
|
|
}
|
|
Kernel& operator=(Kernel other) {
|
|
swap(*this, other);
|
|
return *this;
|
|
}
|
|
friend void swap(Kernel &first, Kernel &second) {
|
|
std::swap(first.kernel_, second.kernel_);
|
|
}
|
|
|
|
// Public functions
|
|
template <typename T> // Note: doesn't work with T=Buffer
|
|
cl_int SetArgument(const cl_uint index, const T &value) {
|
|
return clSetKernelArg(kernel_, index, sizeof(T), &value);
|
|
}
|
|
size_t LocalMemUsage(const Device &device) const {
|
|
auto bytes = size_t{0};
|
|
clGetKernelWorkGroupInfo(kernel_, device(), CL_KERNEL_LOCAL_MEM_SIZE, 0, nullptr, &bytes);
|
|
auto result = size_t{0};
|
|
clGetKernelWorkGroupInfo(kernel_, device(), CL_KERNEL_LOCAL_MEM_SIZE, bytes, &result, nullptr);
|
|
return result;
|
|
}
|
|
|
|
// Accessors to the private data-member
|
|
const cl_kernel& operator()() const { return kernel_; }
|
|
private:
|
|
cl_kernel kernel_;
|
|
};
|
|
|
|
// =================================================================================================
|
|
|
|
// C++11 version of cl_command_queue
|
|
class CommandQueue: public ObjectWithState {
|
|
public:
|
|
|
|
// Constructor based on the plain C data-type
|
|
explicit CommandQueue(const cl_command_queue queue): queue_(queue) {
|
|
clRetainCommandQueue(queue_);
|
|
}
|
|
|
|
// Memory management
|
|
explicit CommandQueue(const Context &context, const Device &device) {
|
|
auto status = CL_SUCCESS;
|
|
queue_ = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &status);
|
|
if (status != CL_SUCCESS) { Error(status); }
|
|
}
|
|
~CommandQueue() {
|
|
clReleaseCommandQueue(queue_);
|
|
}
|
|
CommandQueue(const CommandQueue &other):
|
|
queue_(other.queue_) {
|
|
clRetainCommandQueue(queue_);
|
|
}
|
|
CommandQueue& operator=(CommandQueue other) {
|
|
swap(*this, other);
|
|
return *this;
|
|
}
|
|
friend void swap(CommandQueue &first, CommandQueue &second) {
|
|
std::swap(first.queue_, second.queue_);
|
|
}
|
|
|
|
// Public functions
|
|
cl_int EnqueueKernel(const Kernel &kernel, const std::vector<size_t> &global,
|
|
const std::vector<size_t> &local, Event &event) {
|
|
return clEnqueueNDRangeKernel(queue_, kernel(), static_cast<cl_uint>(global.size()), nullptr,
|
|
global.data(), local.data(), 0, nullptr, &(event()));
|
|
}
|
|
Context GetContext() const {
|
|
auto bytes = size_t{0};
|
|
clGetCommandQueueInfo(queue_, CL_QUEUE_CONTEXT, 0, nullptr, &bytes);
|
|
cl_context result;
|
|
clGetCommandQueueInfo(queue_, CL_QUEUE_CONTEXT, bytes, &result, nullptr);
|
|
return Context(result);
|
|
}
|
|
Device GetDevice() const {
|
|
auto bytes = size_t{0};
|
|
clGetCommandQueueInfo(queue_, CL_QUEUE_DEVICE, 0, nullptr, &bytes);
|
|
cl_device_id result;
|
|
clGetCommandQueueInfo(queue_, CL_QUEUE_DEVICE, bytes, &result, nullptr);
|
|
return Device(result);
|
|
}
|
|
cl_int Finish() {
|
|
return clFinish(queue_);
|
|
}
|
|
|
|
// Accessors to the private data-member
|
|
const cl_command_queue& operator()() const { return queue_; }
|
|
private:
|
|
cl_command_queue queue_;
|
|
};
|
|
|
|
// =================================================================================================
|
|
|
|
// C++11 version of cl_mem
|
|
class Buffer: public ObjectWithState {
|
|
public:
|
|
|
|
// Constructor based on the plain C data-type
|
|
explicit Buffer(const cl_mem buffer): buffer_(buffer) {
|
|
clRetainMemObject(buffer_);
|
|
}
|
|
|
|
// Memory management
|
|
explicit Buffer(const Context &context, const cl_mem_flags flags, const size_t bytes) {
|
|
auto status = CL_SUCCESS;
|
|
buffer_ = clCreateBuffer(context(), flags, bytes, nullptr, &status);
|
|
if (status != CL_SUCCESS) { Error(status); }
|
|
}
|
|
~Buffer() {
|
|
clReleaseMemObject(buffer_);
|
|
}
|
|
Buffer(const Buffer &other):
|
|
buffer_(other.buffer_) {
|
|
clRetainMemObject(buffer_);
|
|
}
|
|
Buffer& operator=(Buffer other) {
|
|
swap(*this, other);
|
|
return *this;
|
|
}
|
|
friend void swap(Buffer &first, Buffer &second) {
|
|
std::swap(first.buffer_, second.buffer_);
|
|
}
|
|
|
|
// Public functions
|
|
template <typename T>
|
|
cl_int ReadBuffer(const CommandQueue &queue, const size_t bytes, T* host) {
|
|
return clEnqueueReadBuffer(queue(), buffer_, CL_TRUE, 0, bytes, host, 0, nullptr, nullptr);
|
|
}
|
|
template <typename T>
|
|
cl_int ReadBuffer(const CommandQueue &queue, const size_t bytes, std::vector<T> &host) {
|
|
return ReadBuffer(queue, bytes, host.data());
|
|
}
|
|
template <typename T>
|
|
cl_int WriteBuffer(const CommandQueue &queue, const size_t bytes, const T* host) {
|
|
return clEnqueueWriteBuffer(queue(), buffer_, CL_TRUE, 0, bytes, host, 0, nullptr, nullptr);
|
|
}
|
|
template <typename T>
|
|
cl_int WriteBuffer(const CommandQueue &queue, const size_t bytes, const std::vector<T> &host) {
|
|
return WriteBuffer(queue, bytes, &host[0]);
|
|
}
|
|
size_t GetSize() const {
|
|
auto bytes = size_t{0};
|
|
auto status = clGetMemObjectInfo(buffer_, CL_MEM_SIZE, 0, nullptr, &bytes);
|
|
if (status != CL_SUCCESS) { Error(status); }
|
|
auto result = size_t{0};
|
|
status = clGetMemObjectInfo(buffer_, CL_MEM_SIZE, bytes, &result, nullptr);
|
|
if (status != CL_SUCCESS) { Error(status); }
|
|
return result;
|
|
}
|
|
|
|
// Accessors to the private data-member
|
|
const cl_mem& operator()() const { return buffer_; }
|
|
private:
|
|
cl_mem buffer_;
|
|
};
|
|
|
|
// =================================================================================================
|
|
} // namespace clblast
|
|
|
|
// CLBLAST_CLPP11_H_
|
|
#endif
|