Added a special override database for the Apple CPU implementation on OS X: this makes the test work, it does not focus on good performance

pull/145/head
Cedric Nugteren 2017-04-07 07:37:30 +02:00
parent d28ee082b0
commit fb6c78ea07
6 changed files with 120 additions and 5 deletions

View File

@ -119,7 +119,7 @@ enum class Side { kLeft = 141, kRight = 142 };
// Precision scoped enum (values in bits)
enum class Precision { kHalf = 16, kSingle = 32, kDouble = 64,
kComplexSingle = 3232, kComplexDouble = 6464 };
kComplexSingle = 3232, kComplexDouble = 6464, kAny = -1 };
// =================================================================================================
// BLAS level-1 (vector-vector) routines

View File

@ -164,6 +164,10 @@ class Platform {
platform_ = platforms[platform_id];
}
// Methods to retrieve platform information
std::string Name() const { return GetInfoString(CL_PLATFORM_NAME); }
std::string Vendor() const { return GetInfoString(CL_PLATFORM_VENDOR); }
// Returns the number of devices on this platform
size_t NumDevices() const {
auto result = cl_uint{0};
@ -175,6 +179,17 @@ class Platform {
const cl_platform_id& operator()() const { return platform_; }
private:
cl_platform_id platform_;
// Private helper functions
std::string GetInfoString(const cl_device_info info) const {
auto bytes = size_t{0};
CheckError(clGetPlatformInfo(platform_, info, 0, nullptr, &bytes));
auto result = std::string{};
result.resize(bytes);
CheckError(clGetPlatformInfo(platform_, info, bytes, &result[0], nullptr));
result.resize(strlen(result.c_str())); // Removes any trailing '\0'-characters
return result;
}
};
// Retrieves a vector with all platforms

View File

@ -0,0 +1,70 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file provides overrides for Apple's OpenCL CPU implementation. It is a special case compared
// to all other implementations, as it only supports a 1-dimensional work-group size. In addition,
// that work-group size is limited to 1024 (in theory) or much lower (kernel resource dependent).
// Thus, instead of supporting this corner-case in the whole regular flow (starting from the tuner),
// we provide this file with some manual overrides.
//
// Note: These overrides are to make the Apple CPU work and not crash, they are not in any way
// optimized parameters. For decent speed don't use Apple's OpenCL CPU implementation.
//
// =================================================================================================
namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry XaxpyApple = {
"Xaxpy", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"VW",8}, {"WGS",1}, {"WPT",4} } } } } }
};
const Database::DatabaseEntry XdotApple = {
"Xdot", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"WGS1",1}, {"WGS2",1} } } } } }
};
const Database::DatabaseEntry XgemvApple = {
"Xgemv", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"WGS1",1}, {"WPT1",4}, {"UNROLL1", 1} } } } } }
};
const Database::DatabaseEntry XgemvFastApple = {
"XgemvFast", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"VW2",1}, {"WGS2",1}, {"WPT2",1} } } } } }
};
const Database::DatabaseEntry XgemvFastRotApple = {
"XgemvFastRot", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"VW3",1}, {"WGS3",1}, {"WPT3",1} } } } } }
};
const Database::DatabaseEntry XgerApple = {
"Xger", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",2} } } } } }
};
const Database::DatabaseEntry XtrsvApple = {
"Xtrsv", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"TRSV_BLOCK_SIZE",32} } } } } }
};
const Database::DatabaseEntry XgemmApple = {
"Xgemm", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"KWG",1}, {"KWI",1}, {"MDIMA",1}, {"MDIMC",1}, {"MWG",1}, {"NDIMB",1}, {"NDIMC",1}, {"NWG",1}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } } } } }
};
const Database::DatabaseEntry XgemmDirectApple = {
"XgemmDirect", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"KWID",1}, {"MDIMAD",1}, {"MDIMCD",1}, {"NDIMBD",1}, {"NDIMCD",1}, {"PADA",0}, {"PADB",0}, {"VWMD",1}, {"VWND",1}, {"WGD",1} } } } } }
};
const Database::DatabaseEntry CopyApple = {
"Copy", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"COPY_DIMX",1}, {"COPY_DIMY",1}, {"COPY_VW",1}, {"COPY_WPT",1} } } } } }
};
const Database::DatabaseEntry PadApple = {
"Pad", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"PAD_DIMX",1}, {"PAD_DIMY",1}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } } } } }
};
const Database::DatabaseEntry TransposeApple = {
"Transpose", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"TRA_DIM",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } } } } }
};
const Database::DatabaseEntry PadtransposeApple = {
"Padtranspose", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",1}, {"PADTRA_WPT",1} } } } } }
};
const Database::DatabaseEntry InvertApple = {
"Invert", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"INTERNAL_BLOCK_SIZE",16} } } } } }
};
// =================================================================================================
} // namespace database
} // namespace clblast

View File

@ -11,6 +11,8 @@
//
// =================================================================================================
#include <list>
#include "utilities/utilities.hpp"
#include "database/database.hpp"
@ -28,12 +30,13 @@
#include "database/kernels/transpose.hpp"
#include "database/kernels/padtranspose.hpp"
#include "database/kernels/invert.hpp"
#include "database/apple_cpu_fallback.hpp"
#include "database/kernel_selection.hpp"
namespace clblast {
// =================================================================================================
// Initializes the database
// Initializes the databases
const std::vector<const Database::DatabaseEntry*> Database::database = {
&database::XaxpyHalf, &database::XaxpySingle, &database::XaxpyDouble, &database::XaxpyComplexSingle, &database::XaxpyComplexDouble,
&database::XdotHalf, &database::XdotSingle, &database::XdotDouble, &database::XdotComplexSingle, &database::XdotComplexDouble,
@ -51,8 +54,15 @@ const std::vector<const Database::DatabaseEntry*> Database::database = {
&database::InvertHalf, &database::InvertSingle, &database::InvertDouble, &database::InvertComplexSingle, &database::InvertComplexDouble,
&database::KernelSelectionHalf, &database::KernelSelectionSingle, &database::KernelSelectionDouble, &database::KernelSelectionComplexSingle, &database::KernelSelectionComplexDouble
};
const std::vector<const Database::DatabaseEntry*> Database::apple_cpu_fallback = {
&database::XaxpyApple, &database::XdotApple,
&database::XgemvApple, &database::XgemvFastApple, &database::XgemvFastRotApple, &database::XgerApple, &database::XtrsvApple,
&database::XgemmApple, &database::XgemmDirectApple,
&database::CopyApple, &database::PadApple, &database::TransposeApple, &database::PadtransposeApple,
&database::InvertApple
};
// The OpenCL device vendors
// The default values
const std::string Database::kDeviceVendorAll = "default";
// Alternative names for some OpenCL vendors
@ -83,9 +93,23 @@ Database::Database(const Device &device, const std::string &kernel_name,
}
}
// Sets the databases to search through
auto databases = std::list<const std::vector<const DatabaseEntry*>>{overlay, database};
// Special case: modifies the database if the device is a CPU with Apple OpenCL
#if defined(__APPLE__) || defined(__MACOSX)
if (device.Type() == "CPU") {
auto extensions = device.Capabilities();
const auto is_apple = (extensions.find("cl_APPLE_SetMemObjectDestructor") == std::string::npos) ? false : true;
if (is_apple) {
databases.push_front(apple_cpu_fallback);
}
}
#endif
// Searches potentially multiple databases
auto search_result = ParametersPtr{};
for (auto &db: { overlay, database}) {
for (auto &db: databases) {
search_result = Search(kernel_name, device_type, device_vendor, device_name, precision, db);
if (search_result) {
parameters_->insert(search_result->begin(), search_result->end());
@ -128,7 +152,8 @@ Database::ParametersPtr Database::Search(const std::string &this_kernel,
// Selects the right kernel
for (auto &db: this_database) {
if (db->kernel == this_kernel && db->precision == this_precision) {
if ((db->kernel == this_kernel) &&
(db->precision == this_precision || db->precision == Precision::kAny)) {
// Searches for the right vendor and device type, or selects the default if unavailable. This
// assumes that the default vendor / device type is last in the database.

View File

@ -72,6 +72,9 @@ class Database {
// The database consists of separate database entries, stored together in a vector
static const std::vector<const DatabaseEntry*> database;
// Database for a special case: Apple CPUs support limited number of threads
static const std::vector<const DatabaseEntry*> apple_cpu_fallback;
Database() = default;
// The constructor with a user-provided database overlay (potentially an empty vector)

View File

@ -176,6 +176,7 @@ std::string ToString(Precision value) {
case Precision::kDouble: return ToString(static_cast<int>(value))+" (double)";
case Precision::kComplexSingle: return ToString(static_cast<int>(value))+" (complex-single)";
case Precision::kComplexDouble: return ToString(static_cast<int>(value))+" (complex-double)";
case Precision::kAny: return ToString(static_cast<int>(value))+" (any)";
}
}
template <>
@ -467,6 +468,7 @@ size_t GetBytes(const Precision precision) {
case Precision::kDouble: return 8;
case Precision::kComplexSingle: return 8;
case Precision::kComplexDouble: return 16;
case Precision::kAny: return -1;
}
}