Added a special override database for the Apple CPU implementation on OS X: this makes the test work, it does not focus on good performance
parent
d28ee082b0
commit
fb6c78ea07
|
@ -119,7 +119,7 @@ enum class Side { kLeft = 141, kRight = 142 };
|
|||
|
||||
// Precision scoped enum (values in bits)
|
||||
enum class Precision { kHalf = 16, kSingle = 32, kDouble = 64,
|
||||
kComplexSingle = 3232, kComplexDouble = 6464 };
|
||||
kComplexSingle = 3232, kComplexDouble = 6464, kAny = -1 };
|
||||
|
||||
// =================================================================================================
|
||||
// BLAS level-1 (vector-vector) routines
|
||||
|
|
|
@ -164,6 +164,10 @@ class Platform {
|
|||
platform_ = platforms[platform_id];
|
||||
}
|
||||
|
||||
// Methods to retrieve platform information
|
||||
std::string Name() const { return GetInfoString(CL_PLATFORM_NAME); }
|
||||
std::string Vendor() const { return GetInfoString(CL_PLATFORM_VENDOR); }
|
||||
|
||||
// Returns the number of devices on this platform
|
||||
size_t NumDevices() const {
|
||||
auto result = cl_uint{0};
|
||||
|
@ -175,6 +179,17 @@ class Platform {
|
|||
const cl_platform_id& operator()() const { return platform_; }
|
||||
private:
|
||||
cl_platform_id platform_;
|
||||
|
||||
// Private helper functions
|
||||
std::string GetInfoString(const cl_device_info info) const {
|
||||
auto bytes = size_t{0};
|
||||
CheckError(clGetPlatformInfo(platform_, info, 0, nullptr, &bytes));
|
||||
auto result = std::string{};
|
||||
result.resize(bytes);
|
||||
CheckError(clGetPlatformInfo(platform_, info, bytes, &result[0], nullptr));
|
||||
result.resize(strlen(result.c_str())); // Removes any trailing '\0'-characters
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
// Retrieves a vector with all platforms
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
|
||||
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
|
||||
// width of 100 characters per line.
|
||||
//
|
||||
// Author(s):
|
||||
// Cedric Nugteren <www.cedricnugteren.nl>
|
||||
//
|
||||
// This file provides overrides for Apple's OpenCL CPU implementation. It is a special case compared
|
||||
// to all other implementations, as it only supports a 1-dimensional work-group size. In addition,
|
||||
// that work-group size is limited to 1024 (in theory) or much lower (kernel resource dependent).
|
||||
// Thus, instead of supporting this corner-case in the whole regular flow (starting from the tuner),
|
||||
// we provide this file with some manual overrides.
|
||||
//
|
||||
// Note: These overrides are to make the Apple CPU work and not crash, they are not in any way
|
||||
// optimized parameters. For decent speed don't use Apple's OpenCL CPU implementation.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
namespace clblast {
|
||||
namespace database {
|
||||
// =================================================================================================
|
||||
|
||||
const Database::DatabaseEntry XaxpyApple = {
|
||||
"Xaxpy", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"VW",8}, {"WGS",1}, {"WPT",4} } } } } }
|
||||
};
|
||||
const Database::DatabaseEntry XdotApple = {
|
||||
"Xdot", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"WGS1",1}, {"WGS2",1} } } } } }
|
||||
};
|
||||
const Database::DatabaseEntry XgemvApple = {
|
||||
"Xgemv", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"WGS1",1}, {"WPT1",4}, {"UNROLL1", 1} } } } } }
|
||||
};
|
||||
const Database::DatabaseEntry XgemvFastApple = {
|
||||
"XgemvFast", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"VW2",1}, {"WGS2",1}, {"WPT2",1} } } } } }
|
||||
};
|
||||
const Database::DatabaseEntry XgemvFastRotApple = {
|
||||
"XgemvFastRot", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"VW3",1}, {"WGS3",1}, {"WPT3",1} } } } } }
|
||||
};
|
||||
const Database::DatabaseEntry XgerApple = {
|
||||
"Xger", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",2} } } } } }
|
||||
};
|
||||
const Database::DatabaseEntry XtrsvApple = {
|
||||
"Xtrsv", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"TRSV_BLOCK_SIZE",32} } } } } }
|
||||
};
|
||||
const Database::DatabaseEntry XgemmApple = {
|
||||
"Xgemm", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"KWG",1}, {"KWI",1}, {"MDIMA",1}, {"MDIMC",1}, {"MWG",1}, {"NDIMB",1}, {"NDIMC",1}, {"NWG",1}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } } } } }
|
||||
};
|
||||
const Database::DatabaseEntry XgemmDirectApple = {
|
||||
"XgemmDirect", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"KWID",1}, {"MDIMAD",1}, {"MDIMCD",1}, {"NDIMBD",1}, {"NDIMCD",1}, {"PADA",0}, {"PADB",0}, {"VWMD",1}, {"VWND",1}, {"WGD",1} } } } } }
|
||||
};
|
||||
const Database::DatabaseEntry CopyApple = {
|
||||
"Copy", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"COPY_DIMX",1}, {"COPY_DIMY",1}, {"COPY_VW",1}, {"COPY_WPT",1} } } } } }
|
||||
};
|
||||
const Database::DatabaseEntry PadApple = {
|
||||
"Pad", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"PAD_DIMX",1}, {"PAD_DIMY",1}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } } } } }
|
||||
};
|
||||
const Database::DatabaseEntry TransposeApple = {
|
||||
"Transpose", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"TRA_DIM",1}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } } } } }
|
||||
};
|
||||
const Database::DatabaseEntry PadtransposeApple = {
|
||||
"Padtranspose", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",1}, {"PADTRA_WPT",1} } } } } }
|
||||
};
|
||||
const Database::DatabaseEntry InvertApple = {
|
||||
"Invert", Precision::kAny, { { kDeviceTypeAll, "default", { { "default", { {"INTERNAL_BLOCK_SIZE",16} } } } } }
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
} // namespace database
|
||||
} // namespace clblast
|
|
@ -11,6 +11,8 @@
|
|||
//
|
||||
// =================================================================================================
|
||||
|
||||
#include <list>
|
||||
|
||||
#include "utilities/utilities.hpp"
|
||||
|
||||
#include "database/database.hpp"
|
||||
|
@ -28,12 +30,13 @@
|
|||
#include "database/kernels/transpose.hpp"
|
||||
#include "database/kernels/padtranspose.hpp"
|
||||
#include "database/kernels/invert.hpp"
|
||||
#include "database/apple_cpu_fallback.hpp"
|
||||
#include "database/kernel_selection.hpp"
|
||||
|
||||
namespace clblast {
|
||||
// =================================================================================================
|
||||
|
||||
// Initializes the database
|
||||
// Initializes the databases
|
||||
const std::vector<const Database::DatabaseEntry*> Database::database = {
|
||||
&database::XaxpyHalf, &database::XaxpySingle, &database::XaxpyDouble, &database::XaxpyComplexSingle, &database::XaxpyComplexDouble,
|
||||
&database::XdotHalf, &database::XdotSingle, &database::XdotDouble, &database::XdotComplexSingle, &database::XdotComplexDouble,
|
||||
|
@ -51,8 +54,15 @@ const std::vector<const Database::DatabaseEntry*> Database::database = {
|
|||
&database::InvertHalf, &database::InvertSingle, &database::InvertDouble, &database::InvertComplexSingle, &database::InvertComplexDouble,
|
||||
&database::KernelSelectionHalf, &database::KernelSelectionSingle, &database::KernelSelectionDouble, &database::KernelSelectionComplexSingle, &database::KernelSelectionComplexDouble
|
||||
};
|
||||
const std::vector<const Database::DatabaseEntry*> Database::apple_cpu_fallback = {
|
||||
&database::XaxpyApple, &database::XdotApple,
|
||||
&database::XgemvApple, &database::XgemvFastApple, &database::XgemvFastRotApple, &database::XgerApple, &database::XtrsvApple,
|
||||
&database::XgemmApple, &database::XgemmDirectApple,
|
||||
&database::CopyApple, &database::PadApple, &database::TransposeApple, &database::PadtransposeApple,
|
||||
&database::InvertApple
|
||||
};
|
||||
|
||||
// The OpenCL device vendors
|
||||
// The default values
|
||||
const std::string Database::kDeviceVendorAll = "default";
|
||||
|
||||
// Alternative names for some OpenCL vendors
|
||||
|
@ -83,9 +93,23 @@ Database::Database(const Device &device, const std::string &kernel_name,
|
|||
}
|
||||
}
|
||||
|
||||
// Sets the databases to search through
|
||||
auto databases = std::list<const std::vector<const DatabaseEntry*>>{overlay, database};
|
||||
|
||||
// Special case: modifies the database if the device is a CPU with Apple OpenCL
|
||||
#if defined(__APPLE__) || defined(__MACOSX)
|
||||
if (device.Type() == "CPU") {
|
||||
auto extensions = device.Capabilities();
|
||||
const auto is_apple = (extensions.find("cl_APPLE_SetMemObjectDestructor") == std::string::npos) ? false : true;
|
||||
if (is_apple) {
|
||||
databases.push_front(apple_cpu_fallback);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Searches potentially multiple databases
|
||||
auto search_result = ParametersPtr{};
|
||||
for (auto &db: { overlay, database}) {
|
||||
for (auto &db: databases) {
|
||||
search_result = Search(kernel_name, device_type, device_vendor, device_name, precision, db);
|
||||
if (search_result) {
|
||||
parameters_->insert(search_result->begin(), search_result->end());
|
||||
|
@ -128,7 +152,8 @@ Database::ParametersPtr Database::Search(const std::string &this_kernel,
|
|||
|
||||
// Selects the right kernel
|
||||
for (auto &db: this_database) {
|
||||
if (db->kernel == this_kernel && db->precision == this_precision) {
|
||||
if ((db->kernel == this_kernel) &&
|
||||
(db->precision == this_precision || db->precision == Precision::kAny)) {
|
||||
|
||||
// Searches for the right vendor and device type, or selects the default if unavailable. This
|
||||
// assumes that the default vendor / device type is last in the database.
|
||||
|
|
|
@ -72,6 +72,9 @@ class Database {
|
|||
// The database consists of separate database entries, stored together in a vector
|
||||
static const std::vector<const DatabaseEntry*> database;
|
||||
|
||||
// Database for a special case: Apple CPUs support limited number of threads
|
||||
static const std::vector<const DatabaseEntry*> apple_cpu_fallback;
|
||||
|
||||
Database() = default;
|
||||
|
||||
// The constructor with a user-provided database overlay (potentially an empty vector)
|
||||
|
|
|
@ -176,6 +176,7 @@ std::string ToString(Precision value) {
|
|||
case Precision::kDouble: return ToString(static_cast<int>(value))+" (double)";
|
||||
case Precision::kComplexSingle: return ToString(static_cast<int>(value))+" (complex-single)";
|
||||
case Precision::kComplexDouble: return ToString(static_cast<int>(value))+" (complex-double)";
|
||||
case Precision::kAny: return ToString(static_cast<int>(value))+" (any)";
|
||||
}
|
||||
}
|
||||
template <>
|
||||
|
@ -467,6 +468,7 @@ size_t GetBytes(const Precision precision) {
|
|||
case Precision::kDouble: return 8;
|
||||
case Precision::kComplexSingle: return 8;
|
||||
case Precision::kComplexDouble: return 16;
|
||||
case Precision::kAny: return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue