diff --git a/CMakeLists.txt b/CMakeLists.txt index f82af47a..33b9adb7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -159,8 +159,8 @@ endif() # Sets the supported routines and the used kernels. New routines and kernels should be added here. set(KERNELS copy_fast copy_pad transpose_fast transpose_pad xaxpy xdot xger xgemm xgemm_direct xgemv) -set(DATABASES copy invert pad padtranspose transpose xaxpy xdot - xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger xtrsv) +set(DATABASES copy pad padtranspose transpose xaxpy xdot + xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger) set(SAMPLE_PROGRAMS_CPP sgemm sgemm_batched) set(SAMPLE_PROGRAMS_C sasum dgemv sgemm haxpy cache) if(NETLIB) @@ -230,7 +230,12 @@ foreach(ROUTINE ${LEVELX_ROUTINES}) set(HEADERS ${HEADERS} src/routines/levelx/${ROUTINE}.hpp) endforeach() foreach(DATABASE ${DATABASES}) - set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}.hpp) + set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}.hpp) + set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}_16.hpp) + set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}_32.hpp) + set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}_64.hpp) + set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}_3232.hpp) + set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}_6464.hpp) endforeach() # Creates and links the library diff --git a/scripts/database/database/clblast.py b/scripts/database/database/clblast.py index 803d1d2a..779dd76c 100644 --- a/scripts/database/database/clblast.py +++ b/scripts/database/database/clblast.py @@ -42,20 +42,19 @@ def get_cpp_separator(): return "// =================================================================================================" -def get_cpp_header(family): +def get_cpp_header(family, precision): """Retrieves the C++ header""" return ("\n" + get_cpp_separator() + """ -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. // -// Author(s): -// Database generator -// -// This file populates the database with best-found tuning parameters for the '%s' kernels. +// This file populates the database with best-found tuning parameters for the '%s%s' kernels. //\n""" - % family.title() + get_cpp_separator() + \ - "\n\nnamespace clblast {\n" + "namespace database {\n" + get_cpp_separator()) + % (family.title(), precision)) + get_cpp_separator() + "\n" + + +def get_cpp_header_namespace(): + return "\nnamespace clblast {\n" + "namespace database {\n" def get_cpp_footer(): @@ -67,7 +66,7 @@ def get_cpp_precision(family, precision): """Retrieves the C++ code for the start of a new precision""" precision_string = precision_to_string(precision) camelcase_name = family.title().replace("_", "") - return("\n\nconst Database::DatabaseEntry %s%s = {\n \"%s\", Precision::k%s" + return("\nconst DatabaseEntry %s%s = {\n \"%s\", Precision::k%s" % (camelcase_name, precision_string, camelcase_name, precision_string)) @@ -79,6 +78,15 @@ def get_cpp_device_vendor(vendor, device_type): return " { // %s %ss\n kDeviceType%s, \"%s\", {\n" % (vendor, device_type, device_type_caps, vendor) +def get_cpp_family_includes(family, precisions): + result = "\n" + # result += "#include \"clblast.h\"\n" + # result += "#include \"database/database_structure.hpp\"\n" + for precision in precisions: + result += "#include \"database/kernels/%s/%s_%s.hpp\"\n" % (family, family, precision) + return result + + def print_cpp_database(database, output_dir): """Outputs the database as C++ code""" @@ -87,19 +95,23 @@ def print_cpp_database(database, output_dir): for family_name in kernel_families: family_database = [s for s in database["sections"] if s["kernel_family"] == family_name] - # Opens a new file for each kernel family - full_path = os.path.join(output_dir, family_name + ".hpp") - with open(full_path, 'w+') as f: - f.write(get_cpp_header(family_name)) + # Goes into a new path for each kernel family + family_path = os.path.join(output_dir, family_name) - # Loops over the different precision (e.g. 16, 32, 3232, 64, 6464) - precisions = sorted(set([s["precision"] for s in database["sections"]])) # Based on full database - for precision in precisions: - precision_database = [s for s in family_database if s["precision"] == precision] + # Loops over the different precision (e.g. 16, 32, 3232, 64, 6464) + precisions = sorted(set([s["precision"] for s in database["sections"]])) # Based on full database + for precision in precisions: + precision_database = [s for s in family_database if s["precision"] == precision] + + # Opens a new file for each precision + full_path = os.path.join(family_path, family_name + "_" + precision + ".hpp") + with open(full_path, 'w+') as f: + f.write(get_cpp_header(family_name, precision)) + f.write(get_cpp_header_namespace()) f.write(get_cpp_precision(family_name, precision)) - # In case there is nothing found at all (e.g. 16-bit): continue as if this was a precision of 32 but - # with the defaults only + # In case there is nothing found at all (e.g. 16-bit): continue as if this was a + # precision of 32 but with the defaults only if len(precision_database) == 0: print("[database] No results found for %s:%s, retrieving defaults from %s:32" % (family_name, precision, family_name)) @@ -138,7 +150,7 @@ def print_cpp_database(database, output_dir): # Collects the parameters for this entry parameters = [] - parmameter_index = 0 + parameter_index = 0 kernels = sorted(set([s["kernel"] for s in device_database])) for kernel in kernels: kernel_database = [s for s in device_database if s["kernel"] == kernel] @@ -149,10 +161,10 @@ def print_cpp_database(database, output_dir): assert len(results) == 1 new_parameters = results[0]["parameters"] for parameter_name in sorted(new_parameters): - assert parameter_name == parameter_names[parmameter_index] + assert parameter_name == parameter_names[parameter_index] parameter_value = new_parameters[parameter_name] parameters.append(str(parameter_value)) - parmameter_index += 1 + parameter_index += 1 # Prints the entry f.write(", ".join(parameters)) @@ -162,7 +174,13 @@ def print_cpp_database(database, output_dir): f.write(" }\n },\n") # Prints the precision footer - f.write(" }\n};\n\n" + get_cpp_separator()) + f.write(" }\n};\n") - # Prints the file footer - f.write(get_cpp_footer()) + # Prints the file footer + f.write(get_cpp_footer()) + + # Creates the combined family includes header + full_path = os.path.join(family_path, family_name + ".hpp") + with open(full_path, 'w+') as f: + f.write(get_cpp_header(family_name, "")) + f.write(get_cpp_family_includes(family_name, precisions)) diff --git a/src/clblast.cpp b/src/clblast.cpp index 9089b17c..ba2feb05 100644 --- a/src/clblast.cpp +++ b/src/clblast.cpp @@ -2520,10 +2520,10 @@ StatusCode OverrideParameters(const cl_device_id device, const std::string &kern } // Creates a small custom database based on the provided parameters - const auto database_device = Database::DatabaseDevice{"default", parameter_values}; - const auto database_vendor = Database::DatabaseVendor{database::kDeviceTypeAll, "default", {database_device}}; - const auto database_entry = Database::DatabaseEntry{kernel_name, precision, parameter_names, {database_vendor}}; - const auto database_entries = std::vector{database_entry}; + const auto database_device = database::DatabaseDevice{"default", parameter_values}; + const auto database_vendor = database::DatabaseVendor{database::kDeviceTypeAll, "default", {database_device}}; + const auto database_entry = database::DatabaseEntry{kernel_name, precision, parameter_names, {database_vendor}}; + const auto database_entries = std::vector{database_entry}; const auto database = Database(device_cpp, kernel_name, precision, database_entries); // Removes the old database entry and stores the new one in the cache diff --git a/src/database/apple_cpu_fallback.hpp b/src/database/apple_cpu_fallback.hpp index ae42749d..c5434dd5 100644 --- a/src/database/apple_cpu_fallback.hpp +++ b/src/database/apple_cpu_fallback.hpp @@ -22,46 +22,46 @@ namespace clblast { namespace database { // ================================================================================================= -const Database::DatabaseEntry XaxpyApple = { +const DatabaseEntry XaxpyApple = { "Xaxpy", Precision::kAny, {"VW", "WGS", "WPT"}, { { kDeviceTypeAll, "default", { { "default", { 8, 1, 4 } } } } } }; -const Database::DatabaseEntry XdotApple = { +const DatabaseEntry XdotApple = { "Xdot", Precision::kAny, {"WGS1", "WGS2"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1 } } } } } }; -const Database::DatabaseEntry XgemvApple = { +const DatabaseEntry XgemvApple = { "Xgemv", Precision::kAny, {"WGS1", "WPT1", "UNROLL1"}, { { kDeviceTypeAll, "default", { { "default", { 1, 4, 1 } } } } } }; -const Database::DatabaseEntry XgemvFastApple = { +const DatabaseEntry XgemvFastApple = { "XgemvFast", Precision::kAny, {"VW2", "WGS2", "WPT2"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1 } } } } } }; -const Database::DatabaseEntry XgemvFastRotApple = { +const DatabaseEntry XgemvFastRotApple = { "XgemvFastRot", Precision::kAny, {"VW3", "WGS3", "WPT3"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1 } } } } } }; -const Database::DatabaseEntry XgerApple = { +const DatabaseEntry XgerApple = { "Xger", Precision::kAny, {"WGS1", "WGS2", "WPT"}, { { kDeviceTypeAll, "default", { { "default", { 64, 1, 2 } } } } } }; -const Database::DatabaseEntry XtrsvApple = { +const DatabaseEntry XtrsvApple = { "Xtrsv", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { 32 } } } } } }; -const Database::DatabaseEntry XgemmApple = { +const DatabaseEntry XgemmApple = { "Xgemm", Precision::kAny, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1 } } } } } }; -const Database::DatabaseEntry XgemmDirectApple = { +const DatabaseEntry XgemmDirectApple = { "XgemmDirect", Precision::kAny, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1 } } } } } }; -const Database::DatabaseEntry CopyApple = { +const DatabaseEntry CopyApple = { "Copy", Precision::kAny, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1, 1 } } } } } }; -const Database::DatabaseEntry PadApple = { +const DatabaseEntry PadApple = { "Pad", Precision::kAny, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1, 1 } } } } } }; -const Database::DatabaseEntry TransposeApple = { +const DatabaseEntry TransposeApple = { "Transpose", Precision::kAny, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { { kDeviceTypeAll, "default", { { "default", { 1, 0, 0, 1 } } } } } }; -const Database::DatabaseEntry PadtransposeApple = { +const DatabaseEntry PadtransposeApple = { "Padtranspose", Precision::kAny, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { { kDeviceTypeAll, "default", { { "default", { 0, 1, 1 } } } } } }; -const Database::DatabaseEntry InvertApple = { +const DatabaseEntry InvertApple = { "Invert", Precision::kAny, {"INTERNAL_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { 16 } } } } } }; diff --git a/src/database/database.cpp b/src/database/database.cpp index 79c2ea03..c0eeaa2f 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -16,19 +16,19 @@ #include "utilities/utilities.hpp" #include "database/database.hpp" -#include "database/kernels/xaxpy.hpp" -#include "database/kernels/xdot.hpp" -#include "database/kernels/xgemv.hpp" -#include "database/kernels/xgemv_fast.hpp" -#include "database/kernels/xgemv_fast_rot.hpp" -#include "database/kernels/xger.hpp" +#include "database/kernels/xaxpy/xaxpy.hpp" +#include "database/kernels/xdot/xdot.hpp" +#include "database/kernels/xgemv/xgemv.hpp" +#include "database/kernels/xgemv_fast/xgemv_fast.hpp" +#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot.hpp" +#include "database/kernels/xger/xger.hpp" +#include "database/kernels/xgemm/xgemm.hpp" +#include "database/kernels/xgemm_direct/xgemm_direct.hpp" +#include "database/kernels/copy/copy.hpp" +#include "database/kernels/pad/pad.hpp" +#include "database/kernels/transpose/transpose.hpp" +#include "database/kernels/padtranspose/padtranspose.hpp" #include "database/kernels/xtrsv.hpp" -#include "database/kernels/xgemm.hpp" -#include "database/kernels/xgemm_direct.hpp" -#include "database/kernels/copy.hpp" -#include "database/kernels/pad.hpp" -#include "database/kernels/transpose.hpp" -#include "database/kernels/padtranspose.hpp" #include "database/kernels/invert.hpp" #include "database/apple_cpu_fallback.hpp" #include "database/kernel_selection.hpp" @@ -36,8 +36,12 @@ namespace clblast { // ================================================================================================= +namespace database { +extern const DatabaseEntry CopyHalf; +} + // Initializes the databases -const std::vector Database::database = std::vector{ +const std::vector Database::database = std::vector{ database::XaxpyHalf, database::XaxpySingle, database::XaxpyDouble, database::XaxpyComplexSingle, database::XaxpyComplexDouble, database::XdotHalf, database::XdotSingle, database::XdotDouble, database::XdotComplexSingle, database::XdotComplexDouble, database::XgemvHalf, database::XgemvSingle, database::XgemvDouble, database::XgemvComplexSingle, database::XgemvComplexDouble, @@ -54,7 +58,7 @@ const std::vector Database::database = std::vector Database::apple_cpu_fallback = std::vector{ +const std::vector Database::apple_cpu_fallback = std::vector{ database::XaxpyApple, database::XdotApple, database::XgemvApple, database::XgemvFastApple, database::XgemvFastRotApple, database::XgerApple, database::XtrsvApple, database::XgemmApple, database::XgemmDirectApple, @@ -78,8 +82,8 @@ const std::unordered_map Database::kVendorNames{ // Constructor, computing device properties and populating the parameter-vector from the database. // This takes an optional overlay database in case of custom tuning or custom kernels. Database::Database(const Device &device, const std::string &kernel_name, - const Precision precision, const std::vector &overlay): - parameters_(std::make_shared()) { + const Precision precision, const std::vector &overlay): + parameters_(std::make_shared()) { // Finds information of the current device auto device_type = device.Type(); @@ -94,7 +98,7 @@ Database::Database(const Device &device, const std::string &kernel_name, } // Sets the databases to search through - auto databases = std::list>{overlay, database}; + auto databases = std::list>{overlay, database}; // Special case: modifies the database if the device is a CPU with Apple OpenCL #if defined(__APPLE__) || defined(__MACOSX) @@ -108,7 +112,7 @@ Database::Database(const Device &device, const std::string &kernel_name, #endif // Searches potentially multiple databases - auto search_result = Parameters(); + auto search_result = database::Parameters(); for (auto &db: databases) { search_result = Search(kernel_name, device_type, device_vendor, device_name, precision, db); if (search_result.size() != 0) { @@ -143,12 +147,12 @@ std::vector Database::GetParameterNames() const { // ================================================================================================= // Searches a particular database for the right kernel and precision -Database::Parameters Database::Search(const std::string &this_kernel, +database::Parameters Database::Search(const std::string &this_kernel, const std::string &this_type, const std::string &this_vendor, const std::string &this_device, const Precision this_precision, - const std::vector &this_database) const { + const std::vector &this_database) const { // Selects the right kernel for (auto &db: this_database) { @@ -165,13 +169,13 @@ Database::Parameters Database::Search(const std::string &this_kernel, } // If we reached this point, the entry was not found in this database - return Parameters(); + return database::Parameters(); } -Database::Parameters Database::SearchVendorAndType(const std::string &target_vendor, +database::Parameters Database::SearchVendorAndType(const std::string &target_vendor, const std::string &target_type, const std::string &this_device, - const std::vector &vendors, + const std::vector &vendors, const std::vector ¶meter_names) const { for (auto &vendor: vendors) { if ((vendor.name == target_vendor) && (vendor.type == target_type)) { @@ -182,25 +186,25 @@ Database::Parameters Database::SearchVendorAndType(const std::string &target_ven return SearchDevice("default", vendor.devices, parameter_names); } } - return Parameters(); + return database::Parameters(); } -Database::Parameters Database::SearchDevice(const std::string &target_device, - const std::vector &devices, +database::Parameters Database::SearchDevice(const std::string &target_device, + const std::vector &devices, const std::vector ¶meter_names) const { for (auto &device: devices) { if (device.name == target_device) { // Sets the parameters accordingly - auto parameters = Parameters(); - if (parameter_names.size() != device.parameters.size()) { return Parameters(); } // ERROR + auto parameters = database::Parameters(); + if (parameter_names.size() != device.parameters.size()) { return database::Parameters(); } // ERROR for (auto i = size_t{0}; i < parameter_names.size(); ++i) { parameters[parameter_names[i]] = device.parameters[i]; } return parameters; } } - return Parameters(); + return database::Parameters(); } // ================================================================================================= diff --git a/src/database/database.hpp b/src/database/database.hpp index b652164c..66cf93d5 100644 --- a/src/database/database.hpp +++ b/src/database/database.hpp @@ -22,47 +22,15 @@ #include #include "utilities/utilities.hpp" +#include "database/database_structure.hpp" namespace clblast { // ================================================================================================= -// A special namespace to hold all the global constant variables (including the database entries) -namespace database { - - // The OpenCL device types - const std::string kDeviceTypeCPU = "CPU"; - const std::string kDeviceTypeGPU = "GPU"; - const std::string kDeviceTypeAccelerator = "accelerator"; - const std::string kDeviceTypeAll = "default"; - -} // namespace database - -// ================================================================================================= - // See comment at top of file for a description of the class class Database { public: - // Type alias for the database parameters - using Parameters = std::unordered_map; - - // Structures for content inside the database - struct DatabaseDevice { - std::string name; - std::vector parameters; // parameter values - }; - struct DatabaseVendor { - std::string type; - std::string name; - std::vector devices; - }; - struct DatabaseEntry { - std::string kernel; - Precision precision; - std::vector parameter_names; - std::vector vendors; - }; - // The OpenCL device vendors static const std::string kDeviceVendorAll; @@ -70,16 +38,16 @@ class Database { static const std::unordered_map kVendorNames; // The database consists of separate database entries, stored together in a vector - static const std::vector database; + static const std::vector database; // Database for a special case: Apple CPUs support limited number of threads - static const std::vector apple_cpu_fallback; + static const std::vector apple_cpu_fallback; Database() = default; // The constructor with a user-provided database overlay (potentially an empty vector) explicit Database(const Device &device, const std::string &kernel_name, - const Precision precision, const std::vector &overlay); + const Precision precision, const std::vector &overlay); // Accessor of values by key size_t operator[](const std::string &key) const { return parameters_->find(key)->second; } @@ -93,21 +61,21 @@ class Database { private: // Search method functions, returning a set of parameters (possibly empty) - Parameters Search(const std::string &this_kernel, const std::string &this_type, + database::Parameters Search(const std::string &this_kernel, const std::string &this_type, const std::string &this_vendor, const std::string &this_device, const Precision this_precision, - const std::vector &db) const; - Parameters SearchDevice(const std::string &target_device, - const std::vector &devices, + const std::vector &db) const; + database::Parameters SearchDevice(const std::string &target_device, + const std::vector &devices, const std::vector ¶meter_names) const; - Parameters SearchVendorAndType(const std::string &target_vendor, + database::Parameters SearchVendorAndType(const std::string &target_vendor, const std::string &target_type, const std::string &this_device, - const std::vector &vendors, + const std::vector &vendors, const std::vector ¶meter_names) const; // Found parameters suitable for this device/kernel - std::shared_ptr parameters_; + std::shared_ptr parameters_; }; // ================================================================================================= diff --git a/src/database/database_structure.hpp b/src/database/database_structure.hpp new file mode 100644 index 00000000..961ab239 --- /dev/null +++ b/src/database/database_structure.hpp @@ -0,0 +1,58 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren +// +// This file describes the database storage structures. +// +// ================================================================================================= + +#ifndef CLBLAST_DATABASE_STRUCTURE_H_ +#define CLBLAST_DATABASE_STRUCTURE_H_ + +#include +#include +#include + +namespace clblast { +// A special namespace to hold all the global constant variables (including the database entries) +namespace database { + +// ================================================================================================= + +// The OpenCL device types +const std::string kDeviceTypeCPU = "CPU"; +const std::string kDeviceTypeGPU = "GPU"; +const std::string kDeviceTypeAccelerator = "accelerator"; +const std::string kDeviceTypeAll = "default"; + +// Type alias for the database parameters +using Parameters = std::unordered_map; + +// Structures for content inside the database +struct DatabaseDevice { + std::string name; + std::vector parameters; // parameter values +}; +struct DatabaseVendor { + std::string type; + std::string name; + std::vector devices; +}; +struct DatabaseEntry { + std::string kernel; + Precision precision; + std::vector parameter_names; + std::vector vendors; +}; + +// ================================================================================================= +} // namespace database +} // namespace clblast + +// CLBLAST_DATABASE_STRUCTURE_H_ +#endif diff --git a/src/database/kernel_selection.hpp b/src/database/kernel_selection.hpp index 82c7d59d..b492bd82 100644 --- a/src/database/kernel_selection.hpp +++ b/src/database/kernel_selection.hpp @@ -18,7 +18,7 @@ namespace clblast { namespace database { // ================================================================================================= -const Database::DatabaseEntry KernelSelectionHalf = { +const DatabaseEntry KernelSelectionHalf = { "KernelSelection", Precision::kHalf, {"XGEMM_MIN_INDIRECT_SIZE"}, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -40,7 +40,7 @@ const Database::DatabaseEntry KernelSelectionHalf = { // ================================================================================================= -const Database::DatabaseEntry KernelSelectionSingle = { +const DatabaseEntry KernelSelectionSingle = { "KernelSelection", Precision::kSingle, {"XGEMM_MIN_INDIRECT_SIZE"}, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -67,7 +67,7 @@ const Database::DatabaseEntry KernelSelectionSingle = { // ================================================================================================= -const Database::DatabaseEntry KernelSelectionComplexSingle = { +const DatabaseEntry KernelSelectionComplexSingle = { "KernelSelection", Precision::kComplexSingle, {"XGEMM_MIN_INDIRECT_SIZE"}, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -89,7 +89,7 @@ const Database::DatabaseEntry KernelSelectionComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry KernelSelectionDouble = { +const DatabaseEntry KernelSelectionDouble = { "KernelSelection", Precision::kDouble, {"XGEMM_MIN_INDIRECT_SIZE"}, { { // Intel GPUs kDeviceTypeGPU, "Intel", { @@ -111,7 +111,7 @@ const Database::DatabaseEntry KernelSelectionDouble = { // ================================================================================================= -const Database::DatabaseEntry KernelSelectionComplexDouble = { +const DatabaseEntry KernelSelectionComplexDouble = { "KernelSelection", Precision::kComplexDouble, {"XGEMM_MIN_INDIRECT_SIZE"}, { { // Intel GPUs kDeviceTypeGPU, "Intel", { diff --git a/src/database/kernels/copy.hpp b/src/database/kernels/copy.hpp deleted file mode 100644 index e5defb32..00000000 --- a/src/database/kernels/copy.hpp +++ /dev/null @@ -1,354 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator -// -// This file populates the database with best-found tuning parameters for the 'Copy' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry CopyHalf = { - "Copy", Precision::kHalf, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 16, 8, 4, 4 } }, - { "default", { 16, 8, 4, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 16, 8, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 4, 8 } }, - { "default", { 8, 32, 4, 8 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } }, - { "default", { 32, 8, 8, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 16, 8, 4, 4 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry CopySingle = { - "Copy", Precision::kSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 4, 1 } }, - { "ATI Radeon HD 6750M", { 16, 8, 2, 1 } }, - { "Ellesmere", { 8, 8, 4, 8 } }, - { "Fiji", { 16, 16, 1, 2 } }, - { "Hawaii", { 32, 8, 2, 2 } }, - { "Oland", { 32, 8, 4, 2 } }, - { "Pitcairn", { 8, 16, 4, 1 } }, - { "Tahiti", { 32, 8, 2, 2 } }, - { "Tonga", { 32, 8, 4, 4 } }, - { "Turks", { 8, 8, 4, 2 } }, - { "default", { 8, 16, 4, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 8, 2, 4 } }, - { "default", { 32, 8, 2, 4 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 8, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 8, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 16, 8, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 2 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 1 } }, - { "default", { 32, 16, 8, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 8, 8, 2, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 16, 4, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 16, 4, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 2, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 8, 4, 8 } }, - { "Iris", { 16, 8, 1, 2 } }, - { "Iris Pro", { 32, 8, 4, 4 } }, - { "default", { 8, 8, 2, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } }, - { "default", { 32, 8, 8, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 8, 4, 1 } }, - { "GeForce GT 650M", { 16, 16, 4, 2 } }, - { "GeForce GTX 1070", { 8, 16, 4, 1 } }, - { "GeForce GTX 1080", { 8, 32, 4, 1 } }, - { "GeForce GTX 480", { 8, 8, 4, 1 } }, - { "GeForce GTX 670", { 16, 32, 4, 1 } }, - { "GeForce GTX 680", { 32, 16, 4, 1 } }, - { "GeForce GTX 750", { 32, 8, 2, 2 } }, - { "GeForce GTX 750 Ti", { 16, 32, 2, 2 } }, - { "GeForce GTX 980", { 32, 16, 1, 1 } }, - { "GeForce GTX TITAN", { 32, 8, 2, 4 } }, - { "GeForce GTX TITAN Black", { 8, 32, 4, 8 } }, - { "GeForce GTX TITAN X", { 32, 8, 1, 2 } }, - { "TITAN X (Pascal)", { 8, 32, 4, 1 } }, - { "Tesla K20m", { 8, 8, 4, 4 } }, - { "Tesla K40m", { 8, 8, 4, 2 } }, - { "default", { 8, 32, 4, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } }, - { "default", { 32, 8, 8, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 8, 4, 4 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry CopyComplexSingle = { - "Copy", Precision::kComplexSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, - { "ATI Radeon HD 6750M", { 8, 8, 1, 1 } }, - { "Ellesmere", { 16, 16, 1, 4 } }, - { "Fiji", { 16, 8, 1, 2 } }, - { "Hawaii", { 32, 8, 1, 2 } }, - { "Oland", { 8, 16, 1, 1 } }, - { "Pitcairn", { 8, 8, 1, 2 } }, - { "Tahiti", { 8, 8, 2, 2 } }, - { "Tonga", { 8, 32, 1, 2 } }, - { "Turks", { 32, 8, 4, 1 } }, - { "default", { 16, 8, 1, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 4, 2 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 16, 8, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 8, 2, 2 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 8, 8, 1 } }, - { "default", { 32, 8, 8, 1 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 16, 8, 2, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 16, 2, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 8, 32, 2, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 8, 2, 1 } }, - { "Iris", { 16, 8, 1, 2 } }, - { "Iris Pro", { 32, 16, 1, 4 } }, - { "default", { 16, 8, 1, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } }, - { "default", { 32, 8, 4, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 8, 1, 1 } }, - { "GeForce GTX 1070", { 16, 8, 1, 1 } }, - { "GeForce GTX 1080", { 32, 8, 1, 2 } }, - { "GeForce GTX 480", { 16, 16, 1, 1 } }, - { "GeForce GTX 670", { 16, 8, 1, 1 } }, - { "GeForce GTX 750", { 16, 8, 1, 2 } }, - { "GeForce GTX 750 Ti", { 16, 32, 1, 1 } }, - { "GeForce GTX 980", { 8, 8, 1, 1 } }, - { "GeForce GTX TITAN Black", { 16, 8, 1, 1 } }, - { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, - { "TITAN X (Pascal)", { 8, 16, 2, 1 } }, - { "Tesla K20m", { 8, 8, 1, 4 } }, - { "Tesla K40m", { 16, 8, 1, 1 } }, - { "default", { 32, 8, 1, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 8, 1, 1 } }, - { "default", { 32, 8, 1, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 16, 8, 1, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry CopyDouble = { - "Copy", Precision::kDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, - { "Ellesmere", { 32, 8, 1, 4 } }, - { "Fiji", { 16, 8, 1, 2 } }, - { "Hawaii", { 32, 8, 1, 2 } }, - { "Oland", { 32, 8, 2, 8 } }, - { "Pitcairn", { 32, 8, 1, 1 } }, - { "Tahiti", { 8, 32, 2, 1 } }, - { "Tonga", { 8, 32, 2, 4 } }, - { "default", { 16, 8, 2, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 8, 8, 2 } }, - { "default", { 16, 8, 8, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 32, 8, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 8, 8, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 8, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 2, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 32, 8, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 16, 8, 1 } }, - { "default", { 16, 8, 8, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 8, 8, 8, 1 } }, - { "default", { 8, 8, 8, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 32, 16, 2, 1 } }, - { "GeForce GTX 1070", { 8, 8, 4, 1 } }, - { "GeForce GTX 1080", { 8, 8, 4, 1 } }, - { "GeForce GTX 480", { 8, 8, 2, 1 } }, - { "GeForce GTX 670", { 8, 8, 2, 1 } }, - { "GeForce GTX 680", { 16, 32, 2, 1 } }, - { "GeForce GTX 750", { 8, 16, 2, 1 } }, - { "GeForce GTX 750 Ti", { 16, 8, 2, 1 } }, - { "GeForce GTX 980", { 32, 8, 2, 1 } }, - { "GeForce GTX TITAN", { 16, 32, 2, 2 } }, - { "GeForce GTX TITAN Black", { 16, 8, 2, 8 } }, - { "GeForce GTX TITAN X", { 32, 16, 1, 1 } }, - { "TITAN X (Pascal)", { 8, 8, 2, 2 } }, - { "Tesla K20m", { 8, 8, 2, 1 } }, - { "Tesla K40m", { 8, 8, 2, 2 } }, - { "default", { 32, 32, 2, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 16, 8, 2, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry CopyComplexDouble = { - "Copy", Precision::kComplexDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 8, 16, 1, 1 } }, - { "Ellesmere", { 8, 32, 1, 2 } }, - { "Fiji", { 8, 16, 1, 1 } }, - { "Hawaii", { 32, 8, 2, 8 } }, - { "Oland", { 8, 16, 1, 1 } }, - { "Pitcairn", { 16, 8, 1, 1 } }, - { "Tahiti", { 8, 16, 1, 1 } }, - { "Tonga", { 16, 8, 2, 1 } }, - { "default", { 8, 16, 1, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 8, 1, 2 } }, - { "default", { 32, 8, 1, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 8, 8, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 8, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 8, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 8, 8, 1 } }, - { "default", { 16, 8, 8, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } }, - { "default", { 32, 8, 8, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 8, 8, 1, 1 } }, - { "GeForce GTX 1070", { 8, 32, 1, 4 } }, - { "GeForce GTX 1080", { 8, 8, 1, 1 } }, - { "GeForce GTX 480", { 16, 8, 1, 1 } }, - { "GeForce GTX 670", { 16, 8, 1, 1 } }, - { "GeForce GTX 680", { 8, 8, 1, 1 } }, - { "GeForce GTX 750", { 32, 8, 1, 1 } }, - { "GeForce GTX 750 Ti", { 16, 16, 1, 1 } }, - { "GeForce GTX 980", { 8, 8, 1, 1 } }, - { "GeForce GTX TITAN", { 16, 16, 1, 1 } }, - { "GeForce GTX TITAN Black", { 8, 8, 1, 2 } }, - { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, - { "TITAN X (Pascal)", { 8, 8, 1, 2 } }, - { "Tesla K20m", { 8, 8, 1, 2 } }, - { "Tesla K40m", { 8, 8, 1, 1 } }, - { "default", { 8, 8, 1, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 16, 8, 1, 1 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/copy/copy.hpp b/src/database/kernels/copy/copy.hpp new file mode 100644 index 00000000..8c6e7e03 --- /dev/null +++ b/src/database/kernels/copy/copy.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Copy' kernels. +// +// ================================================================================================= + +#include "database/kernels/copy/copy_16.hpp" +#include "database/kernels/copy/copy_32.hpp" +#include "database/kernels/copy/copy_3232.hpp" +#include "database/kernels/copy/copy_64.hpp" +#include "database/kernels/copy/copy_6464.hpp" diff --git a/src/database/kernels/copy/copy_16.hpp b/src/database/kernels/copy/copy_16.hpp new file mode 100644 index 00000000..dea61ca6 --- /dev/null +++ b/src/database/kernels/copy/copy_16.hpp @@ -0,0 +1,43 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Copy16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry CopyHalf = { + "Copy", Precision::kHalf, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { 16, 8, 4, 4 } }, + { "default", { 16, 8, 4, 4 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 16, 8, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 4, 8 } }, + { "default", { 8, 32, 4, 8 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } }, + { "default", { 32, 8, 8, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 16, 8, 4, 4 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/copy/copy_32.hpp b/src/database/kernels/copy/copy_32.hpp new file mode 100644 index 00000000..254c2b38 --- /dev/null +++ b/src/database/kernels/copy/copy_32.hpp @@ -0,0 +1,101 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Copy32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry CopySingle = { + "Copy", Precision::kSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 4, 1 } }, + { "ATI Radeon HD 6750M", { 16, 8, 2, 1 } }, + { "Ellesmere", { 8, 8, 4, 8 } }, + { "Fiji", { 16, 16, 1, 2 } }, + { "Hawaii", { 32, 8, 2, 2 } }, + { "Oland", { 32, 8, 4, 2 } }, + { "Pitcairn", { 8, 16, 4, 1 } }, + { "Tahiti", { 32, 8, 2, 2 } }, + { "Tonga", { 32, 8, 4, 4 } }, + { "Turks", { 8, 8, 4, 2 } }, + { "default", { 8, 16, 4, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 32, 8, 2, 4 } }, + { "default", { 32, 8, 2, 4 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 8, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 8, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 16, 8, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 2 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 1 } }, + { "default", { 32, 16, 8, 2 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 8, 8, 2, 1 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 16, 4, 1 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 16, 4, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 2, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 8, 4, 8 } }, + { "Iris", { 16, 8, 1, 2 } }, + { "Iris Pro", { 32, 8, 4, 4 } }, + { "default", { 8, 8, 2, 1 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } }, + { "default", { 32, 8, 8, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 16, 8, 4, 1 } }, + { "GeForce GT 650M", { 16, 16, 4, 2 } }, + { "GeForce GTX 1070", { 8, 16, 4, 1 } }, + { "GeForce GTX 1080", { 8, 32, 4, 1 } }, + { "GeForce GTX 480", { 8, 8, 4, 1 } }, + { "GeForce GTX 670", { 16, 32, 4, 1 } }, + { "GeForce GTX 680", { 32, 16, 4, 1 } }, + { "GeForce GTX 750", { 32, 8, 2, 2 } }, + { "GeForce GTX 750 Ti", { 16, 32, 2, 2 } }, + { "GeForce GTX 980", { 32, 16, 1, 1 } }, + { "GeForce GTX TITAN", { 32, 8, 2, 4 } }, + { "GeForce GTX TITAN Black", { 8, 32, 4, 8 } }, + { "GeForce GTX TITAN X", { 32, 8, 1, 2 } }, + { "TITAN X (Pascal)", { 8, 32, 4, 1 } }, + { "Tesla K20m", { 8, 8, 4, 4 } }, + { "Tesla K40m", { 8, 8, 4, 2 } }, + { "default", { 8, 32, 4, 1 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } }, + { "default", { 32, 8, 8, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 32, 8, 4, 4 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/copy/copy_3232.hpp b/src/database/kernels/copy/copy_3232.hpp new file mode 100644 index 00000000..7af25017 --- /dev/null +++ b/src/database/kernels/copy/copy_3232.hpp @@ -0,0 +1,92 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Copy3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry CopyComplexSingle = { + "Copy", Precision::kComplexSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, + { "ATI Radeon HD 6750M", { 8, 8, 1, 1 } }, + { "Ellesmere", { 16, 16, 1, 4 } }, + { "Fiji", { 16, 8, 1, 2 } }, + { "Hawaii", { 32, 8, 1, 2 } }, + { "Oland", { 8, 16, 1, 1 } }, + { "Pitcairn", { 8, 8, 1, 2 } }, + { "Tahiti", { 8, 8, 2, 2 } }, + { "Tonga", { 8, 32, 1, 2 } }, + { "Turks", { 32, 8, 4, 1 } }, + { "default", { 16, 8, 1, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 4, 2 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 16, 8, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 8, 2, 2 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 8, 8, 1 } }, + { "default", { 32, 8, 8, 1 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 16, 8, 2, 1 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 16, 2, 2 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 8, 32, 2, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 8, 2, 1 } }, + { "Iris", { 16, 8, 1, 2 } }, + { "Iris Pro", { 32, 16, 1, 4 } }, + { "default", { 16, 8, 1, 2 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } }, + { "default", { 32, 8, 4, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 16, 8, 1, 1 } }, + { "GeForce GTX 1070", { 16, 8, 1, 1 } }, + { "GeForce GTX 1080", { 32, 8, 1, 2 } }, + { "GeForce GTX 480", { 16, 16, 1, 1 } }, + { "GeForce GTX 670", { 16, 8, 1, 1 } }, + { "GeForce GTX 750", { 16, 8, 1, 2 } }, + { "GeForce GTX 750 Ti", { 16, 32, 1, 1 } }, + { "GeForce GTX 980", { 8, 8, 1, 1 } }, + { "GeForce GTX TITAN Black", { 16, 8, 1, 1 } }, + { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, + { "TITAN X (Pascal)", { 8, 16, 2, 1 } }, + { "Tesla K20m", { 8, 8, 1, 4 } }, + { "Tesla K40m", { 16, 8, 1, 1 } }, + { "default", { 32, 8, 1, 1 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 32, 8, 1, 1 } }, + { "default", { 32, 8, 1, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 16, 8, 1, 2 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/copy/copy_64.hpp b/src/database/kernels/copy/copy_64.hpp new file mode 100644 index 00000000..5c00407b --- /dev/null +++ b/src/database/kernels/copy/copy_64.hpp @@ -0,0 +1,80 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Copy64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry CopyDouble = { + "Copy", Precision::kDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, + { "Ellesmere", { 32, 8, 1, 4 } }, + { "Fiji", { 16, 8, 1, 2 } }, + { "Hawaii", { 32, 8, 1, 2 } }, + { "Oland", { 32, 8, 2, 8 } }, + { "Pitcairn", { 32, 8, 1, 1 } }, + { "Tahiti", { 8, 32, 2, 1 } }, + { "Tonga", { 8, 32, 2, 4 } }, + { "default", { 16, 8, 2, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 16, 8, 8, 2 } }, + { "default", { 16, 8, 8, 2 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 32, 8, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 8, 8, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 8, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 2, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 32, 8, 1 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 16, 8, 1 } }, + { "default", { 16, 8, 8, 1 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 8, 8, 8, 1 } }, + { "default", { 8, 8, 8, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 32, 16, 2, 1 } }, + { "GeForce GTX 1070", { 8, 8, 4, 1 } }, + { "GeForce GTX 1080", { 8, 8, 4, 1 } }, + { "GeForce GTX 480", { 8, 8, 2, 1 } }, + { "GeForce GTX 670", { 8, 8, 2, 1 } }, + { "GeForce GTX 680", { 16, 32, 2, 1 } }, + { "GeForce GTX 750", { 8, 16, 2, 1 } }, + { "GeForce GTX 750 Ti", { 16, 8, 2, 1 } }, + { "GeForce GTX 980", { 32, 8, 2, 1 } }, + { "GeForce GTX TITAN", { 16, 32, 2, 2 } }, + { "GeForce GTX TITAN Black", { 16, 8, 2, 8 } }, + { "GeForce GTX TITAN X", { 32, 16, 1, 1 } }, + { "TITAN X (Pascal)", { 8, 8, 2, 2 } }, + { "Tesla K20m", { 8, 8, 2, 1 } }, + { "Tesla K40m", { 8, 8, 2, 2 } }, + { "default", { 32, 32, 2, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 16, 8, 2, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/copy/copy_6464.hpp b/src/database/kernels/copy/copy_6464.hpp new file mode 100644 index 00000000..c7f74855 --- /dev/null +++ b/src/database/kernels/copy/copy_6464.hpp @@ -0,0 +1,80 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Copy6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry CopyComplexDouble = { + "Copy", Precision::kComplexDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 8, 16, 1, 1 } }, + { "Ellesmere", { 8, 32, 1, 2 } }, + { "Fiji", { 8, 16, 1, 1 } }, + { "Hawaii", { 32, 8, 2, 8 } }, + { "Oland", { 8, 16, 1, 1 } }, + { "Pitcairn", { 16, 8, 1, 1 } }, + { "Tahiti", { 8, 16, 1, 1 } }, + { "Tonga", { 16, 8, 2, 1 } }, + { "default", { 8, 16, 1, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 32, 8, 1, 2 } }, + { "default", { 32, 8, 1, 2 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 8, 8, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 8, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 8, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 8, 8, 1 } }, + { "default", { 16, 8, 8, 1 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } }, + { "default", { 32, 8, 8, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 8, 8, 1, 1 } }, + { "GeForce GTX 1070", { 8, 32, 1, 4 } }, + { "GeForce GTX 1080", { 8, 8, 1, 1 } }, + { "GeForce GTX 480", { 16, 8, 1, 1 } }, + { "GeForce GTX 670", { 16, 8, 1, 1 } }, + { "GeForce GTX 680", { 8, 8, 1, 1 } }, + { "GeForce GTX 750", { 32, 8, 1, 1 } }, + { "GeForce GTX 750 Ti", { 16, 16, 1, 1 } }, + { "GeForce GTX 980", { 8, 8, 1, 1 } }, + { "GeForce GTX TITAN", { 16, 16, 1, 1 } }, + { "GeForce GTX TITAN Black", { 8, 8, 1, 2 } }, + { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, + { "TITAN X (Pascal)", { 8, 8, 1, 2 } }, + { "Tesla K20m", { 8, 8, 1, 2 } }, + { "Tesla K40m", { 8, 8, 1, 1 } }, + { "default", { 8, 8, 1, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 16, 8, 1, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/invert.hpp b/src/database/kernels/invert.hpp index 193d1ab4..e736c864 100644 --- a/src/database/kernels/invert.hpp +++ b/src/database/kernels/invert.hpp @@ -15,7 +15,7 @@ namespace clblast { namespace database { // ================================================================================================= -const Database::DatabaseEntry InvertHalf = { +const DatabaseEntry InvertHalf = { "Invert", Precision::kHalf, {"INTERNAL_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { @@ -27,7 +27,7 @@ const Database::DatabaseEntry InvertHalf = { // ================================================================================================= -const Database::DatabaseEntry InvertSingle = { +const DatabaseEntry InvertSingle = { "Invert", Precision::kSingle, {"INTERNAL_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { @@ -39,7 +39,7 @@ const Database::DatabaseEntry InvertSingle = { // ================================================================================================= -const Database::DatabaseEntry InvertComplexSingle = { +const DatabaseEntry InvertComplexSingle = { "Invert", Precision::kComplexSingle, {"INTERNAL_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { @@ -51,7 +51,7 @@ const Database::DatabaseEntry InvertComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry InvertDouble = { +const DatabaseEntry InvertDouble = { "Invert", Precision::kDouble, {"INTERNAL_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { @@ -63,7 +63,7 @@ const Database::DatabaseEntry InvertDouble = { // ================================================================================================= -const Database::DatabaseEntry InvertComplexDouble = { +const DatabaseEntry InvertComplexDouble = { "Invert", Precision::kComplexDouble, {"INTERNAL_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { diff --git a/src/database/kernels/pad.hpp b/src/database/kernels/pad.hpp deleted file mode 100644 index b6ebde43..00000000 --- a/src/database/kernels/pad.hpp +++ /dev/null @@ -1,362 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator -// -// This file populates the database with best-found tuning parameters for the 'Pad' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry PadHalf = { - "Pad", Precision::kHalf, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 16, 8, 1, 2 } }, - { "default", { 16, 8, 1, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 4, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 2, 2 } }, - { "default", { 8, 8, 2, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 16, 8, 4, 2 } }, - { "default", { 16, 8, 4, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 8, 8, 4, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadSingle = { - "Pad", Precision::kSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, - { "ATI Radeon HD 6750M", { 8, 16, 2, 1 } }, - { "Ellesmere", { 32, 8, 2, 2 } }, - { "Fiji", { 16, 16, 1, 2 } }, - { "Hawaii", { 32, 8, 1, 4 } }, - { "Oland", { 8, 8, 1, 2 } }, - { "Pitcairn", { 32, 8, 1, 2 } }, - { "Tahiti", { 32, 8, 1, 2 } }, - { "Tonga", { 16, 16, 2, 2 } }, - { "Turks", { 32, 8, 2, 1 } }, - { "default", { 8, 16, 1, 2 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 8, 1, 4 } }, - { "default", { 32, 8, 1, 4 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32, 4, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 4, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 4, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 4, 1 } }, - { "default", { 32, 8, 4, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 32, 8, 2, 4 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 2, 4 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 8, 1, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 4, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 4, 2 } }, - { "Iris", { 32, 16, 2, 1 } }, - { "Iris Pro", { 16, 8, 2, 1 } }, - { "default", { 32, 8, 4, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 16, 2, 1 } }, - { "default", { 32, 16, 2, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 32, 8, 2, 1 } }, - { "GeForce GT 650M", { 32, 16, 2, 2 } }, - { "GeForce GTX 1070", { 16, 8, 1, 1 } }, - { "GeForce GTX 1080", { 16, 8, 1, 1 } }, - { "GeForce GTX 480", { 32, 8, 1, 4 } }, - { "GeForce GTX 670", { 32, 8, 2, 2 } }, - { "GeForce GTX 680", { 16, 8, 4, 1 } }, - { "GeForce GTX 750", { 32, 16, 4, 2 } }, - { "GeForce GTX 750 Ti", { 16, 8, 4, 1 } }, - { "GeForce GTX 980", { 16, 8, 1, 1 } }, - { "GeForce GTX TITAN", { 32, 8, 2, 1 } }, - { "GeForce GTX TITAN Black", { 32, 8, 1, 2 } }, - { "GeForce GTX TITAN X", { 16, 16, 1, 1 } }, - { "TITAN X (Pascal)", { 16, 8, 1, 2 } }, - { "Tesla K20m", { 32, 8, 2, 1 } }, - { "Tesla K40m", { 32, 8, 1, 1 } }, - { "default", { 32, 8, 4, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 8, 2, 1 } }, - { "default", { 32, 8, 2, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 8, 2, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadComplexSingle = { - "Pad", Precision::kComplexSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, - { "ATI Radeon HD 6750M", { 16, 8, 2, 1 } }, - { "Ellesmere", { 16, 16, 2, 4 } }, - { "Fiji", { 16, 8, 1, 2 } }, - { "Hawaii", { 32, 8, 1, 2 } }, - { "Oland", { 8, 32, 1, 1 } }, - { "Pitcairn", { 8, 8, 1, 2 } }, - { "Tahiti", { 16, 16, 1, 1 } }, - { "Tonga", { 16, 8, 1, 2 } }, - { "Turks", { 16, 8, 4, 4 } }, - { "default", { 16, 8, 1, 2 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 8, 1, 4 } }, - { "default", { 32, 8, 1, 4 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 1, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 8, 2, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 16, 4, 1 } }, - { "default", { 32, 8, 4, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 8, 8, 1, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 1, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 8, 1, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 1, 1 } }, - { "Iris", { 32, 16, 2, 4 } }, - { "Iris Pro", { 32, 8, 2, 1 } }, - { "default", { 32, 8, 1, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } }, - { "default", { 32, 8, 1, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 16, 1, 1 } }, - { "GeForce GTX 1070", { 8, 32, 1, 1 } }, - { "GeForce GTX 1080", { 32, 8, 1, 1 } }, - { "GeForce GTX 480", { 16, 8, 2, 1 } }, - { "GeForce GTX 670", { 16, 8, 1, 2 } }, - { "GeForce GTX 680", { 16, 32, 1, 2 } }, - { "GeForce GTX 750", { 32, 8, 2, 1 } }, - { "GeForce GTX 750 Ti", { 16, 8, 1, 1 } }, - { "GeForce GTX 980", { 16, 16, 1, 1 } }, - { "GeForce GTX TITAN", { 16, 8, 2, 1 } }, - { "GeForce GTX TITAN Black", { 16, 8, 1, 2 } }, - { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, - { "TITAN X (Pascal)", { 32, 32, 1, 2 } }, - { "Tesla K20m", { 32, 8, 1, 2 } }, - { "Tesla K40m", { 16, 8, 1, 1 } }, - { "default", { 32, 8, 1, 2 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 8, 4, 1 } }, - { "default", { 32, 8, 4, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 8, 1, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadDouble = { - "Pad", Precision::kDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, - { "Ellesmere", { 8, 32, 2, 1 } }, - { "Fiji", { 8, 16, 1, 2 } }, - { "Hawaii", { 32, 8, 1, 2 } }, - { "Oland", { 8, 32, 1, 1 } }, - { "Pitcairn", { 8, 8, 1, 2 } }, - { "Tahiti", { 32, 8, 1, 1 } }, - { "Tonga", { 32, 8, 4, 1 } }, - { "default", { 16, 16, 1, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 8, 4, 2 } }, - { "default", { 32, 8, 4, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 4, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } }, - { "default", { 32, 16, 4, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } }, - { "default", { 32, 8, 1, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 32, 8, 1, 1 } }, - { "GeForce GTX 1070", { 8, 8, 1, 1 } }, - { "GeForce GTX 1080", { 32, 32, 2, 1 } }, - { "GeForce GTX 480", { 16, 8, 1, 1 } }, - { "GeForce GTX 670", { 16, 16, 2, 1 } }, - { "GeForce GTX 680", { 32, 32, 1, 2 } }, - { "GeForce GTX 750", { 32, 16, 1, 1 } }, - { "GeForce GTX 750 Ti", { 8, 16, 1, 1 } }, - { "GeForce GTX 980", { 8, 16, 1, 1 } }, - { "GeForce GTX TITAN", { 32, 8, 1, 1 } }, - { "GeForce GTX TITAN Black", { 16, 8, 1, 1 } }, - { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, - { "TITAN X (Pascal)", { 8, 32, 4, 1 } }, - { "Tesla K20m", { 32, 8, 1, 1 } }, - { "Tesla K40m", { 16, 8, 1, 2 } }, - { "default", { 32, 8, 1, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 8, 1, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadComplexDouble = { - "Pad", Precision::kComplexDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 16, 8, 1, 1 } }, - { "Ellesmere", { 8, 16, 1, 2 } }, - { "Fiji", { 32, 8, 2, 1 } }, - { "Hawaii", { 32, 8, 1, 1 } }, - { "Oland", { 8, 16, 2, 1 } }, - { "Pitcairn", { 16, 8, 1, 1 } }, - { "Tahiti", { 8, 16, 1, 1 } }, - { "Tonga", { 8, 16, 1, 1 } }, - { "default", { 8, 16, 1, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 8, 4, 1 } }, - { "default", { 16, 8, 4, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 16, 4, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 2, 2 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } }, - { "default", { 32, 8, 2, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } }, - { "default", { 32, 8, 4, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 8, 8, 1, 1 } }, - { "GeForce GTX 1070", { 8, 8, 2, 2 } }, - { "GeForce GTX 1080", { 8, 8, 1, 1 } }, - { "GeForce GTX 480", { 16, 8, 1, 1 } }, - { "GeForce GTX 670", { 32, 8, 1, 1 } }, - { "GeForce GTX 680", { 8, 8, 1, 1 } }, - { "GeForce GTX 750", { 8, 8, 1, 1 } }, - { "GeForce GTX 750 Ti", { 16, 32, 1, 1 } }, - { "GeForce GTX 980", { 16, 16, 1, 1 } }, - { "GeForce GTX TITAN", { 8, 32, 1, 2 } }, - { "GeForce GTX TITAN Black", { 16, 8, 1, 4 } }, - { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, - { "TITAN X (Pascal)", { 8, 16, 1, 1 } }, - { "Tesla K20m", { 8, 8, 1, 2 } }, - { "Tesla K40m", { 8, 8, 1, 1 } }, - { "default", { 16, 8, 1, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 8, 1, 1 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/pad/pad.hpp b/src/database/kernels/pad/pad.hpp new file mode 100644 index 00000000..bc91c09f --- /dev/null +++ b/src/database/kernels/pad/pad.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Pad' kernels. +// +// ================================================================================================= + +#include "database/kernels/pad/pad_16.hpp" +#include "database/kernels/pad/pad_32.hpp" +#include "database/kernels/pad/pad_3232.hpp" +#include "database/kernels/pad/pad_64.hpp" +#include "database/kernels/pad/pad_6464.hpp" diff --git a/src/database/kernels/pad/pad_16.hpp b/src/database/kernels/pad/pad_16.hpp new file mode 100644 index 00000000..8f31c31e --- /dev/null +++ b/src/database/kernels/pad/pad_16.hpp @@ -0,0 +1,43 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Pad16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadHalf = { + "Pad", Precision::kHalf, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { 16, 8, 1, 2 } }, + { "default", { 16, 8, 1, 2 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 4, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 2, 2 } }, + { "default", { 8, 8, 2, 1 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 16, 8, 4, 2 } }, + { "default", { 16, 8, 4, 2 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 8, 8, 4, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/pad/pad_32.hpp b/src/database/kernels/pad/pad_32.hpp new file mode 100644 index 00000000..eda85e8b --- /dev/null +++ b/src/database/kernels/pad/pad_32.hpp @@ -0,0 +1,101 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Pad32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadSingle = { + "Pad", Precision::kSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, + { "ATI Radeon HD 6750M", { 8, 16, 2, 1 } }, + { "Ellesmere", { 32, 8, 2, 2 } }, + { "Fiji", { 16, 16, 1, 2 } }, + { "Hawaii", { 32, 8, 1, 4 } }, + { "Oland", { 8, 8, 1, 2 } }, + { "Pitcairn", { 32, 8, 1, 2 } }, + { "Tahiti", { 32, 8, 1, 2 } }, + { "Tonga", { 16, 16, 2, 2 } }, + { "Turks", { 32, 8, 2, 1 } }, + { "default", { 8, 16, 1, 2 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 32, 8, 1, 4 } }, + { "default", { 32, 8, 1, 4 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32, 4, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 4, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 4, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 4, 1 } }, + { "default", { 32, 8, 4, 2 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 32, 8, 2, 4 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 2, 4 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 8, 1, 2 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 4, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 4, 2 } }, + { "Iris", { 32, 16, 2, 1 } }, + { "Iris Pro", { 16, 8, 2, 1 } }, + { "default", { 32, 8, 4, 2 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 16, 2, 1 } }, + { "default", { 32, 16, 2, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 32, 8, 2, 1 } }, + { "GeForce GT 650M", { 32, 16, 2, 2 } }, + { "GeForce GTX 1070", { 16, 8, 1, 1 } }, + { "GeForce GTX 1080", { 16, 8, 1, 1 } }, + { "GeForce GTX 480", { 32, 8, 1, 4 } }, + { "GeForce GTX 670", { 32, 8, 2, 2 } }, + { "GeForce GTX 680", { 16, 8, 4, 1 } }, + { "GeForce GTX 750", { 32, 16, 4, 2 } }, + { "GeForce GTX 750 Ti", { 16, 8, 4, 1 } }, + { "GeForce GTX 980", { 16, 8, 1, 1 } }, + { "GeForce GTX TITAN", { 32, 8, 2, 1 } }, + { "GeForce GTX TITAN Black", { 32, 8, 1, 2 } }, + { "GeForce GTX TITAN X", { 16, 16, 1, 1 } }, + { "TITAN X (Pascal)", { 16, 8, 1, 2 } }, + { "Tesla K20m", { 32, 8, 2, 1 } }, + { "Tesla K40m", { 32, 8, 1, 1 } }, + { "default", { 32, 8, 4, 1 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 32, 8, 2, 1 } }, + { "default", { 32, 8, 2, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 32, 8, 2, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/pad/pad_3232.hpp b/src/database/kernels/pad/pad_3232.hpp new file mode 100644 index 00000000..bc6ee662 --- /dev/null +++ b/src/database/kernels/pad/pad_3232.hpp @@ -0,0 +1,100 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Pad3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadComplexSingle = { + "Pad", Precision::kComplexSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, + { "ATI Radeon HD 6750M", { 16, 8, 2, 1 } }, + { "Ellesmere", { 16, 16, 2, 4 } }, + { "Fiji", { 16, 8, 1, 2 } }, + { "Hawaii", { 32, 8, 1, 2 } }, + { "Oland", { 8, 32, 1, 1 } }, + { "Pitcairn", { 8, 8, 1, 2 } }, + { "Tahiti", { 16, 16, 1, 1 } }, + { "Tonga", { 16, 8, 1, 2 } }, + { "Turks", { 16, 8, 4, 4 } }, + { "default", { 16, 8, 1, 2 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 32, 8, 1, 4 } }, + { "default", { 32, 8, 1, 4 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 1, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 8, 2, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 16, 4, 1 } }, + { "default", { 32, 8, 4, 2 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 8, 8, 1, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 1, 1 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 8, 1, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 1, 1 } }, + { "Iris", { 32, 16, 2, 4 } }, + { "Iris Pro", { 32, 8, 2, 1 } }, + { "default", { 32, 8, 1, 4 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } }, + { "default", { 32, 8, 1, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 16, 16, 1, 1 } }, + { "GeForce GTX 1070", { 8, 32, 1, 1 } }, + { "GeForce GTX 1080", { 32, 8, 1, 1 } }, + { "GeForce GTX 480", { 16, 8, 2, 1 } }, + { "GeForce GTX 670", { 16, 8, 1, 2 } }, + { "GeForce GTX 680", { 16, 32, 1, 2 } }, + { "GeForce GTX 750", { 32, 8, 2, 1 } }, + { "GeForce GTX 750 Ti", { 16, 8, 1, 1 } }, + { "GeForce GTX 980", { 16, 16, 1, 1 } }, + { "GeForce GTX TITAN", { 16, 8, 2, 1 } }, + { "GeForce GTX TITAN Black", { 16, 8, 1, 2 } }, + { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, + { "TITAN X (Pascal)", { 32, 32, 1, 2 } }, + { "Tesla K20m", { 32, 8, 1, 2 } }, + { "Tesla K40m", { 16, 8, 1, 1 } }, + { "default", { 32, 8, 1, 2 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 32, 8, 4, 1 } }, + { "default", { 32, 8, 4, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 32, 8, 1, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/pad/pad_64.hpp b/src/database/kernels/pad/pad_64.hpp new file mode 100644 index 00000000..94008efe --- /dev/null +++ b/src/database/kernels/pad/pad_64.hpp @@ -0,0 +1,80 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Pad64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadDouble = { + "Pad", Precision::kDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, + { "Ellesmere", { 8, 32, 2, 1 } }, + { "Fiji", { 8, 16, 1, 2 } }, + { "Hawaii", { 32, 8, 1, 2 } }, + { "Oland", { 8, 32, 1, 1 } }, + { "Pitcairn", { 8, 8, 1, 2 } }, + { "Tahiti", { 32, 8, 1, 1 } }, + { "Tonga", { 32, 8, 4, 1 } }, + { "default", { 16, 16, 1, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 32, 8, 4, 2 } }, + { "default", { 32, 8, 4, 2 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 4, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } }, + { "default", { 32, 16, 4, 1 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } }, + { "default", { 32, 8, 1, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 32, 8, 1, 1 } }, + { "GeForce GTX 1070", { 8, 8, 1, 1 } }, + { "GeForce GTX 1080", { 32, 32, 2, 1 } }, + { "GeForce GTX 480", { 16, 8, 1, 1 } }, + { "GeForce GTX 670", { 16, 16, 2, 1 } }, + { "GeForce GTX 680", { 32, 32, 1, 2 } }, + { "GeForce GTX 750", { 32, 16, 1, 1 } }, + { "GeForce GTX 750 Ti", { 8, 16, 1, 1 } }, + { "GeForce GTX 980", { 8, 16, 1, 1 } }, + { "GeForce GTX TITAN", { 32, 8, 1, 1 } }, + { "GeForce GTX TITAN Black", { 16, 8, 1, 1 } }, + { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, + { "TITAN X (Pascal)", { 8, 32, 4, 1 } }, + { "Tesla K20m", { 32, 8, 1, 1 } }, + { "Tesla K40m", { 16, 8, 1, 2 } }, + { "default", { 32, 8, 1, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 32, 8, 1, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/pad/pad_6464.hpp b/src/database/kernels/pad/pad_6464.hpp new file mode 100644 index 00000000..43c5a8e0 --- /dev/null +++ b/src/database/kernels/pad/pad_6464.hpp @@ -0,0 +1,80 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Pad6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadComplexDouble = { + "Pad", Precision::kComplexDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 16, 8, 1, 1 } }, + { "Ellesmere", { 8, 16, 1, 2 } }, + { "Fiji", { 32, 8, 2, 1 } }, + { "Hawaii", { 32, 8, 1, 1 } }, + { "Oland", { 8, 16, 2, 1 } }, + { "Pitcairn", { 16, 8, 1, 1 } }, + { "Tahiti", { 8, 16, 1, 1 } }, + { "Tonga", { 8, 16, 1, 1 } }, + { "default", { 8, 16, 1, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 16, 8, 4, 1 } }, + { "default", { 16, 8, 4, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 16, 4, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 2, 2 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } }, + { "default", { 32, 8, 2, 2 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } }, + { "default", { 32, 8, 4, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 8, 8, 1, 1 } }, + { "GeForce GTX 1070", { 8, 8, 2, 2 } }, + { "GeForce GTX 1080", { 8, 8, 1, 1 } }, + { "GeForce GTX 480", { 16, 8, 1, 1 } }, + { "GeForce GTX 670", { 32, 8, 1, 1 } }, + { "GeForce GTX 680", { 8, 8, 1, 1 } }, + { "GeForce GTX 750", { 8, 8, 1, 1 } }, + { "GeForce GTX 750 Ti", { 16, 32, 1, 1 } }, + { "GeForce GTX 980", { 16, 16, 1, 1 } }, + { "GeForce GTX TITAN", { 8, 32, 1, 2 } }, + { "GeForce GTX TITAN Black", { 16, 8, 1, 4 } }, + { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, + { "TITAN X (Pascal)", { 8, 16, 1, 1 } }, + { "Tesla K20m", { 8, 8, 1, 2 } }, + { "Tesla K40m", { 8, 8, 1, 1 } }, + { "default", { 16, 8, 1, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 32, 8, 1, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/padtranspose.hpp b/src/database/kernels/padtranspose.hpp deleted file mode 100644 index bbda5c65..00000000 --- a/src/database/kernels/padtranspose.hpp +++ /dev/null @@ -1,361 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator -// -// This file populates the database with best-found tuning parameters for the 'Padtranspose' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry PadtransposeHalf = { - "Padtranspose", Precision::kHalf, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 0, 16, 4 } }, - { "default", { 0, 16, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 8, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 8, 2 } }, - { "default", { 0, 8, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 0, 8, 8 } }, - { "default", { 0, 8, 8 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 0, 8, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadtransposeSingle = { - "Padtranspose", Precision::kSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } }, - { "ATI Radeon HD 6750M", { 1, 16, 1 } }, - { "Ellesmere", { 1, 8, 4 } }, - { "Fiji", { 0, 16, 2 } }, - { "Hawaii", { 1, 16, 4 } }, - { "Oland", { 0, 16, 4 } }, - { "Pitcairn", { 0, 16, 4 } }, - { "Tahiti", { 0, 16, 4 } }, - { "Tonga", { 0, 16, 2 } }, - { "Turks", { 1, 16, 1 } }, - { "default", { 0, 16, 4 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 0, 8, 2 } }, - { "default", { 0, 8, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 0, 16, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 32, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 32, 1 } }, - { "default", { 0, 8, 8 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 1, 16, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 4 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 16, 2 } }, - { "Iris", { 1, 16, 2 } }, - { "Iris Pro", { 1, 16, 2 } }, - { "default", { 1, 16, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 2 } }, - { "default", { 0, 16, 2 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 32, 2 } }, - { "GeForce GTX 1070", { 0, 16, 1 } }, - { "GeForce GTX 1080", { 1, 16, 2 } }, - { "GeForce GTX 480", { 1, 16, 2 } }, - { "GeForce GTX 670", { 1, 32, 2 } }, - { "GeForce GTX 680", { 1, 16, 2 } }, - { "GeForce GTX 750", { 1, 32, 2 } }, - { "GeForce GTX 750 Ti", { 1, 32, 2 } }, - { "GeForce GTX 980", { 0, 16, 1 } }, - { "GeForce GTX TITAN", { 1, 16, 2 } }, - { "GeForce GTX TITAN Black", { 1, 32, 2 } }, - { "GeForce GTX TITAN X", { 1, 32, 1 } }, - { "TITAN X (Pascal)", { 1, 16, 2 } }, - { "Tesla K20m", { 1, 16, 2 } }, - { "Tesla K40m", { 1, 32, 2 } }, - { "default", { 1, 32, 2 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 0, 8, 2 } }, - { "default", { 0, 8, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 16, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadtransposeComplexSingle = { - "Padtranspose", Precision::kComplexSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } }, - { "ATI Radeon HD 6750M", { 1, 16, 1 } }, - { "Ellesmere", { 0, 8, 4 } }, - { "Fiji", { 1, 16, 2 } }, - { "Hawaii", { 0, 16, 2 } }, - { "Oland", { 0, 8, 4 } }, - { "Pitcairn", { 0, 8, 4 } }, - { "Tahiti", { 0, 16, 2 } }, - { "Tonga", { 0, 16, 2 } }, - { "Turks", { 0, 16, 4 } }, - { "default", { 0, 8, 4 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 1, 16, 2 } }, - { "default", { 1, 16, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 8, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 8, 4 } }, - { "default", { 0, 8, 8 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 1, 16, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 2 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 0, 16, 4 } }, - { "Iris", { 0, 16, 2 } }, - { "Iris Pro", { 1, 16, 2 } }, - { "default", { 1, 16, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 16, 1 } }, - { "default", { 1, 16, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 16, 1 } }, - { "GeForce GTX 1070", { 1, 16, 1 } }, - { "GeForce GTX 1080", { 0, 8, 1 } }, - { "GeForce GTX 480", { 1, 16, 1 } }, - { "GeForce GTX 670", { 1, 16, 1 } }, - { "GeForce GTX 680", { 1, 16, 1 } }, - { "GeForce GTX 750", { 1, 16, 2 } }, - { "GeForce GTX 750 Ti", { 1, 16, 1 } }, - { "GeForce GTX 980", { 0, 16, 1 } }, - { "GeForce GTX TITAN", { 1, 16, 1 } }, - { "GeForce GTX TITAN Black", { 0, 16, 1 } }, - { "GeForce GTX TITAN X", { 1, 32, 1 } }, - { "TITAN X (Pascal)", { 1, 8, 1 } }, - { "Tesla K20m", { 0, 16, 1 } }, - { "Tesla K40m", { 1, 16, 1 } }, - { "default", { 1, 16, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 0, 8, 4 } }, - { "default", { 0, 8, 4 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 8, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadtransposeDouble = { - "Padtranspose", Precision::kDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } }, - { "Ellesmere", { 0, 16, 4 } }, - { "Fiji", { 0, 16, 2 } }, - { "Hawaii", { 0, 16, 2 } }, - { "Oland", { 0, 16, 4 } }, - { "Pitcairn", { 0, 8, 4 } }, - { "Tahiti", { 1, 16, 2 } }, - { "Tonga", { 0, 8, 2 } }, - { "default", { 0, 16, 4 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 0, 16, 2 } }, - { "default", { 0, 16, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 64, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 32, 1 } }, - { "default", { 1, 8, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } }, - { "default", { 0, 16, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 16, 1 } }, - { "GeForce GTX 1070", { 1, 16, 1 } }, - { "GeForce GTX 1080", { 0, 8, 1 } }, - { "GeForce GTX 480", { 1, 16, 1 } }, - { "GeForce GTX 670", { 1, 16, 1 } }, - { "GeForce GTX 680", { 1, 16, 1 } }, - { "GeForce GTX 750", { 1, 16, 2 } }, - { "GeForce GTX 750 Ti", { 1, 32, 2 } }, - { "GeForce GTX 980", { 1, 32, 1 } }, - { "GeForce GTX TITAN", { 0, 16, 1 } }, - { "GeForce GTX TITAN Black", { 0, 16, 1 } }, - { "GeForce GTX TITAN X", { 1, 32, 1 } }, - { "TITAN X (Pascal)", { 0, 8, 1 } }, - { "Tesla K20m", { 0, 16, 1 } }, - { "Tesla K40m", { 1, 16, 1 } }, - { "default", { 1, 16, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 16, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry PadtransposeComplexDouble = { - "Padtranspose", Precision::kComplexDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 0, 8, 4 } }, - { "Ellesmere", { 0, 8, 4 } }, - { "Fiji", { 0, 8, 2 } }, - { "Hawaii", { 0, 8, 4 } }, - { "Oland", { 0, 8, 4 } }, - { "Pitcairn", { 0, 8, 4 } }, - { "Tahiti", { 0, 8, 2 } }, - { "Tonga", { 0, 8, 2 } }, - { "default", { 0, 8, 4 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 0, 8, 1 } }, - { "default", { 0, 8, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 8, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 8, 4 } }, - { "default", { 0, 8, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } }, - { "default", { 0, 16, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 16, 1 } }, - { "GeForce GTX 1070", { 1, 16, 1 } }, - { "GeForce GTX 1080", { 1, 8, 1 } }, - { "GeForce GTX 480", { 1, 16, 1 } }, - { "GeForce GTX 670", { 1, 16, 1 } }, - { "GeForce GTX 680", { 1, 32, 1 } }, - { "GeForce GTX 750", { 1, 16, 1 } }, - { "GeForce GTX 750 Ti", { 1, 8, 2 } }, - { "GeForce GTX 980", { 0, 16, 1 } }, - { "GeForce GTX TITAN", { 1, 16, 1 } }, - { "GeForce GTX TITAN Black", { 0, 16, 1 } }, - { "GeForce GTX TITAN X", { 1, 32, 1 } }, - { "TITAN X (Pascal)", { 1, 8, 1 } }, - { "Tesla K20m", { 1, 16, 1 } }, - { "Tesla K40m", { 1, 16, 1 } }, - { "default", { 1, 16, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 0, 8, 2 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/padtranspose/padtranspose.hpp b/src/database/kernels/padtranspose/padtranspose.hpp new file mode 100644 index 00000000..c395653a --- /dev/null +++ b/src/database/kernels/padtranspose/padtranspose.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Padtranspose' kernels. +// +// ================================================================================================= + +#include "database/kernels/padtranspose/padtranspose_16.hpp" +#include "database/kernels/padtranspose/padtranspose_32.hpp" +#include "database/kernels/padtranspose/padtranspose_3232.hpp" +#include "database/kernels/padtranspose/padtranspose_64.hpp" +#include "database/kernels/padtranspose/padtranspose_6464.hpp" diff --git a/src/database/kernels/padtranspose/padtranspose_16.hpp b/src/database/kernels/padtranspose/padtranspose_16.hpp new file mode 100644 index 00000000..e815ced0 --- /dev/null +++ b/src/database/kernels/padtranspose/padtranspose_16.hpp @@ -0,0 +1,43 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Padtranspose16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadtransposeHalf = { + "Padtranspose", Precision::kHalf, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { 0, 16, 4 } }, + { "default", { 0, 16, 4 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 8, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 8, 2 } }, + { "default", { 0, 8, 1 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 0, 8, 8 } }, + { "default", { 0, 8, 8 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 0, 8, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/padtranspose/padtranspose_32.hpp b/src/database/kernels/padtranspose/padtranspose_32.hpp new file mode 100644 index 00000000..ca04b01e --- /dev/null +++ b/src/database/kernels/padtranspose/padtranspose_32.hpp @@ -0,0 +1,100 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Padtranspose32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadtransposeSingle = { + "Padtranspose", Precision::kSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } }, + { "ATI Radeon HD 6750M", { 1, 16, 1 } }, + { "Ellesmere", { 1, 8, 4 } }, + { "Fiji", { 0, 16, 2 } }, + { "Hawaii", { 1, 16, 4 } }, + { "Oland", { 0, 16, 4 } }, + { "Pitcairn", { 0, 16, 4 } }, + { "Tahiti", { 0, 16, 4 } }, + { "Tonga", { 0, 16, 2 } }, + { "Turks", { 1, 16, 1 } }, + { "default", { 0, 16, 4 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 0, 8, 2 } }, + { "default", { 0, 8, 2 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 0, 16, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 32, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 32, 1 } }, + { "default", { 0, 8, 8 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 1, 16, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 4 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 16, 2 } }, + { "Iris", { 1, 16, 2 } }, + { "Iris Pro", { 1, 16, 2 } }, + { "default", { 1, 16, 2 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 2 } }, + { "default", { 0, 16, 2 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 1, 32, 2 } }, + { "GeForce GTX 1070", { 0, 16, 1 } }, + { "GeForce GTX 1080", { 1, 16, 2 } }, + { "GeForce GTX 480", { 1, 16, 2 } }, + { "GeForce GTX 670", { 1, 32, 2 } }, + { "GeForce GTX 680", { 1, 16, 2 } }, + { "GeForce GTX 750", { 1, 32, 2 } }, + { "GeForce GTX 750 Ti", { 1, 32, 2 } }, + { "GeForce GTX 980", { 0, 16, 1 } }, + { "GeForce GTX TITAN", { 1, 16, 2 } }, + { "GeForce GTX TITAN Black", { 1, 32, 2 } }, + { "GeForce GTX TITAN X", { 1, 32, 1 } }, + { "TITAN X (Pascal)", { 1, 16, 2 } }, + { "Tesla K20m", { 1, 16, 2 } }, + { "Tesla K40m", { 1, 32, 2 } }, + { "default", { 1, 32, 2 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 0, 8, 2 } }, + { "default", { 0, 8, 2 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 1, 16, 2 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/padtranspose/padtranspose_3232.hpp b/src/database/kernels/padtranspose/padtranspose_3232.hpp new file mode 100644 index 00000000..bc9425da --- /dev/null +++ b/src/database/kernels/padtranspose/padtranspose_3232.hpp @@ -0,0 +1,100 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Padtranspose3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadtransposeComplexSingle = { + "Padtranspose", Precision::kComplexSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } }, + { "ATI Radeon HD 6750M", { 1, 16, 1 } }, + { "Ellesmere", { 0, 8, 4 } }, + { "Fiji", { 1, 16, 2 } }, + { "Hawaii", { 0, 16, 2 } }, + { "Oland", { 0, 8, 4 } }, + { "Pitcairn", { 0, 8, 4 } }, + { "Tahiti", { 0, 16, 2 } }, + { "Tonga", { 0, 16, 2 } }, + { "Turks", { 0, 16, 4 } }, + { "default", { 0, 8, 4 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 1, 16, 2 } }, + { "default", { 1, 16, 2 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 8, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 8, 4 } }, + { "default", { 0, 8, 8 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 1, 16, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 2 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 2 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 0, 16, 4 } }, + { "Iris", { 0, 16, 2 } }, + { "Iris Pro", { 1, 16, 2 } }, + { "default", { 1, 16, 2 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 1, 16, 1 } }, + { "GeForce GTX 1070", { 1, 16, 1 } }, + { "GeForce GTX 1080", { 0, 8, 1 } }, + { "GeForce GTX 480", { 1, 16, 1 } }, + { "GeForce GTX 670", { 1, 16, 1 } }, + { "GeForce GTX 680", { 1, 16, 1 } }, + { "GeForce GTX 750", { 1, 16, 2 } }, + { "GeForce GTX 750 Ti", { 1, 16, 1 } }, + { "GeForce GTX 980", { 0, 16, 1 } }, + { "GeForce GTX TITAN", { 1, 16, 1 } }, + { "GeForce GTX TITAN Black", { 0, 16, 1 } }, + { "GeForce GTX TITAN X", { 1, 32, 1 } }, + { "TITAN X (Pascal)", { 1, 8, 1 } }, + { "Tesla K20m", { 0, 16, 1 } }, + { "Tesla K40m", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 0, 8, 4 } }, + { "default", { 0, 8, 4 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 1, 8, 2 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/padtranspose/padtranspose_64.hpp b/src/database/kernels/padtranspose/padtranspose_64.hpp new file mode 100644 index 00000000..bdfe9788 --- /dev/null +++ b/src/database/kernels/padtranspose/padtranspose_64.hpp @@ -0,0 +1,80 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Padtranspose64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadtransposeDouble = { + "Padtranspose", Precision::kDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } }, + { "Ellesmere", { 0, 16, 4 } }, + { "Fiji", { 0, 16, 2 } }, + { "Hawaii", { 0, 16, 2 } }, + { "Oland", { 0, 16, 4 } }, + { "Pitcairn", { 0, 8, 4 } }, + { "Tahiti", { 1, 16, 2 } }, + { "Tonga", { 0, 8, 2 } }, + { "default", { 0, 16, 4 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 0, 16, 2 } }, + { "default", { 0, 16, 2 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 64, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 32, 1 } }, + { "default", { 1, 8, 4 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } }, + { "default", { 0, 16, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 1, 16, 1 } }, + { "GeForce GTX 1070", { 1, 16, 1 } }, + { "GeForce GTX 1080", { 0, 8, 1 } }, + { "GeForce GTX 480", { 1, 16, 1 } }, + { "GeForce GTX 670", { 1, 16, 1 } }, + { "GeForce GTX 680", { 1, 16, 1 } }, + { "GeForce GTX 750", { 1, 16, 2 } }, + { "GeForce GTX 750 Ti", { 1, 32, 2 } }, + { "GeForce GTX 980", { 1, 32, 1 } }, + { "GeForce GTX TITAN", { 0, 16, 1 } }, + { "GeForce GTX TITAN Black", { 0, 16, 1 } }, + { "GeForce GTX TITAN X", { 1, 32, 1 } }, + { "TITAN X (Pascal)", { 0, 8, 1 } }, + { "Tesla K20m", { 0, 16, 1 } }, + { "Tesla K40m", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 1, 16, 2 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/padtranspose/padtranspose_6464.hpp b/src/database/kernels/padtranspose/padtranspose_6464.hpp new file mode 100644 index 00000000..c839ab2c --- /dev/null +++ b/src/database/kernels/padtranspose/padtranspose_6464.hpp @@ -0,0 +1,80 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Padtranspose6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry PadtransposeComplexDouble = { + "Padtranspose", Precision::kComplexDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 0, 8, 4 } }, + { "Ellesmere", { 0, 8, 4 } }, + { "Fiji", { 0, 8, 2 } }, + { "Hawaii", { 0, 8, 4 } }, + { "Oland", { 0, 8, 4 } }, + { "Pitcairn", { 0, 8, 4 } }, + { "Tahiti", { 0, 8, 2 } }, + { "Tonga", { 0, 8, 2 } }, + { "default", { 0, 8, 4 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 0, 8, 1 } }, + { "default", { 0, 8, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 8, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 8, 4 } }, + { "default", { 0, 8, 4 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } }, + { "default", { 0, 16, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 1, 16, 1 } }, + { "GeForce GTX 1070", { 1, 16, 1 } }, + { "GeForce GTX 1080", { 1, 8, 1 } }, + { "GeForce GTX 480", { 1, 16, 1 } }, + { "GeForce GTX 670", { 1, 16, 1 } }, + { "GeForce GTX 680", { 1, 32, 1 } }, + { "GeForce GTX 750", { 1, 16, 1 } }, + { "GeForce GTX 750 Ti", { 1, 8, 2 } }, + { "GeForce GTX 980", { 0, 16, 1 } }, + { "GeForce GTX TITAN", { 1, 16, 1 } }, + { "GeForce GTX TITAN Black", { 0, 16, 1 } }, + { "GeForce GTX TITAN X", { 1, 32, 1 } }, + { "TITAN X (Pascal)", { 1, 8, 1 } }, + { "Tesla K20m", { 1, 16, 1 } }, + { "Tesla K40m", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 0, 8, 2 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/transpose.hpp b/src/database/kernels/transpose.hpp deleted file mode 100644 index b00a23dc..00000000 --- a/src/database/kernels/transpose.hpp +++ /dev/null @@ -1,350 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator -// -// This file populates the database with best-found tuning parameters for the 'Transpose' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry TransposeHalf = { - "Transpose", Precision::kHalf, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 4, 0, 1, 8 } }, - { "default", { 4, 0, 1, 8 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 1, 1, 8 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 0, 4 } }, - { "default", { 8, 1, 0, 8 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 8, 0, 0, 4 } }, - { "default", { 8, 0, 0, 4 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 8, 0, 1, 8 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry TransposeSingle = { - "Transpose", Precision::kSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 8 } }, - { "ATI Radeon HD 6750M", { 8, 0, 1, 2 } }, - { "Ellesmere", { 16, 0, 1, 4 } }, - { "Fiji", { 16, 0, 1, 2 } }, - { "Hawaii", { 4, 0, 1, 8 } }, - { "Oland", { 8, 0, 1, 4 } }, - { "Pitcairn", { 16, 0, 1, 1 } }, - { "Tahiti", { 4, 0, 1, 4 } }, - { "Tonga", { 8, 1, 1, 2 } }, - { "Turks", { 8, 0, 1, 2 } }, - { "default", { 8, 0, 1, 2 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 8, 0, 1, 4 } }, - { "default", { 8, 0, 1, 4 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 16 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 8 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 1, 8 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 0, 0, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 16 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } }, - { "default", { 4, 0, 0, 8 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 16, 0, 1, 4 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 0, 0, 4 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 0, 0, 4 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 8, 0, 1, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 1, 2 } }, - { "Iris", { 8, 1, 0, 4 } }, - { "Iris Pro", { 16, 1, 0, 4 } }, - { "default", { 16, 0, 0, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 16, 1, 1, 1 } }, - { "default", { 16, 1, 1, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 32, 1, 1, 2 } }, - { "GeForce GT 650M", { 8, 1, 0, 4 } }, - { "GeForce GTX 1070", { 8, 0, 1, 4 } }, - { "GeForce GTX 1080", { 4, 0, 0, 4 } }, - { "GeForce GTX 480", { 16, 1, 0, 2 } }, - { "GeForce GTX 670", { 16, 1, 1, 2 } }, - { "GeForce GTX 680", { 16, 1, 1, 2 } }, - { "GeForce GTX 750", { 4, 0, 0, 8 } }, - { "GeForce GTX 750 Ti", { 32, 1, 0, 2 } }, - { "GeForce GTX 980", { 16, 0, 0, 1 } }, - { "GeForce GTX TITAN", { 8, 1, 0, 4 } }, - { "GeForce GTX TITAN Black", { 8, 1, 0, 4 } }, - { "GeForce GTX TITAN X", { 16, 0, 0, 4 } }, - { "TITAN X (Pascal)", { 8, 0, 0, 4 } }, - { "Tesla K20m", { 8, 0, 0, 4 } }, - { "Tesla K40m", { 8, 1, 0, 4 } }, - { "default", { 8, 1, 0, 4 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 8, 1, 1, 4 } }, - { "default", { 8, 1, 1, 4 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 8, 0, 1, 4 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry TransposeComplexSingle = { - "Transpose", Precision::kComplexSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 4 } }, - { "ATI Radeon HD 6750M", { 16, 1, 1, 1 } }, - { "Ellesmere", { 4, 0, 1, 4 } }, - { "Fiji", { 8, 1, 1, 2 } }, - { "Hawaii", { 16, 0, 1, 1 } }, - { "Oland", { 4, 0, 1, 2 } }, - { "Pitcairn", { 8, 0, 1, 1 } }, - { "Tahiti", { 16, 0, 1, 1 } }, - { "Tonga", { 16, 0, 1, 1 } }, - { "Turks", { 8, 1, 1, 4 } }, - { "default", { 8, 0, 1, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 0, 0, 2 } }, - { "default", { 16, 0, 0, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 0, 0, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 0, 0, 4 } }, - { "default", { 4, 1, 0, 8 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 16, 1, 1, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 0, 0, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 0, 0, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 1, 1, 2 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 0, 2 } }, - { "Iris", { 8, 0, 0, 2 } }, - { "Iris Pro", { 16, 1, 0, 2 } }, - { "default", { 16, 1, 0, 2 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 1, 1, 1 } }, - { "GeForce GTX 1070", { 16, 1, 1, 1 } }, - { "GeForce GTX 1080", { 16, 1, 0, 1 } }, - { "GeForce GTX 480", { 16, 1, 0, 1 } }, - { "GeForce GTX 670", { 16, 1, 1, 1 } }, - { "GeForce GTX 680", { 16, 1, 1, 1 } }, - { "GeForce GTX 750", { 16, 1, 0, 1 } }, - { "GeForce GTX 750 Ti", { 16, 1, 0, 1 } }, - { "GeForce GTX 980", { 16, 1, 0, 1 } }, - { "GeForce GTX TITAN", { 16, 0, 0, 1 } }, - { "GeForce GTX TITAN Black", { 16, 1, 0, 1 } }, - { "GeForce GTX TITAN X", { 32, 1, 0, 1 } }, - { "TITAN X (Pascal)", { 8, 1, 0, 2 } }, - { "Tesla K20m", { 16, 0, 0, 1 } }, - { "Tesla K40m", { 16, 1, 0, 1 } }, - { "default", { 16, 1, 0, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 16, 1, 0, 1 } }, - { "default", { 16, 1, 0, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 8, 1, 1, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry TransposeDouble = { - "Transpose", Precision::kDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 4 } }, - { "Ellesmere", { 4, 0, 1, 4 } }, - { "Fiji", { 8, 1, 1, 2 } }, - { "Hawaii", { 16, 0, 1, 1 } }, - { "Oland", { 8, 1, 1, 2 } }, - { "Pitcairn", { 4, 0, 1, 2 } }, - { "Tahiti", { 4, 1, 1, 4 } }, - { "Tonga", { 4, 0, 1, 4 } }, - { "default", { 4, 0, 1, 4 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 8, 0, 0, 1 } }, - { "default", { 8, 0, 0, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1, 0, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 0, 16 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } }, - { "default", { 4, 1, 0, 8 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 1, 0, 1 } }, - { "default", { 32, 1, 0, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 1, 1, 2 } }, - { "GeForce GTX 1070", { 8, 0, 1, 2 } }, - { "GeForce GTX 1080", { 8, 0, 0, 2 } }, - { "GeForce GTX 480", { 8, 1, 0, 2 } }, - { "GeForce GTX 670", { 16, 1, 1, 2 } }, - { "GeForce GTX 680", { 16, 1, 1, 2 } }, - { "GeForce GTX 750", { 16, 1, 0, 1 } }, - { "GeForce GTX 750 Ti", { 32, 1, 0, 2 } }, - { "GeForce GTX 980", { 16, 0, 0, 2 } }, - { "GeForce GTX TITAN", { 8, 0, 0, 2 } }, - { "GeForce GTX TITAN Black", { 16, 1, 0, 2 } }, - { "GeForce GTX TITAN X", { 32, 1, 0, 1 } }, - { "TITAN X (Pascal)", { 16, 1, 0, 2 } }, - { "Tesla K20m", { 16, 1, 0, 2 } }, - { "Tesla K40m", { 16, 1, 1, 2 } }, - { "default", { 16, 1, 1, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 16, 1, 1, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry TransposeComplexDouble = { - "Transpose", Precision::kComplexDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 2 } }, - { "Ellesmere", { 16, 0, 1, 1 } }, - { "Fiji", { 16, 0, 1, 1 } }, - { "Hawaii", { 4, 0, 1, 2 } }, - { "Oland", { 16, 0, 1, 1 } }, - { "Pitcairn", { 4, 0, 1, 1 } }, - { "Tahiti", { 16, 0, 1, 1 } }, - { "Tonga", { 8, 1, 1, 2 } }, - { "default", { 16, 0, 1, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 8, 0, 0, 1 } }, - { "default", { 8, 0, 0, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 1, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 1, 0, 2 } }, - { "default", { 4, 0, 0, 8 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 1, 1, 1 } }, - { "GeForce GTX 1070", { 8, 0, 0, 1 } }, - { "GeForce GTX 1080", { 8, 0, 0, 1 } }, - { "GeForce GTX 480", { 8, 1, 0, 1 } }, - { "GeForce GTX 670", { 16, 1, 1, 1 } }, - { "GeForce GTX 680", { 16, 1, 1, 1 } }, - { "GeForce GTX 750", { 16, 1, 0, 1 } }, - { "GeForce GTX 750 Ti", { 16, 1, 0, 1 } }, - { "GeForce GTX 980", { 32, 1, 0, 1 } }, - { "GeForce GTX TITAN", { 16, 1, 0, 1 } }, - { "GeForce GTX TITAN Black", { 16, 0, 0, 1 } }, - { "GeForce GTX TITAN X", { 32, 1, 0, 1 } }, - { "TITAN X (Pascal)", { 8, 0, 0, 1 } }, - { "Tesla K20m", { 16, 1, 0, 1 } }, - { "Tesla K40m", { 16, 1, 0, 1 } }, - { "default", { 16, 1, 0, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 16, 1, 1, 1 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/transpose/transpose.hpp b/src/database/kernels/transpose/transpose.hpp new file mode 100644 index 00000000..fa262c50 --- /dev/null +++ b/src/database/kernels/transpose/transpose.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Transpose' kernels. +// +// ================================================================================================= + +#include "database/kernels/transpose/transpose_16.hpp" +#include "database/kernels/transpose/transpose_32.hpp" +#include "database/kernels/transpose/transpose_3232.hpp" +#include "database/kernels/transpose/transpose_64.hpp" +#include "database/kernels/transpose/transpose_6464.hpp" diff --git a/src/database/kernels/transpose/transpose_16.hpp b/src/database/kernels/transpose/transpose_16.hpp new file mode 100644 index 00000000..016788dc --- /dev/null +++ b/src/database/kernels/transpose/transpose_16.hpp @@ -0,0 +1,43 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Transpose16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TransposeHalf = { + "Transpose", Precision::kHalf, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { 4, 0, 1, 8 } }, + { "default", { 4, 0, 1, 8 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 1, 1, 8 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 0, 4 } }, + { "default", { 8, 1, 0, 8 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 8, 0, 0, 4 } }, + { "default", { 8, 0, 0, 4 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 8, 0, 1, 8 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/transpose/transpose_32.hpp b/src/database/kernels/transpose/transpose_32.hpp new file mode 100644 index 00000000..abbe8e19 --- /dev/null +++ b/src/database/kernels/transpose/transpose_32.hpp @@ -0,0 +1,101 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Transpose32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TransposeSingle = { + "Transpose", Precision::kSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 8 } }, + { "ATI Radeon HD 6750M", { 8, 0, 1, 2 } }, + { "Ellesmere", { 16, 0, 1, 4 } }, + { "Fiji", { 16, 0, 1, 2 } }, + { "Hawaii", { 4, 0, 1, 8 } }, + { "Oland", { 8, 0, 1, 4 } }, + { "Pitcairn", { 16, 0, 1, 1 } }, + { "Tahiti", { 4, 0, 1, 4 } }, + { "Tonga", { 8, 1, 1, 2 } }, + { "Turks", { 8, 0, 1, 2 } }, + { "default", { 8, 0, 1, 2 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 8, 0, 1, 4 } }, + { "default", { 8, 0, 1, 4 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 16 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 8 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 1, 8 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 0, 0, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 16 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } }, + { "default", { 4, 0, 0, 8 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 16, 0, 1, 4 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 0, 0, 4 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 0, 0, 4 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 8, 0, 1, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 1, 2 } }, + { "Iris", { 8, 1, 0, 4 } }, + { "Iris Pro", { 16, 1, 0, 4 } }, + { "default", { 16, 0, 0, 4 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 16, 1, 1, 1 } }, + { "default", { 16, 1, 1, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 32, 1, 1, 2 } }, + { "GeForce GT 650M", { 8, 1, 0, 4 } }, + { "GeForce GTX 1070", { 8, 0, 1, 4 } }, + { "GeForce GTX 1080", { 4, 0, 0, 4 } }, + { "GeForce GTX 480", { 16, 1, 0, 2 } }, + { "GeForce GTX 670", { 16, 1, 1, 2 } }, + { "GeForce GTX 680", { 16, 1, 1, 2 } }, + { "GeForce GTX 750", { 4, 0, 0, 8 } }, + { "GeForce GTX 750 Ti", { 32, 1, 0, 2 } }, + { "GeForce GTX 980", { 16, 0, 0, 1 } }, + { "GeForce GTX TITAN", { 8, 1, 0, 4 } }, + { "GeForce GTX TITAN Black", { 8, 1, 0, 4 } }, + { "GeForce GTX TITAN X", { 16, 0, 0, 4 } }, + { "TITAN X (Pascal)", { 8, 0, 0, 4 } }, + { "Tesla K20m", { 8, 0, 0, 4 } }, + { "Tesla K40m", { 8, 1, 0, 4 } }, + { "default", { 8, 1, 0, 4 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 8, 1, 1, 4 } }, + { "default", { 8, 1, 1, 4 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 8, 0, 1, 4 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/transpose/transpose_3232.hpp b/src/database/kernels/transpose/transpose_3232.hpp new file mode 100644 index 00000000..f8560206 --- /dev/null +++ b/src/database/kernels/transpose/transpose_3232.hpp @@ -0,0 +1,94 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Transpose3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TransposeComplexSingle = { + "Transpose", Precision::kComplexSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 4 } }, + { "ATI Radeon HD 6750M", { 16, 1, 1, 1 } }, + { "Ellesmere", { 4, 0, 1, 4 } }, + { "Fiji", { 8, 1, 1, 2 } }, + { "Hawaii", { 16, 0, 1, 1 } }, + { "Oland", { 4, 0, 1, 2 } }, + { "Pitcairn", { 8, 0, 1, 1 } }, + { "Tahiti", { 16, 0, 1, 1 } }, + { "Tonga", { 16, 0, 1, 1 } }, + { "Turks", { 8, 1, 1, 4 } }, + { "default", { 8, 0, 1, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 16, 0, 0, 2 } }, + { "default", { 16, 0, 0, 2 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 0, 0, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 8 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 0, 0, 4 } }, + { "default", { 4, 1, 0, 8 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 16, 1, 1, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 0, 0, 2 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 0, 0, 2 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 1, 1, 2 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 0, 2 } }, + { "Iris", { 8, 0, 0, 2 } }, + { "Iris Pro", { 16, 1, 0, 2 } }, + { "default", { 16, 1, 0, 2 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 16, 1, 1, 1 } }, + { "GeForce GTX 1070", { 16, 1, 1, 1 } }, + { "GeForce GTX 1080", { 16, 1, 0, 1 } }, + { "GeForce GTX 480", { 16, 1, 0, 1 } }, + { "GeForce GTX 670", { 16, 1, 1, 1 } }, + { "GeForce GTX 680", { 16, 1, 1, 1 } }, + { "GeForce GTX 750", { 16, 1, 0, 1 } }, + { "GeForce GTX 750 Ti", { 16, 1, 0, 1 } }, + { "GeForce GTX 980", { 16, 1, 0, 1 } }, + { "GeForce GTX TITAN", { 16, 0, 0, 1 } }, + { "GeForce GTX TITAN Black", { 16, 1, 0, 1 } }, + { "GeForce GTX TITAN X", { 32, 1, 0, 1 } }, + { "TITAN X (Pascal)", { 8, 1, 0, 2 } }, + { "Tesla K20m", { 16, 0, 0, 1 } }, + { "Tesla K40m", { 16, 1, 0, 1 } }, + { "default", { 16, 1, 0, 1 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 16, 1, 0, 1 } }, + { "default", { 16, 1, 0, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 8, 1, 1, 2 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/transpose/transpose_64.hpp b/src/database/kernels/transpose/transpose_64.hpp new file mode 100644 index 00000000..ddad3bac --- /dev/null +++ b/src/database/kernels/transpose/transpose_64.hpp @@ -0,0 +1,80 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Transpose64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TransposeDouble = { + "Transpose", Precision::kDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 4 } }, + { "Ellesmere", { 4, 0, 1, 4 } }, + { "Fiji", { 8, 1, 1, 2 } }, + { "Hawaii", { 16, 0, 1, 1 } }, + { "Oland", { 8, 1, 1, 2 } }, + { "Pitcairn", { 4, 0, 1, 2 } }, + { "Tahiti", { 4, 1, 1, 4 } }, + { "Tonga", { 4, 0, 1, 4 } }, + { "default", { 4, 0, 1, 4 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 8, 0, 0, 1 } }, + { "default", { 8, 0, 0, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1, 0, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 0, 16 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } }, + { "default", { 4, 1, 0, 8 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 1, 0, 1 } }, + { "default", { 32, 1, 0, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 16, 1, 1, 2 } }, + { "GeForce GTX 1070", { 8, 0, 1, 2 } }, + { "GeForce GTX 1080", { 8, 0, 0, 2 } }, + { "GeForce GTX 480", { 8, 1, 0, 2 } }, + { "GeForce GTX 670", { 16, 1, 1, 2 } }, + { "GeForce GTX 680", { 16, 1, 1, 2 } }, + { "GeForce GTX 750", { 16, 1, 0, 1 } }, + { "GeForce GTX 750 Ti", { 32, 1, 0, 2 } }, + { "GeForce GTX 980", { 16, 0, 0, 2 } }, + { "GeForce GTX TITAN", { 8, 0, 0, 2 } }, + { "GeForce GTX TITAN Black", { 16, 1, 0, 2 } }, + { "GeForce GTX TITAN X", { 32, 1, 0, 1 } }, + { "TITAN X (Pascal)", { 16, 1, 0, 2 } }, + { "Tesla K20m", { 16, 1, 0, 2 } }, + { "Tesla K40m", { 16, 1, 1, 2 } }, + { "default", { 16, 1, 1, 2 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 16, 1, 1, 2 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/transpose/transpose_6464.hpp b/src/database/kernels/transpose/transpose_6464.hpp new file mode 100644 index 00000000..95c7fbba --- /dev/null +++ b/src/database/kernels/transpose/transpose_6464.hpp @@ -0,0 +1,74 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Transpose6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry TransposeComplexDouble = { + "Transpose", Precision::kComplexDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 2 } }, + { "Ellesmere", { 16, 0, 1, 1 } }, + { "Fiji", { 16, 0, 1, 1 } }, + { "Hawaii", { 4, 0, 1, 2 } }, + { "Oland", { 16, 0, 1, 1 } }, + { "Pitcairn", { 4, 0, 1, 1 } }, + { "Tahiti", { 16, 0, 1, 1 } }, + { "Tonga", { 8, 1, 1, 2 } }, + { "default", { 16, 0, 1, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 8, 0, 0, 1 } }, + { "default", { 8, 0, 0, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 1, 8 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 1, 0, 2 } }, + { "default", { 4, 0, 0, 8 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 16, 1, 1, 1 } }, + { "GeForce GTX 1070", { 8, 0, 0, 1 } }, + { "GeForce GTX 1080", { 8, 0, 0, 1 } }, + { "GeForce GTX 480", { 8, 1, 0, 1 } }, + { "GeForce GTX 670", { 16, 1, 1, 1 } }, + { "GeForce GTX 680", { 16, 1, 1, 1 } }, + { "GeForce GTX 750", { 16, 1, 0, 1 } }, + { "GeForce GTX 750 Ti", { 16, 1, 0, 1 } }, + { "GeForce GTX 980", { 32, 1, 0, 1 } }, + { "GeForce GTX TITAN", { 16, 1, 0, 1 } }, + { "GeForce GTX TITAN Black", { 16, 0, 0, 1 } }, + { "GeForce GTX TITAN X", { 32, 1, 0, 1 } }, + { "TITAN X (Pascal)", { 8, 0, 0, 1 } }, + { "Tesla K20m", { 16, 1, 0, 1 } }, + { "Tesla K40m", { 16, 1, 0, 1 } }, + { "default", { 16, 1, 0, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 16, 1, 1, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xaxpy.hpp b/src/database/kernels/xaxpy.hpp deleted file mode 100644 index 5cb225d1..00000000 --- a/src/database/kernels/xaxpy.hpp +++ /dev/null @@ -1,362 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator -// -// This file populates the database with best-found tuning parameters for the 'Xaxpy' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XaxpyHalf = { - "Xaxpy", Precision::kHalf, {"VW", "WGS", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 4, 128, 4 } }, - { "default", { 4, 128, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 64, 1 } }, - { "default", { 8, 64, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 8, 64, 1 } }, - { "default", { 8, 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 8, 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XaxpySingle = { - "Xaxpy", Precision::kSingle, {"VW", "WGS", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } }, - { "ATI Radeon HD 6750M", { 1, 256, 2 } }, - { "Ellesmere", { 1, 64, 4 } }, - { "Fiji", { 4, 64, 1 } }, - { "Hawaii", { 2, 64, 2 } }, - { "Oland", { 1, 128, 1 } }, - { "Pitcairn", { 2, 128, 1 } }, - { "Tahiti", { 2, 64, 1 } }, - { "Tonga", { 1, 256, 8 } }, - { "Turks", { 2, 256, 1 } }, - { "default", { 2, 256, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 4, 256, 1 } }, - { "default", { 4, 256, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 512, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 512, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 128, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 256, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 1024, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 128, 1 } }, - { "default", { 8, 512, 1 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 1, 128, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 256, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 512, 1 } }, - { "Iris", { 1, 64, 1 } }, - { "Iris Pro", { 1, 128, 2 } }, - { "default", { 4, 256, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 2, 1024, 2 } }, - { "default", { 2, 1024, 2 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 2, 64, 1 } }, - { "GeForce GT 650M", { 2, 1024, 1 } }, - { "GeForce GTX 1070", { 1, 64, 4 } }, - { "GeForce GTX 1080", { 1, 256, 1 } }, - { "GeForce GTX 480", { 2, 128, 1 } }, - { "GeForce GTX 670", { 2, 64, 1 } }, - { "GeForce GTX 680", { 1, 128, 1 } }, - { "GeForce GTX 750", { 1, 64, 1 } }, - { "GeForce GTX 750 Ti", { 2, 64, 1 } }, - { "GeForce GTX 980", { 1, 1024, 1 } }, - { "GeForce GTX TITAN", { 4, 256, 1 } }, - { "GeForce GTX TITAN Black", { 4, 128, 4 } }, - { "GeForce GTX TITAN X", { 1, 64, 1 } }, - { "TITAN X (Pascal)", { 4, 128, 1 } }, - { "Tesla K20m", { 4, 128, 1 } }, - { "Tesla K40m", { 4, 128, 1 } }, - { "default", { 4, 1024, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 4, 128, 2 } }, - { "default", { 4, 128, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 4, 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XaxpyComplexSingle = { - "Xaxpy", Precision::kComplexSingle, {"VW", "WGS", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 64, 8 } }, - { "ATI Radeon HD 6750M", { 1, 64, 1 } }, - { "Ellesmere", { 2, 256, 1 } }, - { "Fiji", { 1, 128, 2 } }, - { "Hawaii", { 1, 128, 2 } }, - { "Oland", { 1, 128, 1 } }, - { "Pitcairn", { 1, 64, 1 } }, - { "Tahiti", { 1, 64, 1 } }, - { "Tonga", { 1, 256, 8 } }, - { "Turks", { 2, 256, 1 } }, - { "default", { 1, 128, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 1, 256, 1 } }, - { "default", { 1, 256, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 256, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1024, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 1024, 2 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1024, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 2, 1024, 1 } }, - { "default", { 8, 1024, 1 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 4, 64, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 1 } }, - { "Iris", { 2, 128, 1 } }, - { "Iris Pro", { 1, 256, 8 } }, - { "default", { 4, 64, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } }, - { "default", { 1, 1024, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 512, 1 } }, - { "GeForce GTX 1070", { 1, 64, 2 } }, - { "GeForce GTX 1080", { 2, 64, 1 } }, - { "GeForce GTX 480", { 1, 256, 1 } }, - { "GeForce GTX 670", { 1, 256, 1 } }, - { "GeForce GTX 680", { 1, 256, 1 } }, - { "GeForce GTX 750", { 1, 512, 1 } }, - { "GeForce GTX 750 Ti", { 1, 512, 1 } }, - { "GeForce GTX 980", { 1, 64, 1 } }, - { "GeForce GTX TITAN", { 1, 256, 1 } }, - { "GeForce GTX TITAN Black", { 1, 128, 2 } }, - { "GeForce GTX TITAN X", { 1, 512, 1 } }, - { "TITAN X (Pascal)", { 2, 512, 1 } }, - { "Tesla K20m", { 1, 128, 1 } }, - { "Tesla K40m", { 1, 128, 1 } }, - { "default", { 1, 256, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 128, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XaxpyDouble = { - "Xaxpy", Precision::kDouble, {"VW", "WGS", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } }, - { "Ellesmere", { 2, 64, 4 } }, - { "Fiji", { 2, 64, 4 } }, - { "Hawaii", { 1, 64, 2 } }, - { "Oland", { 1, 64, 1 } }, - { "Pitcairn", { 1, 128, 1 } }, - { "Tahiti", { 1, 64, 1 } }, - { "Tonga", { 1, 128, 4 } }, - { "default", { 2, 64, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 2, 128, 2 } }, - { "default", { 2, 128, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 64, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 1024, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 1024, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 64, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 256, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 2048, 1 } }, - { "default", { 8, 64, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 2, 512, 1 } }, - { "default", { 2, 512, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 64, 1 } }, - { "GeForce GTX 1070", { 1, 64, 8 } }, - { "GeForce GTX 1080", { 1, 128, 1 } }, - { "GeForce GTX 480", { 1, 128, 1 } }, - { "GeForce GTX 670", { 1, 64, 1 } }, - { "GeForce GTX 680", { 1, 64, 1 } }, - { "GeForce GTX 750", { 1, 128, 1 } }, - { "GeForce GTX 750 Ti", { 1, 256, 2 } }, - { "GeForce GTX 980", { 1, 256, 1 } }, - { "GeForce GTX TITAN", { 2, 1024, 1 } }, - { "GeForce GTX TITAN Black", { 2, 128, 1 } }, - { "GeForce GTX TITAN X", { 1, 512, 1 } }, - { "TITAN X (Pascal)", { 2, 512, 1 } }, - { "Tesla K20m", { 2, 128, 1 } }, - { "Tesla K40m", { 2, 128, 1 } }, - { "default", { 1, 128, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 2, 256, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XaxpyComplexDouble = { - "Xaxpy", Precision::kComplexDouble, {"VW", "WGS", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } }, - { "Ellesmere", { 1, 128, 1 } }, - { "Fiji", { 1, 64, 1 } }, - { "Hawaii", { 2, 64, 1 } }, - { "Oland", { 1, 256, 1 } }, - { "Pitcairn", { 1, 128, 1 } }, - { "Tahiti", { 1, 128, 1 } }, - { "Tonga", { 1, 64, 1 } }, - { "default", { 1, 128, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 1, 64, 8 } }, - { "default", { 1, 64, 8 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 128, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 512, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 1024, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 256, 1 } }, - { "default", { 8, 256, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } }, - { "default", { 1, 1024, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 64, 1 } }, - { "GeForce GTX 1070", { 1, 64, 2 } }, - { "GeForce GTX 1080", { 1, 256, 1 } }, - { "GeForce GTX 480", { 1, 128, 1 } }, - { "GeForce GTX 670", { 1, 256, 1 } }, - { "GeForce GTX 680", { 1, 64, 1 } }, - { "GeForce GTX 750", { 1, 1024, 1 } }, - { "GeForce GTX 750 Ti", { 1, 64, 2 } }, - { "GeForce GTX 980", { 1, 1024, 1 } }, - { "GeForce GTX TITAN", { 1, 64, 4 } }, - { "GeForce GTX TITAN Black", { 1, 128, 4 } }, - { "GeForce GTX TITAN X", { 1, 1024, 1 } }, - { "TITAN X (Pascal)", { 1, 256, 2 } }, - { "Tesla K20m", { 1, 64, 1 } }, - { "Tesla K40m", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 256, 1 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xaxpy/xaxpy.hpp b/src/database/kernels/xaxpy/xaxpy.hpp new file mode 100644 index 00000000..aa920183 --- /dev/null +++ b/src/database/kernels/xaxpy/xaxpy.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xaxpy' kernels. +// +// ================================================================================================= + +#include "database/kernels/xaxpy/xaxpy_16.hpp" +#include "database/kernels/xaxpy/xaxpy_32.hpp" +#include "database/kernels/xaxpy/xaxpy_3232.hpp" +#include "database/kernels/xaxpy/xaxpy_64.hpp" +#include "database/kernels/xaxpy/xaxpy_6464.hpp" diff --git a/src/database/kernels/xaxpy/xaxpy_16.hpp b/src/database/kernels/xaxpy/xaxpy_16.hpp new file mode 100644 index 00000000..08f635f8 --- /dev/null +++ b/src/database/kernels/xaxpy/xaxpy_16.hpp @@ -0,0 +1,43 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xaxpy16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XaxpyHalf = { + "Xaxpy", Precision::kHalf, {"VW", "WGS", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { 4, 128, 4 } }, + { "default", { 4, 128, 4 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 64, 1 } }, + { "default", { 8, 64, 1 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 8, 64, 1 } }, + { "default", { 8, 64, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 8, 64, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xaxpy/xaxpy_32.hpp b/src/database/kernels/xaxpy/xaxpy_32.hpp new file mode 100644 index 00000000..e5530a15 --- /dev/null +++ b/src/database/kernels/xaxpy/xaxpy_32.hpp @@ -0,0 +1,101 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xaxpy32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XaxpySingle = { + "Xaxpy", Precision::kSingle, {"VW", "WGS", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } }, + { "ATI Radeon HD 6750M", { 1, 256, 2 } }, + { "Ellesmere", { 1, 64, 4 } }, + { "Fiji", { 4, 64, 1 } }, + { "Hawaii", { 2, 64, 2 } }, + { "Oland", { 1, 128, 1 } }, + { "Pitcairn", { 2, 128, 1 } }, + { "Tahiti", { 2, 64, 1 } }, + { "Tonga", { 1, 256, 8 } }, + { "Turks", { 2, 256, 1 } }, + { "default", { 2, 256, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 4, 256, 1 } }, + { "default", { 4, 256, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 512, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 512, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 128, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 256, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 1024, 1 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 128, 1 } }, + { "default", { 8, 512, 1 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 1, 128, 1 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 256, 1 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 512, 1 } }, + { "Iris", { 1, 64, 1 } }, + { "Iris Pro", { 1, 128, 2 } }, + { "default", { 4, 256, 1 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 2, 1024, 2 } }, + { "default", { 2, 1024, 2 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 2, 64, 1 } }, + { "GeForce GT 650M", { 2, 1024, 1 } }, + { "GeForce GTX 1070", { 1, 64, 4 } }, + { "GeForce GTX 1080", { 1, 256, 1 } }, + { "GeForce GTX 480", { 2, 128, 1 } }, + { "GeForce GTX 670", { 2, 64, 1 } }, + { "GeForce GTX 680", { 1, 128, 1 } }, + { "GeForce GTX 750", { 1, 64, 1 } }, + { "GeForce GTX 750 Ti", { 2, 64, 1 } }, + { "GeForce GTX 980", { 1, 1024, 1 } }, + { "GeForce GTX TITAN", { 4, 256, 1 } }, + { "GeForce GTX TITAN Black", { 4, 128, 4 } }, + { "GeForce GTX TITAN X", { 1, 64, 1 } }, + { "TITAN X (Pascal)", { 4, 128, 1 } }, + { "Tesla K20m", { 4, 128, 1 } }, + { "Tesla K40m", { 4, 128, 1 } }, + { "default", { 4, 1024, 1 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 4, 128, 2 } }, + { "default", { 4, 128, 2 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 4, 64, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xaxpy/xaxpy_3232.hpp b/src/database/kernels/xaxpy/xaxpy_3232.hpp new file mode 100644 index 00000000..f0a2d117 --- /dev/null +++ b/src/database/kernels/xaxpy/xaxpy_3232.hpp @@ -0,0 +1,100 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xaxpy3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XaxpyComplexSingle = { + "Xaxpy", Precision::kComplexSingle, {"VW", "WGS", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 2, 64, 8 } }, + { "ATI Radeon HD 6750M", { 1, 64, 1 } }, + { "Ellesmere", { 2, 256, 1 } }, + { "Fiji", { 1, 128, 2 } }, + { "Hawaii", { 1, 128, 2 } }, + { "Oland", { 1, 128, 1 } }, + { "Pitcairn", { 1, 64, 1 } }, + { "Tahiti", { 1, 64, 1 } }, + { "Tonga", { 1, 256, 8 } }, + { "Turks", { 2, 256, 1 } }, + { "default", { 1, 128, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 1, 256, 1 } }, + { "default", { 1, 256, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 256, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1024, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 1024, 2 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1024, 1 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 2, 1024, 1 } }, + { "default", { 8, 1024, 1 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 4, 64, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 1 } }, + { "Iris", { 2, 128, 1 } }, + { "Iris Pro", { 1, 256, 8 } }, + { "default", { 4, 64, 1 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } }, + { "default", { 1, 1024, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 1, 512, 1 } }, + { "GeForce GTX 1070", { 1, 64, 2 } }, + { "GeForce GTX 1080", { 2, 64, 1 } }, + { "GeForce GTX 480", { 1, 256, 1 } }, + { "GeForce GTX 670", { 1, 256, 1 } }, + { "GeForce GTX 680", { 1, 256, 1 } }, + { "GeForce GTX 750", { 1, 512, 1 } }, + { "GeForce GTX 750 Ti", { 1, 512, 1 } }, + { "GeForce GTX 980", { 1, 64, 1 } }, + { "GeForce GTX TITAN", { 1, 256, 1 } }, + { "GeForce GTX TITAN Black", { 1, 128, 2 } }, + { "GeForce GTX TITAN X", { 1, 512, 1 } }, + { "TITAN X (Pascal)", { 2, 512, 1 } }, + { "Tesla K20m", { 1, 128, 1 } }, + { "Tesla K40m", { 1, 128, 1 } }, + { "default", { 1, 256, 1 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 1, 128, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xaxpy/xaxpy_64.hpp b/src/database/kernels/xaxpy/xaxpy_64.hpp new file mode 100644 index 00000000..dc9ba62c --- /dev/null +++ b/src/database/kernels/xaxpy/xaxpy_64.hpp @@ -0,0 +1,80 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xaxpy64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XaxpyDouble = { + "Xaxpy", Precision::kDouble, {"VW", "WGS", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } }, + { "Ellesmere", { 2, 64, 4 } }, + { "Fiji", { 2, 64, 4 } }, + { "Hawaii", { 1, 64, 2 } }, + { "Oland", { 1, 64, 1 } }, + { "Pitcairn", { 1, 128, 1 } }, + { "Tahiti", { 1, 64, 1 } }, + { "Tonga", { 1, 128, 4 } }, + { "default", { 2, 64, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 2, 128, 2 } }, + { "default", { 2, 128, 2 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 64, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 1024, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 1024, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 64, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 256, 1 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 2048, 1 } }, + { "default", { 8, 64, 1 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 2, 512, 1 } }, + { "default", { 2, 512, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 1, 64, 1 } }, + { "GeForce GTX 1070", { 1, 64, 8 } }, + { "GeForce GTX 1080", { 1, 128, 1 } }, + { "GeForce GTX 480", { 1, 128, 1 } }, + { "GeForce GTX 670", { 1, 64, 1 } }, + { "GeForce GTX 680", { 1, 64, 1 } }, + { "GeForce GTX 750", { 1, 128, 1 } }, + { "GeForce GTX 750 Ti", { 1, 256, 2 } }, + { "GeForce GTX 980", { 1, 256, 1 } }, + { "GeForce GTX TITAN", { 2, 1024, 1 } }, + { "GeForce GTX TITAN Black", { 2, 128, 1 } }, + { "GeForce GTX TITAN X", { 1, 512, 1 } }, + { "TITAN X (Pascal)", { 2, 512, 1 } }, + { "Tesla K20m", { 2, 128, 1 } }, + { "Tesla K40m", { 2, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 2, 256, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xaxpy/xaxpy_6464.hpp b/src/database/kernels/xaxpy/xaxpy_6464.hpp new file mode 100644 index 00000000..0b5f5bcf --- /dev/null +++ b/src/database/kernels/xaxpy/xaxpy_6464.hpp @@ -0,0 +1,80 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xaxpy6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XaxpyComplexDouble = { + "Xaxpy", Precision::kComplexDouble, {"VW", "WGS", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } }, + { "Ellesmere", { 1, 128, 1 } }, + { "Fiji", { 1, 64, 1 } }, + { "Hawaii", { 2, 64, 1 } }, + { "Oland", { 1, 256, 1 } }, + { "Pitcairn", { 1, 128, 1 } }, + { "Tahiti", { 1, 128, 1 } }, + { "Tonga", { 1, 64, 1 } }, + { "default", { 1, 128, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 1, 64, 8 } }, + { "default", { 1, 64, 8 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 128, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 512, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 1024, 1 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 256, 1 } }, + { "default", { 8, 256, 1 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } }, + { "default", { 1, 1024, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 1, 64, 1 } }, + { "GeForce GTX 1070", { 1, 64, 2 } }, + { "GeForce GTX 1080", { 1, 256, 1 } }, + { "GeForce GTX 480", { 1, 128, 1 } }, + { "GeForce GTX 670", { 1, 256, 1 } }, + { "GeForce GTX 680", { 1, 64, 1 } }, + { "GeForce GTX 750", { 1, 1024, 1 } }, + { "GeForce GTX 750 Ti", { 1, 64, 2 } }, + { "GeForce GTX 980", { 1, 1024, 1 } }, + { "GeForce GTX TITAN", { 1, 64, 4 } }, + { "GeForce GTX TITAN Black", { 1, 128, 4 } }, + { "GeForce GTX TITAN X", { 1, 1024, 1 } }, + { "TITAN X (Pascal)", { 1, 256, 2 } }, + { "Tesla K20m", { 1, 64, 1 } }, + { "Tesla K40m", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 1, 256, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xdot.hpp b/src/database/kernels/xdot.hpp deleted file mode 100644 index 986c32b2..00000000 --- a/src/database/kernels/xdot.hpp +++ /dev/null @@ -1,292 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator -// -// This file populates the database with best-found tuning parameters for the 'Xdot' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XdotHalf = { - "Xdot", Precision::kHalf, {"WGS1", "WGS2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 256, 64 } }, - { "default", { 256, 64 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 128, 32 } }, - { "default", { 128, 32 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 64, 64 } }, - { "default", { 64, 64 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 128, 64 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XdotSingle = { - "Xdot", Precision::kSingle, {"WGS1", "WGS2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 128, 32 } }, - { "ATI Radeon HD 6750M", { 256, 32 } }, - { "Ellesmere", { 128, 32 } }, - { "Fiji", { 256, 32 } }, - { "Oland", { 256, 32 } }, - { "Pitcairn", { 128, 32 } }, - { "Tahiti", { 128, 32 } }, - { "Tonga", { 64, 32 } }, - { "Turks", { 128, 64 } }, - { "default", { 256, 32 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 128 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 32 } }, - { "default", { 64, 64 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 64, 32 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 32 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 512, 128 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 32 } }, - { "Iris Pro", { 512, 64 } }, - { "default", { 64, 32 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 32 } }, - { "GeForce GT 650M", { 128, 64 } }, - { "GeForce GTX 1070", { 128, 1024 } }, - { "GeForce GTX 1080", { 512, 64 } }, - { "GeForce GTX 480", { 512, 32 } }, - { "GeForce GTX 670", { 512, 1024 } }, - { "GeForce GTX 680", { 128, 128 } }, - { "GeForce GTX 750", { 128, 32 } }, - { "GeForce GTX 750 Ti", { 64, 32 } }, - { "GeForce GTX 980", { 256, 32 } }, - { "GeForce GTX TITAN Black", { 512, 64 } }, - { "GeForce GTX TITAN X", { 256, 32 } }, - { "TITAN X (Pascal)", { 1024, 32 } }, - { "Tesla K20m", { 1024, 32 } }, - { "default", { 256, 64 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 128, 64 } }, - { "default", { 128, 64 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 128, 32 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XdotComplexSingle = { - "Xdot", Precision::kComplexSingle, {"WGS1", "WGS2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 32 } }, - { "ATI Radeon HD 6750M", { 256, 256 } }, - { "Ellesmere", { 256, 32 } }, - { "Fiji", { 256, 64 } }, - { "Oland", { 128, 32 } }, - { "Pitcairn", { 256, 32 } }, - { "Tahiti", { 64, 32 } }, - { "Tonga", { 256, 64 } }, - { "Turks", { 128, 32 } }, - { "default", { 256, 32 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 64 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } }, - { "default", { 256, 32 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 256, 32 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 32 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 512, 32 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 256 } }, - { "Iris Pro", { 32, 32 } }, - { "default", { 32, 32 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 64, 32 } }, - { "GeForce GTX 1070", { 128, 32 } }, - { "GeForce GTX 1080", { 128, 64 } }, - { "GeForce GTX 480", { 512, 32 } }, - { "GeForce GTX 670", { 256, 32 } }, - { "GeForce GTX 680", { 128, 64 } }, - { "GeForce GTX 750", { 64, 32 } }, - { "GeForce GTX 750 Ti", { 64, 32 } }, - { "GeForce GTX 980", { 256, 64 } }, - { "GeForce GTX TITAN Black", { 128, 64 } }, - { "GeForce GTX TITAN X", { 256, 32 } }, - { "TITAN X (Pascal)", { 256, 32 } }, - { "Tesla K20m", { 512, 32 } }, - { "default", { 512, 64 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 64, 256 } }, - { "default", { 64, 256 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 256, 32 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XdotDouble = { - "Xdot", Precision::kDouble, {"WGS1", "WGS2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 128 } }, - { "Ellesmere", { 128, 64 } }, - { "Fiji", { 256, 32 } }, - { "Oland", { 256, 32 } }, - { "Pitcairn", { 128, 32 } }, - { "Tahiti", { 256, 32 } }, - { "Tonga", { 128, 64 } }, - { "default", { 128, 64 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 128 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 64 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 64 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } }, - { "default", { 256, 64 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 32 } }, - { "GeForce GTX 1070", { 128, 512 } }, - { "GeForce GTX 1080", { 128, 128 } }, - { "GeForce GTX 480", { 512, 32 } }, - { "GeForce GTX 670", { 256, 32 } }, - { "GeForce GTX 680", { 128, 64 } }, - { "GeForce GTX 750", { 64, 256 } }, - { "GeForce GTX 750 Ti", { 128, 64 } }, - { "GeForce GTX 980", { 128, 32 } }, - { "GeForce GTX TITAN Black", { 128, 64 } }, - { "GeForce GTX TITAN X", { 256, 32 } }, - { "TITAN X (Pascal)", { 128, 32 } }, - { "Tesla K20m", { 512, 32 } }, - { "default", { 128, 128 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 128, 64 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XdotComplexDouble = { - "Xdot", Precision::kComplexDouble, {"WGS1", "WGS2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 32 } }, - { "Ellesmere", { 256, 32 } }, - { "Fiji", { 256, 32 } }, - { "Oland", { 256, 32 } }, - { "Pitcairn", { 256, 32 } }, - { "Tahiti", { 256, 32 } }, - { "Tonga", { 128, 64 } }, - { "default", { 256, 32 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 128 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1024, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32 } }, - { "default", { 128, 32 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 64, 32 } }, - { "GeForce GTX 1070", { 128, 64 } }, - { "GeForce GTX 1080", { 128, 32 } }, - { "GeForce GTX 480", { 512, 32 } }, - { "GeForce GTX 670", { 512, 128 } }, - { "GeForce GTX 680", { 256, 64 } }, - { "GeForce GTX 750", { 256, 32 } }, - { "GeForce GTX 750 Ti", { 64, 32 } }, - { "GeForce GTX 980", { 64, 32 } }, - { "GeForce GTX TITAN Black", { 128, 32 } }, - { "GeForce GTX TITAN X", { 128, 32 } }, - { "TITAN X (Pascal)", { 128, 64 } }, - { "Tesla K20m", { 128, 32 } }, - { "default", { 128, 64 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 256, 32 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xdot/xdot.hpp b/src/database/kernels/xdot/xdot.hpp new file mode 100644 index 00000000..5d54cdc9 --- /dev/null +++ b/src/database/kernels/xdot/xdot.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xdot' kernels. +// +// ================================================================================================= + +#include "database/kernels/xdot/xdot_16.hpp" +#include "database/kernels/xdot/xdot_32.hpp" +#include "database/kernels/xdot/xdot_3232.hpp" +#include "database/kernels/xdot/xdot_64.hpp" +#include "database/kernels/xdot/xdot_6464.hpp" diff --git a/src/database/kernels/xdot/xdot_16.hpp b/src/database/kernels/xdot/xdot_16.hpp new file mode 100644 index 00000000..570604ec --- /dev/null +++ b/src/database/kernels/xdot/xdot_16.hpp @@ -0,0 +1,43 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xdot16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XdotHalf = { + "Xdot", Precision::kHalf, {"WGS1", "WGS2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { 256, 64 } }, + { "default", { 256, 64 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 128, 32 } }, + { "default", { 128, 32 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 64, 64 } }, + { "default", { 64, 64 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 128, 64 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xdot/xdot_32.hpp b/src/database/kernels/xdot/xdot_32.hpp new file mode 100644 index 00000000..8a7bd9d8 --- /dev/null +++ b/src/database/kernels/xdot/xdot_32.hpp @@ -0,0 +1,83 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xdot32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XdotSingle = { + "Xdot", Precision::kSingle, {"WGS1", "WGS2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 128, 32 } }, + { "ATI Radeon HD 6750M", { 256, 32 } }, + { "Ellesmere", { 128, 32 } }, + { "Fiji", { 256, 32 } }, + { "Oland", { 256, 32 } }, + { "Pitcairn", { 128, 32 } }, + { "Tahiti", { 128, 32 } }, + { "Tonga", { 64, 32 } }, + { "Turks", { 128, 64 } }, + { "default", { 256, 32 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 128 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 32 } }, + { "default", { 64, 64 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 64, 32 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 32 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 512, 128 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 32 } }, + { "Iris Pro", { 512, 64 } }, + { "default", { 64, 32 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 128, 32 } }, + { "GeForce GT 650M", { 128, 64 } }, + { "GeForce GTX 1070", { 128, 1024 } }, + { "GeForce GTX 1080", { 512, 64 } }, + { "GeForce GTX 480", { 512, 32 } }, + { "GeForce GTX 670", { 512, 1024 } }, + { "GeForce GTX 680", { 128, 128 } }, + { "GeForce GTX 750", { 128, 32 } }, + { "GeForce GTX 750 Ti", { 64, 32 } }, + { "GeForce GTX 980", { 256, 32 } }, + { "GeForce GTX TITAN Black", { 512, 64 } }, + { "GeForce GTX TITAN X", { 256, 32 } }, + { "TITAN X (Pascal)", { 1024, 32 } }, + { "Tesla K20m", { 1024, 32 } }, + { "default", { 256, 64 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 128, 64 } }, + { "default", { 128, 64 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 128, 32 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xdot/xdot_3232.hpp b/src/database/kernels/xdot/xdot_3232.hpp new file mode 100644 index 00000000..4950c1f2 --- /dev/null +++ b/src/database/kernels/xdot/xdot_3232.hpp @@ -0,0 +1,82 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xdot3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XdotComplexSingle = { + "Xdot", Precision::kComplexSingle, {"WGS1", "WGS2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 64, 32 } }, + { "ATI Radeon HD 6750M", { 256, 256 } }, + { "Ellesmere", { 256, 32 } }, + { "Fiji", { 256, 64 } }, + { "Oland", { 128, 32 } }, + { "Pitcairn", { 256, 32 } }, + { "Tahiti", { 64, 32 } }, + { "Tonga", { 256, 64 } }, + { "Turks", { 128, 32 } }, + { "default", { 256, 32 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 64 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 32 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } }, + { "default", { 256, 32 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 256, 32 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 32 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 512, 32 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 256 } }, + { "Iris Pro", { 32, 32 } }, + { "default", { 32, 32 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 64, 32 } }, + { "GeForce GTX 1070", { 128, 32 } }, + { "GeForce GTX 1080", { 128, 64 } }, + { "GeForce GTX 480", { 512, 32 } }, + { "GeForce GTX 670", { 256, 32 } }, + { "GeForce GTX 680", { 128, 64 } }, + { "GeForce GTX 750", { 64, 32 } }, + { "GeForce GTX 750 Ti", { 64, 32 } }, + { "GeForce GTX 980", { 256, 64 } }, + { "GeForce GTX TITAN Black", { 128, 64 } }, + { "GeForce GTX TITAN X", { 256, 32 } }, + { "TITAN X (Pascal)", { 256, 32 } }, + { "Tesla K20m", { 512, 32 } }, + { "default", { 512, 64 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 64, 256 } }, + { "default", { 64, 256 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 256, 32 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xdot/xdot_64.hpp b/src/database/kernels/xdot/xdot_64.hpp new file mode 100644 index 00000000..7fbcb474 --- /dev/null +++ b/src/database/kernels/xdot/xdot_64.hpp @@ -0,0 +1,63 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xdot64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XdotDouble = { + "Xdot", Precision::kDouble, {"WGS1", "WGS2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 64, 128 } }, + { "Ellesmere", { 128, 64 } }, + { "Fiji", { 256, 32 } }, + { "Oland", { 256, 32 } }, + { "Pitcairn", { 128, 32 } }, + { "Tahiti", { 256, 32 } }, + { "Tonga", { 128, 64 } }, + { "default", { 128, 64 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 128 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 64 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 64 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } }, + { "default", { 256, 64 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 128, 32 } }, + { "GeForce GTX 1070", { 128, 512 } }, + { "GeForce GTX 1080", { 128, 128 } }, + { "GeForce GTX 480", { 512, 32 } }, + { "GeForce GTX 670", { 256, 32 } }, + { "GeForce GTX 680", { 128, 64 } }, + { "GeForce GTX 750", { 64, 256 } }, + { "GeForce GTX 750 Ti", { 128, 64 } }, + { "GeForce GTX 980", { 128, 32 } }, + { "GeForce GTX TITAN Black", { 128, 64 } }, + { "GeForce GTX TITAN X", { 256, 32 } }, + { "TITAN X (Pascal)", { 128, 32 } }, + { "Tesla K20m", { 512, 32 } }, + { "default", { 128, 128 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 128, 64 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xdot/xdot_6464.hpp b/src/database/kernels/xdot/xdot_6464.hpp new file mode 100644 index 00000000..ea23b329 --- /dev/null +++ b/src/database/kernels/xdot/xdot_6464.hpp @@ -0,0 +1,63 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xdot6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XdotComplexDouble = { + "Xdot", Precision::kComplexDouble, {"WGS1", "WGS2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 64, 32 } }, + { "Ellesmere", { 256, 32 } }, + { "Fiji", { 256, 32 } }, + { "Oland", { 256, 32 } }, + { "Pitcairn", { 256, 32 } }, + { "Tahiti", { 256, 32 } }, + { "Tonga", { 128, 64 } }, + { "default", { 256, 32 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 128 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1024, 32 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32 } }, + { "default", { 128, 32 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 64, 32 } }, + { "GeForce GTX 1070", { 128, 64 } }, + { "GeForce GTX 1080", { 128, 32 } }, + { "GeForce GTX 480", { 512, 32 } }, + { "GeForce GTX 670", { 512, 128 } }, + { "GeForce GTX 680", { 256, 64 } }, + { "GeForce GTX 750", { 256, 32 } }, + { "GeForce GTX 750 Ti", { 64, 32 } }, + { "GeForce GTX 980", { 64, 32 } }, + { "GeForce GTX TITAN Black", { 128, 32 } }, + { "GeForce GTX TITAN X", { 128, 32 } }, + { "TITAN X (Pascal)", { 128, 64 } }, + { "Tesla K20m", { 128, 32 } }, + { "default", { 128, 64 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 256, 32 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm.hpp b/src/database/kernels/xgemm.hpp deleted file mode 100644 index 43854afb..00000000 --- a/src/database/kernels/xgemm.hpp +++ /dev/null @@ -1,348 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator -// -// This file populates the database with best-found tuning parameters for the 'Xgemm' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XgemmHalf = { - "Xgemm", Precision::kHalf, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmSingle = { - "Xgemm", Precision::kSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 16, 128, 0, 0, 0, 0, 2, 8 } }, - { "ATI Radeon HD 6750M", { 32, 2, 8, 16, 128, 8, 8, 128, 0, 0, 1, 1, 8, 8 } }, - { "Ellesmere", { 32, 2, 8, 8, 16, 16, 16, 64, 1, 1, 0, 0, 1, 2 } }, - { "Fiji", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { "Hawaii", { 16, 2, 16, 32, 128, 32, 8, 64, 1, 1, 1, 1, 4, 2 } }, - { "Oland", { 16, 2, 32, 16, 64, 32, 16, 128, 1, 1, 1, 0, 2, 4 } }, - { "Pitcairn", { 16, 2, 16, 8, 32, 16, 16, 128, 0, 0, 1, 0, 1, 1 } }, - { "Tahiti", { 32, 2, 16, 32, 128, 16, 8, 64, 0, 0, 0, 0, 4, 1 } }, - { "Tonga", { 16, 2, 16, 32, 64, 16, 8, 128, 1, 1, 0, 0, 2, 8 } }, - { "Turks", { 32, 2, 8, 8, 64, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, - { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } }, - { "default", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 8, 8, 128, 16, 8, 128, 0, 1, 1, 1, 1, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 32, 32, 64, 32, 16, 64, 1, 1, 1, 0, 2, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 8, 128, 16, 8, 64, 0, 0, 1, 0, 1, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 32, 8, 128, 8, 8, 128, 1, 1, 1, 1, 2, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 2, 8, 8, 128, 8, 8, 128, 1, 1, 1, 0, 1, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 16, 16, 64, 32, 32, 64, 0, 1, 1, 0, 1, 2 } }, - { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 32, 2, 8, 8, 128, 32, 16, 64, 0, 0, 1, 0, 4, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 8, 8, 64, 32, 16, 64, 1, 1, 1, 1, 4, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 2, 16, 8, 32, 8, 16, 128, 1, 1, 1, 1, 2, 4 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, - { "Iris", { 16, 8, 16, 8, 128, 32, 16, 64, 1, 1, 1, 1, 4, 1 } }, - { "Iris Pro", { 16, 2, 16, 8, 64, 32, 32, 128, 1, 1, 1, 0, 4, 4 } }, - { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } }, - { "default", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 2, 16, 8, 32, 8, 16, 64, 1, 1, 1, 1, 2, 4 } }, - { "GeForce GT 650M", { 32, 2, 8, 8, 32, 32, 32, 64, 1, 1, 0, 0, 4, 2 } }, - { "GeForce GTX 1070", { 16, 2, 32, 16, 128, 32, 8, 128, 1, 1, 1, 0, 4, 1 } }, - { "GeForce GTX 1080", { 32, 2, 16, 8, 64, 8, 8, 64, 1, 1, 1, 1, 4, 8 } }, - { "GeForce GTX 480", { 16, 2, 16, 8, 64, 32, 16, 64, 1, 1, 1, 1, 2, 2 } }, - { "GeForce GTX 670", { 16, 2, 8, 8, 64, 16, 16, 64, 1, 1, 1, 0, 2, 4 } }, - { "GeForce GTX 680", { 32, 8, 8, 16, 64, 32, 16, 128, 1, 1, 0, 0, 4, 2 } }, - { "GeForce GTX 750", { 16, 2, 16, 16, 64, 32, 8, 128, 1, 1, 1, 1, 1, 2 } }, - { "GeForce GTX 750 Ti", { 16, 2, 16, 16, 128, 32, 8, 64, 1, 1, 0, 1, 8, 2 } }, - { "GeForce GTX 980", { 16, 2, 16, 16, 64, 16, 8, 128, 1, 1, 1, 0, 4, 8 } }, - { "GeForce GTX TITAN", { 16, 8, 32, 16, 64, 8, 8, 64, 1, 1, 1, 0, 2, 2 } }, - { "GeForce GTX TITAN Black", { 16, 2, 16, 8, 64, 16, 16, 64, 1, 1, 1, 0, 4, 1 } }, - { "GeForce GTX TITAN X", { 16, 2, 8, 16, 128, 8, 8, 128, 1, 1, 1, 1, 4, 8 } }, - { "TITAN X (Pascal)", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } }, - { "Tesla K20m", { 16, 2, 32, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } }, - { "Tesla K40m", { 16, 8, 16, 8, 64, 16, 16, 128, 1, 1, 1, 0, 2, 4 } }, - { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 2 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } }, - { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmComplexSingle = { - "Xgemm", Precision::kComplexSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 32, 32, 64, 8, 8, 64, 0, 0, 1, 1, 2, 8 } }, - { "ATI Radeon HD 6750M", { 32, 2, 8, 8, 32, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { "Ellesmere", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } }, - { "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } }, - { "Hawaii", { 32, 2, 32, 8, 32, 8, 16, 32, 1, 0, 1, 0, 1, 1 } }, - { "Oland", { 32, 2, 16, 8, 32, 32, 32, 128, 1, 0, 0, 1, 2, 4 } }, - { "Pitcairn", { 16, 2, 8, 8, 32, 8, 8, 32, 0, 1, 1, 1, 4, 2 } }, - { "Tahiti", { 16, 2, 8, 8, 32, 8, 16, 32, 1, 0, 0, 1, 2, 1 } }, - { "Tonga", { 16, 2, 32, 8, 64, 16, 32, 64, 1, 1, 1, 0, 2, 1 } }, - { "Turks", { 16, 2, 8, 8, 32, 32, 8, 32, 0, 1, 0, 0, 2, 1 } }, - { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } }, - { "default", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 0, 1, 1, 2 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 32, 16, 16, 64, 0, 1, 1, 0, 1, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 16, 64, 8, 16, 64, 0, 1, 0, 0, 4, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 16, 32, 128, 0, 0, 0, 0, 1, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 16, 16, 16, 128, 0, 0, 1, 1, 1, 4 } }, - { "default", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 16, 8, 8, 16, 64, 32, 8, 32, 0, 0, 0, 0, 2, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 8, 8, 8, 32, 16, 16, 64, 1, 0, 0, 0, 4, 4 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 1, 2, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 0, 0, 0, 0, 4, 2 } }, - { "Iris", { 32, 8, 32, 16, 64, 8, 16, 64, 1, 0, 1, 0, 1, 1 } }, - { "Iris Pro", { 16, 2, 8, 8, 32, 32, 8, 32, 1, 1, 1, 1, 1, 1 } }, - { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } }, - { "default", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 8, 32, 32, 64, 32, 16, 128, 1, 0, 1, 0, 1, 4 } }, - { "GeForce GTX 1070", { 16, 2, 16, 16, 128, 16, 16, 64, 1, 1, 1, 1, 2, 4 } }, - { "GeForce GTX 1080", { 16, 2, 32, 16, 64, 32, 8, 64, 1, 1, 0, 0, 1, 2 } }, - { "GeForce GTX 480", { 16, 2, 16, 16, 32, 32, 16, 128, 0, 1, 1, 1, 2, 2 } }, - { "GeForce GTX 670", { 16, 2, 32, 32, 64, 32, 8, 32, 1, 1, 1, 1, 1, 1 } }, - { "GeForce GTX 680", { 16, 2, 32, 16, 64, 32, 32, 128, 1, 0, 0, 0, 2, 2 } }, - { "GeForce GTX 750", { 16, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 0, 2, 2 } }, - { "GeForce GTX 750 Ti", { 16, 2, 16, 8, 32, 32, 16, 64, 1, 1, 1, 0, 1, 2 } }, - { "GeForce GTX 980", { 32, 8, 32, 32, 64, 16, 16, 64, 1, 1, 1, 0, 2, 1 } }, - { "GeForce GTX TITAN", { 16, 8, 16, 16, 64, 32, 16, 64, 1, 1, 1, 0, 1, 1 } }, - { "GeForce GTX TITAN Black", { 16, 2, 8, 16, 64, 8, 8, 32, 0, 1, 1, 0, 1, 2 } }, - { "GeForce GTX TITAN X", { 16, 2, 8, 8, 64, 8, 8, 32, 1, 0, 1, 1, 1, 4 } }, - { "TITAN X (Pascal)", { 32, 2, 32, 32, 64, 8, 8, 32, 1, 1, 0, 0, 2, 4 } }, - { "Tesla K20m", { 32, 2, 8, 16, 64, 8, 16, 64, 1, 0, 0, 0, 1, 4 } }, - { "Tesla K40m", { 16, 2, 32, 32, 32, 32, 8, 64, 0, 1, 0, 0, 1, 1 } }, - { "default", { 32, 2, 8, 8, 16, 32, 32, 64, 1, 1, 0, 0, 1, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 2, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmDouble = { - "Xgemm", Precision::kDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 4 } }, - { "Ellesmere", { 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 2 } }, - { "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, - { "Hawaii", { 16, 8, 32, 8, 128, 8, 8, 32, 0, 1, 0, 0, 1, 4 } }, - { "Oland", { 16, 2, 8, 16, 64, 16, 8, 16, 0, 0, 1, 1, 1, 1 } }, - { "Pitcairn", { 32, 2, 32, 16, 64, 8, 16, 32, 0, 0, 0, 0, 1, 2 } }, - { "Tahiti", { 32, 2, 16, 8, 16, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, - { "Tonga", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, - { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } }, - { "default", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 1, 1, 2, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 128, 16, 16, 64, 0, 1, 1, 0, 1, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 32, 16, 128, 16, 16, 128, 0, 0, 1, 0, 1, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 16, 8, 128, 8, 8, 64, 1, 0, 0, 1, 2, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 16, 8, 128, 8, 8, 128, 1, 0, 0, 0, 2, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 128, 16, 8, 128, 0, 0, 1, 1, 1, 8 } }, - { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 1, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } }, - { "default", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 2, 8, 8, 16, 8, 8, 32, 1, 0, 0, 1, 2, 2 } }, - { "GeForce GTX 1070", { 16, 2, 8, 16, 32, 8, 8, 64, 0, 0, 1, 1, 2, 8 } }, - { "GeForce GTX 1080", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, - { "GeForce GTX 480", { 16, 2, 8, 16, 32, 32, 8, 64, 1, 1, 1, 0, 1, 2 } }, - { "GeForce GTX 670", { 32, 8, 16, 32, 128, 16, 8, 32, 0, 1, 1, 0, 1, 1 } }, - { "GeForce GTX 680", { 32, 8, 8, 8, 32, 16, 32, 128, 1, 0, 0, 1, 2, 4 } }, - { "GeForce GTX 750", { 32, 8, 16, 32, 64, 16, 8, 128, 0, 0, 0, 1, 2, 1 } }, - { "GeForce GTX 750 Ti", { 32, 2, 8, 8, 32, 16, 16, 32, 0, 0, 0, 0, 4, 2 } }, - { "GeForce GTX 980", { 32, 8, 16, 8, 64, 32, 32, 128, 0, 0, 1, 0, 2, 4 } }, - { "GeForce GTX TITAN", { 16, 8, 16, 8, 32, 16, 32, 128, 1, 1, 1, 1, 2, 2 } }, - { "GeForce GTX TITAN Black", { 16, 2, 16, 8, 16, 16, 8, 16, 1, 1, 1, 0, 1, 1 } }, - { "GeForce GTX TITAN X", { 16, 8, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, - { "TITAN X (Pascal)", { 32, 2, 32, 32, 32, 16, 16, 32, 0, 0, 0, 0, 1, 2 } }, - { "Tesla K20m", { 16, 2, 32, 8, 32, 16, 16, 64, 1, 0, 0, 0, 1, 1 } }, - { "Tesla K40m", { 32, 2, 16, 8, 64, 16, 32, 128, 1, 0, 1, 1, 2, 4 } }, - { "default", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmComplexDouble = { - "Xgemm", Precision::kComplexDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 8, 16, 32, 16, 16, 32, 0, 0, 1, 1, 2, 2 } }, - { "Ellesmere", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, - { "Fiji", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, - { "Hawaii", { 16, 2, 16, 16, 16, 16, 16, 32, 1, 0, 0, 0, 1, 2 } }, - { "Oland", { 16, 2, 16, 8, 16, 16, 32, 128, 0, 0, 0, 0, 1, 4 } }, - { "Pitcairn", { 32, 2, 16, 8, 32, 8, 32, 32, 0, 1, 1, 0, 1, 1 } }, - { "Tahiti", { 16, 2, 16, 8, 16, 8, 8, 16, 0, 0, 1, 0, 1, 1 } }, - { "Tonga", { 16, 2, 32, 16, 32, 16, 16, 16, 1, 1, 1, 1, 1, 1 } }, - { "default", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } }, - { "default", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 64, 16, 8, 128, 0, 1, 0, 1, 2, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 16, 32, 128, 16, 16, 64, 0, 1, 0, 0, 2, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 32, 128, 16, 8, 32, 0, 1, 0, 0, 4, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 8, 16, 128, 0, 0, 0, 1, 1, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 32, 32, 8, 8, 32, 0, 1, 0, 0, 1, 2 } }, - { "default", { 32, 2, 8, 8, 16, 8, 8, 32, 1, 1, 0, 0, 1, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } }, - { "default", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 32, 8, 16, 16, 16, 8, 16, 64, 1, 0, 1, 1, 1, 1 } }, - { "GeForce GTX 1070", { 32, 8, 32, 16, 32, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, - { "GeForce GTX 1080", { 32, 2, 16, 16, 16, 8, 8, 16, 0, 0, 0, 0, 1, 2 } }, - { "GeForce GTX 480", { 16, 2, 32, 32, 32, 32, 8, 32, 0, 0, 1, 0, 1, 1 } }, - { "GeForce GTX 670", { 32, 8, 16, 8, 16, 16, 32, 64, 1, 0, 0, 1, 1, 2 } }, - { "GeForce GTX 680", { 16, 8, 16, 8, 64, 16, 32, 32, 0, 1, 1, 0, 1, 1 } }, - { "GeForce GTX 750", { 32, 2, 8, 32, 32, 8, 8, 64, 0, 0, 1, 0, 1, 4 } }, - { "GeForce GTX 750 Ti", { 32, 2, 8, 8, 16, 8, 8, 32, 0, 0, 0, 0, 1, 1 } }, - { "GeForce GTX 980", { 16, 2, 16, 8, 32, 8, 16, 128, 0, 0, 1, 1, 2, 2 } }, - { "GeForce GTX TITAN Black", { 16, 2, 16, 16, 32, 16, 8, 32, 0, 1, 1, 1, 1, 1 } }, - { "GeForce GTX TITAN X", { 32, 8, 16, 16, 128, 16, 16, 32, 0, 0, 1, 0, 1, 1 } }, - { "TITAN X (Pascal)", { 32, 2, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, - { "Tesla K20m", { 32, 2, 32, 8, 32, 16, 16, 64, 0, 0, 1, 0, 1, 1 } }, - { "Tesla K40m", { 16, 8, 8, 8, 32, 32, 16, 32, 0, 0, 1, 0, 1, 1 } }, - { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 0, 0, 0, 0, 1, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 2, 32, 32, 32, 8, 8, 32, 1, 1, 0, 0, 1, 1 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xgemm/xgemm.hpp b/src/database/kernels/xgemm/xgemm.hpp new file mode 100644 index 00000000..d5d382f0 --- /dev/null +++ b/src/database/kernels/xgemm/xgemm.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm' kernels. +// +// ================================================================================================= + +#include "database/kernels/xgemm/xgemm_16.hpp" +#include "database/kernels/xgemm/xgemm_32.hpp" +#include "database/kernels/xgemm/xgemm_3232.hpp" +#include "database/kernels/xgemm/xgemm_64.hpp" +#include "database/kernels/xgemm/xgemm_6464.hpp" diff --git a/src/database/kernels/xgemm/xgemm_16.hpp b/src/database/kernels/xgemm/xgemm_16.hpp new file mode 100644 index 00000000..53a8ccd9 --- /dev/null +++ b/src/database/kernels/xgemm/xgemm_16.hpp @@ -0,0 +1,36 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmHalf = { + "Xgemm", Precision::kHalf, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm/xgemm_32.hpp b/src/database/kernels/xgemm/xgemm_32.hpp new file mode 100644 index 00000000..2c0a63f2 --- /dev/null +++ b/src/database/kernels/xgemm/xgemm_32.hpp @@ -0,0 +1,101 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmSingle = { + "Xgemm", Precision::kSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 16, 128, 0, 0, 0, 0, 2, 8 } }, + { "ATI Radeon HD 6750M", { 32, 2, 8, 16, 128, 8, 8, 128, 0, 0, 1, 1, 8, 8 } }, + { "Ellesmere", { 32, 2, 8, 8, 16, 16, 16, 64, 1, 1, 0, 0, 1, 2 } }, + { "Fiji", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { "Hawaii", { 16, 2, 16, 32, 128, 32, 8, 64, 1, 1, 1, 1, 4, 2 } }, + { "Oland", { 16, 2, 32, 16, 64, 32, 16, 128, 1, 1, 1, 0, 2, 4 } }, + { "Pitcairn", { 16, 2, 16, 8, 32, 16, 16, 128, 0, 0, 1, 0, 1, 1 } }, + { "Tahiti", { 32, 2, 16, 32, 128, 16, 8, 64, 0, 0, 0, 0, 4, 1 } }, + { "Tonga", { 16, 2, 16, 32, 64, 16, 8, 128, 1, 1, 0, 0, 2, 8 } }, + { "Turks", { 32, 2, 8, 8, 64, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, + { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } }, + { "default", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 8, 8, 128, 16, 8, 128, 0, 1, 1, 1, 1, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 32, 32, 64, 32, 16, 64, 1, 1, 1, 0, 2, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 8, 128, 16, 8, 64, 0, 0, 1, 0, 1, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 32, 8, 128, 8, 8, 128, 1, 1, 1, 1, 2, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 2, 8, 8, 128, 8, 8, 128, 1, 1, 1, 0, 1, 8 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 16, 16, 64, 32, 32, 64, 0, 1, 1, 0, 1, 2 } }, + { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 32, 2, 8, 8, 128, 32, 16, 64, 0, 0, 1, 0, 4, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 8, 8, 64, 32, 16, 64, 1, 1, 1, 1, 4, 2 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 2, 16, 8, 32, 8, 16, 128, 1, 1, 1, 1, 2, 4 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, + { "Iris", { 16, 8, 16, 8, 128, 32, 16, 64, 1, 1, 1, 1, 4, 1 } }, + { "Iris Pro", { 16, 2, 16, 8, 64, 32, 32, 128, 1, 1, 1, 0, 4, 4 } }, + { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } }, + { "default", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 16, 2, 16, 8, 32, 8, 16, 64, 1, 1, 1, 1, 2, 4 } }, + { "GeForce GT 650M", { 32, 2, 8, 8, 32, 32, 32, 64, 1, 1, 0, 0, 4, 2 } }, + { "GeForce GTX 1070", { 16, 2, 32, 16, 128, 32, 8, 128, 1, 1, 1, 0, 4, 1 } }, + { "GeForce GTX 1080", { 32, 2, 16, 8, 64, 8, 8, 64, 1, 1, 1, 1, 4, 8 } }, + { "GeForce GTX 480", { 16, 2, 16, 8, 64, 32, 16, 64, 1, 1, 1, 1, 2, 2 } }, + { "GeForce GTX 670", { 16, 2, 8, 8, 64, 16, 16, 64, 1, 1, 1, 0, 2, 4 } }, + { "GeForce GTX 680", { 32, 8, 8, 16, 64, 32, 16, 128, 1, 1, 0, 0, 4, 2 } }, + { "GeForce GTX 750", { 16, 2, 16, 16, 64, 32, 8, 128, 1, 1, 1, 1, 1, 2 } }, + { "GeForce GTX 750 Ti", { 16, 2, 16, 16, 128, 32, 8, 64, 1, 1, 0, 1, 8, 2 } }, + { "GeForce GTX 980", { 16, 2, 16, 16, 64, 16, 8, 128, 1, 1, 1, 0, 4, 8 } }, + { "GeForce GTX TITAN", { 16, 8, 32, 16, 64, 8, 8, 64, 1, 1, 1, 0, 2, 2 } }, + { "GeForce GTX TITAN Black", { 16, 2, 16, 8, 64, 16, 16, 64, 1, 1, 1, 0, 4, 1 } }, + { "GeForce GTX TITAN X", { 16, 2, 8, 16, 128, 8, 8, 128, 1, 1, 1, 1, 4, 8 } }, + { "TITAN X (Pascal)", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } }, + { "Tesla K20m", { 16, 2, 32, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } }, + { "Tesla K40m", { 16, 8, 16, 8, 64, 16, 16, 128, 1, 1, 1, 0, 2, 4 } }, + { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 2 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } }, + { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm/xgemm_3232.hpp b/src/database/kernels/xgemm/xgemm_3232.hpp new file mode 100644 index 00000000..3ba015e2 --- /dev/null +++ b/src/database/kernels/xgemm/xgemm_3232.hpp @@ -0,0 +1,94 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmComplexSingle = { + "Xgemm", Precision::kComplexSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 32, 32, 64, 8, 8, 64, 0, 0, 1, 1, 2, 8 } }, + { "ATI Radeon HD 6750M", { 32, 2, 8, 8, 32, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { "Ellesmere", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } }, + { "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } }, + { "Hawaii", { 32, 2, 32, 8, 32, 8, 16, 32, 1, 0, 1, 0, 1, 1 } }, + { "Oland", { 32, 2, 16, 8, 32, 32, 32, 128, 1, 0, 0, 1, 2, 4 } }, + { "Pitcairn", { 16, 2, 8, 8, 32, 8, 8, 32, 0, 1, 1, 1, 4, 2 } }, + { "Tahiti", { 16, 2, 8, 8, 32, 8, 16, 32, 1, 0, 0, 1, 2, 1 } }, + { "Tonga", { 16, 2, 32, 8, 64, 16, 32, 64, 1, 1, 1, 0, 2, 1 } }, + { "Turks", { 16, 2, 8, 8, 32, 32, 8, 32, 0, 1, 0, 0, 2, 1 } }, + { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } }, + { "default", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 0, 1, 1, 2 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 32, 16, 16, 64, 0, 1, 1, 0, 1, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 16, 64, 8, 16, 64, 0, 1, 0, 0, 4, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 16, 32, 128, 0, 0, 0, 0, 1, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 16, 16, 16, 128, 0, 0, 1, 1, 1, 4 } }, + { "default", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 2 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 16, 8, 8, 16, 64, 32, 8, 32, 0, 0, 0, 0, 2, 1 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 8, 8, 8, 32, 16, 16, 64, 1, 0, 0, 0, 4, 4 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 1, 2, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 0, 0, 0, 0, 4, 2 } }, + { "Iris", { 32, 8, 32, 16, 64, 8, 16, 64, 1, 0, 1, 0, 1, 1 } }, + { "Iris Pro", { 16, 2, 8, 8, 32, 32, 8, 32, 1, 1, 1, 1, 1, 1 } }, + { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } }, + { "default", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 16, 8, 32, 32, 64, 32, 16, 128, 1, 0, 1, 0, 1, 4 } }, + { "GeForce GTX 1070", { 16, 2, 16, 16, 128, 16, 16, 64, 1, 1, 1, 1, 2, 4 } }, + { "GeForce GTX 1080", { 16, 2, 32, 16, 64, 32, 8, 64, 1, 1, 0, 0, 1, 2 } }, + { "GeForce GTX 480", { 16, 2, 16, 16, 32, 32, 16, 128, 0, 1, 1, 1, 2, 2 } }, + { "GeForce GTX 670", { 16, 2, 32, 32, 64, 32, 8, 32, 1, 1, 1, 1, 1, 1 } }, + { "GeForce GTX 680", { 16, 2, 32, 16, 64, 32, 32, 128, 1, 0, 0, 0, 2, 2 } }, + { "GeForce GTX 750", { 16, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 0, 2, 2 } }, + { "GeForce GTX 750 Ti", { 16, 2, 16, 8, 32, 32, 16, 64, 1, 1, 1, 0, 1, 2 } }, + { "GeForce GTX 980", { 32, 8, 32, 32, 64, 16, 16, 64, 1, 1, 1, 0, 2, 1 } }, + { "GeForce GTX TITAN", { 16, 8, 16, 16, 64, 32, 16, 64, 1, 1, 1, 0, 1, 1 } }, + { "GeForce GTX TITAN Black", { 16, 2, 8, 16, 64, 8, 8, 32, 0, 1, 1, 0, 1, 2 } }, + { "GeForce GTX TITAN X", { 16, 2, 8, 8, 64, 8, 8, 32, 1, 0, 1, 1, 1, 4 } }, + { "TITAN X (Pascal)", { 32, 2, 32, 32, 64, 8, 8, 32, 1, 1, 0, 0, 2, 4 } }, + { "Tesla K20m", { 32, 2, 8, 16, 64, 8, 16, 64, 1, 0, 0, 0, 1, 4 } }, + { "Tesla K40m", { 16, 2, 32, 32, 32, 32, 8, 64, 0, 1, 0, 0, 1, 1 } }, + { "default", { 32, 2, 8, 8, 16, 32, 32, 64, 1, 1, 0, 0, 1, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 2, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm/xgemm_64.hpp b/src/database/kernels/xgemm/xgemm_64.hpp new file mode 100644 index 00000000..45d04e80 --- /dev/null +++ b/src/database/kernels/xgemm/xgemm_64.hpp @@ -0,0 +1,80 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmDouble = { + "Xgemm", Precision::kDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 4 } }, + { "Ellesmere", { 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 2 } }, + { "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, + { "Hawaii", { 16, 8, 32, 8, 128, 8, 8, 32, 0, 1, 0, 0, 1, 4 } }, + { "Oland", { 16, 2, 8, 16, 64, 16, 8, 16, 0, 0, 1, 1, 1, 1 } }, + { "Pitcairn", { 32, 2, 32, 16, 64, 8, 16, 32, 0, 0, 0, 0, 1, 2 } }, + { "Tahiti", { 32, 2, 16, 8, 16, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, + { "Tonga", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, + { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } }, + { "default", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 1, 1, 2, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 128, 16, 16, 64, 0, 1, 1, 0, 1, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 32, 16, 128, 16, 16, 128, 0, 0, 1, 0, 1, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 16, 8, 128, 8, 8, 64, 1, 0, 0, 1, 2, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 16, 8, 128, 8, 8, 128, 1, 0, 0, 0, 2, 8 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 128, 16, 8, 128, 0, 0, 1, 1, 1, 8 } }, + { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 1, 4 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } }, + { "default", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 16, 2, 8, 8, 16, 8, 8, 32, 1, 0, 0, 1, 2, 2 } }, + { "GeForce GTX 1070", { 16, 2, 8, 16, 32, 8, 8, 64, 0, 0, 1, 1, 2, 8 } }, + { "GeForce GTX 1080", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, + { "GeForce GTX 480", { 16, 2, 8, 16, 32, 32, 8, 64, 1, 1, 1, 0, 1, 2 } }, + { "GeForce GTX 670", { 32, 8, 16, 32, 128, 16, 8, 32, 0, 1, 1, 0, 1, 1 } }, + { "GeForce GTX 680", { 32, 8, 8, 8, 32, 16, 32, 128, 1, 0, 0, 1, 2, 4 } }, + { "GeForce GTX 750", { 32, 8, 16, 32, 64, 16, 8, 128, 0, 0, 0, 1, 2, 1 } }, + { "GeForce GTX 750 Ti", { 32, 2, 8, 8, 32, 16, 16, 32, 0, 0, 0, 0, 4, 2 } }, + { "GeForce GTX 980", { 32, 8, 16, 8, 64, 32, 32, 128, 0, 0, 1, 0, 2, 4 } }, + { "GeForce GTX TITAN", { 16, 8, 16, 8, 32, 16, 32, 128, 1, 1, 1, 1, 2, 2 } }, + { "GeForce GTX TITAN Black", { 16, 2, 16, 8, 16, 16, 8, 16, 1, 1, 1, 0, 1, 1 } }, + { "GeForce GTX TITAN X", { 16, 8, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, + { "TITAN X (Pascal)", { 32, 2, 32, 32, 32, 16, 16, 32, 0, 0, 0, 0, 1, 2 } }, + { "Tesla K20m", { 16, 2, 32, 8, 32, 16, 16, 64, 1, 0, 0, 0, 1, 1 } }, + { "Tesla K40m", { 32, 2, 16, 8, 64, 16, 32, 128, 1, 0, 1, 1, 2, 4 } }, + { "default", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm/xgemm_6464.hpp b/src/database/kernels/xgemm/xgemm_6464.hpp new file mode 100644 index 00000000..14f47eff --- /dev/null +++ b/src/database/kernels/xgemm/xgemm_6464.hpp @@ -0,0 +1,79 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmComplexDouble = { + "Xgemm", Precision::kComplexDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 8, 16, 32, 16, 16, 32, 0, 0, 1, 1, 2, 2 } }, + { "Ellesmere", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, + { "Fiji", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, + { "Hawaii", { 16, 2, 16, 16, 16, 16, 16, 32, 1, 0, 0, 0, 1, 2 } }, + { "Oland", { 16, 2, 16, 8, 16, 16, 32, 128, 0, 0, 0, 0, 1, 4 } }, + { "Pitcairn", { 32, 2, 16, 8, 32, 8, 32, 32, 0, 1, 1, 0, 1, 1 } }, + { "Tahiti", { 16, 2, 16, 8, 16, 8, 8, 16, 0, 0, 1, 0, 1, 1 } }, + { "Tonga", { 16, 2, 32, 16, 32, 16, 16, 16, 1, 1, 1, 1, 1, 1 } }, + { "default", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } }, + { "default", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 64, 16, 8, 128, 0, 1, 0, 1, 2, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 16, 32, 128, 16, 16, 64, 0, 1, 0, 0, 2, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 32, 128, 16, 8, 32, 0, 1, 0, 0, 4, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 8, 16, 128, 0, 0, 0, 1, 1, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 32, 32, 8, 8, 32, 0, 1, 0, 0, 1, 2 } }, + { "default", { 32, 2, 8, 8, 16, 8, 8, 32, 1, 1, 0, 0, 1, 2 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } }, + { "default", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 32, 8, 16, 16, 16, 8, 16, 64, 1, 0, 1, 1, 1, 1 } }, + { "GeForce GTX 1070", { 32, 8, 32, 16, 32, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, + { "GeForce GTX 1080", { 32, 2, 16, 16, 16, 8, 8, 16, 0, 0, 0, 0, 1, 2 } }, + { "GeForce GTX 480", { 16, 2, 32, 32, 32, 32, 8, 32, 0, 0, 1, 0, 1, 1 } }, + { "GeForce GTX 670", { 32, 8, 16, 8, 16, 16, 32, 64, 1, 0, 0, 1, 1, 2 } }, + { "GeForce GTX 680", { 16, 8, 16, 8, 64, 16, 32, 32, 0, 1, 1, 0, 1, 1 } }, + { "GeForce GTX 750", { 32, 2, 8, 32, 32, 8, 8, 64, 0, 0, 1, 0, 1, 4 } }, + { "GeForce GTX 750 Ti", { 32, 2, 8, 8, 16, 8, 8, 32, 0, 0, 0, 0, 1, 1 } }, + { "GeForce GTX 980", { 16, 2, 16, 8, 32, 8, 16, 128, 0, 0, 1, 1, 2, 2 } }, + { "GeForce GTX TITAN Black", { 16, 2, 16, 16, 32, 16, 8, 32, 0, 1, 1, 1, 1, 1 } }, + { "GeForce GTX TITAN X", { 32, 8, 16, 16, 128, 16, 16, 32, 0, 0, 1, 0, 1, 1 } }, + { "TITAN X (Pascal)", { 32, 2, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, + { "Tesla K20m", { 32, 2, 32, 8, 32, 16, 16, 64, 0, 0, 1, 0, 1, 1 } }, + { "Tesla K40m", { 16, 8, 8, 8, 32, 32, 16, 32, 0, 0, 1, 0, 1, 1 } }, + { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 0, 0, 0, 0, 1, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 32, 2, 32, 32, 32, 8, 8, 32, 1, 1, 0, 0, 1, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm_direct.hpp b/src/database/kernels/xgemm_direct.hpp deleted file mode 100644 index acace63f..00000000 --- a/src/database/kernels/xgemm_direct.hpp +++ /dev/null @@ -1,218 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator -// -// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XgemmDirectHalf = { - "XgemmDirect", Precision::kHalf, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } }, - { "default", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmDirectSingle = { - "XgemmDirect", Precision::kSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 32 } }, - { "ATI Radeon HD 6750M", { 8, 8, 16, 8, 8, 1, 0, 2, 2, 32 } }, - { "Ellesmere", { 2, 8, 8, 32, 32, 1, 1, 2, 1, 32 } }, - { "Fiji", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - { "Tonga", { 16, 16, 16, 32, 8, 0, 1, 1, 1, 32 } }, - { "Turks", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } }, - { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 1, 8, 64 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 16, 16, 8, 8, 8, 0, 0, 2, 4, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 0, 0, 2, 2, 64 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - { "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 4, 32 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GeForce GT 650M", { 16, 16, 16, 8, 16, 1, 0, 2, 2, 32 } }, - { "GeForce GTX 1080", { 16, 16, 8, 16, 8, 1, 1, 1, 1, 32 } }, - { "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, - { "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } }, - { "TITAN X (Pascal)", { 8, 32, 8, 8, 16, 1, 1, 1, 1, 32 } }, - { "default", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmDirectComplexSingle = { - "XgemmDirect", Precision::kComplexSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "ATI Radeon HD 6750M", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - { "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } }, - { "Turks", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, - { "default", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 4, 4, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 16, 16, 8, 8, 1, 1, 1, 4, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 16, 8, 1, 1, 2, 1, 32 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 2, 32 } }, - { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GeForce GTX 1080", { 8, 8, 16, 16, 8, 1, 1, 2, 2, 32 } }, - { "GeForce GTX 750 Ti", { 16, 8, 8, 16, 8, 1, 1, 2, 1, 16 } }, - { "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 1, 1, 16 } }, - { "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmDirectDouble = { - "XgemmDirect", Precision::kDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - { "Ellesmere", { 8, 16, 16, 8, 16, 1, 1, 2, 1, 32 } }, - { "Fiji", { 16, 8, 8, 8, 16, 1, 1, 1, 1, 16 } }, - { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 32 } }, - { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 8, 8, 8, 8, 0, 0, 1, 4, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } }, - { "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 2, 4, 32 } }, - { "GeForce GTX TITAN Black", { 8, 16, 16, 16, 8, 1, 0, 1, 1, 16 } }, - { "TITAN X (Pascal)", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemmDirectComplexDouble = { - "XgemmDirect", Precision::kComplexDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "Ellesmere", { 16, 32, 32, 16, 8, 0, 0, 1, 1, 32 } }, - { "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 32, 8, 0, 0, 1, 1, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 16, 8, 8, 0, 0, 2, 1, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 16, 8, 8, 8, 0, 0, 2, 2, 32 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - { "GeForce GTX 750 Ti", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } }, - { "GeForce GTX TITAN Black", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - { "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } }, - { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xgemm_direct/xgemm_direct.hpp b/src/database/kernels/xgemm_direct/xgemm_direct.hpp new file mode 100644 index 00000000..9a26e7ce --- /dev/null +++ b/src/database/kernels/xgemm_direct/xgemm_direct.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct' kernels. +// +// ================================================================================================= + +#include "database/kernels/xgemm_direct/xgemm_direct_16.hpp" +#include "database/kernels/xgemm_direct/xgemm_direct_32.hpp" +#include "database/kernels/xgemm_direct/xgemm_direct_3232.hpp" +#include "database/kernels/xgemm_direct/xgemm_direct_64.hpp" +#include "database/kernels/xgemm_direct/xgemm_direct_6464.hpp" diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp new file mode 100644 index 00000000..ed71285f --- /dev/null +++ b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp @@ -0,0 +1,36 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmDirectHalf = { + "XgemmDirect", Precision::kHalf, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } }, + { "default", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp new file mode 100644 index 00000000..e0a991c0 --- /dev/null +++ b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp @@ -0,0 +1,66 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmDirectSingle = { + "XgemmDirect", Precision::kSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 32 } }, + { "ATI Radeon HD 6750M", { 8, 8, 16, 8, 8, 1, 0, 2, 2, 32 } }, + { "Ellesmere", { 2, 8, 8, 32, 32, 1, 1, 2, 1, 32 } }, + { "Fiji", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + { "Tonga", { 16, 16, 16, 32, 8, 0, 1, 1, 1, 32 } }, + { "Turks", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } }, + { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 1, 8, 64 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 16, 16, 8, 8, 8, 0, 0, 2, 4, 32 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 0, 0, 2, 2, 64 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + { "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 4, 32 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GT 650M", { 16, 16, 16, 8, 16, 1, 0, 2, 2, 32 } }, + { "GeForce GTX 1080", { 16, 16, 8, 16, 8, 1, 1, 1, 1, 32 } }, + { "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, + { "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } }, + { "TITAN X (Pascal)", { 8, 32, 8, 8, 16, 1, 1, 1, 1, 32 } }, + { "default", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp new file mode 100644 index 00000000..05e672ac --- /dev/null +++ b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp @@ -0,0 +1,58 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmDirectComplexSingle = { + "XgemmDirect", Precision::kComplexSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "ATI Radeon HD 6750M", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + { "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } }, + { "Turks", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, + { "default", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 4, 4, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 16, 16, 8, 8, 1, 1, 1, 4, 32 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 16, 8, 1, 1, 2, 1, 32 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 2, 32 } }, + { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 1080", { 8, 8, 16, 16, 8, 1, 1, 2, 2, 32 } }, + { "GeForce GTX 750 Ti", { 16, 8, 8, 16, 8, 1, 1, 2, 1, 16 } }, + { "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 1, 1, 16 } }, + { "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp new file mode 100644 index 00000000..ac740dae --- /dev/null +++ b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp @@ -0,0 +1,50 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmDirectDouble = { + "XgemmDirect", Precision::kDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + { "Ellesmere", { 8, 16, 16, 8, 16, 1, 1, 2, 1, 32 } }, + { "Fiji", { 16, 8, 8, 8, 16, 1, 1, 1, 1, 16 } }, + { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 32 } }, + { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 8, 8, 8, 8, 0, 0, 1, 4, 32 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } }, + { "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 2, 4, 32 } }, + { "GeForce GTX TITAN Black", { 8, 16, 16, 16, 8, 1, 0, 1, 1, 16 } }, + { "TITAN X (Pascal)", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp new file mode 100644 index 00000000..1352ec66 --- /dev/null +++ b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp @@ -0,0 +1,50 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemmDirectComplexDouble = { + "XgemmDirect", Precision::kComplexDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "Ellesmere", { 16, 32, 32, 16, 8, 0, 0, 1, 1, 32 } }, + { "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 32, 8, 0, 0, 1, 1, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 16, 8, 8, 0, 0, 2, 1, 32 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 16, 8, 8, 8, 0, 0, 2, 2, 32 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + { "GeForce GTX 750 Ti", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } }, + { "GeForce GTX TITAN Black", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + { "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } }, + { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv.hpp b/src/database/kernels/xgemv.hpp deleted file mode 100644 index c537294a..00000000 --- a/src/database/kernels/xgemv.hpp +++ /dev/null @@ -1,306 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator -// -// This file populates the database with best-found tuning parameters for the 'Xgemv' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XgemvHalf = { - "Xgemv", Precision::kHalf, {"WGS1", "WPT1"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 256, 1 } }, - { "default", { 256, 1 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 256, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvSingle = { - "Xgemv", Precision::kSingle, {"WGS1", "WPT1"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 128, 1 } }, - { "ATI Radeon HD 6750M", { 32, 1 } }, - { "Ellesmere", { 256, 1 } }, - { "Fiji", { 128, 1 } }, - { "Hawaii", { 128, 1 } }, - { "Oland", { 128, 1 } }, - { "Pitcairn", { 256, 1 } }, - { "Tahiti", { 256, 1 } }, - { "Tonga", { 128, 2 } }, - { "Turks", { 32, 1 } }, - { "default", { 128, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } }, - { "default", { 64, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 256, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 1 } }, - { "Iris", { 64, 2 } }, - { "Iris Pro", { 128, 1 } }, - { "default", { 128, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 256, 1 } }, - { "GeForce GT 650M", { 256, 1 } }, - { "GeForce GTX 1070", { 128, 1 } }, - { "GeForce GTX 1080", { 32, 1 } }, - { "GeForce GTX 480", { 64, 1 } }, - { "GeForce GTX 670", { 64, 1 } }, - { "GeForce GTX 680", { 256, 1 } }, - { "GeForce GTX 750", { 256, 1 } }, - { "GeForce GTX 750 Ti", { 32, 1 } }, - { "GeForce GTX 980", { 128, 1 } }, - { "GeForce GTX TITAN", { 256, 1 } }, - { "GeForce GTX TITAN Black", { 256, 1 } }, - { "GeForce GTX TITAN X", { 256, 1 } }, - { "TITAN X (Pascal)", { 32, 1 } }, - { "Tesla K20m", { 128, 1 } }, - { "Tesla K40m", { 256, 1 } }, - { "default", { 256, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 128, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvComplexSingle = { - "Xgemv", Precision::kComplexSingle, {"WGS1", "WPT1"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } }, - { "ATI Radeon HD 6750M", { 64, 1 } }, - { "Ellesmere", { 32, 1 } }, - { "Fiji", { 32, 1 } }, - { "Hawaii", { 64, 1 } }, - { "Oland", { 64, 1 } }, - { "Pitcairn", { 64, 1 } }, - { "Tahiti", { 64, 1 } }, - { "Tonga", { 32, 1 } }, - { "Turks", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } }, - { "default", { 64, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 64, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1 } }, - { "Iris", { 256, 1 } }, - { "Iris Pro", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 256, 1 } }, - { "GeForce GTX 1070", { 64, 1 } }, - { "GeForce GTX 1080", { 32, 1 } }, - { "GeForce GTX 480", { 64, 1 } }, - { "GeForce GTX 670", { 64, 1 } }, - { "GeForce GTX 680", { 64, 1 } }, - { "GeForce GTX 750", { 128, 1 } }, - { "GeForce GTX 750 Ti", { 32, 1 } }, - { "GeForce GTX TITAN", { 256, 1 } }, - { "GeForce GTX TITAN Black", { 32, 1 } }, - { "TITAN X (Pascal)", { 32, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvDouble = { - "Xgemv", Precision::kDouble, {"WGS1", "WPT1"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } }, - { "Ellesmere", { 32, 1 } }, - { "Fiji", { 32, 1 } }, - { "Hawaii", { 128, 1 } }, - { "Oland", { 256, 1 } }, - { "Pitcairn", { 256, 1 } }, - { "Tahiti", { 256, 1 } }, - { "Tonga", { 32, 1 } }, - { "default", { 256, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } }, - { "default", { 64, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 1 } }, - { "GeForce GTX 1070", { 64, 1 } }, - { "GeForce GTX 1080", { 32, 1 } }, - { "GeForce GTX 480", { 256, 1 } }, - { "GeForce GTX 670", { 128, 1 } }, - { "GeForce GTX 680", { 128, 1 } }, - { "GeForce GTX 750", { 64, 1 } }, - { "GeForce GTX 750 Ti", { 32, 1 } }, - { "GeForce GTX 980", { 64, 1 } }, - { "GeForce GTX TITAN", { 256, 1 } }, - { "GeForce GTX TITAN Black", { 32, 1 } }, - { "GeForce GTX TITAN X", { 64, 1 } }, - { "TITAN X (Pascal)", { 32, 1 } }, - { "Tesla K20m", { 256, 1 } }, - { "Tesla K40m", { 256, 1 } }, - { "default", { 128, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 128, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvComplexDouble = { - "Xgemv", Precision::kComplexDouble, {"WGS1", "WPT1"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } }, - { "Ellesmere", { 32, 1 } }, - { "Fiji", { 64, 1 } }, - { "Hawaii", { 64, 1 } }, - { "Oland", { 256, 1 } }, - { "Pitcairn", { 256, 1 } }, - { "Tahiti", { 256, 1 } }, - { "Tonga", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 4 } }, - { "default", { 64, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, - { "default", { 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 1 } }, - { "GeForce GTX 480", { 64, 1 } }, - { "GeForce GTX 670", { 128, 1 } }, - { "default", { 128, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xgemv/xgemv.hpp b/src/database/kernels/xgemv/xgemv.hpp new file mode 100644 index 00000000..081c995f --- /dev/null +++ b/src/database/kernels/xgemv/xgemv.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv' kernels. +// +// ================================================================================================= + +#include "database/kernels/xgemv/xgemv_16.hpp" +#include "database/kernels/xgemv/xgemv_32.hpp" +#include "database/kernels/xgemv/xgemv_3232.hpp" +#include "database/kernels/xgemv/xgemv_64.hpp" +#include "database/kernels/xgemv/xgemv_6464.hpp" diff --git a/src/database/kernels/xgemv/xgemv_16.hpp b/src/database/kernels/xgemv/xgemv_16.hpp new file mode 100644 index 00000000..1fc86276 --- /dev/null +++ b/src/database/kernels/xgemv/xgemv_16.hpp @@ -0,0 +1,37 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvHalf = { + "Xgemv", Precision::kHalf, {"WGS1", "WPT1"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { 256, 1 } }, + { "default", { 256, 1 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 256, 1 } }, + { "default", { 64, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 64, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv/xgemv_32.hpp b/src/database/kernels/xgemv/xgemv_32.hpp new file mode 100644 index 00000000..fd1aa7aa --- /dev/null +++ b/src/database/kernels/xgemv/xgemv_32.hpp @@ -0,0 +1,94 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvSingle = { + "Xgemv", Precision::kSingle, {"WGS1", "WPT1"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 128, 1 } }, + { "ATI Radeon HD 6750M", { 32, 1 } }, + { "Ellesmere", { 256, 1 } }, + { "Fiji", { 128, 1 } }, + { "Hawaii", { 128, 1 } }, + { "Oland", { 128, 1 } }, + { "Pitcairn", { 256, 1 } }, + { "Tahiti", { 256, 1 } }, + { "Tonga", { 128, 2 } }, + { "Turks", { 32, 1 } }, + { "default", { 128, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } }, + { "default", { 64, 4 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 256, 1 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 1 } }, + { "Iris", { 64, 2 } }, + { "Iris Pro", { 128, 1 } }, + { "default", { 128, 1 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, + { "default", { 64, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 256, 1 } }, + { "GeForce GT 650M", { 256, 1 } }, + { "GeForce GTX 1070", { 128, 1 } }, + { "GeForce GTX 1080", { 32, 1 } }, + { "GeForce GTX 480", { 64, 1 } }, + { "GeForce GTX 670", { 64, 1 } }, + { "GeForce GTX 680", { 256, 1 } }, + { "GeForce GTX 750", { 256, 1 } }, + { "GeForce GTX 750 Ti", { 32, 1 } }, + { "GeForce GTX 980", { 128, 1 } }, + { "GeForce GTX TITAN", { 256, 1 } }, + { "GeForce GTX TITAN Black", { 256, 1 } }, + { "GeForce GTX TITAN X", { 256, 1 } }, + { "TITAN X (Pascal)", { 32, 1 } }, + { "Tesla K20m", { 128, 1 } }, + { "Tesla K40m", { 256, 1 } }, + { "default", { 256, 1 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 64, 1 } }, + { "default", { 64, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 128, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv/xgemv_3232.hpp b/src/database/kernels/xgemv/xgemv_3232.hpp new file mode 100644 index 00000000..442dd97f --- /dev/null +++ b/src/database/kernels/xgemv/xgemv_3232.hpp @@ -0,0 +1,83 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvComplexSingle = { + "Xgemv", Precision::kComplexSingle, {"WGS1", "WPT1"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } }, + { "ATI Radeon HD 6750M", { 64, 1 } }, + { "Ellesmere", { 32, 1 } }, + { "Fiji", { 32, 1 } }, + { "Hawaii", { 64, 1 } }, + { "Oland", { 64, 1 } }, + { "Pitcairn", { 64, 1 } }, + { "Tahiti", { 64, 1 } }, + { "Tonga", { 32, 1 } }, + { "Turks", { 64, 1 } }, + { "default", { 64, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } }, + { "default", { 64, 2 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 64, 1 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1 } }, + { "Iris", { 256, 1 } }, + { "Iris Pro", { 64, 1 } }, + { "default", { 64, 1 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, + { "default", { 64, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 256, 1 } }, + { "GeForce GTX 1070", { 64, 1 } }, + { "GeForce GTX 1080", { 32, 1 } }, + { "GeForce GTX 480", { 64, 1 } }, + { "GeForce GTX 670", { 64, 1 } }, + { "GeForce GTX 680", { 64, 1 } }, + { "GeForce GTX 750", { 128, 1 } }, + { "GeForce GTX 750 Ti", { 32, 1 } }, + { "GeForce GTX TITAN", { 256, 1 } }, + { "GeForce GTX TITAN Black", { 32, 1 } }, + { "TITAN X (Pascal)", { 32, 1 } }, + { "default", { 64, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 64, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv/xgemv_64.hpp b/src/database/kernels/xgemv/xgemv_64.hpp new file mode 100644 index 00000000..8dd899c3 --- /dev/null +++ b/src/database/kernels/xgemv/xgemv_64.hpp @@ -0,0 +1,73 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvDouble = { + "Xgemv", Precision::kDouble, {"WGS1", "WPT1"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } }, + { "Ellesmere", { 32, 1 } }, + { "Fiji", { 32, 1 } }, + { "Hawaii", { 128, 1 } }, + { "Oland", { 256, 1 } }, + { "Pitcairn", { 256, 1 } }, + { "Tahiti", { 256, 1 } }, + { "Tonga", { 32, 1 } }, + { "default", { 256, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } }, + { "default", { 64, 4 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, + { "default", { 64, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 128, 1 } }, + { "GeForce GTX 1070", { 64, 1 } }, + { "GeForce GTX 1080", { 32, 1 } }, + { "GeForce GTX 480", { 256, 1 } }, + { "GeForce GTX 670", { 128, 1 } }, + { "GeForce GTX 680", { 128, 1 } }, + { "GeForce GTX 750", { 64, 1 } }, + { "GeForce GTX 750 Ti", { 32, 1 } }, + { "GeForce GTX 980", { 64, 1 } }, + { "GeForce GTX TITAN", { 256, 1 } }, + { "GeForce GTX TITAN Black", { 32, 1 } }, + { "GeForce GTX TITAN X", { 64, 1 } }, + { "TITAN X (Pascal)", { 32, 1 } }, + { "Tesla K20m", { 256, 1 } }, + { "Tesla K40m", { 256, 1 } }, + { "default", { 128, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 128, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv/xgemv_6464.hpp b/src/database/kernels/xgemv/xgemv_6464.hpp new file mode 100644 index 00000000..50dc8ea0 --- /dev/null +++ b/src/database/kernels/xgemv/xgemv_6464.hpp @@ -0,0 +1,61 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvComplexDouble = { + "Xgemv", Precision::kComplexDouble, {"WGS1", "WPT1"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } }, + { "Ellesmere", { 32, 1 } }, + { "Fiji", { 64, 1 } }, + { "Hawaii", { 64, 1 } }, + { "Oland", { 256, 1 } }, + { "Pitcairn", { 256, 1 } }, + { "Tahiti", { 256, 1 } }, + { "Tonga", { 64, 1 } }, + { "default", { 64, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 4 } }, + { "default", { 64, 4 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, + { "default", { 64, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 128, 1 } }, + { "GeForce GTX 480", { 64, 1 } }, + { "GeForce GTX 670", { 128, 1 } }, + { "default", { 128, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 64, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast.hpp b/src/database/kernels/xgemv_fast.hpp deleted file mode 100644 index c3b9103a..00000000 --- a/src/database/kernels/xgemv_fast.hpp +++ /dev/null @@ -1,300 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator -// -// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastHalf = { - "XgemvFast", Precision::kHalf, {"VW2", "WGS2", "WPT2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 1, 32, 1 } }, - { "default", { 1, 32, 1 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 16, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 4 } }, - { "default", { 1, 16, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 16, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastSingle = { - "XgemvFast", Precision::kSingle, {"VW2", "WGS2", "WPT2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } }, - { "ATI Radeon HD 6750M", { 2, 64, 2 } }, - { "Ellesmere", { 1, 64, 1 } }, - { "Fiji", { 1, 64, 2 } }, - { "Hawaii", { 1, 64, 1 } }, - { "Oland", { 1, 64, 1 } }, - { "Pitcairn", { 1, 64, 1 } }, - { "Tahiti", { 1, 64, 1 } }, - { "Tonga", { 1, 16, 4 } }, - { "Turks", { 1, 256, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 32, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 32, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 4 } }, - { "default", { 4, 128, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 1, 256, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 32, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 4 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 2 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 32, 2 } }, - { "Iris", { 1, 128, 2 } }, - { "Iris Pro", { 4, 64, 4 } }, - { "default", { 2, 256, 2 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 2, 256, 2 } }, - { "GeForce GT 650M", { 2, 32, 2 } }, - { "GeForce GTX 1070", { 1, 256, 1 } }, - { "GeForce GTX 1080", { 1, 128, 1 } }, - { "GeForce GTX 480", { 1, 128, 1 } }, - { "GeForce GTX 670", { 2, 256, 2 } }, - { "GeForce GTX 680", { 1, 128, 1 } }, - { "GeForce GTX 750", { 1, 256, 1 } }, - { "GeForce GTX 750 Ti", { 2, 32, 2 } }, - { "GeForce GTX 980", { 1, 256, 1 } }, - { "GeForce GTX TITAN", { 1, 256, 1 } }, - { "GeForce GTX TITAN Black", { 1, 256, 1 } }, - { "GeForce GTX TITAN X", { 1, 64, 1 } }, - { "TITAN X (Pascal)", { 1, 64, 1 } }, - { "Tesla K20m", { 1, 256, 1 } }, - { "Tesla K40m", { 1, 256, 1 } }, - { "default", { 1, 256, 1 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 1, 64, 4 } }, - { "default", { 1, 64, 4 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastComplexSingle = { - "XgemvFast", Precision::kComplexSingle, {"VW2", "WGS2", "WPT2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 256, 2 } }, - { "ATI Radeon HD 6750M", { 1, 128, 1 } }, - { "Ellesmere", { 1, 64, 1 } }, - { "Fiji", { 1, 16, 1 } }, - { "Hawaii", { 1, 64, 1 } }, - { "Oland", { 1, 64, 1 } }, - { "Pitcairn", { 1, 64, 1 } }, - { "Tahiti", { 1, 128, 1 } }, - { "Tonga", { 2, 32, 2 } }, - { "Turks", { 1, 16, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 64, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 128, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 128, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 16, 4 } }, - { "default", { 1, 64, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 2, 128, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 32, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 2, 128, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 32, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 32, 1 } }, - { "Iris", { 1, 64, 1 } }, - { "Iris Pro", { 4, 128, 4 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 256, 1 } }, - { "GeForce GTX 1070", { 1, 64, 1 } }, - { "GeForce GTX 480", { 1, 64, 1 } }, - { "GeForce GTX 670", { 1, 64, 1 } }, - { "GeForce GTX 680", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastDouble = { - "XgemvFast", Precision::kDouble, {"VW2", "WGS2", "WPT2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } }, - { "Ellesmere", { 1, 128, 1 } }, - { "Fiji", { 1, 32, 1 } }, - { "Hawaii", { 1, 64, 1 } }, - { "Oland", { 1, 64, 1 } }, - { "Pitcairn", { 1, 64, 1 } }, - { "Tahiti", { 1, 64, 1 } }, - { "Tonga", { 2, 32, 2 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 64, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 4 } }, - { "default", { 1, 64, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 256, 1 } }, - { "GeForce GTX 1070", { 1, 256, 1 } }, - { "GeForce GTX 1080", { 1, 32, 2 } }, - { "GeForce GTX 480", { 1, 64, 1 } }, - { "GeForce GTX 670", { 1, 128, 1 } }, - { "GeForce GTX 680", { 1, 128, 1 } }, - { "GeForce GTX 750", { 2, 256, 2 } }, - { "GeForce GTX 750 Ti", { 1, 32, 2 } }, - { "GeForce GTX 980", { 1, 64, 1 } }, - { "GeForce GTX TITAN", { 1, 256, 1 } }, - { "GeForce GTX TITAN Black", { 1, 256, 1 } }, - { "GeForce GTX TITAN X", { 1, 128, 1 } }, - { "TITAN X (Pascal)", { 1, 32, 1 } }, - { "Tesla K20m", { 1, 128, 1 } }, - { "Tesla K40m", { 1, 256, 1 } }, - { "default", { 1, 256, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastComplexDouble = { - "XgemvFast", Precision::kComplexDouble, {"VW2", "WGS2", "WPT2"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } }, - { "Ellesmere", { 1, 16, 1 } }, - { "Fiji", { 1, 16, 1 } }, - { "Hawaii", { 1, 64, 1 } }, - { "Oland", { 1, 256, 1 } }, - { "Pitcairn", { 1, 64, 1 } }, - { "Tahiti", { 1, 64, 1 } }, - { "Tonga", { 1, 32, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 2, 64, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 64, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 2 } }, - { "default", { 4, 64, 4 } }, - } - }, - { // Intel accelerators - kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 128, 1 } }, - { "GeForce GTX 480", { 1, 64, 1 } }, - { "GeForce GTX 670", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 1, 64, 1 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xgemv_fast/xgemv_fast.hpp b/src/database/kernels/xgemv_fast/xgemv_fast.hpp new file mode 100644 index 00000000..5cc2ca6e --- /dev/null +++ b/src/database/kernels/xgemv_fast/xgemv_fast.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast' kernels. +// +// ================================================================================================= + +#include "database/kernels/xgemv_fast/xgemv_fast_16.hpp" +#include "database/kernels/xgemv_fast/xgemv_fast_32.hpp" +#include "database/kernels/xgemv_fast/xgemv_fast_3232.hpp" +#include "database/kernels/xgemv_fast/xgemv_fast_64.hpp" +#include "database/kernels/xgemv_fast/xgemv_fast_6464.hpp" diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp new file mode 100644 index 00000000..6728e713 --- /dev/null +++ b/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp @@ -0,0 +1,37 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastHalf = { + "XgemvFast", Precision::kHalf, {"VW2", "WGS2", "WPT2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { 1, 32, 1 } }, + { "default", { 1, 32, 1 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 16, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 4 } }, + { "default", { 1, 16, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 1, 16, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp new file mode 100644 index 00000000..eae35316 --- /dev/null +++ b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp @@ -0,0 +1,94 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastSingle = { + "XgemvFast", Precision::kSingle, {"VW2", "WGS2", "WPT2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } }, + { "ATI Radeon HD 6750M", { 2, 64, 2 } }, + { "Ellesmere", { 1, 64, 1 } }, + { "Fiji", { 1, 64, 2 } }, + { "Hawaii", { 1, 64, 1 } }, + { "Oland", { 1, 64, 1 } }, + { "Pitcairn", { 1, 64, 1 } }, + { "Tahiti", { 1, 64, 1 } }, + { "Tonga", { 1, 16, 4 } }, + { "Turks", { 1, 256, 1 } }, + { "default", { 1, 64, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 32, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 32, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 4 } }, + { "default", { 4, 128, 4 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 1, 256, 1 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 32, 2 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 4 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 2 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 32, 2 } }, + { "Iris", { 1, 128, 2 } }, + { "Iris Pro", { 4, 64, 4 } }, + { "default", { 2, 256, 2 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 2, 256, 2 } }, + { "GeForce GT 650M", { 2, 32, 2 } }, + { "GeForce GTX 1070", { 1, 256, 1 } }, + { "GeForce GTX 1080", { 1, 128, 1 } }, + { "GeForce GTX 480", { 1, 128, 1 } }, + { "GeForce GTX 670", { 2, 256, 2 } }, + { "GeForce GTX 680", { 1, 128, 1 } }, + { "GeForce GTX 750", { 1, 256, 1 } }, + { "GeForce GTX 750 Ti", { 2, 32, 2 } }, + { "GeForce GTX 980", { 1, 256, 1 } }, + { "GeForce GTX TITAN", { 1, 256, 1 } }, + { "GeForce GTX TITAN Black", { 1, 256, 1 } }, + { "GeForce GTX TITAN X", { 1, 64, 1 } }, + { "TITAN X (Pascal)", { 1, 64, 1 } }, + { "Tesla K20m", { 1, 256, 1 } }, + { "Tesla K40m", { 1, 256, 1 } }, + { "default", { 1, 256, 1 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 1, 64, 4 } }, + { "default", { 1, 64, 4 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 1, 64, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp new file mode 100644 index 00000000..c66cdc19 --- /dev/null +++ b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp @@ -0,0 +1,77 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastComplexSingle = { + "XgemvFast", Precision::kComplexSingle, {"VW2", "WGS2", "WPT2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 2, 256, 2 } }, + { "ATI Radeon HD 6750M", { 1, 128, 1 } }, + { "Ellesmere", { 1, 64, 1 } }, + { "Fiji", { 1, 16, 1 } }, + { "Hawaii", { 1, 64, 1 } }, + { "Oland", { 1, 64, 1 } }, + { "Pitcairn", { 1, 64, 1 } }, + { "Tahiti", { 1, 128, 1 } }, + { "Tonga", { 2, 32, 2 } }, + { "Turks", { 1, 16, 1 } }, + { "default", { 1, 64, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 64, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 128, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 128, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 16, 4 } }, + { "default", { 1, 64, 2 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 2, 128, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 32, 2 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 2, 128, 2 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 32, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 32, 1 } }, + { "Iris", { 1, 64, 1 } }, + { "Iris Pro", { 4, 128, 4 } }, + { "default", { 1, 64, 1 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 1, 256, 1 } }, + { "GeForce GTX 1070", { 1, 64, 1 } }, + { "GeForce GTX 480", { 1, 64, 1 } }, + { "GeForce GTX 670", { 1, 64, 1 } }, + { "GeForce GTX 680", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 1, 64, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp new file mode 100644 index 00000000..53692530 --- /dev/null +++ b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp @@ -0,0 +1,73 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastDouble = { + "XgemvFast", Precision::kDouble, {"VW2", "WGS2", "WPT2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } }, + { "Ellesmere", { 1, 128, 1 } }, + { "Fiji", { 1, 32, 1 } }, + { "Hawaii", { 1, 64, 1 } }, + { "Oland", { 1, 64, 1 } }, + { "Pitcairn", { 1, 64, 1 } }, + { "Tahiti", { 1, 64, 1 } }, + { "Tonga", { 2, 32, 2 } }, + { "default", { 1, 64, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 64, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 4 } }, + { "default", { 1, 64, 4 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 1, 256, 1 } }, + { "GeForce GTX 1070", { 1, 256, 1 } }, + { "GeForce GTX 1080", { 1, 32, 2 } }, + { "GeForce GTX 480", { 1, 64, 1 } }, + { "GeForce GTX 670", { 1, 128, 1 } }, + { "GeForce GTX 680", { 1, 128, 1 } }, + { "GeForce GTX 750", { 2, 256, 2 } }, + { "GeForce GTX 750 Ti", { 1, 32, 2 } }, + { "GeForce GTX 980", { 1, 64, 1 } }, + { "GeForce GTX TITAN", { 1, 256, 1 } }, + { "GeForce GTX TITAN Black", { 1, 256, 1 } }, + { "GeForce GTX TITAN X", { 1, 128, 1 } }, + { "TITAN X (Pascal)", { 1, 32, 1 } }, + { "Tesla K20m", { 1, 128, 1 } }, + { "Tesla K40m", { 1, 256, 1 } }, + { "default", { 1, 256, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 1, 64, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp new file mode 100644 index 00000000..fdf3d508 --- /dev/null +++ b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp @@ -0,0 +1,61 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastComplexDouble = { + "XgemvFast", Precision::kComplexDouble, {"VW2", "WGS2", "WPT2"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } }, + { "Ellesmere", { 1, 16, 1 } }, + { "Fiji", { 1, 16, 1 } }, + { "Hawaii", { 1, 64, 1 } }, + { "Oland", { 1, 256, 1 } }, + { "Pitcairn", { 1, 64, 1 } }, + { "Tahiti", { 1, 64, 1 } }, + { "Tonga", { 1, 32, 1 } }, + { "default", { 1, 64, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 2, 64, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 64, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 2 } }, + { "default", { 4, 64, 4 } }, + } + }, + { // Intel accelerators + kDeviceTypeAccelerator, "Intel", { + { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 1, 128, 1 } }, + { "GeForce GTX 480", { 1, 64, 1 } }, + { "GeForce GTX 670", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 1, 64, 1 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot.hpp deleted file mode 100644 index 7e5905e4..00000000 --- a/src/database/kernels/xgemv_fast_rot.hpp +++ /dev/null @@ -1,213 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator -// -// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastRotHalf = { - "XgemvFastRot", Precision::kHalf, {"VW3", "WGS3", "WPT3"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 8, 32, 32 } }, - { "default", { 8, 32, 32 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 128, 32 } }, - { "default", { 8, 128, 32 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 8, 128, 32 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastRotSingle = { - "XgemvFastRot", Precision::kSingle, {"VW3", "WGS3", "WPT3"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 8, 64, 32 } }, - { "ATI Radeon HD 6750M", { 8, 128, 16 } }, - { "Ellesmere", { 8, 32, 32 } }, - { "Fiji", { 4, 32, 16 } }, - { "Tonga", { 8, 128, 32 } }, - { "Turks", { 8, 128, 16 } }, - { "default", { 8, 32, 32 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } }, - { "default", { 8, 32, 32 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 64, 32 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 64, 16 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 2, 32, 16 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } }, - { "Iris Pro", { 4, 16, 16 } }, - { "default", { 4, 64, 16 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GeForce GT 650M", { 8, 32, 16 } }, - { "GeForce GTX 1080", { 8, 32, 32 } }, - { "GeForce GTX 750 Ti", { 8, 32, 32 } }, - { "GeForce GTX TITAN", { 1, 16, 16 } }, - { "GeForce GTX TITAN Black", { 4, 128, 16 } }, - { "TITAN X (Pascal)", { 8, 64, 32 } }, - { "default", { 8, 32, 32 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 4, 64, 16 } }, - { "default", { 4, 64, 16 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 8, 32, 32 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastRotComplexSingle = { - "XgemvFastRot", Precision::kComplexSingle, {"VW3", "WGS3", "WPT3"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 8, 16, 16 } }, - { "ATI Radeon HD 6750M", { 8, 32, 8 } }, - { "Ellesmere", { 2, 32, 16 } }, - { "Fiji", { 4, 32, 32 } }, - { "Tonga", { 4, 32, 32 } }, - { "Turks", { 4, 32, 8 } }, - { "default", { 8, 16, 16 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 32, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 16, 16 } }, - { "default", { 4, 32, 32 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 16, 16 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 8 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 4, 32, 8 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } }, - { "Iris Pro", { 4, 16, 16 } }, - { "default", { 2, 32, 8 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 4, 16, 16 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastRotDouble = { - "XgemvFastRot", Precision::kDouble, {"VW3", "WGS3", "WPT3"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 16, 16 } }, - { "Ellesmere", { 4, 16, 16 } }, - { "Fiji", { 4, 32, 32 } }, - { "Tonga", { 4, 16, 16 } }, - { "default", { 4, 16, 16 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } }, - { "default", { 8, 32, 32 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GeForce GTX 1080", { 8, 32, 32 } }, - { "GeForce GTX 750 Ti", { 4, 32, 16 } }, - { "GeForce GTX TITAN", { 1, 16, 16 } }, - { "GeForce GTX TITAN Black", { 1, 16, 16 } }, - { "TITAN X (Pascal)", { 8, 32, 32 } }, - { "default", { 4, 32, 16 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 4, 16, 16 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgemvFastRotComplexDouble = { - "XgemvFastRot", Precision::kComplexDouble, {"VW3", "WGS3", "WPT3"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 32, 16 } }, - { "Ellesmere", { 4, 16, 16 } }, - { "Fiji", { 4, 32, 8 } }, - { "Tonga", { 4, 16, 8 } }, - { "default", { 8, 32, 16 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 16, 16 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 16 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 16 } }, - { "default", { 8, 16, 16 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 4, 16, 16 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot.hpp new file mode 100644 index 00000000..7379eba9 --- /dev/null +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot' kernels. +// +// ================================================================================================= + +#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp" +#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp" +#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp" +#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp" +#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp" diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp new file mode 100644 index 00000000..8d516141 --- /dev/null +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp @@ -0,0 +1,36 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastRotHalf = { + "XgemvFastRot", Precision::kHalf, {"VW3", "WGS3", "WPT3"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { 8, 32, 32 } }, + { "default", { 8, 32, 32 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 128, 32 } }, + { "default", { 8, 128, 32 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 8, 128, 32 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp new file mode 100644 index 00000000..71b8b355 --- /dev/null +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp @@ -0,0 +1,71 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastRotSingle = { + "XgemvFastRot", Precision::kSingle, {"VW3", "WGS3", "WPT3"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 8, 64, 32 } }, + { "ATI Radeon HD 6750M", { 8, 128, 16 } }, + { "Ellesmere", { 8, 32, 32 } }, + { "Fiji", { 4, 32, 16 } }, + { "Tonga", { 8, 128, 32 } }, + { "Turks", { 8, 128, 16 } }, + { "default", { 8, 32, 32 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } }, + { "default", { 8, 32, 32 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 64, 32 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 64, 16 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 2, 32, 16 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } }, + { "Iris Pro", { 4, 16, 16 } }, + { "default", { 4, 64, 16 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GT 650M", { 8, 32, 16 } }, + { "GeForce GTX 1080", { 8, 32, 32 } }, + { "GeForce GTX 750 Ti", { 8, 32, 32 } }, + { "GeForce GTX TITAN", { 1, 16, 16 } }, + { "GeForce GTX TITAN Black", { 4, 128, 16 } }, + { "TITAN X (Pascal)", { 8, 64, 32 } }, + { "default", { 8, 32, 32 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 4, 64, 16 } }, + { "default", { 4, 64, 16 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 8, 32, 32 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp new file mode 100644 index 00000000..4fd88fc4 --- /dev/null +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp @@ -0,0 +1,54 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastRotComplexSingle = { + "XgemvFastRot", Precision::kComplexSingle, {"VW3", "WGS3", "WPT3"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 8, 16, 16 } }, + { "ATI Radeon HD 6750M", { 8, 32, 8 } }, + { "Ellesmere", { 2, 32, 16 } }, + { "Fiji", { 4, 32, 32 } }, + { "Tonga", { 4, 32, 32 } }, + { "Turks", { 4, 32, 8 } }, + { "default", { 8, 16, 16 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 32, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 16, 16 } }, + { "default", { 4, 32, 32 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 16, 16 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 8 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 4, 32, 8 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } }, + { "Iris Pro", { 4, 16, 16 } }, + { "default", { 2, 32, 8 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 4, 16, 16 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp new file mode 100644 index 00000000..66299b56 --- /dev/null +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp @@ -0,0 +1,52 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastRotDouble = { + "XgemvFastRot", Precision::kDouble, {"VW3", "WGS3", "WPT3"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 4, 16, 16 } }, + { "Ellesmere", { 4, 16, 16 } }, + { "Fiji", { 4, 32, 32 } }, + { "Tonga", { 4, 16, 16 } }, + { "default", { 4, 16, 16 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } }, + { "default", { 8, 32, 32 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GeForce GTX 1080", { 8, 32, 32 } }, + { "GeForce GTX 750 Ti", { 4, 32, 16 } }, + { "GeForce GTX TITAN", { 1, 16, 16 } }, + { "GeForce GTX TITAN Black", { 1, 16, 16 } }, + { "TITAN X (Pascal)", { 8, 32, 32 } }, + { "default", { 4, 32, 16 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 4, 16, 16 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp new file mode 100644 index 00000000..bc1964ff --- /dev/null +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp @@ -0,0 +1,42 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgemvFastRotComplexDouble = { + "XgemvFastRot", Precision::kComplexDouble, {"VW3", "WGS3", "WPT3"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 4, 32, 16 } }, + { "Ellesmere", { 4, 16, 16 } }, + { "Fiji", { 4, 32, 8 } }, + { "Tonga", { 4, 16, 8 } }, + { "default", { 8, 32, 16 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 16, 16 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 16 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 16 } }, + { "default", { 8, 16, 16 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 4, 16, 16 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xger.hpp b/src/database/kernels/xger.hpp deleted file mode 100644 index e17396f6..00000000 --- a/src/database/kernels/xger.hpp +++ /dev/null @@ -1,316 +0,0 @@ - -// ================================================================================================= -// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This -// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- -// width of 100 characters per line. -// -// Author(s): -// Database generator -// -// This file populates the database with best-found tuning parameters for the 'Xger' kernels. -// -// ================================================================================================= - -namespace clblast { -namespace database { -// ================================================================================================= - -const Database::DatabaseEntry XgerHalf = { - "Xger", Precision::kHalf, {"WGS1", "WGS2", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 64, 1, 2 } }, - { "default", { 64, 1, 2 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 1, 2 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1, 4 } }, - { "default", { 4, 8, 2 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 64, 4, 2 } }, - { "default", { 64, 4, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 64, 1, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgerSingle = { - "Xger", Precision::kSingle, {"WGS1", "WGS2", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 256, 1, 1 } }, - { "ATI Radeon HD 6750M", { 16, 16, 4 } }, - { "Ellesmere", { 64, 4, 2 } }, - { "Fiji", { 256, 1, 1 } }, - { "Hawaii", { 64, 2, 1 } }, - { "Oland", { 32, 4, 2 } }, - { "Pitcairn", { 64, 1, 1 } }, - { "Tahiti", { 256, 1, 1 } }, - { "Tonga", { 256, 1, 2 } }, - { "Turks", { 64, 4, 2 } }, - { "default", { 16, 16, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 64, 4, 4 } }, - { "default", { 64, 4, 4 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 2, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 16, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 128, 1, 4 } }, - { "default", { 128, 8, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 32, 1, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 2, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 64, 1, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 4, 4 } }, - { "Iris Pro", { 64, 1, 4 } }, - { "default", { 32, 4, 2 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 1, 2 } }, - { "GeForce GT 650M", { 32, 16, 4 } }, - { "GeForce GTX 1070", { 512, 1, 1 } }, - { "GeForce GTX 1080", { 16, 4, 1 } }, - { "GeForce GTX 480", { 256, 1, 4 } }, - { "GeForce GTX 670", { 32, 8, 2 } }, - { "GeForce GTX 680", { 128, 1, 4 } }, - { "GeForce GTX 750", { 64, 16, 4 } }, - { "GeForce GTX 750 Ti", { 64, 1, 2 } }, - { "GeForce GTX TITAN", { 32, 4, 2 } }, - { "GeForce GTX TITAN Black", { 32, 4, 2 } }, - { "TITAN X (Pascal)", { 512, 2, 1 } }, - { "default", { 128, 1, 2 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 128, 1, 2 } }, - { "default", { 128, 1, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 32, 4, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgerComplexSingle = { - "Xger", Precision::kComplexSingle, {"WGS1", "WGS2", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 4, 1 } }, - { "ATI Radeon HD 6750M", { 16, 16, 1 } }, - { "Ellesmere", { 16, 8, 2 } }, - { "Fiji", { 128, 2, 1 } }, - { "Hawaii", { 64, 1, 2 } }, - { "Oland", { 4, 8, 1 } }, - { "Pitcairn", { 128, 2, 1 } }, - { "Tahiti", { 64, 2, 1 } }, - { "Tonga", { 64, 1, 1 } }, - { "Turks", { 128, 2, 1 } }, - { "default", { 128, 2, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 128, 1, 1 } }, - { "default", { 128, 1, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 2, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 256, 1, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 2, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 4, 2 } }, - { "default", { 256, 2, 4 } }, - } - }, - { // Intel GPUs - kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 32, 1, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 128, 2, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 512, 1, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1, 2 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 1 } }, - { "Iris Pro", { 16, 2, 4 } }, - { "default", { 128, 2, 2 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 64, 4, 2 } }, - { "GeForce GTX 1070", { 16, 64, 2 } }, - { "GeForce GTX 1080", { 32, 2, 1 } }, - { "GeForce GTX 480", { 128, 2, 2 } }, - { "GeForce GTX 670", { 16, 32, 2 } }, - { "GeForce GTX 680", { 32, 4, 2 } }, - { "GeForce GTX 750", { 32, 16, 4 } }, - { "GeForce GTX 750 Ti", { 32, 8, 2 } }, - { "GeForce GTX TITAN", { 16, 16, 2 } }, - { "GeForce GTX TITAN Black", { 16, 16, 2 } }, - { "TITAN X (Pascal)", { 32, 2, 1 } }, - { "default", { 128, 2, 2 } }, - } - }, - { // QUALCOMM GPUs - kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 64, 1, 4 } }, - { "default", { 64, 1, 4 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 64, 2, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgerDouble = { - "Xger", Precision::kDouble, {"WGS1", "WGS2", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 4, 1 } }, - { "Ellesmere", { 64, 1, 4 } }, - { "Fiji", { 256, 1, 2 } }, - { "Hawaii", { 32, 4, 2 } }, - { "Oland", { 128, 1, 2 } }, - { "Pitcairn", { 64, 1, 1 } }, - { "Tahiti", { 64, 2, 1 } }, - { "Tonga", { 8, 16, 2 } }, - { "default", { 128, 2, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 64, 4, 1 } }, - { "default", { 64, 4, 1 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 256, 1, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 16, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 1, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 8, 2 } }, - { "default", { 256, 1, 4 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 8, 2 } }, - { "GeForce GTX 1070", { 32, 8, 1 } }, - { "GeForce GTX 1080", { 32, 2, 1 } }, - { "GeForce GTX 480", { 32, 4, 2 } }, - { "GeForce GTX 670", { 32, 32, 2 } }, - { "GeForce GTX 680", { 128, 4, 2 } }, - { "GeForce GTX 750", { 256, 2, 2 } }, - { "GeForce GTX 750 Ti", { 32, 16, 1 } }, - { "GeForce GTX TITAN", { 16, 8, 2 } }, - { "GeForce GTX TITAN Black", { 32, 4, 2 } }, - { "TITAN X (Pascal)", { 32, 2, 1 } }, - { "default", { 128, 1, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 128, 1, 2 } }, - } - }, - } -}; - -// ================================================================================================= - -const Database::DatabaseEntry XgerComplexDouble = { - "Xger", Precision::kComplexDouble, {"WGS1", "WGS2", "WPT"}, { - { // AMD GPUs - kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 1, 1 } }, - { "Ellesmere", { 8, 16, 1 } }, - { "Fiji", { 64, 4, 2 } }, - { "Hawaii", { 128, 1, 1 } }, - { "Oland", { 16, 16, 2 } }, - { "Pitcairn", { 64, 4, 1 } }, - { "Tahiti", { 32, 4, 1 } }, - { "Tonga", { 16, 4, 1 } }, - { "default", { 32, 4, 1 } }, - } - }, - { // ARM GPUs - kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 64, 2, 4 } }, - { "default", { 64, 2, 4 } }, - } - }, - { // Intel CPUs - kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 4, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 512, 2, 2 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 256, 1, 2 } }, - { "default", { 256, 2, 2 } }, - } - }, - { // NVIDIA GPUs - kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 8, 2 } }, - { "GeForce GTX 1070", { 8, 128, 1 } }, - { "GeForce GTX 1080", { 8, 4, 1 } }, - { "GeForce GTX 480", { 64, 2, 2 } }, - { "GeForce GTX 670", { 8, 16, 2 } }, - { "GeForce GTX 680", { 8, 16, 1 } }, - { "GeForce GTX 750", { 8, 32, 4 } }, - { "GeForce GTX 750 Ti", { 32, 8, 2 } }, - { "GeForce GTX TITAN", { 32, 4, 2 } }, - { "GeForce GTX TITAN Black", { 16, 16, 2 } }, - { "TITAN X (Pascal)", { 4, 8, 1 } }, - { "default", { 16, 8, 2 } }, - } - }, - { // Default - kDeviceTypeAll, "default", { - { "default", { 64, 2, 2 } }, - } - }, - } -}; - -// ================================================================================================= -} // namespace database -} // namespace clblast diff --git a/src/database/kernels/xger/xger.hpp b/src/database/kernels/xger/xger.hpp new file mode 100644 index 00000000..284d1fc6 --- /dev/null +++ b/src/database/kernels/xger/xger.hpp @@ -0,0 +1,14 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xger' kernels. +// +// ================================================================================================= + +#include "database/kernels/xger/xger_16.hpp" +#include "database/kernels/xger/xger_32.hpp" +#include "database/kernels/xger/xger_3232.hpp" +#include "database/kernels/xger/xger_64.hpp" +#include "database/kernels/xger/xger_6464.hpp" diff --git a/src/database/kernels/xger/xger_16.hpp b/src/database/kernels/xger/xger_16.hpp new file mode 100644 index 00000000..376716b7 --- /dev/null +++ b/src/database/kernels/xger/xger_16.hpp @@ -0,0 +1,43 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xger16' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgerHalf = { + "Xger", Precision::kHalf, {"WGS1", "WGS2", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "Ellesmere", { 64, 1, 2 } }, + { "default", { 64, 1, 2 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 1, 2 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1, 4 } }, + { "default", { 4, 8, 2 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 64, 4, 2 } }, + { "default", { 64, 4, 2 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 64, 1, 2 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xger/xger_32.hpp b/src/database/kernels/xger/xger_32.hpp new file mode 100644 index 00000000..bc18f20e --- /dev/null +++ b/src/database/kernels/xger/xger_32.hpp @@ -0,0 +1,89 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xger32' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgerSingle = { + "Xger", Precision::kSingle, {"WGS1", "WGS2", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 256, 1, 1 } }, + { "ATI Radeon HD 6750M", { 16, 16, 4 } }, + { "Ellesmere", { 64, 4, 2 } }, + { "Fiji", { 256, 1, 1 } }, + { "Hawaii", { 64, 2, 1 } }, + { "Oland", { 32, 4, 2 } }, + { "Pitcairn", { 64, 1, 1 } }, + { "Tahiti", { 256, 1, 1 } }, + { "Tonga", { 256, 1, 2 } }, + { "Turks", { 64, 4, 2 } }, + { "default", { 16, 16, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 64, 4, 4 } }, + { "default", { 64, 4, 4 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 2, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 16, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 128, 1, 4 } }, + { "default", { 128, 8, 4 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 32, 1, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 2, 2 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1, 2 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 64, 1, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 4, 4 } }, + { "Iris Pro", { 64, 1, 4 } }, + { "default", { 32, 4, 2 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 128, 1, 2 } }, + { "GeForce GT 650M", { 32, 16, 4 } }, + { "GeForce GTX 1070", { 512, 1, 1 } }, + { "GeForce GTX 1080", { 16, 4, 1 } }, + { "GeForce GTX 480", { 256, 1, 4 } }, + { "GeForce GTX 670", { 32, 8, 2 } }, + { "GeForce GTX 680", { 128, 1, 4 } }, + { "GeForce GTX 750", { 64, 16, 4 } }, + { "GeForce GTX 750 Ti", { 64, 1, 2 } }, + { "GeForce GTX TITAN", { 32, 4, 2 } }, + { "GeForce GTX TITAN Black", { 32, 4, 2 } }, + { "TITAN X (Pascal)", { 512, 2, 1 } }, + { "default", { 128, 1, 2 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 128, 1, 2 } }, + { "default", { 128, 1, 2 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 32, 4, 2 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xger/xger_3232.hpp b/src/database/kernels/xger/xger_3232.hpp new file mode 100644 index 00000000..d2d1587f --- /dev/null +++ b/src/database/kernels/xger/xger_3232.hpp @@ -0,0 +1,88 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xger3232' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgerComplexSingle = { + "Xger", Precision::kComplexSingle, {"WGS1", "WGS2", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 64, 4, 1 } }, + { "ATI Radeon HD 6750M", { 16, 16, 1 } }, + { "Ellesmere", { 16, 8, 2 } }, + { "Fiji", { 128, 2, 1 } }, + { "Hawaii", { 64, 1, 2 } }, + { "Oland", { 4, 8, 1 } }, + { "Pitcairn", { 128, 2, 1 } }, + { "Tahiti", { 64, 2, 1 } }, + { "Tonga", { 64, 1, 1 } }, + { "Turks", { 128, 2, 1 } }, + { "default", { 128, 2, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 128, 1, 1 } }, + { "default", { 128, 1, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 2, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 256, 1, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 2, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 4, 2 } }, + { "default", { 256, 2, 4 } }, + } + }, + { // Intel GPUs + kDeviceTypeGPU, "Intel", { + { "Intel(R) HD Graphics 530", { 32, 1, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 128, 2, 1 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 512, 1, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1, 2 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 1 } }, + { "Iris Pro", { 16, 2, 4 } }, + { "default", { 128, 2, 2 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 64, 4, 2 } }, + { "GeForce GTX 1070", { 16, 64, 2 } }, + { "GeForce GTX 1080", { 32, 2, 1 } }, + { "GeForce GTX 480", { 128, 2, 2 } }, + { "GeForce GTX 670", { 16, 32, 2 } }, + { "GeForce GTX 680", { 32, 4, 2 } }, + { "GeForce GTX 750", { 32, 16, 4 } }, + { "GeForce GTX 750 Ti", { 32, 8, 2 } }, + { "GeForce GTX TITAN", { 16, 16, 2 } }, + { "GeForce GTX TITAN Black", { 16, 16, 2 } }, + { "TITAN X (Pascal)", { 32, 2, 1 } }, + { "default", { 128, 2, 2 } }, + } + }, + { // QUALCOMM GPUs + kDeviceTypeGPU, "QUALCOMM", { + { "QUALCOMM Adreno(TM)", { 64, 1, 4 } }, + { "default", { 64, 1, 4 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 64, 2, 2 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xger/xger_64.hpp b/src/database/kernels/xger/xger_64.hpp new file mode 100644 index 00000000..304fcaf4 --- /dev/null +++ b/src/database/kernels/xger/xger_64.hpp @@ -0,0 +1,69 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xger64' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgerDouble = { + "Xger", Precision::kDouble, {"WGS1", "WGS2", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 32, 4, 1 } }, + { "Ellesmere", { 64, 1, 4 } }, + { "Fiji", { 256, 1, 2 } }, + { "Hawaii", { 32, 4, 2 } }, + { "Oland", { 128, 1, 2 } }, + { "Pitcairn", { 64, 1, 1 } }, + { "Tahiti", { 64, 2, 1 } }, + { "Tonga", { 8, 16, 2 } }, + { "default", { 128, 2, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 64, 4, 1 } }, + { "default", { 64, 4, 1 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 256, 1, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 16, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 1, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 8, 2 } }, + { "default", { 256, 1, 4 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 128, 8, 2 } }, + { "GeForce GTX 1070", { 32, 8, 1 } }, + { "GeForce GTX 1080", { 32, 2, 1 } }, + { "GeForce GTX 480", { 32, 4, 2 } }, + { "GeForce GTX 670", { 32, 32, 2 } }, + { "GeForce GTX 680", { 128, 4, 2 } }, + { "GeForce GTX 750", { 256, 2, 2 } }, + { "GeForce GTX 750 Ti", { 32, 16, 1 } }, + { "GeForce GTX TITAN", { 16, 8, 2 } }, + { "GeForce GTX TITAN Black", { 32, 4, 2 } }, + { "TITAN X (Pascal)", { 32, 2, 1 } }, + { "default", { 128, 1, 2 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 128, 1, 2 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xger/xger_6464.hpp b/src/database/kernels/xger/xger_6464.hpp new file mode 100644 index 00000000..dd7e6572 --- /dev/null +++ b/src/database/kernels/xger/xger_6464.hpp @@ -0,0 +1,69 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It +// is auto-generated by the 'scripts/database/database.py' Python script. +// +// This file populates the database with best-found tuning parameters for the 'Xger6464' kernels. +// +// ================================================================================================= + +namespace clblast { +namespace database { + +const DatabaseEntry XgerComplexDouble = { + "Xger", Precision::kComplexDouble, {"WGS1", "WGS2", "WPT"}, { + { // AMD GPUs + kDeviceTypeGPU, "AMD", { + { "AMD Radeon R9 M370X Compute Engine", { 64, 1, 1 } }, + { "Ellesmere", { 8, 16, 1 } }, + { "Fiji", { 64, 4, 2 } }, + { "Hawaii", { 128, 1, 1 } }, + { "Oland", { 16, 16, 2 } }, + { "Pitcairn", { 64, 4, 1 } }, + { "Tahiti", { 32, 4, 1 } }, + { "Tonga", { 16, 4, 1 } }, + { "default", { 32, 4, 1 } }, + } + }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "Mali-T628", { 64, 2, 4 } }, + { "default", { 64, 2, 4 } }, + } + }, + { // Intel CPUs + kDeviceTypeCPU, "Intel", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 4, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 512, 2, 2 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 256, 1, 2 } }, + { "default", { 256, 2, 2 } }, + } + }, + { // NVIDIA GPUs + kDeviceTypeGPU, "NVIDIA", { + { "GRID K520", { 16, 8, 2 } }, + { "GeForce GTX 1070", { 8, 128, 1 } }, + { "GeForce GTX 1080", { 8, 4, 1 } }, + { "GeForce GTX 480", { 64, 2, 2 } }, + { "GeForce GTX 670", { 8, 16, 2 } }, + { "GeForce GTX 680", { 8, 16, 1 } }, + { "GeForce GTX 750", { 8, 32, 4 } }, + { "GeForce GTX 750 Ti", { 32, 8, 2 } }, + { "GeForce GTX TITAN", { 32, 4, 2 } }, + { "GeForce GTX TITAN Black", { 16, 16, 2 } }, + { "TITAN X (Pascal)", { 4, 8, 1 } }, + { "default", { 16, 8, 2 } }, + } + }, + { // Default + kDeviceTypeAll, "default", { + { "default", { 64, 2, 2 } }, + } + }, + } +}; + +} // namespace database +} // namespace clblast diff --git a/src/database/kernels/xtrsv.hpp b/src/database/kernels/xtrsv.hpp index 6633b8b7..7c22aa57 100644 --- a/src/database/kernels/xtrsv.hpp +++ b/src/database/kernels/xtrsv.hpp @@ -15,7 +15,7 @@ namespace clblast { namespace database { // ================================================================================================= -const Database::DatabaseEntry XtrsvHalf = { +const DatabaseEntry XtrsvHalf = { "Xtrsv", Precision::kHalf, {"TRSV_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { @@ -27,7 +27,7 @@ const Database::DatabaseEntry XtrsvHalf = { // ================================================================================================= -const Database::DatabaseEntry XtrsvSingle = { +const DatabaseEntry XtrsvSingle = { "Xtrsv", Precision::kSingle, {"TRSV_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { @@ -39,7 +39,7 @@ const Database::DatabaseEntry XtrsvSingle = { // ================================================================================================= -const Database::DatabaseEntry XtrsvComplexSingle = { +const DatabaseEntry XtrsvComplexSingle = { "Xtrsv", Precision::kComplexSingle, {"TRSV_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { @@ -51,7 +51,7 @@ const Database::DatabaseEntry XtrsvComplexSingle = { // ================================================================================================= -const Database::DatabaseEntry XtrsvDouble = { +const DatabaseEntry XtrsvDouble = { "Xtrsv", Precision::kDouble, {"TRSV_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { @@ -63,7 +63,7 @@ const Database::DatabaseEntry XtrsvDouble = { // ================================================================================================= -const Database::DatabaseEntry XtrsvComplexDouble = { +const DatabaseEntry XtrsvComplexDouble = { "Xtrsv", Precision::kComplexDouble, {"TRSV_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { diff --git a/src/routine.cpp b/src/routine.cpp index 7d4ed76f..758ffa0c 100644 --- a/src/routine.cpp +++ b/src/routine.cpp @@ -51,7 +51,7 @@ const std::unordered_map> Routine::r // The constructor does all heavy work, errors are returned as exceptions Routine::Routine(Queue &queue, EventPointer event, const std::string &name, const std::vector &kernel_names, const Precision precision, - const std::vector &userDatabase, + const std::vector &userDatabase, std::initializer_list source): precision_(precision), routine_name_(name), @@ -67,7 +67,7 @@ Routine::Routine(Queue &queue, EventPointer event, const std::string &name, InitProgram(source); } -void Routine::InitDatabase(const std::vector &userDatabase) { +void Routine::InitDatabase(const std::vector &userDatabase) { for (const auto &kernel_name : kernel_names_) { // Queries the cache to see whether or not the kernel parameter database is already there diff --git a/src/routine.hpp b/src/routine.hpp index 903ccdb1..5e2b4065 100644 --- a/src/routine.hpp +++ b/src/routine.hpp @@ -40,7 +40,7 @@ class Routine { // and routine list, otherwise the caching logic will break. explicit Routine(Queue &queue, EventPointer event, const std::string &name, const std::vector &routines, const Precision precision, - const std::vector &userDatabase, + const std::vector &userDatabase, std::initializer_list source); // List of kernel-routine look-ups @@ -59,7 +59,7 @@ class Routine { void InitProgram(std::initializer_list source); // Initializes db_, fetching cached database or building one - void InitDatabase(const std::vector &userDatabase); + void InitDatabase(const std::vector &userDatabase); protected: