Split the database files over multiple directories and files; first step towards separate compilation

pull/191/head
Cedric Nugteren 2017-09-06 21:50:42 +02:00
parent bb947890de
commit 20da5e33a8
96 changed files with 4646 additions and 3921 deletions

View File

@ -159,8 +159,8 @@ endif()
# Sets the supported routines and the used kernels. New routines and kernels should be added here.
set(KERNELS copy_fast copy_pad transpose_fast transpose_pad xaxpy xdot xger
xgemm xgemm_direct xgemv)
set(DATABASES copy invert pad padtranspose transpose xaxpy xdot
xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger xtrsv)
set(DATABASES copy pad padtranspose transpose xaxpy xdot
xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger)
set(SAMPLE_PROGRAMS_CPP sgemm sgemm_batched)
set(SAMPLE_PROGRAMS_C sasum dgemv sgemm haxpy cache)
if(NETLIB)
@ -230,7 +230,12 @@ foreach(ROUTINE ${LEVELX_ROUTINES})
set(HEADERS ${HEADERS} src/routines/levelx/${ROUTINE}.hpp)
endforeach()
foreach(DATABASE ${DATABASES})
set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}.hpp)
set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}.hpp)
set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}_16.hpp)
set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}_32.hpp)
set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}_64.hpp)
set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}_3232.hpp)
set(HEADERS ${HEADERS} src/database/kernels/${DATABASE}/${DATABASE}_6464.hpp)
endforeach()
# Creates and links the library

View File

@ -42,20 +42,19 @@ def get_cpp_separator():
return "// ================================================================================================="
def get_cpp_header(family):
def get_cpp_header(family, precision):
"""Retrieves the C++ header"""
return ("\n" + get_cpp_separator() + """
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// Author(s):
// Database generator <database.py>
//
// This file populates the database with best-found tuning parameters for the '%s' kernels.
// This file populates the database with best-found tuning parameters for the '%s%s' kernels.
//\n"""
% family.title() + get_cpp_separator() + \
"\n\nnamespace clblast {\n" + "namespace database {\n" + get_cpp_separator())
% (family.title(), precision)) + get_cpp_separator() + "\n"
def get_cpp_header_namespace():
return "\nnamespace clblast {\n" + "namespace database {\n"
def get_cpp_footer():
@ -67,7 +66,7 @@ def get_cpp_precision(family, precision):
"""Retrieves the C++ code for the start of a new precision"""
precision_string = precision_to_string(precision)
camelcase_name = family.title().replace("_", "")
return("\n\nconst Database::DatabaseEntry %s%s = {\n \"%s\", Precision::k%s"
return("\nconst DatabaseEntry %s%s = {\n \"%s\", Precision::k%s"
% (camelcase_name, precision_string, camelcase_name, precision_string))
@ -79,6 +78,15 @@ def get_cpp_device_vendor(vendor, device_type):
return " { // %s %ss\n kDeviceType%s, \"%s\", {\n" % (vendor, device_type, device_type_caps, vendor)
def get_cpp_family_includes(family, precisions):
result = "\n"
# result += "#include \"clblast.h\"\n"
# result += "#include \"database/database_structure.hpp\"\n"
for precision in precisions:
result += "#include \"database/kernels/%s/%s_%s.hpp\"\n" % (family, family, precision)
return result
def print_cpp_database(database, output_dir):
"""Outputs the database as C++ code"""
@ -87,19 +95,23 @@ def print_cpp_database(database, output_dir):
for family_name in kernel_families:
family_database = [s for s in database["sections"] if s["kernel_family"] == family_name]
# Opens a new file for each kernel family
full_path = os.path.join(output_dir, family_name + ".hpp")
with open(full_path, 'w+') as f:
f.write(get_cpp_header(family_name))
# Goes into a new path for each kernel family
family_path = os.path.join(output_dir, family_name)
# Loops over the different precision (e.g. 16, 32, 3232, 64, 6464)
precisions = sorted(set([s["precision"] for s in database["sections"]])) # Based on full database
for precision in precisions:
precision_database = [s for s in family_database if s["precision"] == precision]
# Loops over the different precision (e.g. 16, 32, 3232, 64, 6464)
precisions = sorted(set([s["precision"] for s in database["sections"]])) # Based on full database
for precision in precisions:
precision_database = [s for s in family_database if s["precision"] == precision]
# Opens a new file for each precision
full_path = os.path.join(family_path, family_name + "_" + precision + ".hpp")
with open(full_path, 'w+') as f:
f.write(get_cpp_header(family_name, precision))
f.write(get_cpp_header_namespace())
f.write(get_cpp_precision(family_name, precision))
# In case there is nothing found at all (e.g. 16-bit): continue as if this was a precision of 32 but
# with the defaults only
# In case there is nothing found at all (e.g. 16-bit): continue as if this was a
# precision of 32 but with the defaults only
if len(precision_database) == 0:
print("[database] No results found for %s:%s, retrieving defaults from %s:32" %
(family_name, precision, family_name))
@ -138,7 +150,7 @@ def print_cpp_database(database, output_dir):
# Collects the parameters for this entry
parameters = []
parmameter_index = 0
parameter_index = 0
kernels = sorted(set([s["kernel"] for s in device_database]))
for kernel in kernels:
kernel_database = [s for s in device_database if s["kernel"] == kernel]
@ -149,10 +161,10 @@ def print_cpp_database(database, output_dir):
assert len(results) == 1
new_parameters = results[0]["parameters"]
for parameter_name in sorted(new_parameters):
assert parameter_name == parameter_names[parmameter_index]
assert parameter_name == parameter_names[parameter_index]
parameter_value = new_parameters[parameter_name]
parameters.append(str(parameter_value))
parmameter_index += 1
parameter_index += 1
# Prints the entry
f.write(", ".join(parameters))
@ -162,7 +174,13 @@ def print_cpp_database(database, output_dir):
f.write(" }\n },\n")
# Prints the precision footer
f.write(" }\n};\n\n" + get_cpp_separator())
f.write(" }\n};\n")
# Prints the file footer
f.write(get_cpp_footer())
# Prints the file footer
f.write(get_cpp_footer())
# Creates the combined family includes header
full_path = os.path.join(family_path, family_name + ".hpp")
with open(full_path, 'w+') as f:
f.write(get_cpp_header(family_name, ""))
f.write(get_cpp_family_includes(family_name, precisions))

View File

@ -2520,10 +2520,10 @@ StatusCode OverrideParameters(const cl_device_id device, const std::string &kern
}
// Creates a small custom database based on the provided parameters
const auto database_device = Database::DatabaseDevice{"default", parameter_values};
const auto database_vendor = Database::DatabaseVendor{database::kDeviceTypeAll, "default", {database_device}};
const auto database_entry = Database::DatabaseEntry{kernel_name, precision, parameter_names, {database_vendor}};
const auto database_entries = std::vector<Database::DatabaseEntry>{database_entry};
const auto database_device = database::DatabaseDevice{"default", parameter_values};
const auto database_vendor = database::DatabaseVendor{database::kDeviceTypeAll, "default", {database_device}};
const auto database_entry = database::DatabaseEntry{kernel_name, precision, parameter_names, {database_vendor}};
const auto database_entries = std::vector<database::DatabaseEntry>{database_entry};
const auto database = Database(device_cpp, kernel_name, precision, database_entries);
// Removes the old database entry and stores the new one in the cache

View File

@ -22,46 +22,46 @@ namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry XaxpyApple = {
const DatabaseEntry XaxpyApple = {
"Xaxpy", Precision::kAny, {"VW", "WGS", "WPT"}, { { kDeviceTypeAll, "default", { { "default", { 8, 1, 4 } } } } }
};
const Database::DatabaseEntry XdotApple = {
const DatabaseEntry XdotApple = {
"Xdot", Precision::kAny, {"WGS1", "WGS2"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1 } } } } }
};
const Database::DatabaseEntry XgemvApple = {
const DatabaseEntry XgemvApple = {
"Xgemv", Precision::kAny, {"WGS1", "WPT1", "UNROLL1"}, { { kDeviceTypeAll, "default", { { "default", { 1, 4, 1 } } } } }
};
const Database::DatabaseEntry XgemvFastApple = {
const DatabaseEntry XgemvFastApple = {
"XgemvFast", Precision::kAny, {"VW2", "WGS2", "WPT2"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1 } } } } }
};
const Database::DatabaseEntry XgemvFastRotApple = {
const DatabaseEntry XgemvFastRotApple = {
"XgemvFastRot", Precision::kAny, {"VW3", "WGS3", "WPT3"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1 } } } } }
};
const Database::DatabaseEntry XgerApple = {
const DatabaseEntry XgerApple = {
"Xger", Precision::kAny, {"WGS1", "WGS2", "WPT"}, { { kDeviceTypeAll, "default", { { "default", { 64, 1, 2 } } } } }
};
const Database::DatabaseEntry XtrsvApple = {
const DatabaseEntry XtrsvApple = {
"Xtrsv", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { 32 } } } } }
};
const Database::DatabaseEntry XgemmApple = {
const DatabaseEntry XgemmApple = {
"Xgemm", Precision::kAny, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1 } } } } }
};
const Database::DatabaseEntry XgemmDirectApple = {
const DatabaseEntry XgemmDirectApple = {
"XgemmDirect", Precision::kAny, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1 } } } } }
};
const Database::DatabaseEntry CopyApple = {
const DatabaseEntry CopyApple = {
"Copy", Precision::kAny, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1, 1 } } } } }
};
const Database::DatabaseEntry PadApple = {
const DatabaseEntry PadApple = {
"Pad", Precision::kAny, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1, 1 } } } } }
};
const Database::DatabaseEntry TransposeApple = {
const DatabaseEntry TransposeApple = {
"Transpose", Precision::kAny, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { { kDeviceTypeAll, "default", { { "default", { 1, 0, 0, 1 } } } } }
};
const Database::DatabaseEntry PadtransposeApple = {
const DatabaseEntry PadtransposeApple = {
"Padtranspose", Precision::kAny, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { { kDeviceTypeAll, "default", { { "default", { 0, 1, 1 } } } } }
};
const Database::DatabaseEntry InvertApple = {
const DatabaseEntry InvertApple = {
"Invert", Precision::kAny, {"INTERNAL_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { 16 } } } } }
};

View File

@ -16,19 +16,19 @@
#include "utilities/utilities.hpp"
#include "database/database.hpp"
#include "database/kernels/xaxpy.hpp"
#include "database/kernels/xdot.hpp"
#include "database/kernels/xgemv.hpp"
#include "database/kernels/xgemv_fast.hpp"
#include "database/kernels/xgemv_fast_rot.hpp"
#include "database/kernels/xger.hpp"
#include "database/kernels/xaxpy/xaxpy.hpp"
#include "database/kernels/xdot/xdot.hpp"
#include "database/kernels/xgemv/xgemv.hpp"
#include "database/kernels/xgemv_fast/xgemv_fast.hpp"
#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot.hpp"
#include "database/kernels/xger/xger.hpp"
#include "database/kernels/xgemm/xgemm.hpp"
#include "database/kernels/xgemm_direct/xgemm_direct.hpp"
#include "database/kernels/copy/copy.hpp"
#include "database/kernels/pad/pad.hpp"
#include "database/kernels/transpose/transpose.hpp"
#include "database/kernels/padtranspose/padtranspose.hpp"
#include "database/kernels/xtrsv.hpp"
#include "database/kernels/xgemm.hpp"
#include "database/kernels/xgemm_direct.hpp"
#include "database/kernels/copy.hpp"
#include "database/kernels/pad.hpp"
#include "database/kernels/transpose.hpp"
#include "database/kernels/padtranspose.hpp"
#include "database/kernels/invert.hpp"
#include "database/apple_cpu_fallback.hpp"
#include "database/kernel_selection.hpp"
@ -36,8 +36,12 @@
namespace clblast {
// =================================================================================================
namespace database {
extern const DatabaseEntry CopyHalf;
}
// Initializes the databases
const std::vector<Database::DatabaseEntry> Database::database = std::vector<Database::DatabaseEntry>{
const std::vector<database::DatabaseEntry> Database::database = std::vector<database::DatabaseEntry>{
database::XaxpyHalf, database::XaxpySingle, database::XaxpyDouble, database::XaxpyComplexSingle, database::XaxpyComplexDouble,
database::XdotHalf, database::XdotSingle, database::XdotDouble, database::XdotComplexSingle, database::XdotComplexDouble,
database::XgemvHalf, database::XgemvSingle, database::XgemvDouble, database::XgemvComplexSingle, database::XgemvComplexDouble,
@ -54,7 +58,7 @@ const std::vector<Database::DatabaseEntry> Database::database = std::vector<Data
database::InvertHalf, database::InvertSingle, database::InvertDouble, database::InvertComplexSingle, database::InvertComplexDouble,
database::KernelSelectionHalf, database::KernelSelectionSingle, database::KernelSelectionDouble, database::KernelSelectionComplexSingle, database::KernelSelectionComplexDouble
};
const std::vector<Database::DatabaseEntry> Database::apple_cpu_fallback = std::vector<Database::DatabaseEntry>{
const std::vector<database::DatabaseEntry> Database::apple_cpu_fallback = std::vector<database::DatabaseEntry>{
database::XaxpyApple, database::XdotApple,
database::XgemvApple, database::XgemvFastApple, database::XgemvFastRotApple, database::XgerApple, database::XtrsvApple,
database::XgemmApple, database::XgemmDirectApple,
@ -78,8 +82,8 @@ const std::unordered_map<std::string, std::string> Database::kVendorNames{
// Constructor, computing device properties and populating the parameter-vector from the database.
// This takes an optional overlay database in case of custom tuning or custom kernels.
Database::Database(const Device &device, const std::string &kernel_name,
const Precision precision, const std::vector<DatabaseEntry> &overlay):
parameters_(std::make_shared<Parameters>()) {
const Precision precision, const std::vector<database::DatabaseEntry> &overlay):
parameters_(std::make_shared<database::Parameters>()) {
// Finds information of the current device
auto device_type = device.Type();
@ -94,7 +98,7 @@ Database::Database(const Device &device, const std::string &kernel_name,
}
// Sets the databases to search through
auto databases = std::list<std::vector<DatabaseEntry>>{overlay, database};
auto databases = std::list<std::vector<database::DatabaseEntry>>{overlay, database};
// Special case: modifies the database if the device is a CPU with Apple OpenCL
#if defined(__APPLE__) || defined(__MACOSX)
@ -108,7 +112,7 @@ Database::Database(const Device &device, const std::string &kernel_name,
#endif
// Searches potentially multiple databases
auto search_result = Parameters();
auto search_result = database::Parameters();
for (auto &db: databases) {
search_result = Search(kernel_name, device_type, device_vendor, device_name, precision, db);
if (search_result.size() != 0) {
@ -143,12 +147,12 @@ std::vector<std::string> Database::GetParameterNames() const {
// =================================================================================================
// Searches a particular database for the right kernel and precision
Database::Parameters Database::Search(const std::string &this_kernel,
database::Parameters Database::Search(const std::string &this_kernel,
const std::string &this_type,
const std::string &this_vendor,
const std::string &this_device,
const Precision this_precision,
const std::vector<DatabaseEntry> &this_database) const {
const std::vector<database::DatabaseEntry> &this_database) const {
// Selects the right kernel
for (auto &db: this_database) {
@ -165,13 +169,13 @@ Database::Parameters Database::Search(const std::string &this_kernel,
}
// If we reached this point, the entry was not found in this database
return Parameters();
return database::Parameters();
}
Database::Parameters Database::SearchVendorAndType(const std::string &target_vendor,
database::Parameters Database::SearchVendorAndType(const std::string &target_vendor,
const std::string &target_type,
const std::string &this_device,
const std::vector<DatabaseVendor> &vendors,
const std::vector<database::DatabaseVendor> &vendors,
const std::vector<std::string> &parameter_names) const {
for (auto &vendor: vendors) {
if ((vendor.name == target_vendor) && (vendor.type == target_type)) {
@ -182,25 +186,25 @@ Database::Parameters Database::SearchVendorAndType(const std::string &target_ven
return SearchDevice("default", vendor.devices, parameter_names);
}
}
return Parameters();
return database::Parameters();
}
Database::Parameters Database::SearchDevice(const std::string &target_device,
const std::vector<DatabaseDevice> &devices,
database::Parameters Database::SearchDevice(const std::string &target_device,
const std::vector<database::DatabaseDevice> &devices,
const std::vector<std::string> &parameter_names) const {
for (auto &device: devices) {
if (device.name == target_device) {
// Sets the parameters accordingly
auto parameters = Parameters();
if (parameter_names.size() != device.parameters.size()) { return Parameters(); } // ERROR
auto parameters = database::Parameters();
if (parameter_names.size() != device.parameters.size()) { return database::Parameters(); } // ERROR
for (auto i = size_t{0}; i < parameter_names.size(); ++i) {
parameters[parameter_names[i]] = device.parameters[i];
}
return parameters;
}
}
return Parameters();
return database::Parameters();
}
// =================================================================================================

View File

@ -22,47 +22,15 @@
#include <unordered_map>
#include "utilities/utilities.hpp"
#include "database/database_structure.hpp"
namespace clblast {
// =================================================================================================
// A special namespace to hold all the global constant variables (including the database entries)
namespace database {
// The OpenCL device types
const std::string kDeviceTypeCPU = "CPU";
const std::string kDeviceTypeGPU = "GPU";
const std::string kDeviceTypeAccelerator = "accelerator";
const std::string kDeviceTypeAll = "default";
} // namespace database
// =================================================================================================
// See comment at top of file for a description of the class
class Database {
public:
// Type alias for the database parameters
using Parameters = std::unordered_map<std::string, size_t>;
// Structures for content inside the database
struct DatabaseDevice {
std::string name;
std::vector<size_t> parameters; // parameter values
};
struct DatabaseVendor {
std::string type;
std::string name;
std::vector<DatabaseDevice> devices;
};
struct DatabaseEntry {
std::string kernel;
Precision precision;
std::vector<std::string> parameter_names;
std::vector<DatabaseVendor> vendors;
};
// The OpenCL device vendors
static const std::string kDeviceVendorAll;
@ -70,16 +38,16 @@ class Database {
static const std::unordered_map<std::string, std::string> kVendorNames;
// The database consists of separate database entries, stored together in a vector
static const std::vector<DatabaseEntry> database;
static const std::vector<database::DatabaseEntry> database;
// Database for a special case: Apple CPUs support limited number of threads
static const std::vector<DatabaseEntry> apple_cpu_fallback;
static const std::vector<database::DatabaseEntry> apple_cpu_fallback;
Database() = default;
// The constructor with a user-provided database overlay (potentially an empty vector)
explicit Database(const Device &device, const std::string &kernel_name,
const Precision precision, const std::vector<DatabaseEntry> &overlay);
const Precision precision, const std::vector<database::DatabaseEntry> &overlay);
// Accessor of values by key
size_t operator[](const std::string &key) const { return parameters_->find(key)->second; }
@ -93,21 +61,21 @@ class Database {
private:
// Search method functions, returning a set of parameters (possibly empty)
Parameters Search(const std::string &this_kernel, const std::string &this_type,
database::Parameters Search(const std::string &this_kernel, const std::string &this_type,
const std::string &this_vendor, const std::string &this_device,
const Precision this_precision,
const std::vector<DatabaseEntry> &db) const;
Parameters SearchDevice(const std::string &target_device,
const std::vector<DatabaseDevice> &devices,
const std::vector<database::DatabaseEntry> &db) const;
database::Parameters SearchDevice(const std::string &target_device,
const std::vector<database::DatabaseDevice> &devices,
const std::vector<std::string> &parameter_names) const;
Parameters SearchVendorAndType(const std::string &target_vendor,
database::Parameters SearchVendorAndType(const std::string &target_vendor,
const std::string &target_type,
const std::string &this_device,
const std::vector<DatabaseVendor> &vendors,
const std::vector<database::DatabaseVendor> &vendors,
const std::vector<std::string> &parameter_names) const;
// Found parameters suitable for this device/kernel
std::shared_ptr<Parameters> parameters_;
std::shared_ptr<database::Parameters> parameters_;
};
// =================================================================================================

View File

@ -0,0 +1,58 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file describes the database storage structures.
//
// =================================================================================================
#ifndef CLBLAST_DATABASE_STRUCTURE_H_
#define CLBLAST_DATABASE_STRUCTURE_H_
#include <string>
#include <vector>
#include <unordered_map>
namespace clblast {
// A special namespace to hold all the global constant variables (including the database entries)
namespace database {
// =================================================================================================
// The OpenCL device types
const std::string kDeviceTypeCPU = "CPU";
const std::string kDeviceTypeGPU = "GPU";
const std::string kDeviceTypeAccelerator = "accelerator";
const std::string kDeviceTypeAll = "default";
// Type alias for the database parameters
using Parameters = std::unordered_map<std::string, size_t>;
// Structures for content inside the database
struct DatabaseDevice {
std::string name;
std::vector<size_t> parameters; // parameter values
};
struct DatabaseVendor {
std::string type;
std::string name;
std::vector<DatabaseDevice> devices;
};
struct DatabaseEntry {
std::string kernel;
Precision precision;
std::vector<std::string> parameter_names;
std::vector<DatabaseVendor> vendors;
};
// =================================================================================================
} // namespace database
} // namespace clblast
// CLBLAST_DATABASE_STRUCTURE_H_
#endif

View File

@ -18,7 +18,7 @@ namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry KernelSelectionHalf = {
const DatabaseEntry KernelSelectionHalf = {
"KernelSelection", Precision::kHalf, {"XGEMM_MIN_INDIRECT_SIZE"}, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
@ -40,7 +40,7 @@ const Database::DatabaseEntry KernelSelectionHalf = {
// =================================================================================================
const Database::DatabaseEntry KernelSelectionSingle = {
const DatabaseEntry KernelSelectionSingle = {
"KernelSelection", Precision::kSingle, {"XGEMM_MIN_INDIRECT_SIZE"}, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
@ -67,7 +67,7 @@ const Database::DatabaseEntry KernelSelectionSingle = {
// =================================================================================================
const Database::DatabaseEntry KernelSelectionComplexSingle = {
const DatabaseEntry KernelSelectionComplexSingle = {
"KernelSelection", Precision::kComplexSingle, {"XGEMM_MIN_INDIRECT_SIZE"}, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
@ -89,7 +89,7 @@ const Database::DatabaseEntry KernelSelectionComplexSingle = {
// =================================================================================================
const Database::DatabaseEntry KernelSelectionDouble = {
const DatabaseEntry KernelSelectionDouble = {
"KernelSelection", Precision::kDouble, {"XGEMM_MIN_INDIRECT_SIZE"}, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
@ -111,7 +111,7 @@ const Database::DatabaseEntry KernelSelectionDouble = {
// =================================================================================================
const Database::DatabaseEntry KernelSelectionComplexDouble = {
const DatabaseEntry KernelSelectionComplexDouble = {
"KernelSelection", Precision::kComplexDouble, {"XGEMM_MIN_INDIRECT_SIZE"}, {
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {

View File

@ -1,354 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Database generator <database.py>
//
// This file populates the database with best-found tuning parameters for the 'Copy' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry CopyHalf = {
"Copy", Precision::kHalf, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 16, 8, 4, 4 } },
{ "default", { 16, 8, 4, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 16, 8, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 4, 8 } },
{ "default", { 8, 32, 4, 8 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } },
{ "default", { 32, 8, 8, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 16, 8, 4, 4 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry CopySingle = {
"Copy", Precision::kSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 8, 4, 1 } },
{ "ATI Radeon HD 6750M", { 16, 8, 2, 1 } },
{ "Ellesmere", { 8, 8, 4, 8 } },
{ "Fiji", { 16, 16, 1, 2 } },
{ "Hawaii", { 32, 8, 2, 2 } },
{ "Oland", { 32, 8, 4, 2 } },
{ "Pitcairn", { 8, 16, 4, 1 } },
{ "Tahiti", { 32, 8, 2, 2 } },
{ "Tonga", { 32, 8, 4, 4 } },
{ "Turks", { 8, 8, 4, 2 } },
{ "default", { 8, 16, 4, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 32, 8, 2, 4 } },
{ "default", { 32, 8, 2, 4 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 8, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 8, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 16, 8, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 2 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 1 } },
{ "default", { 32, 16, 8, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 8, 8, 2, 1 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 16, 4, 1 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 16, 4, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 2, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 16, 8, 4, 8 } },
{ "Iris", { 16, 8, 1, 2 } },
{ "Iris Pro", { 32, 8, 4, 4 } },
{ "default", { 8, 8, 2, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } },
{ "default", { 32, 8, 8, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 8, 4, 1 } },
{ "GeForce GT 650M", { 16, 16, 4, 2 } },
{ "GeForce GTX 1070", { 8, 16, 4, 1 } },
{ "GeForce GTX 1080", { 8, 32, 4, 1 } },
{ "GeForce GTX 480", { 8, 8, 4, 1 } },
{ "GeForce GTX 670", { 16, 32, 4, 1 } },
{ "GeForce GTX 680", { 32, 16, 4, 1 } },
{ "GeForce GTX 750", { 32, 8, 2, 2 } },
{ "GeForce GTX 750 Ti", { 16, 32, 2, 2 } },
{ "GeForce GTX 980", { 32, 16, 1, 1 } },
{ "GeForce GTX TITAN", { 32, 8, 2, 4 } },
{ "GeForce GTX TITAN Black", { 8, 32, 4, 8 } },
{ "GeForce GTX TITAN X", { 32, 8, 1, 2 } },
{ "TITAN X (Pascal)", { 8, 32, 4, 1 } },
{ "Tesla K20m", { 8, 8, 4, 4 } },
{ "Tesla K40m", { 8, 8, 4, 2 } },
{ "default", { 8, 32, 4, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } },
{ "default", { 32, 8, 8, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 8, 4, 4 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry CopyComplexSingle = {
"Copy", Precision::kComplexSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
{ "ATI Radeon HD 6750M", { 8, 8, 1, 1 } },
{ "Ellesmere", { 16, 16, 1, 4 } },
{ "Fiji", { 16, 8, 1, 2 } },
{ "Hawaii", { 32, 8, 1, 2 } },
{ "Oland", { 8, 16, 1, 1 } },
{ "Pitcairn", { 8, 8, 1, 2 } },
{ "Tahiti", { 8, 8, 2, 2 } },
{ "Tonga", { 8, 32, 1, 2 } },
{ "Turks", { 32, 8, 4, 1 } },
{ "default", { 16, 8, 1, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 4, 2 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 16, 8, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 8, 2, 2 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 8, 8, 1 } },
{ "default", { 32, 8, 8, 1 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 16, 8, 2, 1 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 16, 2, 2 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 8, 32, 2, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 8, 8, 2, 1 } },
{ "Iris", { 16, 8, 1, 2 } },
{ "Iris Pro", { 32, 16, 1, 4 } },
{ "default", { 16, 8, 1, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } },
{ "default", { 32, 8, 4, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 8, 1, 1 } },
{ "GeForce GTX 1070", { 16, 8, 1, 1 } },
{ "GeForce GTX 1080", { 32, 8, 1, 2 } },
{ "GeForce GTX 480", { 16, 16, 1, 1 } },
{ "GeForce GTX 670", { 16, 8, 1, 1 } },
{ "GeForce GTX 750", { 16, 8, 1, 2 } },
{ "GeForce GTX 750 Ti", { 16, 32, 1, 1 } },
{ "GeForce GTX 980", { 8, 8, 1, 1 } },
{ "GeForce GTX TITAN Black", { 16, 8, 1, 1 } },
{ "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
{ "TITAN X (Pascal)", { 8, 16, 2, 1 } },
{ "Tesla K20m", { 8, 8, 1, 4 } },
{ "Tesla K40m", { 16, 8, 1, 1 } },
{ "default", { 32, 8, 1, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 32, 8, 1, 1 } },
{ "default", { 32, 8, 1, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 16, 8, 1, 2 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry CopyDouble = {
"Copy", Precision::kDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
{ "Ellesmere", { 32, 8, 1, 4 } },
{ "Fiji", { 16, 8, 1, 2 } },
{ "Hawaii", { 32, 8, 1, 2 } },
{ "Oland", { 32, 8, 2, 8 } },
{ "Pitcairn", { 32, 8, 1, 1 } },
{ "Tahiti", { 8, 32, 2, 1 } },
{ "Tonga", { 8, 32, 2, 4 } },
{ "default", { 16, 8, 2, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 16, 8, 8, 2 } },
{ "default", { 16, 8, 8, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 32, 8, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 8, 8, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 8, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 2, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 32, 8, 1 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 16, 8, 1 } },
{ "default", { 16, 8, 8, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 8, 8, 8, 1 } },
{ "default", { 8, 8, 8, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 32, 16, 2, 1 } },
{ "GeForce GTX 1070", { 8, 8, 4, 1 } },
{ "GeForce GTX 1080", { 8, 8, 4, 1 } },
{ "GeForce GTX 480", { 8, 8, 2, 1 } },
{ "GeForce GTX 670", { 8, 8, 2, 1 } },
{ "GeForce GTX 680", { 16, 32, 2, 1 } },
{ "GeForce GTX 750", { 8, 16, 2, 1 } },
{ "GeForce GTX 750 Ti", { 16, 8, 2, 1 } },
{ "GeForce GTX 980", { 32, 8, 2, 1 } },
{ "GeForce GTX TITAN", { 16, 32, 2, 2 } },
{ "GeForce GTX TITAN Black", { 16, 8, 2, 8 } },
{ "GeForce GTX TITAN X", { 32, 16, 1, 1 } },
{ "TITAN X (Pascal)", { 8, 8, 2, 2 } },
{ "Tesla K20m", { 8, 8, 2, 1 } },
{ "Tesla K40m", { 8, 8, 2, 2 } },
{ "default", { 32, 32, 2, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 16, 8, 2, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry CopyComplexDouble = {
"Copy", Precision::kComplexDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 8, 16, 1, 1 } },
{ "Ellesmere", { 8, 32, 1, 2 } },
{ "Fiji", { 8, 16, 1, 1 } },
{ "Hawaii", { 32, 8, 2, 8 } },
{ "Oland", { 8, 16, 1, 1 } },
{ "Pitcairn", { 16, 8, 1, 1 } },
{ "Tahiti", { 8, 16, 1, 1 } },
{ "Tonga", { 16, 8, 2, 1 } },
{ "default", { 8, 16, 1, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 32, 8, 1, 2 } },
{ "default", { 32, 8, 1, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 8, 8, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 8, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 8, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 8, 8, 1 } },
{ "default", { 16, 8, 8, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } },
{ "default", { 32, 8, 8, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 8, 8, 1, 1 } },
{ "GeForce GTX 1070", { 8, 32, 1, 4 } },
{ "GeForce GTX 1080", { 8, 8, 1, 1 } },
{ "GeForce GTX 480", { 16, 8, 1, 1 } },
{ "GeForce GTX 670", { 16, 8, 1, 1 } },
{ "GeForce GTX 680", { 8, 8, 1, 1 } },
{ "GeForce GTX 750", { 32, 8, 1, 1 } },
{ "GeForce GTX 750 Ti", { 16, 16, 1, 1 } },
{ "GeForce GTX 980", { 8, 8, 1, 1 } },
{ "GeForce GTX TITAN", { 16, 16, 1, 1 } },
{ "GeForce GTX TITAN Black", { 8, 8, 1, 2 } },
{ "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
{ "TITAN X (Pascal)", { 8, 8, 1, 2 } },
{ "Tesla K20m", { 8, 8, 1, 2 } },
{ "Tesla K40m", { 8, 8, 1, 1 } },
{ "default", { 8, 8, 1, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 16, 8, 1, 1 } },
}
},
}
};
// =================================================================================================
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,14 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Copy' kernels.
//
// =================================================================================================
#include "database/kernels/copy/copy_16.hpp"
#include "database/kernels/copy/copy_32.hpp"
#include "database/kernels/copy/copy_3232.hpp"
#include "database/kernels/copy/copy_64.hpp"
#include "database/kernels/copy/copy_6464.hpp"

View File

@ -0,0 +1,43 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Copy16' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry CopyHalf = {
"Copy", Precision::kHalf, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 16, 8, 4, 4 } },
{ "default", { 16, 8, 4, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 16, 8, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 4, 8 } },
{ "default", { 8, 32, 4, 8 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } },
{ "default", { 32, 8, 8, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 16, 8, 4, 4 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,101 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Copy32' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry CopySingle = {
"Copy", Precision::kSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 8, 4, 1 } },
{ "ATI Radeon HD 6750M", { 16, 8, 2, 1 } },
{ "Ellesmere", { 8, 8, 4, 8 } },
{ "Fiji", { 16, 16, 1, 2 } },
{ "Hawaii", { 32, 8, 2, 2 } },
{ "Oland", { 32, 8, 4, 2 } },
{ "Pitcairn", { 8, 16, 4, 1 } },
{ "Tahiti", { 32, 8, 2, 2 } },
{ "Tonga", { 32, 8, 4, 4 } },
{ "Turks", { 8, 8, 4, 2 } },
{ "default", { 8, 16, 4, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 32, 8, 2, 4 } },
{ "default", { 32, 8, 2, 4 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 8, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 8, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 16, 8, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 2 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 1 } },
{ "default", { 32, 16, 8, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 8, 8, 2, 1 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 16, 4, 1 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 16, 4, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 2, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 16, 8, 4, 8 } },
{ "Iris", { 16, 8, 1, 2 } },
{ "Iris Pro", { 32, 8, 4, 4 } },
{ "default", { 8, 8, 2, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } },
{ "default", { 32, 8, 8, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 8, 4, 1 } },
{ "GeForce GT 650M", { 16, 16, 4, 2 } },
{ "GeForce GTX 1070", { 8, 16, 4, 1 } },
{ "GeForce GTX 1080", { 8, 32, 4, 1 } },
{ "GeForce GTX 480", { 8, 8, 4, 1 } },
{ "GeForce GTX 670", { 16, 32, 4, 1 } },
{ "GeForce GTX 680", { 32, 16, 4, 1 } },
{ "GeForce GTX 750", { 32, 8, 2, 2 } },
{ "GeForce GTX 750 Ti", { 16, 32, 2, 2 } },
{ "GeForce GTX 980", { 32, 16, 1, 1 } },
{ "GeForce GTX TITAN", { 32, 8, 2, 4 } },
{ "GeForce GTX TITAN Black", { 8, 32, 4, 8 } },
{ "GeForce GTX TITAN X", { 32, 8, 1, 2 } },
{ "TITAN X (Pascal)", { 8, 32, 4, 1 } },
{ "Tesla K20m", { 8, 8, 4, 4 } },
{ "Tesla K40m", { 8, 8, 4, 2 } },
{ "default", { 8, 32, 4, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } },
{ "default", { 32, 8, 8, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 8, 4, 4 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,92 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Copy3232' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry CopyComplexSingle = {
"Copy", Precision::kComplexSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
{ "ATI Radeon HD 6750M", { 8, 8, 1, 1 } },
{ "Ellesmere", { 16, 16, 1, 4 } },
{ "Fiji", { 16, 8, 1, 2 } },
{ "Hawaii", { 32, 8, 1, 2 } },
{ "Oland", { 8, 16, 1, 1 } },
{ "Pitcairn", { 8, 8, 1, 2 } },
{ "Tahiti", { 8, 8, 2, 2 } },
{ "Tonga", { 8, 32, 1, 2 } },
{ "Turks", { 32, 8, 4, 1 } },
{ "default", { 16, 8, 1, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 4, 2 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 16, 8, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 8, 2, 2 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 8, 8, 1 } },
{ "default", { 32, 8, 8, 1 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 16, 8, 2, 1 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 16, 2, 2 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 8, 32, 2, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 8, 8, 2, 1 } },
{ "Iris", { 16, 8, 1, 2 } },
{ "Iris Pro", { 32, 16, 1, 4 } },
{ "default", { 16, 8, 1, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } },
{ "default", { 32, 8, 4, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 8, 1, 1 } },
{ "GeForce GTX 1070", { 16, 8, 1, 1 } },
{ "GeForce GTX 1080", { 32, 8, 1, 2 } },
{ "GeForce GTX 480", { 16, 16, 1, 1 } },
{ "GeForce GTX 670", { 16, 8, 1, 1 } },
{ "GeForce GTX 750", { 16, 8, 1, 2 } },
{ "GeForce GTX 750 Ti", { 16, 32, 1, 1 } },
{ "GeForce GTX 980", { 8, 8, 1, 1 } },
{ "GeForce GTX TITAN Black", { 16, 8, 1, 1 } },
{ "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
{ "TITAN X (Pascal)", { 8, 16, 2, 1 } },
{ "Tesla K20m", { 8, 8, 1, 4 } },
{ "Tesla K40m", { 16, 8, 1, 1 } },
{ "default", { 32, 8, 1, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 32, 8, 1, 1 } },
{ "default", { 32, 8, 1, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 16, 8, 1, 2 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,80 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Copy64' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry CopyDouble = {
"Copy", Precision::kDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
{ "Ellesmere", { 32, 8, 1, 4 } },
{ "Fiji", { 16, 8, 1, 2 } },
{ "Hawaii", { 32, 8, 1, 2 } },
{ "Oland", { 32, 8, 2, 8 } },
{ "Pitcairn", { 32, 8, 1, 1 } },
{ "Tahiti", { 8, 32, 2, 1 } },
{ "Tonga", { 8, 32, 2, 4 } },
{ "default", { 16, 8, 2, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 16, 8, 8, 2 } },
{ "default", { 16, 8, 8, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 32, 8, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 8, 8, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 8, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 2, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 32, 8, 1 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 16, 8, 1 } },
{ "default", { 16, 8, 8, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 8, 8, 8, 1 } },
{ "default", { 8, 8, 8, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 32, 16, 2, 1 } },
{ "GeForce GTX 1070", { 8, 8, 4, 1 } },
{ "GeForce GTX 1080", { 8, 8, 4, 1 } },
{ "GeForce GTX 480", { 8, 8, 2, 1 } },
{ "GeForce GTX 670", { 8, 8, 2, 1 } },
{ "GeForce GTX 680", { 16, 32, 2, 1 } },
{ "GeForce GTX 750", { 8, 16, 2, 1 } },
{ "GeForce GTX 750 Ti", { 16, 8, 2, 1 } },
{ "GeForce GTX 980", { 32, 8, 2, 1 } },
{ "GeForce GTX TITAN", { 16, 32, 2, 2 } },
{ "GeForce GTX TITAN Black", { 16, 8, 2, 8 } },
{ "GeForce GTX TITAN X", { 32, 16, 1, 1 } },
{ "TITAN X (Pascal)", { 8, 8, 2, 2 } },
{ "Tesla K20m", { 8, 8, 2, 1 } },
{ "Tesla K40m", { 8, 8, 2, 2 } },
{ "default", { 32, 32, 2, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 16, 8, 2, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,80 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Copy6464' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry CopyComplexDouble = {
"Copy", Precision::kComplexDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 8, 16, 1, 1 } },
{ "Ellesmere", { 8, 32, 1, 2 } },
{ "Fiji", { 8, 16, 1, 1 } },
{ "Hawaii", { 32, 8, 2, 8 } },
{ "Oland", { 8, 16, 1, 1 } },
{ "Pitcairn", { 16, 8, 1, 1 } },
{ "Tahiti", { 8, 16, 1, 1 } },
{ "Tonga", { 16, 8, 2, 1 } },
{ "default", { 8, 16, 1, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 32, 8, 1, 2 } },
{ "default", { 32, 8, 1, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 8, 8, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 8, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 8, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 8, 8, 1 } },
{ "default", { 16, 8, 8, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } },
{ "default", { 32, 8, 8, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 8, 8, 1, 1 } },
{ "GeForce GTX 1070", { 8, 32, 1, 4 } },
{ "GeForce GTX 1080", { 8, 8, 1, 1 } },
{ "GeForce GTX 480", { 16, 8, 1, 1 } },
{ "GeForce GTX 670", { 16, 8, 1, 1 } },
{ "GeForce GTX 680", { 8, 8, 1, 1 } },
{ "GeForce GTX 750", { 32, 8, 1, 1 } },
{ "GeForce GTX 750 Ti", { 16, 16, 1, 1 } },
{ "GeForce GTX 980", { 8, 8, 1, 1 } },
{ "GeForce GTX TITAN", { 16, 16, 1, 1 } },
{ "GeForce GTX TITAN Black", { 8, 8, 1, 2 } },
{ "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
{ "TITAN X (Pascal)", { 8, 8, 1, 2 } },
{ "Tesla K20m", { 8, 8, 1, 2 } },
{ "Tesla K40m", { 8, 8, 1, 1 } },
{ "default", { 8, 8, 1, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 16, 8, 1, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -15,7 +15,7 @@ namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry InvertHalf = {
const DatabaseEntry InvertHalf = {
"Invert", Precision::kHalf, {"INTERNAL_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@ -27,7 +27,7 @@ const Database::DatabaseEntry InvertHalf = {
// =================================================================================================
const Database::DatabaseEntry InvertSingle = {
const DatabaseEntry InvertSingle = {
"Invert", Precision::kSingle, {"INTERNAL_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@ -39,7 +39,7 @@ const Database::DatabaseEntry InvertSingle = {
// =================================================================================================
const Database::DatabaseEntry InvertComplexSingle = {
const DatabaseEntry InvertComplexSingle = {
"Invert", Precision::kComplexSingle, {"INTERNAL_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@ -51,7 +51,7 @@ const Database::DatabaseEntry InvertComplexSingle = {
// =================================================================================================
const Database::DatabaseEntry InvertDouble = {
const DatabaseEntry InvertDouble = {
"Invert", Precision::kDouble, {"INTERNAL_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@ -63,7 +63,7 @@ const Database::DatabaseEntry InvertDouble = {
// =================================================================================================
const Database::DatabaseEntry InvertComplexDouble = {
const DatabaseEntry InvertComplexDouble = {
"Invert", Precision::kComplexDouble, {"INTERNAL_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {

View File

@ -1,362 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Database generator <database.py>
//
// This file populates the database with best-found tuning parameters for the 'Pad' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry PadHalf = {
"Pad", Precision::kHalf, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 16, 8, 1, 2 } },
{ "default", { 16, 8, 1, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 4, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 2, 2 } },
{ "default", { 8, 8, 2, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 16, 8, 4, 2 } },
{ "default", { 16, 8, 4, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 8, 8, 4, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry PadSingle = {
"Pad", Precision::kSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
{ "ATI Radeon HD 6750M", { 8, 16, 2, 1 } },
{ "Ellesmere", { 32, 8, 2, 2 } },
{ "Fiji", { 16, 16, 1, 2 } },
{ "Hawaii", { 32, 8, 1, 4 } },
{ "Oland", { 8, 8, 1, 2 } },
{ "Pitcairn", { 32, 8, 1, 2 } },
{ "Tahiti", { 32, 8, 1, 2 } },
{ "Tonga", { 16, 16, 2, 2 } },
{ "Turks", { 32, 8, 2, 1 } },
{ "default", { 8, 16, 1, 2 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 32, 8, 1, 4 } },
{ "default", { 32, 8, 1, 4 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32, 4, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 4, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 4, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 4, 1 } },
{ "default", { 32, 8, 4, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 32, 8, 2, 4 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 2, 4 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 8, 1, 2 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 4, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 4, 2 } },
{ "Iris", { 32, 16, 2, 1 } },
{ "Iris Pro", { 16, 8, 2, 1 } },
{ "default", { 32, 8, 4, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 16, 2, 1 } },
{ "default", { 32, 16, 2, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 32, 8, 2, 1 } },
{ "GeForce GT 650M", { 32, 16, 2, 2 } },
{ "GeForce GTX 1070", { 16, 8, 1, 1 } },
{ "GeForce GTX 1080", { 16, 8, 1, 1 } },
{ "GeForce GTX 480", { 32, 8, 1, 4 } },
{ "GeForce GTX 670", { 32, 8, 2, 2 } },
{ "GeForce GTX 680", { 16, 8, 4, 1 } },
{ "GeForce GTX 750", { 32, 16, 4, 2 } },
{ "GeForce GTX 750 Ti", { 16, 8, 4, 1 } },
{ "GeForce GTX 980", { 16, 8, 1, 1 } },
{ "GeForce GTX TITAN", { 32, 8, 2, 1 } },
{ "GeForce GTX TITAN Black", { 32, 8, 1, 2 } },
{ "GeForce GTX TITAN X", { 16, 16, 1, 1 } },
{ "TITAN X (Pascal)", { 16, 8, 1, 2 } },
{ "Tesla K20m", { 32, 8, 2, 1 } },
{ "Tesla K40m", { 32, 8, 1, 1 } },
{ "default", { 32, 8, 4, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 32, 8, 2, 1 } },
{ "default", { 32, 8, 2, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 8, 2, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry PadComplexSingle = {
"Pad", Precision::kComplexSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
{ "ATI Radeon HD 6750M", { 16, 8, 2, 1 } },
{ "Ellesmere", { 16, 16, 2, 4 } },
{ "Fiji", { 16, 8, 1, 2 } },
{ "Hawaii", { 32, 8, 1, 2 } },
{ "Oland", { 8, 32, 1, 1 } },
{ "Pitcairn", { 8, 8, 1, 2 } },
{ "Tahiti", { 16, 16, 1, 1 } },
{ "Tonga", { 16, 8, 1, 2 } },
{ "Turks", { 16, 8, 4, 4 } },
{ "default", { 16, 8, 1, 2 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 32, 8, 1, 4 } },
{ "default", { 32, 8, 1, 4 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 1, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 8, 2, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 16, 4, 1 } },
{ "default", { 32, 8, 4, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 8, 8, 1, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 1, 1 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 32, 8, 1, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 1, 1 } },
{ "Iris", { 32, 16, 2, 4 } },
{ "Iris Pro", { 32, 8, 2, 1 } },
{ "default", { 32, 8, 1, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } },
{ "default", { 32, 8, 1, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 16, 1, 1 } },
{ "GeForce GTX 1070", { 8, 32, 1, 1 } },
{ "GeForce GTX 1080", { 32, 8, 1, 1 } },
{ "GeForce GTX 480", { 16, 8, 2, 1 } },
{ "GeForce GTX 670", { 16, 8, 1, 2 } },
{ "GeForce GTX 680", { 16, 32, 1, 2 } },
{ "GeForce GTX 750", { 32, 8, 2, 1 } },
{ "GeForce GTX 750 Ti", { 16, 8, 1, 1 } },
{ "GeForce GTX 980", { 16, 16, 1, 1 } },
{ "GeForce GTX TITAN", { 16, 8, 2, 1 } },
{ "GeForce GTX TITAN Black", { 16, 8, 1, 2 } },
{ "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
{ "TITAN X (Pascal)", { 32, 32, 1, 2 } },
{ "Tesla K20m", { 32, 8, 1, 2 } },
{ "Tesla K40m", { 16, 8, 1, 1 } },
{ "default", { 32, 8, 1, 2 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 32, 8, 4, 1 } },
{ "default", { 32, 8, 4, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 8, 1, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry PadDouble = {
"Pad", Precision::kDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
{ "Ellesmere", { 8, 32, 2, 1 } },
{ "Fiji", { 8, 16, 1, 2 } },
{ "Hawaii", { 32, 8, 1, 2 } },
{ "Oland", { 8, 32, 1, 1 } },
{ "Pitcairn", { 8, 8, 1, 2 } },
{ "Tahiti", { 32, 8, 1, 1 } },
{ "Tonga", { 32, 8, 4, 1 } },
{ "default", { 16, 16, 1, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 32, 8, 4, 2 } },
{ "default", { 32, 8, 4, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 4, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } },
{ "default", { 32, 16, 4, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } },
{ "default", { 32, 8, 1, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 32, 8, 1, 1 } },
{ "GeForce GTX 1070", { 8, 8, 1, 1 } },
{ "GeForce GTX 1080", { 32, 32, 2, 1 } },
{ "GeForce GTX 480", { 16, 8, 1, 1 } },
{ "GeForce GTX 670", { 16, 16, 2, 1 } },
{ "GeForce GTX 680", { 32, 32, 1, 2 } },
{ "GeForce GTX 750", { 32, 16, 1, 1 } },
{ "GeForce GTX 750 Ti", { 8, 16, 1, 1 } },
{ "GeForce GTX 980", { 8, 16, 1, 1 } },
{ "GeForce GTX TITAN", { 32, 8, 1, 1 } },
{ "GeForce GTX TITAN Black", { 16, 8, 1, 1 } },
{ "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
{ "TITAN X (Pascal)", { 8, 32, 4, 1 } },
{ "Tesla K20m", { 32, 8, 1, 1 } },
{ "Tesla K40m", { 16, 8, 1, 2 } },
{ "default", { 32, 8, 1, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 8, 1, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry PadComplexDouble = {
"Pad", Precision::kComplexDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 16, 8, 1, 1 } },
{ "Ellesmere", { 8, 16, 1, 2 } },
{ "Fiji", { 32, 8, 2, 1 } },
{ "Hawaii", { 32, 8, 1, 1 } },
{ "Oland", { 8, 16, 2, 1 } },
{ "Pitcairn", { 16, 8, 1, 1 } },
{ "Tahiti", { 8, 16, 1, 1 } },
{ "Tonga", { 8, 16, 1, 1 } },
{ "default", { 8, 16, 1, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 16, 8, 4, 1 } },
{ "default", { 16, 8, 4, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 16, 4, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 2, 2 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } },
{ "default", { 32, 8, 2, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } },
{ "default", { 32, 8, 4, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 8, 8, 1, 1 } },
{ "GeForce GTX 1070", { 8, 8, 2, 2 } },
{ "GeForce GTX 1080", { 8, 8, 1, 1 } },
{ "GeForce GTX 480", { 16, 8, 1, 1 } },
{ "GeForce GTX 670", { 32, 8, 1, 1 } },
{ "GeForce GTX 680", { 8, 8, 1, 1 } },
{ "GeForce GTX 750", { 8, 8, 1, 1 } },
{ "GeForce GTX 750 Ti", { 16, 32, 1, 1 } },
{ "GeForce GTX 980", { 16, 16, 1, 1 } },
{ "GeForce GTX TITAN", { 8, 32, 1, 2 } },
{ "GeForce GTX TITAN Black", { 16, 8, 1, 4 } },
{ "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
{ "TITAN X (Pascal)", { 8, 16, 1, 1 } },
{ "Tesla K20m", { 8, 8, 1, 2 } },
{ "Tesla K40m", { 8, 8, 1, 1 } },
{ "default", { 16, 8, 1, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 8, 1, 1 } },
}
},
}
};
// =================================================================================================
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,14 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Pad' kernels.
//
// =================================================================================================
#include "database/kernels/pad/pad_16.hpp"
#include "database/kernels/pad/pad_32.hpp"
#include "database/kernels/pad/pad_3232.hpp"
#include "database/kernels/pad/pad_64.hpp"
#include "database/kernels/pad/pad_6464.hpp"

View File

@ -0,0 +1,43 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Pad16' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry PadHalf = {
"Pad", Precision::kHalf, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 16, 8, 1, 2 } },
{ "default", { 16, 8, 1, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 4, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 2, 2 } },
{ "default", { 8, 8, 2, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 16, 8, 4, 2 } },
{ "default", { 16, 8, 4, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 8, 8, 4, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,101 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Pad32' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry PadSingle = {
"Pad", Precision::kSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
{ "ATI Radeon HD 6750M", { 8, 16, 2, 1 } },
{ "Ellesmere", { 32, 8, 2, 2 } },
{ "Fiji", { 16, 16, 1, 2 } },
{ "Hawaii", { 32, 8, 1, 4 } },
{ "Oland", { 8, 8, 1, 2 } },
{ "Pitcairn", { 32, 8, 1, 2 } },
{ "Tahiti", { 32, 8, 1, 2 } },
{ "Tonga", { 16, 16, 2, 2 } },
{ "Turks", { 32, 8, 2, 1 } },
{ "default", { 8, 16, 1, 2 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 32, 8, 1, 4 } },
{ "default", { 32, 8, 1, 4 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32, 4, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 4, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 4, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 4, 1 } },
{ "default", { 32, 8, 4, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 32, 8, 2, 4 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 2, 4 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 8, 1, 2 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 4, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 4, 2 } },
{ "Iris", { 32, 16, 2, 1 } },
{ "Iris Pro", { 16, 8, 2, 1 } },
{ "default", { 32, 8, 4, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 16, 2, 1 } },
{ "default", { 32, 16, 2, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 32, 8, 2, 1 } },
{ "GeForce GT 650M", { 32, 16, 2, 2 } },
{ "GeForce GTX 1070", { 16, 8, 1, 1 } },
{ "GeForce GTX 1080", { 16, 8, 1, 1 } },
{ "GeForce GTX 480", { 32, 8, 1, 4 } },
{ "GeForce GTX 670", { 32, 8, 2, 2 } },
{ "GeForce GTX 680", { 16, 8, 4, 1 } },
{ "GeForce GTX 750", { 32, 16, 4, 2 } },
{ "GeForce GTX 750 Ti", { 16, 8, 4, 1 } },
{ "GeForce GTX 980", { 16, 8, 1, 1 } },
{ "GeForce GTX TITAN", { 32, 8, 2, 1 } },
{ "GeForce GTX TITAN Black", { 32, 8, 1, 2 } },
{ "GeForce GTX TITAN X", { 16, 16, 1, 1 } },
{ "TITAN X (Pascal)", { 16, 8, 1, 2 } },
{ "Tesla K20m", { 32, 8, 2, 1 } },
{ "Tesla K40m", { 32, 8, 1, 1 } },
{ "default", { 32, 8, 4, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 32, 8, 2, 1 } },
{ "default", { 32, 8, 2, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 8, 2, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,100 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Pad3232' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry PadComplexSingle = {
"Pad", Precision::kComplexSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
{ "ATI Radeon HD 6750M", { 16, 8, 2, 1 } },
{ "Ellesmere", { 16, 16, 2, 4 } },
{ "Fiji", { 16, 8, 1, 2 } },
{ "Hawaii", { 32, 8, 1, 2 } },
{ "Oland", { 8, 32, 1, 1 } },
{ "Pitcairn", { 8, 8, 1, 2 } },
{ "Tahiti", { 16, 16, 1, 1 } },
{ "Tonga", { 16, 8, 1, 2 } },
{ "Turks", { 16, 8, 4, 4 } },
{ "default", { 16, 8, 1, 2 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 32, 8, 1, 4 } },
{ "default", { 32, 8, 1, 4 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 1, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 8, 2, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 16, 4, 1 } },
{ "default", { 32, 8, 4, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 8, 8, 1, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 1, 1 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 32, 8, 1, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 1, 1 } },
{ "Iris", { 32, 16, 2, 4 } },
{ "Iris Pro", { 32, 8, 2, 1 } },
{ "default", { 32, 8, 1, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } },
{ "default", { 32, 8, 1, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 16, 1, 1 } },
{ "GeForce GTX 1070", { 8, 32, 1, 1 } },
{ "GeForce GTX 1080", { 32, 8, 1, 1 } },
{ "GeForce GTX 480", { 16, 8, 2, 1 } },
{ "GeForce GTX 670", { 16, 8, 1, 2 } },
{ "GeForce GTX 680", { 16, 32, 1, 2 } },
{ "GeForce GTX 750", { 32, 8, 2, 1 } },
{ "GeForce GTX 750 Ti", { 16, 8, 1, 1 } },
{ "GeForce GTX 980", { 16, 16, 1, 1 } },
{ "GeForce GTX TITAN", { 16, 8, 2, 1 } },
{ "GeForce GTX TITAN Black", { 16, 8, 1, 2 } },
{ "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
{ "TITAN X (Pascal)", { 32, 32, 1, 2 } },
{ "Tesla K20m", { 32, 8, 1, 2 } },
{ "Tesla K40m", { 16, 8, 1, 1 } },
{ "default", { 32, 8, 1, 2 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 32, 8, 4, 1 } },
{ "default", { 32, 8, 4, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 8, 1, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,80 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Pad64' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry PadDouble = {
"Pad", Precision::kDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } },
{ "Ellesmere", { 8, 32, 2, 1 } },
{ "Fiji", { 8, 16, 1, 2 } },
{ "Hawaii", { 32, 8, 1, 2 } },
{ "Oland", { 8, 32, 1, 1 } },
{ "Pitcairn", { 8, 8, 1, 2 } },
{ "Tahiti", { 32, 8, 1, 1 } },
{ "Tonga", { 32, 8, 4, 1 } },
{ "default", { 16, 16, 1, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 32, 8, 4, 2 } },
{ "default", { 32, 8, 4, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 4, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } },
{ "default", { 32, 16, 4, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } },
{ "default", { 32, 8, 1, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 32, 8, 1, 1 } },
{ "GeForce GTX 1070", { 8, 8, 1, 1 } },
{ "GeForce GTX 1080", { 32, 32, 2, 1 } },
{ "GeForce GTX 480", { 16, 8, 1, 1 } },
{ "GeForce GTX 670", { 16, 16, 2, 1 } },
{ "GeForce GTX 680", { 32, 32, 1, 2 } },
{ "GeForce GTX 750", { 32, 16, 1, 1 } },
{ "GeForce GTX 750 Ti", { 8, 16, 1, 1 } },
{ "GeForce GTX 980", { 8, 16, 1, 1 } },
{ "GeForce GTX TITAN", { 32, 8, 1, 1 } },
{ "GeForce GTX TITAN Black", { 16, 8, 1, 1 } },
{ "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
{ "TITAN X (Pascal)", { 8, 32, 4, 1 } },
{ "Tesla K20m", { 32, 8, 1, 1 } },
{ "Tesla K40m", { 16, 8, 1, 2 } },
{ "default", { 32, 8, 1, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 8, 1, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,80 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Pad6464' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry PadComplexDouble = {
"Pad", Precision::kComplexDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 16, 8, 1, 1 } },
{ "Ellesmere", { 8, 16, 1, 2 } },
{ "Fiji", { 32, 8, 2, 1 } },
{ "Hawaii", { 32, 8, 1, 1 } },
{ "Oland", { 8, 16, 2, 1 } },
{ "Pitcairn", { 16, 8, 1, 1 } },
{ "Tahiti", { 8, 16, 1, 1 } },
{ "Tonga", { 8, 16, 1, 1 } },
{ "default", { 8, 16, 1, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 16, 8, 4, 1 } },
{ "default", { 16, 8, 4, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 16, 4, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 2, 2 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } },
{ "default", { 32, 8, 2, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } },
{ "default", { 32, 8, 4, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 8, 8, 1, 1 } },
{ "GeForce GTX 1070", { 8, 8, 2, 2 } },
{ "GeForce GTX 1080", { 8, 8, 1, 1 } },
{ "GeForce GTX 480", { 16, 8, 1, 1 } },
{ "GeForce GTX 670", { 32, 8, 1, 1 } },
{ "GeForce GTX 680", { 8, 8, 1, 1 } },
{ "GeForce GTX 750", { 8, 8, 1, 1 } },
{ "GeForce GTX 750 Ti", { 16, 32, 1, 1 } },
{ "GeForce GTX 980", { 16, 16, 1, 1 } },
{ "GeForce GTX TITAN", { 8, 32, 1, 2 } },
{ "GeForce GTX TITAN Black", { 16, 8, 1, 4 } },
{ "GeForce GTX TITAN X", { 16, 8, 1, 1 } },
{ "TITAN X (Pascal)", { 8, 16, 1, 1 } },
{ "Tesla K20m", { 8, 8, 1, 2 } },
{ "Tesla K40m", { 8, 8, 1, 1 } },
{ "default", { 16, 8, 1, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 8, 1, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -1,361 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Database generator <database.py>
//
// This file populates the database with best-found tuning parameters for the 'Padtranspose' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry PadtransposeHalf = {
"Padtranspose", Precision::kHalf, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 0, 16, 4 } },
{ "default", { 0, 16, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 8, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 1, 8, 2 } },
{ "default", { 0, 8, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 0, 8, 8 } },
{ "default", { 0, 8, 8 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 0, 8, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry PadtransposeSingle = {
"Padtranspose", Precision::kSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } },
{ "ATI Radeon HD 6750M", { 1, 16, 1 } },
{ "Ellesmere", { 1, 8, 4 } },
{ "Fiji", { 0, 16, 2 } },
{ "Hawaii", { 1, 16, 4 } },
{ "Oland", { 0, 16, 4 } },
{ "Pitcairn", { 0, 16, 4 } },
{ "Tahiti", { 0, 16, 4 } },
{ "Tonga", { 0, 16, 2 } },
{ "Turks", { 1, 16, 1 } },
{ "default", { 0, 16, 4 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 0, 8, 2 } },
{ "default", { 0, 8, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 0, 16, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 32, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 32, 1 } },
{ "default", { 0, 8, 8 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 1, 16, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 4 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 1, 16, 2 } },
{ "Iris", { 1, 16, 2 } },
{ "Iris Pro", { 1, 16, 2 } },
{ "default", { 1, 16, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 2 } },
{ "default", { 0, 16, 2 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 32, 2 } },
{ "GeForce GTX 1070", { 0, 16, 1 } },
{ "GeForce GTX 1080", { 1, 16, 2 } },
{ "GeForce GTX 480", { 1, 16, 2 } },
{ "GeForce GTX 670", { 1, 32, 2 } },
{ "GeForce GTX 680", { 1, 16, 2 } },
{ "GeForce GTX 750", { 1, 32, 2 } },
{ "GeForce GTX 750 Ti", { 1, 32, 2 } },
{ "GeForce GTX 980", { 0, 16, 1 } },
{ "GeForce GTX TITAN", { 1, 16, 2 } },
{ "GeForce GTX TITAN Black", { 1, 32, 2 } },
{ "GeForce GTX TITAN X", { 1, 32, 1 } },
{ "TITAN X (Pascal)", { 1, 16, 2 } },
{ "Tesla K20m", { 1, 16, 2 } },
{ "Tesla K40m", { 1, 32, 2 } },
{ "default", { 1, 32, 2 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 0, 8, 2 } },
{ "default", { 0, 8, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 16, 2 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry PadtransposeComplexSingle = {
"Padtranspose", Precision::kComplexSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } },
{ "ATI Radeon HD 6750M", { 1, 16, 1 } },
{ "Ellesmere", { 0, 8, 4 } },
{ "Fiji", { 1, 16, 2 } },
{ "Hawaii", { 0, 16, 2 } },
{ "Oland", { 0, 8, 4 } },
{ "Pitcairn", { 0, 8, 4 } },
{ "Tahiti", { 0, 16, 2 } },
{ "Tonga", { 0, 16, 2 } },
{ "Turks", { 0, 16, 4 } },
{ "default", { 0, 8, 4 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 1, 16, 2 } },
{ "default", { 1, 16, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 8, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 8, 4 } },
{ "default", { 0, 8, 8 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 1, 16, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 2 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 2 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 0, 16, 4 } },
{ "Iris", { 0, 16, 2 } },
{ "Iris Pro", { 1, 16, 2 } },
{ "default", { 1, 16, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 1, 16, 1 } },
{ "default", { 1, 16, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 16, 1 } },
{ "GeForce GTX 1070", { 1, 16, 1 } },
{ "GeForce GTX 1080", { 0, 8, 1 } },
{ "GeForce GTX 480", { 1, 16, 1 } },
{ "GeForce GTX 670", { 1, 16, 1 } },
{ "GeForce GTX 680", { 1, 16, 1 } },
{ "GeForce GTX 750", { 1, 16, 2 } },
{ "GeForce GTX 750 Ti", { 1, 16, 1 } },
{ "GeForce GTX 980", { 0, 16, 1 } },
{ "GeForce GTX TITAN", { 1, 16, 1 } },
{ "GeForce GTX TITAN Black", { 0, 16, 1 } },
{ "GeForce GTX TITAN X", { 1, 32, 1 } },
{ "TITAN X (Pascal)", { 1, 8, 1 } },
{ "Tesla K20m", { 0, 16, 1 } },
{ "Tesla K40m", { 1, 16, 1 } },
{ "default", { 1, 16, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 0, 8, 4 } },
{ "default", { 0, 8, 4 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 8, 2 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry PadtransposeDouble = {
"Padtranspose", Precision::kDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } },
{ "Ellesmere", { 0, 16, 4 } },
{ "Fiji", { 0, 16, 2 } },
{ "Hawaii", { 0, 16, 2 } },
{ "Oland", { 0, 16, 4 } },
{ "Pitcairn", { 0, 8, 4 } },
{ "Tahiti", { 1, 16, 2 } },
{ "Tonga", { 0, 8, 2 } },
{ "default", { 0, 16, 4 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 0, 16, 2 } },
{ "default", { 0, 16, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 64, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 32, 1 } },
{ "default", { 1, 8, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } },
{ "default", { 0, 16, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 16, 1 } },
{ "GeForce GTX 1070", { 1, 16, 1 } },
{ "GeForce GTX 1080", { 0, 8, 1 } },
{ "GeForce GTX 480", { 1, 16, 1 } },
{ "GeForce GTX 670", { 1, 16, 1 } },
{ "GeForce GTX 680", { 1, 16, 1 } },
{ "GeForce GTX 750", { 1, 16, 2 } },
{ "GeForce GTX 750 Ti", { 1, 32, 2 } },
{ "GeForce GTX 980", { 1, 32, 1 } },
{ "GeForce GTX TITAN", { 0, 16, 1 } },
{ "GeForce GTX TITAN Black", { 0, 16, 1 } },
{ "GeForce GTX TITAN X", { 1, 32, 1 } },
{ "TITAN X (Pascal)", { 0, 8, 1 } },
{ "Tesla K20m", { 0, 16, 1 } },
{ "Tesla K40m", { 1, 16, 1 } },
{ "default", { 1, 16, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 16, 2 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry PadtransposeComplexDouble = {
"Padtranspose", Precision::kComplexDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 0, 8, 4 } },
{ "Ellesmere", { 0, 8, 4 } },
{ "Fiji", { 0, 8, 2 } },
{ "Hawaii", { 0, 8, 4 } },
{ "Oland", { 0, 8, 4 } },
{ "Pitcairn", { 0, 8, 4 } },
{ "Tahiti", { 0, 8, 2 } },
{ "Tonga", { 0, 8, 2 } },
{ "default", { 0, 8, 4 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 0, 8, 1 } },
{ "default", { 0, 8, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 8, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 8, 4 } },
{ "default", { 0, 8, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } },
{ "default", { 0, 16, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 16, 1 } },
{ "GeForce GTX 1070", { 1, 16, 1 } },
{ "GeForce GTX 1080", { 1, 8, 1 } },
{ "GeForce GTX 480", { 1, 16, 1 } },
{ "GeForce GTX 670", { 1, 16, 1 } },
{ "GeForce GTX 680", { 1, 32, 1 } },
{ "GeForce GTX 750", { 1, 16, 1 } },
{ "GeForce GTX 750 Ti", { 1, 8, 2 } },
{ "GeForce GTX 980", { 0, 16, 1 } },
{ "GeForce GTX TITAN", { 1, 16, 1 } },
{ "GeForce GTX TITAN Black", { 0, 16, 1 } },
{ "GeForce GTX TITAN X", { 1, 32, 1 } },
{ "TITAN X (Pascal)", { 1, 8, 1 } },
{ "Tesla K20m", { 1, 16, 1 } },
{ "Tesla K40m", { 1, 16, 1 } },
{ "default", { 1, 16, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 0, 8, 2 } },
}
},
}
};
// =================================================================================================
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,14 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Padtranspose' kernels.
//
// =================================================================================================
#include "database/kernels/padtranspose/padtranspose_16.hpp"
#include "database/kernels/padtranspose/padtranspose_32.hpp"
#include "database/kernels/padtranspose/padtranspose_3232.hpp"
#include "database/kernels/padtranspose/padtranspose_64.hpp"
#include "database/kernels/padtranspose/padtranspose_6464.hpp"

View File

@ -0,0 +1,43 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Padtranspose16' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry PadtransposeHalf = {
"Padtranspose", Precision::kHalf, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 0, 16, 4 } },
{ "default", { 0, 16, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 8, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 1, 8, 2 } },
{ "default", { 0, 8, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 0, 8, 8 } },
{ "default", { 0, 8, 8 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 0, 8, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,100 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Padtranspose32' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry PadtransposeSingle = {
"Padtranspose", Precision::kSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } },
{ "ATI Radeon HD 6750M", { 1, 16, 1 } },
{ "Ellesmere", { 1, 8, 4 } },
{ "Fiji", { 0, 16, 2 } },
{ "Hawaii", { 1, 16, 4 } },
{ "Oland", { 0, 16, 4 } },
{ "Pitcairn", { 0, 16, 4 } },
{ "Tahiti", { 0, 16, 4 } },
{ "Tonga", { 0, 16, 2 } },
{ "Turks", { 1, 16, 1 } },
{ "default", { 0, 16, 4 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 0, 8, 2 } },
{ "default", { 0, 8, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 0, 16, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 32, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 32, 1 } },
{ "default", { 0, 8, 8 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 1, 16, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 4 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 1, 16, 2 } },
{ "Iris", { 1, 16, 2 } },
{ "Iris Pro", { 1, 16, 2 } },
{ "default", { 1, 16, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 2 } },
{ "default", { 0, 16, 2 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 32, 2 } },
{ "GeForce GTX 1070", { 0, 16, 1 } },
{ "GeForce GTX 1080", { 1, 16, 2 } },
{ "GeForce GTX 480", { 1, 16, 2 } },
{ "GeForce GTX 670", { 1, 32, 2 } },
{ "GeForce GTX 680", { 1, 16, 2 } },
{ "GeForce GTX 750", { 1, 32, 2 } },
{ "GeForce GTX 750 Ti", { 1, 32, 2 } },
{ "GeForce GTX 980", { 0, 16, 1 } },
{ "GeForce GTX TITAN", { 1, 16, 2 } },
{ "GeForce GTX TITAN Black", { 1, 32, 2 } },
{ "GeForce GTX TITAN X", { 1, 32, 1 } },
{ "TITAN X (Pascal)", { 1, 16, 2 } },
{ "Tesla K20m", { 1, 16, 2 } },
{ "Tesla K40m", { 1, 32, 2 } },
{ "default", { 1, 32, 2 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 0, 8, 2 } },
{ "default", { 0, 8, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 16, 2 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,100 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Padtranspose3232' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry PadtransposeComplexSingle = {
"Padtranspose", Precision::kComplexSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } },
{ "ATI Radeon HD 6750M", { 1, 16, 1 } },
{ "Ellesmere", { 0, 8, 4 } },
{ "Fiji", { 1, 16, 2 } },
{ "Hawaii", { 0, 16, 2 } },
{ "Oland", { 0, 8, 4 } },
{ "Pitcairn", { 0, 8, 4 } },
{ "Tahiti", { 0, 16, 2 } },
{ "Tonga", { 0, 16, 2 } },
{ "Turks", { 0, 16, 4 } },
{ "default", { 0, 8, 4 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 1, 16, 2 } },
{ "default", { 1, 16, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 8, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 8, 4 } },
{ "default", { 0, 8, 8 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 1, 16, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 2 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 2 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 0, 16, 4 } },
{ "Iris", { 0, 16, 2 } },
{ "Iris Pro", { 1, 16, 2 } },
{ "default", { 1, 16, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 1, 16, 1 } },
{ "default", { 1, 16, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 16, 1 } },
{ "GeForce GTX 1070", { 1, 16, 1 } },
{ "GeForce GTX 1080", { 0, 8, 1 } },
{ "GeForce GTX 480", { 1, 16, 1 } },
{ "GeForce GTX 670", { 1, 16, 1 } },
{ "GeForce GTX 680", { 1, 16, 1 } },
{ "GeForce GTX 750", { 1, 16, 2 } },
{ "GeForce GTX 750 Ti", { 1, 16, 1 } },
{ "GeForce GTX 980", { 0, 16, 1 } },
{ "GeForce GTX TITAN", { 1, 16, 1 } },
{ "GeForce GTX TITAN Black", { 0, 16, 1 } },
{ "GeForce GTX TITAN X", { 1, 32, 1 } },
{ "TITAN X (Pascal)", { 1, 8, 1 } },
{ "Tesla K20m", { 0, 16, 1 } },
{ "Tesla K40m", { 1, 16, 1 } },
{ "default", { 1, 16, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 0, 8, 4 } },
{ "default", { 0, 8, 4 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 8, 2 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,80 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Padtranspose64' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry PadtransposeDouble = {
"Padtranspose", Precision::kDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } },
{ "Ellesmere", { 0, 16, 4 } },
{ "Fiji", { 0, 16, 2 } },
{ "Hawaii", { 0, 16, 2 } },
{ "Oland", { 0, 16, 4 } },
{ "Pitcairn", { 0, 8, 4 } },
{ "Tahiti", { 1, 16, 2 } },
{ "Tonga", { 0, 8, 2 } },
{ "default", { 0, 16, 4 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 0, 16, 2 } },
{ "default", { 0, 16, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 64, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 32, 1 } },
{ "default", { 1, 8, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } },
{ "default", { 0, 16, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 16, 1 } },
{ "GeForce GTX 1070", { 1, 16, 1 } },
{ "GeForce GTX 1080", { 0, 8, 1 } },
{ "GeForce GTX 480", { 1, 16, 1 } },
{ "GeForce GTX 670", { 1, 16, 1 } },
{ "GeForce GTX 680", { 1, 16, 1 } },
{ "GeForce GTX 750", { 1, 16, 2 } },
{ "GeForce GTX 750 Ti", { 1, 32, 2 } },
{ "GeForce GTX 980", { 1, 32, 1 } },
{ "GeForce GTX TITAN", { 0, 16, 1 } },
{ "GeForce GTX TITAN Black", { 0, 16, 1 } },
{ "GeForce GTX TITAN X", { 1, 32, 1 } },
{ "TITAN X (Pascal)", { 0, 8, 1 } },
{ "Tesla K20m", { 0, 16, 1 } },
{ "Tesla K40m", { 1, 16, 1 } },
{ "default", { 1, 16, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 16, 2 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,80 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Padtranspose6464' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry PadtransposeComplexDouble = {
"Padtranspose", Precision::kComplexDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 0, 8, 4 } },
{ "Ellesmere", { 0, 8, 4 } },
{ "Fiji", { 0, 8, 2 } },
{ "Hawaii", { 0, 8, 4 } },
{ "Oland", { 0, 8, 4 } },
{ "Pitcairn", { 0, 8, 4 } },
{ "Tahiti", { 0, 8, 2 } },
{ "Tonga", { 0, 8, 2 } },
{ "default", { 0, 8, 4 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 0, 8, 1 } },
{ "default", { 0, 8, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 8, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 8, 4 } },
{ "default", { 0, 8, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } },
{ "default", { 0, 16, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 16, 1 } },
{ "GeForce GTX 1070", { 1, 16, 1 } },
{ "GeForce GTX 1080", { 1, 8, 1 } },
{ "GeForce GTX 480", { 1, 16, 1 } },
{ "GeForce GTX 670", { 1, 16, 1 } },
{ "GeForce GTX 680", { 1, 32, 1 } },
{ "GeForce GTX 750", { 1, 16, 1 } },
{ "GeForce GTX 750 Ti", { 1, 8, 2 } },
{ "GeForce GTX 980", { 0, 16, 1 } },
{ "GeForce GTX TITAN", { 1, 16, 1 } },
{ "GeForce GTX TITAN Black", { 0, 16, 1 } },
{ "GeForce GTX TITAN X", { 1, 32, 1 } },
{ "TITAN X (Pascal)", { 1, 8, 1 } },
{ "Tesla K20m", { 1, 16, 1 } },
{ "Tesla K40m", { 1, 16, 1 } },
{ "default", { 1, 16, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 0, 8, 2 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -1,350 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Database generator <database.py>
//
// This file populates the database with best-found tuning parameters for the 'Transpose' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry TransposeHalf = {
"Transpose", Precision::kHalf, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 4, 0, 1, 8 } },
{ "default", { 4, 0, 1, 8 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 1, 1, 8 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 0, 4 } },
{ "default", { 8, 1, 0, 8 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 8, 0, 0, 4 } },
{ "default", { 8, 0, 0, 4 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 8, 0, 1, 8 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry TransposeSingle = {
"Transpose", Precision::kSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 8 } },
{ "ATI Radeon HD 6750M", { 8, 0, 1, 2 } },
{ "Ellesmere", { 16, 0, 1, 4 } },
{ "Fiji", { 16, 0, 1, 2 } },
{ "Hawaii", { 4, 0, 1, 8 } },
{ "Oland", { 8, 0, 1, 4 } },
{ "Pitcairn", { 16, 0, 1, 1 } },
{ "Tahiti", { 4, 0, 1, 4 } },
{ "Tonga", { 8, 1, 1, 2 } },
{ "Turks", { 8, 0, 1, 2 } },
{ "default", { 8, 0, 1, 2 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 8, 0, 1, 4 } },
{ "default", { 8, 0, 1, 4 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 16 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 8 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 1, 8 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 0, 0, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 16 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } },
{ "default", { 4, 0, 0, 8 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 16, 0, 1, 4 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 0, 0, 4 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 0, 0, 4 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 8, 0, 1, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 1, 2 } },
{ "Iris", { 8, 1, 0, 4 } },
{ "Iris Pro", { 16, 1, 0, 4 } },
{ "default", { 16, 0, 0, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 16, 1, 1, 1 } },
{ "default", { 16, 1, 1, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 32, 1, 1, 2 } },
{ "GeForce GT 650M", { 8, 1, 0, 4 } },
{ "GeForce GTX 1070", { 8, 0, 1, 4 } },
{ "GeForce GTX 1080", { 4, 0, 0, 4 } },
{ "GeForce GTX 480", { 16, 1, 0, 2 } },
{ "GeForce GTX 670", { 16, 1, 1, 2 } },
{ "GeForce GTX 680", { 16, 1, 1, 2 } },
{ "GeForce GTX 750", { 4, 0, 0, 8 } },
{ "GeForce GTX 750 Ti", { 32, 1, 0, 2 } },
{ "GeForce GTX 980", { 16, 0, 0, 1 } },
{ "GeForce GTX TITAN", { 8, 1, 0, 4 } },
{ "GeForce GTX TITAN Black", { 8, 1, 0, 4 } },
{ "GeForce GTX TITAN X", { 16, 0, 0, 4 } },
{ "TITAN X (Pascal)", { 8, 0, 0, 4 } },
{ "Tesla K20m", { 8, 0, 0, 4 } },
{ "Tesla K40m", { 8, 1, 0, 4 } },
{ "default", { 8, 1, 0, 4 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 8, 1, 1, 4 } },
{ "default", { 8, 1, 1, 4 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 8, 0, 1, 4 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry TransposeComplexSingle = {
"Transpose", Precision::kComplexSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 4 } },
{ "ATI Radeon HD 6750M", { 16, 1, 1, 1 } },
{ "Ellesmere", { 4, 0, 1, 4 } },
{ "Fiji", { 8, 1, 1, 2 } },
{ "Hawaii", { 16, 0, 1, 1 } },
{ "Oland", { 4, 0, 1, 2 } },
{ "Pitcairn", { 8, 0, 1, 1 } },
{ "Tahiti", { 16, 0, 1, 1 } },
{ "Tonga", { 16, 0, 1, 1 } },
{ "Turks", { 8, 1, 1, 4 } },
{ "default", { 8, 0, 1, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 16, 0, 0, 2 } },
{ "default", { 16, 0, 0, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 0, 0, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 8 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 0, 0, 4 } },
{ "default", { 4, 1, 0, 8 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 16, 1, 1, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 0, 0, 2 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 0, 0, 2 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 16, 1, 1, 2 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 0, 2 } },
{ "Iris", { 8, 0, 0, 2 } },
{ "Iris Pro", { 16, 1, 0, 2 } },
{ "default", { 16, 1, 0, 2 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 1, 1, 1 } },
{ "GeForce GTX 1070", { 16, 1, 1, 1 } },
{ "GeForce GTX 1080", { 16, 1, 0, 1 } },
{ "GeForce GTX 480", { 16, 1, 0, 1 } },
{ "GeForce GTX 670", { 16, 1, 1, 1 } },
{ "GeForce GTX 680", { 16, 1, 1, 1 } },
{ "GeForce GTX 750", { 16, 1, 0, 1 } },
{ "GeForce GTX 750 Ti", { 16, 1, 0, 1 } },
{ "GeForce GTX 980", { 16, 1, 0, 1 } },
{ "GeForce GTX TITAN", { 16, 0, 0, 1 } },
{ "GeForce GTX TITAN Black", { 16, 1, 0, 1 } },
{ "GeForce GTX TITAN X", { 32, 1, 0, 1 } },
{ "TITAN X (Pascal)", { 8, 1, 0, 2 } },
{ "Tesla K20m", { 16, 0, 0, 1 } },
{ "Tesla K40m", { 16, 1, 0, 1 } },
{ "default", { 16, 1, 0, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 16, 1, 0, 1 } },
{ "default", { 16, 1, 0, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 8, 1, 1, 2 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry TransposeDouble = {
"Transpose", Precision::kDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 4 } },
{ "Ellesmere", { 4, 0, 1, 4 } },
{ "Fiji", { 8, 1, 1, 2 } },
{ "Hawaii", { 16, 0, 1, 1 } },
{ "Oland", { 8, 1, 1, 2 } },
{ "Pitcairn", { 4, 0, 1, 2 } },
{ "Tahiti", { 4, 1, 1, 4 } },
{ "Tonga", { 4, 0, 1, 4 } },
{ "default", { 4, 0, 1, 4 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 8, 0, 0, 1 } },
{ "default", { 8, 0, 0, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1, 0, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 0, 16 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } },
{ "default", { 4, 1, 0, 8 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 1, 0, 1 } },
{ "default", { 32, 1, 0, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 1, 1, 2 } },
{ "GeForce GTX 1070", { 8, 0, 1, 2 } },
{ "GeForce GTX 1080", { 8, 0, 0, 2 } },
{ "GeForce GTX 480", { 8, 1, 0, 2 } },
{ "GeForce GTX 670", { 16, 1, 1, 2 } },
{ "GeForce GTX 680", { 16, 1, 1, 2 } },
{ "GeForce GTX 750", { 16, 1, 0, 1 } },
{ "GeForce GTX 750 Ti", { 32, 1, 0, 2 } },
{ "GeForce GTX 980", { 16, 0, 0, 2 } },
{ "GeForce GTX TITAN", { 8, 0, 0, 2 } },
{ "GeForce GTX TITAN Black", { 16, 1, 0, 2 } },
{ "GeForce GTX TITAN X", { 32, 1, 0, 1 } },
{ "TITAN X (Pascal)", { 16, 1, 0, 2 } },
{ "Tesla K20m", { 16, 1, 0, 2 } },
{ "Tesla K40m", { 16, 1, 1, 2 } },
{ "default", { 16, 1, 1, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 16, 1, 1, 2 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry TransposeComplexDouble = {
"Transpose", Precision::kComplexDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 2 } },
{ "Ellesmere", { 16, 0, 1, 1 } },
{ "Fiji", { 16, 0, 1, 1 } },
{ "Hawaii", { 4, 0, 1, 2 } },
{ "Oland", { 16, 0, 1, 1 } },
{ "Pitcairn", { 4, 0, 1, 1 } },
{ "Tahiti", { 16, 0, 1, 1 } },
{ "Tonga", { 8, 1, 1, 2 } },
{ "default", { 16, 0, 1, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 8, 0, 0, 1 } },
{ "default", { 8, 0, 0, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 1, 8 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 1, 0, 2 } },
{ "default", { 4, 0, 0, 8 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 1, 1, 1 } },
{ "GeForce GTX 1070", { 8, 0, 0, 1 } },
{ "GeForce GTX 1080", { 8, 0, 0, 1 } },
{ "GeForce GTX 480", { 8, 1, 0, 1 } },
{ "GeForce GTX 670", { 16, 1, 1, 1 } },
{ "GeForce GTX 680", { 16, 1, 1, 1 } },
{ "GeForce GTX 750", { 16, 1, 0, 1 } },
{ "GeForce GTX 750 Ti", { 16, 1, 0, 1 } },
{ "GeForce GTX 980", { 32, 1, 0, 1 } },
{ "GeForce GTX TITAN", { 16, 1, 0, 1 } },
{ "GeForce GTX TITAN Black", { 16, 0, 0, 1 } },
{ "GeForce GTX TITAN X", { 32, 1, 0, 1 } },
{ "TITAN X (Pascal)", { 8, 0, 0, 1 } },
{ "Tesla K20m", { 16, 1, 0, 1 } },
{ "Tesla K40m", { 16, 1, 0, 1 } },
{ "default", { 16, 1, 0, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 16, 1, 1, 1 } },
}
},
}
};
// =================================================================================================
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,14 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Transpose' kernels.
//
// =================================================================================================
#include "database/kernels/transpose/transpose_16.hpp"
#include "database/kernels/transpose/transpose_32.hpp"
#include "database/kernels/transpose/transpose_3232.hpp"
#include "database/kernels/transpose/transpose_64.hpp"
#include "database/kernels/transpose/transpose_6464.hpp"

View File

@ -0,0 +1,43 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Transpose16' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry TransposeHalf = {
"Transpose", Precision::kHalf, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 4, 0, 1, 8 } },
{ "default", { 4, 0, 1, 8 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 1, 1, 8 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 0, 4 } },
{ "default", { 8, 1, 0, 8 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 8, 0, 0, 4 } },
{ "default", { 8, 0, 0, 4 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 8, 0, 1, 8 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,101 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Transpose32' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry TransposeSingle = {
"Transpose", Precision::kSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 8 } },
{ "ATI Radeon HD 6750M", { 8, 0, 1, 2 } },
{ "Ellesmere", { 16, 0, 1, 4 } },
{ "Fiji", { 16, 0, 1, 2 } },
{ "Hawaii", { 4, 0, 1, 8 } },
{ "Oland", { 8, 0, 1, 4 } },
{ "Pitcairn", { 16, 0, 1, 1 } },
{ "Tahiti", { 4, 0, 1, 4 } },
{ "Tonga", { 8, 1, 1, 2 } },
{ "Turks", { 8, 0, 1, 2 } },
{ "default", { 8, 0, 1, 2 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 8, 0, 1, 4 } },
{ "default", { 8, 0, 1, 4 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 16 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 8 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 1, 8 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 0, 0, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 16 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } },
{ "default", { 4, 0, 0, 8 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 16, 0, 1, 4 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 0, 0, 4 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 0, 0, 4 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 8, 0, 1, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 1, 2 } },
{ "Iris", { 8, 1, 0, 4 } },
{ "Iris Pro", { 16, 1, 0, 4 } },
{ "default", { 16, 0, 0, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 16, 1, 1, 1 } },
{ "default", { 16, 1, 1, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 32, 1, 1, 2 } },
{ "GeForce GT 650M", { 8, 1, 0, 4 } },
{ "GeForce GTX 1070", { 8, 0, 1, 4 } },
{ "GeForce GTX 1080", { 4, 0, 0, 4 } },
{ "GeForce GTX 480", { 16, 1, 0, 2 } },
{ "GeForce GTX 670", { 16, 1, 1, 2 } },
{ "GeForce GTX 680", { 16, 1, 1, 2 } },
{ "GeForce GTX 750", { 4, 0, 0, 8 } },
{ "GeForce GTX 750 Ti", { 32, 1, 0, 2 } },
{ "GeForce GTX 980", { 16, 0, 0, 1 } },
{ "GeForce GTX TITAN", { 8, 1, 0, 4 } },
{ "GeForce GTX TITAN Black", { 8, 1, 0, 4 } },
{ "GeForce GTX TITAN X", { 16, 0, 0, 4 } },
{ "TITAN X (Pascal)", { 8, 0, 0, 4 } },
{ "Tesla K20m", { 8, 0, 0, 4 } },
{ "Tesla K40m", { 8, 1, 0, 4 } },
{ "default", { 8, 1, 0, 4 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 8, 1, 1, 4 } },
{ "default", { 8, 1, 1, 4 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 8, 0, 1, 4 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,94 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Transpose3232' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry TransposeComplexSingle = {
"Transpose", Precision::kComplexSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 4 } },
{ "ATI Radeon HD 6750M", { 16, 1, 1, 1 } },
{ "Ellesmere", { 4, 0, 1, 4 } },
{ "Fiji", { 8, 1, 1, 2 } },
{ "Hawaii", { 16, 0, 1, 1 } },
{ "Oland", { 4, 0, 1, 2 } },
{ "Pitcairn", { 8, 0, 1, 1 } },
{ "Tahiti", { 16, 0, 1, 1 } },
{ "Tonga", { 16, 0, 1, 1 } },
{ "Turks", { 8, 1, 1, 4 } },
{ "default", { 8, 0, 1, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 16, 0, 0, 2 } },
{ "default", { 16, 0, 0, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 0, 0, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 8 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 0, 0, 4 } },
{ "default", { 4, 1, 0, 8 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 16, 1, 1, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 0, 0, 2 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 0, 0, 2 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 16, 1, 1, 2 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 0, 2 } },
{ "Iris", { 8, 0, 0, 2 } },
{ "Iris Pro", { 16, 1, 0, 2 } },
{ "default", { 16, 1, 0, 2 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 1, 1, 1 } },
{ "GeForce GTX 1070", { 16, 1, 1, 1 } },
{ "GeForce GTX 1080", { 16, 1, 0, 1 } },
{ "GeForce GTX 480", { 16, 1, 0, 1 } },
{ "GeForce GTX 670", { 16, 1, 1, 1 } },
{ "GeForce GTX 680", { 16, 1, 1, 1 } },
{ "GeForce GTX 750", { 16, 1, 0, 1 } },
{ "GeForce GTX 750 Ti", { 16, 1, 0, 1 } },
{ "GeForce GTX 980", { 16, 1, 0, 1 } },
{ "GeForce GTX TITAN", { 16, 0, 0, 1 } },
{ "GeForce GTX TITAN Black", { 16, 1, 0, 1 } },
{ "GeForce GTX TITAN X", { 32, 1, 0, 1 } },
{ "TITAN X (Pascal)", { 8, 1, 0, 2 } },
{ "Tesla K20m", { 16, 0, 0, 1 } },
{ "Tesla K40m", { 16, 1, 0, 1 } },
{ "default", { 16, 1, 0, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 16, 1, 0, 1 } },
{ "default", { 16, 1, 0, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 8, 1, 1, 2 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,80 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Transpose64' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry TransposeDouble = {
"Transpose", Precision::kDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 4 } },
{ "Ellesmere", { 4, 0, 1, 4 } },
{ "Fiji", { 8, 1, 1, 2 } },
{ "Hawaii", { 16, 0, 1, 1 } },
{ "Oland", { 8, 1, 1, 2 } },
{ "Pitcairn", { 4, 0, 1, 2 } },
{ "Tahiti", { 4, 1, 1, 4 } },
{ "Tonga", { 4, 0, 1, 4 } },
{ "default", { 4, 0, 1, 4 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 8, 0, 0, 1 } },
{ "default", { 8, 0, 0, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1, 0, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 0, 16 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } },
{ "default", { 4, 1, 0, 8 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 1, 0, 1 } },
{ "default", { 32, 1, 0, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 1, 1, 2 } },
{ "GeForce GTX 1070", { 8, 0, 1, 2 } },
{ "GeForce GTX 1080", { 8, 0, 0, 2 } },
{ "GeForce GTX 480", { 8, 1, 0, 2 } },
{ "GeForce GTX 670", { 16, 1, 1, 2 } },
{ "GeForce GTX 680", { 16, 1, 1, 2 } },
{ "GeForce GTX 750", { 16, 1, 0, 1 } },
{ "GeForce GTX 750 Ti", { 32, 1, 0, 2 } },
{ "GeForce GTX 980", { 16, 0, 0, 2 } },
{ "GeForce GTX TITAN", { 8, 0, 0, 2 } },
{ "GeForce GTX TITAN Black", { 16, 1, 0, 2 } },
{ "GeForce GTX TITAN X", { 32, 1, 0, 1 } },
{ "TITAN X (Pascal)", { 16, 1, 0, 2 } },
{ "Tesla K20m", { 16, 1, 0, 2 } },
{ "Tesla K40m", { 16, 1, 1, 2 } },
{ "default", { 16, 1, 1, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 16, 1, 1, 2 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,74 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Transpose6464' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry TransposeComplexDouble = {
"Transpose", Precision::kComplexDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 2 } },
{ "Ellesmere", { 16, 0, 1, 1 } },
{ "Fiji", { 16, 0, 1, 1 } },
{ "Hawaii", { 4, 0, 1, 2 } },
{ "Oland", { 16, 0, 1, 1 } },
{ "Pitcairn", { 4, 0, 1, 1 } },
{ "Tahiti", { 16, 0, 1, 1 } },
{ "Tonga", { 8, 1, 1, 2 } },
{ "default", { 16, 0, 1, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 8, 0, 0, 1 } },
{ "default", { 8, 0, 0, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 1, 8 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 1, 0, 2 } },
{ "default", { 4, 0, 0, 8 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 1, 1, 1 } },
{ "GeForce GTX 1070", { 8, 0, 0, 1 } },
{ "GeForce GTX 1080", { 8, 0, 0, 1 } },
{ "GeForce GTX 480", { 8, 1, 0, 1 } },
{ "GeForce GTX 670", { 16, 1, 1, 1 } },
{ "GeForce GTX 680", { 16, 1, 1, 1 } },
{ "GeForce GTX 750", { 16, 1, 0, 1 } },
{ "GeForce GTX 750 Ti", { 16, 1, 0, 1 } },
{ "GeForce GTX 980", { 32, 1, 0, 1 } },
{ "GeForce GTX TITAN", { 16, 1, 0, 1 } },
{ "GeForce GTX TITAN Black", { 16, 0, 0, 1 } },
{ "GeForce GTX TITAN X", { 32, 1, 0, 1 } },
{ "TITAN X (Pascal)", { 8, 0, 0, 1 } },
{ "Tesla K20m", { 16, 1, 0, 1 } },
{ "Tesla K40m", { 16, 1, 0, 1 } },
{ "default", { 16, 1, 0, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 16, 1, 1, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -1,362 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Database generator <database.py>
//
// This file populates the database with best-found tuning parameters for the 'Xaxpy' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry XaxpyHalf = {
"Xaxpy", Precision::kHalf, {"VW", "WGS", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 4, 128, 4 } },
{ "default", { 4, 128, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 8, 64, 1 } },
{ "default", { 8, 64, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 8, 64, 1 } },
{ "default", { 8, 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 8, 64, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XaxpySingle = {
"Xaxpy", Precision::kSingle, {"VW", "WGS", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } },
{ "ATI Radeon HD 6750M", { 1, 256, 2 } },
{ "Ellesmere", { 1, 64, 4 } },
{ "Fiji", { 4, 64, 1 } },
{ "Hawaii", { 2, 64, 2 } },
{ "Oland", { 1, 128, 1 } },
{ "Pitcairn", { 2, 128, 1 } },
{ "Tahiti", { 2, 64, 1 } },
{ "Tonga", { 1, 256, 8 } },
{ "Turks", { 2, 256, 1 } },
{ "default", { 2, 256, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 4, 256, 1 } },
{ "default", { 4, 256, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 512, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 512, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 128, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 256, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 1024, 1 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 128, 1 } },
{ "default", { 8, 512, 1 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 1, 128, 1 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 256, 1 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 8, 512, 1 } },
{ "Iris", { 1, 64, 1 } },
{ "Iris Pro", { 1, 128, 2 } },
{ "default", { 4, 256, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 2, 1024, 2 } },
{ "default", { 2, 1024, 2 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 2, 64, 1 } },
{ "GeForce GT 650M", { 2, 1024, 1 } },
{ "GeForce GTX 1070", { 1, 64, 4 } },
{ "GeForce GTX 1080", { 1, 256, 1 } },
{ "GeForce GTX 480", { 2, 128, 1 } },
{ "GeForce GTX 670", { 2, 64, 1 } },
{ "GeForce GTX 680", { 1, 128, 1 } },
{ "GeForce GTX 750", { 1, 64, 1 } },
{ "GeForce GTX 750 Ti", { 2, 64, 1 } },
{ "GeForce GTX 980", { 1, 1024, 1 } },
{ "GeForce GTX TITAN", { 4, 256, 1 } },
{ "GeForce GTX TITAN Black", { 4, 128, 4 } },
{ "GeForce GTX TITAN X", { 1, 64, 1 } },
{ "TITAN X (Pascal)", { 4, 128, 1 } },
{ "Tesla K20m", { 4, 128, 1 } },
{ "Tesla K40m", { 4, 128, 1 } },
{ "default", { 4, 1024, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 4, 128, 2 } },
{ "default", { 4, 128, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 4, 64, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XaxpyComplexSingle = {
"Xaxpy", Precision::kComplexSingle, {"VW", "WGS", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 2, 64, 8 } },
{ "ATI Radeon HD 6750M", { 1, 64, 1 } },
{ "Ellesmere", { 2, 256, 1 } },
{ "Fiji", { 1, 128, 2 } },
{ "Hawaii", { 1, 128, 2 } },
{ "Oland", { 1, 128, 1 } },
{ "Pitcairn", { 1, 64, 1 } },
{ "Tahiti", { 1, 64, 1 } },
{ "Tonga", { 1, 256, 8 } },
{ "Turks", { 2, 256, 1 } },
{ "default", { 1, 128, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 1, 256, 1 } },
{ "default", { 1, 256, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 256, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1024, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 1024, 2 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1024, 1 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 2, 1024, 1 } },
{ "default", { 8, 1024, 1 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 4, 64, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 1 } },
{ "Iris", { 2, 128, 1 } },
{ "Iris Pro", { 1, 256, 8 } },
{ "default", { 4, 64, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } },
{ "default", { 1, 1024, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 512, 1 } },
{ "GeForce GTX 1070", { 1, 64, 2 } },
{ "GeForce GTX 1080", { 2, 64, 1 } },
{ "GeForce GTX 480", { 1, 256, 1 } },
{ "GeForce GTX 670", { 1, 256, 1 } },
{ "GeForce GTX 680", { 1, 256, 1 } },
{ "GeForce GTX 750", { 1, 512, 1 } },
{ "GeForce GTX 750 Ti", { 1, 512, 1 } },
{ "GeForce GTX 980", { 1, 64, 1 } },
{ "GeForce GTX TITAN", { 1, 256, 1 } },
{ "GeForce GTX TITAN Black", { 1, 128, 2 } },
{ "GeForce GTX TITAN X", { 1, 512, 1 } },
{ "TITAN X (Pascal)", { 2, 512, 1 } },
{ "Tesla K20m", { 1, 128, 1 } },
{ "Tesla K40m", { 1, 128, 1 } },
{ "default", { 1, 256, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 128, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XaxpyDouble = {
"Xaxpy", Precision::kDouble, {"VW", "WGS", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } },
{ "Ellesmere", { 2, 64, 4 } },
{ "Fiji", { 2, 64, 4 } },
{ "Hawaii", { 1, 64, 2 } },
{ "Oland", { 1, 64, 1 } },
{ "Pitcairn", { 1, 128, 1 } },
{ "Tahiti", { 1, 64, 1 } },
{ "Tonga", { 1, 128, 4 } },
{ "default", { 2, 64, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 2, 128, 2 } },
{ "default", { 2, 128, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 64, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 1024, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 1024, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 64, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 256, 1 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 2048, 1 } },
{ "default", { 8, 64, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 2, 512, 1 } },
{ "default", { 2, 512, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 64, 1 } },
{ "GeForce GTX 1070", { 1, 64, 8 } },
{ "GeForce GTX 1080", { 1, 128, 1 } },
{ "GeForce GTX 480", { 1, 128, 1 } },
{ "GeForce GTX 670", { 1, 64, 1 } },
{ "GeForce GTX 680", { 1, 64, 1 } },
{ "GeForce GTX 750", { 1, 128, 1 } },
{ "GeForce GTX 750 Ti", { 1, 256, 2 } },
{ "GeForce GTX 980", { 1, 256, 1 } },
{ "GeForce GTX TITAN", { 2, 1024, 1 } },
{ "GeForce GTX TITAN Black", { 2, 128, 1 } },
{ "GeForce GTX TITAN X", { 1, 512, 1 } },
{ "TITAN X (Pascal)", { 2, 512, 1 } },
{ "Tesla K20m", { 2, 128, 1 } },
{ "Tesla K40m", { 2, 128, 1 } },
{ "default", { 1, 128, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 2, 256, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XaxpyComplexDouble = {
"Xaxpy", Precision::kComplexDouble, {"VW", "WGS", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } },
{ "Ellesmere", { 1, 128, 1 } },
{ "Fiji", { 1, 64, 1 } },
{ "Hawaii", { 2, 64, 1 } },
{ "Oland", { 1, 256, 1 } },
{ "Pitcairn", { 1, 128, 1 } },
{ "Tahiti", { 1, 128, 1 } },
{ "Tonga", { 1, 64, 1 } },
{ "default", { 1, 128, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 1, 64, 8 } },
{ "default", { 1, 64, 8 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 128, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 512, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 1024, 1 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 256, 1 } },
{ "default", { 8, 256, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } },
{ "default", { 1, 1024, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 64, 1 } },
{ "GeForce GTX 1070", { 1, 64, 2 } },
{ "GeForce GTX 1080", { 1, 256, 1 } },
{ "GeForce GTX 480", { 1, 128, 1 } },
{ "GeForce GTX 670", { 1, 256, 1 } },
{ "GeForce GTX 680", { 1, 64, 1 } },
{ "GeForce GTX 750", { 1, 1024, 1 } },
{ "GeForce GTX 750 Ti", { 1, 64, 2 } },
{ "GeForce GTX 980", { 1, 1024, 1 } },
{ "GeForce GTX TITAN", { 1, 64, 4 } },
{ "GeForce GTX TITAN Black", { 1, 128, 4 } },
{ "GeForce GTX TITAN X", { 1, 1024, 1 } },
{ "TITAN X (Pascal)", { 1, 256, 2 } },
{ "Tesla K20m", { 1, 64, 1 } },
{ "Tesla K40m", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 256, 1 } },
}
},
}
};
// =================================================================================================
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,14 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xaxpy' kernels.
//
// =================================================================================================
#include "database/kernels/xaxpy/xaxpy_16.hpp"
#include "database/kernels/xaxpy/xaxpy_32.hpp"
#include "database/kernels/xaxpy/xaxpy_3232.hpp"
#include "database/kernels/xaxpy/xaxpy_64.hpp"
#include "database/kernels/xaxpy/xaxpy_6464.hpp"

View File

@ -0,0 +1,43 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xaxpy16' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XaxpyHalf = {
"Xaxpy", Precision::kHalf, {"VW", "WGS", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 4, 128, 4 } },
{ "default", { 4, 128, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 8, 64, 1 } },
{ "default", { 8, 64, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 8, 64, 1 } },
{ "default", { 8, 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 8, 64, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,101 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xaxpy32' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XaxpySingle = {
"Xaxpy", Precision::kSingle, {"VW", "WGS", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } },
{ "ATI Radeon HD 6750M", { 1, 256, 2 } },
{ "Ellesmere", { 1, 64, 4 } },
{ "Fiji", { 4, 64, 1 } },
{ "Hawaii", { 2, 64, 2 } },
{ "Oland", { 1, 128, 1 } },
{ "Pitcairn", { 2, 128, 1 } },
{ "Tahiti", { 2, 64, 1 } },
{ "Tonga", { 1, 256, 8 } },
{ "Turks", { 2, 256, 1 } },
{ "default", { 2, 256, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 4, 256, 1 } },
{ "default", { 4, 256, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 512, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 512, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 128, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 256, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 1024, 1 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 128, 1 } },
{ "default", { 8, 512, 1 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 1, 128, 1 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 256, 1 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 8, 512, 1 } },
{ "Iris", { 1, 64, 1 } },
{ "Iris Pro", { 1, 128, 2 } },
{ "default", { 4, 256, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 2, 1024, 2 } },
{ "default", { 2, 1024, 2 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 2, 64, 1 } },
{ "GeForce GT 650M", { 2, 1024, 1 } },
{ "GeForce GTX 1070", { 1, 64, 4 } },
{ "GeForce GTX 1080", { 1, 256, 1 } },
{ "GeForce GTX 480", { 2, 128, 1 } },
{ "GeForce GTX 670", { 2, 64, 1 } },
{ "GeForce GTX 680", { 1, 128, 1 } },
{ "GeForce GTX 750", { 1, 64, 1 } },
{ "GeForce GTX 750 Ti", { 2, 64, 1 } },
{ "GeForce GTX 980", { 1, 1024, 1 } },
{ "GeForce GTX TITAN", { 4, 256, 1 } },
{ "GeForce GTX TITAN Black", { 4, 128, 4 } },
{ "GeForce GTX TITAN X", { 1, 64, 1 } },
{ "TITAN X (Pascal)", { 4, 128, 1 } },
{ "Tesla K20m", { 4, 128, 1 } },
{ "Tesla K40m", { 4, 128, 1 } },
{ "default", { 4, 1024, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 4, 128, 2 } },
{ "default", { 4, 128, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 4, 64, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,100 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xaxpy3232' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XaxpyComplexSingle = {
"Xaxpy", Precision::kComplexSingle, {"VW", "WGS", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 2, 64, 8 } },
{ "ATI Radeon HD 6750M", { 1, 64, 1 } },
{ "Ellesmere", { 2, 256, 1 } },
{ "Fiji", { 1, 128, 2 } },
{ "Hawaii", { 1, 128, 2 } },
{ "Oland", { 1, 128, 1 } },
{ "Pitcairn", { 1, 64, 1 } },
{ "Tahiti", { 1, 64, 1 } },
{ "Tonga", { 1, 256, 8 } },
{ "Turks", { 2, 256, 1 } },
{ "default", { 1, 128, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 1, 256, 1 } },
{ "default", { 1, 256, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 256, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1024, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 1024, 2 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1024, 1 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 2, 1024, 1 } },
{ "default", { 8, 1024, 1 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 4, 64, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 1 } },
{ "Iris", { 2, 128, 1 } },
{ "Iris Pro", { 1, 256, 8 } },
{ "default", { 4, 64, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } },
{ "default", { 1, 1024, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 512, 1 } },
{ "GeForce GTX 1070", { 1, 64, 2 } },
{ "GeForce GTX 1080", { 2, 64, 1 } },
{ "GeForce GTX 480", { 1, 256, 1 } },
{ "GeForce GTX 670", { 1, 256, 1 } },
{ "GeForce GTX 680", { 1, 256, 1 } },
{ "GeForce GTX 750", { 1, 512, 1 } },
{ "GeForce GTX 750 Ti", { 1, 512, 1 } },
{ "GeForce GTX 980", { 1, 64, 1 } },
{ "GeForce GTX TITAN", { 1, 256, 1 } },
{ "GeForce GTX TITAN Black", { 1, 128, 2 } },
{ "GeForce GTX TITAN X", { 1, 512, 1 } },
{ "TITAN X (Pascal)", { 2, 512, 1 } },
{ "Tesla K20m", { 1, 128, 1 } },
{ "Tesla K40m", { 1, 128, 1 } },
{ "default", { 1, 256, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 128, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,80 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xaxpy64' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XaxpyDouble = {
"Xaxpy", Precision::kDouble, {"VW", "WGS", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } },
{ "Ellesmere", { 2, 64, 4 } },
{ "Fiji", { 2, 64, 4 } },
{ "Hawaii", { 1, 64, 2 } },
{ "Oland", { 1, 64, 1 } },
{ "Pitcairn", { 1, 128, 1 } },
{ "Tahiti", { 1, 64, 1 } },
{ "Tonga", { 1, 128, 4 } },
{ "default", { 2, 64, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 2, 128, 2 } },
{ "default", { 2, 128, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 64, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 1024, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 1024, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 64, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 256, 1 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 2048, 1 } },
{ "default", { 8, 64, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 2, 512, 1 } },
{ "default", { 2, 512, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 64, 1 } },
{ "GeForce GTX 1070", { 1, 64, 8 } },
{ "GeForce GTX 1080", { 1, 128, 1 } },
{ "GeForce GTX 480", { 1, 128, 1 } },
{ "GeForce GTX 670", { 1, 64, 1 } },
{ "GeForce GTX 680", { 1, 64, 1 } },
{ "GeForce GTX 750", { 1, 128, 1 } },
{ "GeForce GTX 750 Ti", { 1, 256, 2 } },
{ "GeForce GTX 980", { 1, 256, 1 } },
{ "GeForce GTX TITAN", { 2, 1024, 1 } },
{ "GeForce GTX TITAN Black", { 2, 128, 1 } },
{ "GeForce GTX TITAN X", { 1, 512, 1 } },
{ "TITAN X (Pascal)", { 2, 512, 1 } },
{ "Tesla K20m", { 2, 128, 1 } },
{ "Tesla K40m", { 2, 128, 1 } },
{ "default", { 1, 128, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 2, 256, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,80 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xaxpy6464' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XaxpyComplexDouble = {
"Xaxpy", Precision::kComplexDouble, {"VW", "WGS", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } },
{ "Ellesmere", { 1, 128, 1 } },
{ "Fiji", { 1, 64, 1 } },
{ "Hawaii", { 2, 64, 1 } },
{ "Oland", { 1, 256, 1 } },
{ "Pitcairn", { 1, 128, 1 } },
{ "Tahiti", { 1, 128, 1 } },
{ "Tonga", { 1, 64, 1 } },
{ "default", { 1, 128, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 1, 64, 8 } },
{ "default", { 1, 64, 8 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 128, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 512, 1 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 1024, 1 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 256, 1 } },
{ "default", { 8, 256, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } },
{ "default", { 1, 1024, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 64, 1 } },
{ "GeForce GTX 1070", { 1, 64, 2 } },
{ "GeForce GTX 1080", { 1, 256, 1 } },
{ "GeForce GTX 480", { 1, 128, 1 } },
{ "GeForce GTX 670", { 1, 256, 1 } },
{ "GeForce GTX 680", { 1, 64, 1 } },
{ "GeForce GTX 750", { 1, 1024, 1 } },
{ "GeForce GTX 750 Ti", { 1, 64, 2 } },
{ "GeForce GTX 980", { 1, 1024, 1 } },
{ "GeForce GTX TITAN", { 1, 64, 4 } },
{ "GeForce GTX TITAN Black", { 1, 128, 4 } },
{ "GeForce GTX TITAN X", { 1, 1024, 1 } },
{ "TITAN X (Pascal)", { 1, 256, 2 } },
{ "Tesla K20m", { 1, 64, 1 } },
{ "Tesla K40m", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 256, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -1,292 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Database generator <database.py>
//
// This file populates the database with best-found tuning parameters for the 'Xdot' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry XdotHalf = {
"Xdot", Precision::kHalf, {"WGS1", "WGS2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 256, 64 } },
{ "default", { 256, 64 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 128, 32 } },
{ "default", { 128, 32 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 64, 64 } },
{ "default", { 64, 64 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 128, 64 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XdotSingle = {
"Xdot", Precision::kSingle, {"WGS1", "WGS2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 128, 32 } },
{ "ATI Radeon HD 6750M", { 256, 32 } },
{ "Ellesmere", { 128, 32 } },
{ "Fiji", { 256, 32 } },
{ "Oland", { 256, 32 } },
{ "Pitcairn", { 128, 32 } },
{ "Tahiti", { 128, 32 } },
{ "Tonga", { 64, 32 } },
{ "Turks", { 128, 64 } },
{ "default", { 256, 32 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 128 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 32 } },
{ "default", { 64, 64 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 64, 32 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 32 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 512, 128 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 32 } },
{ "Iris Pro", { 512, 64 } },
{ "default", { 64, 32 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 128, 32 } },
{ "GeForce GT 650M", { 128, 64 } },
{ "GeForce GTX 1070", { 128, 1024 } },
{ "GeForce GTX 1080", { 512, 64 } },
{ "GeForce GTX 480", { 512, 32 } },
{ "GeForce GTX 670", { 512, 1024 } },
{ "GeForce GTX 680", { 128, 128 } },
{ "GeForce GTX 750", { 128, 32 } },
{ "GeForce GTX 750 Ti", { 64, 32 } },
{ "GeForce GTX 980", { 256, 32 } },
{ "GeForce GTX TITAN Black", { 512, 64 } },
{ "GeForce GTX TITAN X", { 256, 32 } },
{ "TITAN X (Pascal)", { 1024, 32 } },
{ "Tesla K20m", { 1024, 32 } },
{ "default", { 256, 64 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 128, 64 } },
{ "default", { 128, 64 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 128, 32 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XdotComplexSingle = {
"Xdot", Precision::kComplexSingle, {"WGS1", "WGS2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 32 } },
{ "ATI Radeon HD 6750M", { 256, 256 } },
{ "Ellesmere", { 256, 32 } },
{ "Fiji", { 256, 64 } },
{ "Oland", { 128, 32 } },
{ "Pitcairn", { 256, 32 } },
{ "Tahiti", { 64, 32 } },
{ "Tonga", { 256, 64 } },
{ "Turks", { 128, 32 } },
{ "default", { 256, 32 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 64 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 32 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } },
{ "default", { 256, 32 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 256, 32 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 32 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 512, 32 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 256 } },
{ "Iris Pro", { 32, 32 } },
{ "default", { 32, 32 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 64, 32 } },
{ "GeForce GTX 1070", { 128, 32 } },
{ "GeForce GTX 1080", { 128, 64 } },
{ "GeForce GTX 480", { 512, 32 } },
{ "GeForce GTX 670", { 256, 32 } },
{ "GeForce GTX 680", { 128, 64 } },
{ "GeForce GTX 750", { 64, 32 } },
{ "GeForce GTX 750 Ti", { 64, 32 } },
{ "GeForce GTX 980", { 256, 64 } },
{ "GeForce GTX TITAN Black", { 128, 64 } },
{ "GeForce GTX TITAN X", { 256, 32 } },
{ "TITAN X (Pascal)", { 256, 32 } },
{ "Tesla K20m", { 512, 32 } },
{ "default", { 512, 64 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 64, 256 } },
{ "default", { 64, 256 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 256, 32 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XdotDouble = {
"Xdot", Precision::kDouble, {"WGS1", "WGS2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 128 } },
{ "Ellesmere", { 128, 64 } },
{ "Fiji", { 256, 32 } },
{ "Oland", { 256, 32 } },
{ "Pitcairn", { 128, 32 } },
{ "Tahiti", { 256, 32 } },
{ "Tonga", { 128, 64 } },
{ "default", { 128, 64 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 128 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 64 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 64 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } },
{ "default", { 256, 64 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 128, 32 } },
{ "GeForce GTX 1070", { 128, 512 } },
{ "GeForce GTX 1080", { 128, 128 } },
{ "GeForce GTX 480", { 512, 32 } },
{ "GeForce GTX 670", { 256, 32 } },
{ "GeForce GTX 680", { 128, 64 } },
{ "GeForce GTX 750", { 64, 256 } },
{ "GeForce GTX 750 Ti", { 128, 64 } },
{ "GeForce GTX 980", { 128, 32 } },
{ "GeForce GTX TITAN Black", { 128, 64 } },
{ "GeForce GTX TITAN X", { 256, 32 } },
{ "TITAN X (Pascal)", { 128, 32 } },
{ "Tesla K20m", { 512, 32 } },
{ "default", { 128, 128 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 128, 64 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XdotComplexDouble = {
"Xdot", Precision::kComplexDouble, {"WGS1", "WGS2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 32 } },
{ "Ellesmere", { 256, 32 } },
{ "Fiji", { 256, 32 } },
{ "Oland", { 256, 32 } },
{ "Pitcairn", { 256, 32 } },
{ "Tahiti", { 256, 32 } },
{ "Tonga", { 128, 64 } },
{ "default", { 256, 32 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 128 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1024, 32 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32 } },
{ "default", { 128, 32 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 64, 32 } },
{ "GeForce GTX 1070", { 128, 64 } },
{ "GeForce GTX 1080", { 128, 32 } },
{ "GeForce GTX 480", { 512, 32 } },
{ "GeForce GTX 670", { 512, 128 } },
{ "GeForce GTX 680", { 256, 64 } },
{ "GeForce GTX 750", { 256, 32 } },
{ "GeForce GTX 750 Ti", { 64, 32 } },
{ "GeForce GTX 980", { 64, 32 } },
{ "GeForce GTX TITAN Black", { 128, 32 } },
{ "GeForce GTX TITAN X", { 128, 32 } },
{ "TITAN X (Pascal)", { 128, 64 } },
{ "Tesla K20m", { 128, 32 } },
{ "default", { 128, 64 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 256, 32 } },
}
},
}
};
// =================================================================================================
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,14 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xdot' kernels.
//
// =================================================================================================
#include "database/kernels/xdot/xdot_16.hpp"
#include "database/kernels/xdot/xdot_32.hpp"
#include "database/kernels/xdot/xdot_3232.hpp"
#include "database/kernels/xdot/xdot_64.hpp"
#include "database/kernels/xdot/xdot_6464.hpp"

View File

@ -0,0 +1,43 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xdot16' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XdotHalf = {
"Xdot", Precision::kHalf, {"WGS1", "WGS2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 256, 64 } },
{ "default", { 256, 64 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 128, 32 } },
{ "default", { 128, 32 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 64, 64 } },
{ "default", { 64, 64 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 128, 64 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,83 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xdot32' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XdotSingle = {
"Xdot", Precision::kSingle, {"WGS1", "WGS2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 128, 32 } },
{ "ATI Radeon HD 6750M", { 256, 32 } },
{ "Ellesmere", { 128, 32 } },
{ "Fiji", { 256, 32 } },
{ "Oland", { 256, 32 } },
{ "Pitcairn", { 128, 32 } },
{ "Tahiti", { 128, 32 } },
{ "Tonga", { 64, 32 } },
{ "Turks", { 128, 64 } },
{ "default", { 256, 32 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 128 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 32 } },
{ "default", { 64, 64 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 64, 32 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 32 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 512, 128 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 32 } },
{ "Iris Pro", { 512, 64 } },
{ "default", { 64, 32 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 128, 32 } },
{ "GeForce GT 650M", { 128, 64 } },
{ "GeForce GTX 1070", { 128, 1024 } },
{ "GeForce GTX 1080", { 512, 64 } },
{ "GeForce GTX 480", { 512, 32 } },
{ "GeForce GTX 670", { 512, 1024 } },
{ "GeForce GTX 680", { 128, 128 } },
{ "GeForce GTX 750", { 128, 32 } },
{ "GeForce GTX 750 Ti", { 64, 32 } },
{ "GeForce GTX 980", { 256, 32 } },
{ "GeForce GTX TITAN Black", { 512, 64 } },
{ "GeForce GTX TITAN X", { 256, 32 } },
{ "TITAN X (Pascal)", { 1024, 32 } },
{ "Tesla K20m", { 1024, 32 } },
{ "default", { 256, 64 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 128, 64 } },
{ "default", { 128, 64 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 128, 32 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,82 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xdot3232' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XdotComplexSingle = {
"Xdot", Precision::kComplexSingle, {"WGS1", "WGS2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 32 } },
{ "ATI Radeon HD 6750M", { 256, 256 } },
{ "Ellesmere", { 256, 32 } },
{ "Fiji", { 256, 64 } },
{ "Oland", { 128, 32 } },
{ "Pitcairn", { 256, 32 } },
{ "Tahiti", { 64, 32 } },
{ "Tonga", { 256, 64 } },
{ "Turks", { 128, 32 } },
{ "default", { 256, 32 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 64 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 32 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } },
{ "default", { 256, 32 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 256, 32 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 32 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 512, 32 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 256 } },
{ "Iris Pro", { 32, 32 } },
{ "default", { 32, 32 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 64, 32 } },
{ "GeForce GTX 1070", { 128, 32 } },
{ "GeForce GTX 1080", { 128, 64 } },
{ "GeForce GTX 480", { 512, 32 } },
{ "GeForce GTX 670", { 256, 32 } },
{ "GeForce GTX 680", { 128, 64 } },
{ "GeForce GTX 750", { 64, 32 } },
{ "GeForce GTX 750 Ti", { 64, 32 } },
{ "GeForce GTX 980", { 256, 64 } },
{ "GeForce GTX TITAN Black", { 128, 64 } },
{ "GeForce GTX TITAN X", { 256, 32 } },
{ "TITAN X (Pascal)", { 256, 32 } },
{ "Tesla K20m", { 512, 32 } },
{ "default", { 512, 64 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 64, 256 } },
{ "default", { 64, 256 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 256, 32 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,63 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xdot64' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XdotDouble = {
"Xdot", Precision::kDouble, {"WGS1", "WGS2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 128 } },
{ "Ellesmere", { 128, 64 } },
{ "Fiji", { 256, 32 } },
{ "Oland", { 256, 32 } },
{ "Pitcairn", { 128, 32 } },
{ "Tahiti", { 256, 32 } },
{ "Tonga", { 128, 64 } },
{ "default", { 128, 64 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 128 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 64 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 64 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } },
{ "default", { 256, 64 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 128, 32 } },
{ "GeForce GTX 1070", { 128, 512 } },
{ "GeForce GTX 1080", { 128, 128 } },
{ "GeForce GTX 480", { 512, 32 } },
{ "GeForce GTX 670", { 256, 32 } },
{ "GeForce GTX 680", { 128, 64 } },
{ "GeForce GTX 750", { 64, 256 } },
{ "GeForce GTX 750 Ti", { 128, 64 } },
{ "GeForce GTX 980", { 128, 32 } },
{ "GeForce GTX TITAN Black", { 128, 64 } },
{ "GeForce GTX TITAN X", { 256, 32 } },
{ "TITAN X (Pascal)", { 128, 32 } },
{ "Tesla K20m", { 512, 32 } },
{ "default", { 128, 128 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 128, 64 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,63 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xdot6464' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XdotComplexDouble = {
"Xdot", Precision::kComplexDouble, {"WGS1", "WGS2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 32 } },
{ "Ellesmere", { 256, 32 } },
{ "Fiji", { 256, 32 } },
{ "Oland", { 256, 32 } },
{ "Pitcairn", { 256, 32 } },
{ "Tahiti", { 256, 32 } },
{ "Tonga", { 128, 64 } },
{ "default", { 256, 32 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 128 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1024, 32 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32 } },
{ "default", { 128, 32 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 64, 32 } },
{ "GeForce GTX 1070", { 128, 64 } },
{ "GeForce GTX 1080", { 128, 32 } },
{ "GeForce GTX 480", { 512, 32 } },
{ "GeForce GTX 670", { 512, 128 } },
{ "GeForce GTX 680", { 256, 64 } },
{ "GeForce GTX 750", { 256, 32 } },
{ "GeForce GTX 750 Ti", { 64, 32 } },
{ "GeForce GTX 980", { 64, 32 } },
{ "GeForce GTX TITAN Black", { 128, 32 } },
{ "GeForce GTX TITAN X", { 128, 32 } },
{ "TITAN X (Pascal)", { 128, 64 } },
{ "Tesla K20m", { 128, 32 } },
{ "default", { 128, 64 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 256, 32 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -1,348 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Database generator <database.py>
//
// This file populates the database with best-found tuning parameters for the 'Xgemm' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry XgemmHalf = {
"Xgemm", Precision::kHalf, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
{ "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
{ "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemmSingle = {
"Xgemm", Precision::kSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 16, 128, 0, 0, 0, 0, 2, 8 } },
{ "ATI Radeon HD 6750M", { 32, 2, 8, 16, 128, 8, 8, 128, 0, 0, 1, 1, 8, 8 } },
{ "Ellesmere", { 32, 2, 8, 8, 16, 16, 16, 64, 1, 1, 0, 0, 1, 2 } },
{ "Fiji", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
{ "Hawaii", { 16, 2, 16, 32, 128, 32, 8, 64, 1, 1, 1, 1, 4, 2 } },
{ "Oland", { 16, 2, 32, 16, 64, 32, 16, 128, 1, 1, 1, 0, 2, 4 } },
{ "Pitcairn", { 16, 2, 16, 8, 32, 16, 16, 128, 0, 0, 1, 0, 1, 1 } },
{ "Tahiti", { 32, 2, 16, 32, 128, 16, 8, 64, 0, 0, 0, 0, 4, 1 } },
{ "Tonga", { 16, 2, 16, 32, 64, 16, 8, 128, 1, 1, 0, 0, 2, 8 } },
{ "Turks", { 32, 2, 8, 8, 64, 8, 8, 64, 0, 0, 0, 0, 4, 4 } },
{ "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } },
{ "default", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 8, 8, 128, 16, 8, 128, 0, 1, 1, 1, 1, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 32, 32, 64, 32, 16, 64, 1, 1, 1, 0, 2, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 8, 128, 16, 8, 64, 0, 0, 1, 0, 1, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 32, 8, 128, 8, 8, 128, 1, 1, 1, 1, 2, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 2, 8, 8, 128, 8, 8, 128, 1, 1, 1, 0, 1, 8 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 16, 16, 64, 32, 32, 64, 0, 1, 1, 0, 1, 2 } },
{ "default", { 32, 2, 8, 8, 32, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 32, 2, 8, 8, 128, 32, 16, 64, 0, 0, 1, 0, 4, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 8, 8, 64, 32, 16, 64, 1, 1, 1, 1, 4, 2 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 2, 16, 8, 32, 8, 16, 128, 1, 1, 1, 1, 2, 4 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
{ "Iris", { 16, 8, 16, 8, 128, 32, 16, 64, 1, 1, 1, 1, 4, 1 } },
{ "Iris Pro", { 16, 2, 16, 8, 64, 32, 32, 128, 1, 1, 1, 0, 4, 4 } },
{ "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } },
{ "default", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 2, 16, 8, 32, 8, 16, 64, 1, 1, 1, 1, 2, 4 } },
{ "GeForce GT 650M", { 32, 2, 8, 8, 32, 32, 32, 64, 1, 1, 0, 0, 4, 2 } },
{ "GeForce GTX 1070", { 16, 2, 32, 16, 128, 32, 8, 128, 1, 1, 1, 0, 4, 1 } },
{ "GeForce GTX 1080", { 32, 2, 16, 8, 64, 8, 8, 64, 1, 1, 1, 1, 4, 8 } },
{ "GeForce GTX 480", { 16, 2, 16, 8, 64, 32, 16, 64, 1, 1, 1, 1, 2, 2 } },
{ "GeForce GTX 670", { 16, 2, 8, 8, 64, 16, 16, 64, 1, 1, 1, 0, 2, 4 } },
{ "GeForce GTX 680", { 32, 8, 8, 16, 64, 32, 16, 128, 1, 1, 0, 0, 4, 2 } },
{ "GeForce GTX 750", { 16, 2, 16, 16, 64, 32, 8, 128, 1, 1, 1, 1, 1, 2 } },
{ "GeForce GTX 750 Ti", { 16, 2, 16, 16, 128, 32, 8, 64, 1, 1, 0, 1, 8, 2 } },
{ "GeForce GTX 980", { 16, 2, 16, 16, 64, 16, 8, 128, 1, 1, 1, 0, 4, 8 } },
{ "GeForce GTX TITAN", { 16, 8, 32, 16, 64, 8, 8, 64, 1, 1, 1, 0, 2, 2 } },
{ "GeForce GTX TITAN Black", { 16, 2, 16, 8, 64, 16, 16, 64, 1, 1, 1, 0, 4, 1 } },
{ "GeForce GTX TITAN X", { 16, 2, 8, 16, 128, 8, 8, 128, 1, 1, 1, 1, 4, 8 } },
{ "TITAN X (Pascal)", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } },
{ "Tesla K20m", { 16, 2, 32, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } },
{ "Tesla K40m", { 16, 8, 16, 8, 64, 16, 16, 128, 1, 1, 1, 0, 2, 4 } },
{ "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 2 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } },
{ "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemmComplexSingle = {
"Xgemm", Precision::kComplexSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 2, 32, 32, 64, 8, 8, 64, 0, 0, 1, 1, 2, 8 } },
{ "ATI Radeon HD 6750M", { 32, 2, 8, 8, 32, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
{ "Ellesmere", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } },
{ "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } },
{ "Hawaii", { 32, 2, 32, 8, 32, 8, 16, 32, 1, 0, 1, 0, 1, 1 } },
{ "Oland", { 32, 2, 16, 8, 32, 32, 32, 128, 1, 0, 0, 1, 2, 4 } },
{ "Pitcairn", { 16, 2, 8, 8, 32, 8, 8, 32, 0, 1, 1, 1, 4, 2 } },
{ "Tahiti", { 16, 2, 8, 8, 32, 8, 16, 32, 1, 0, 0, 1, 2, 1 } },
{ "Tonga", { 16, 2, 32, 8, 64, 16, 32, 64, 1, 1, 1, 0, 2, 1 } },
{ "Turks", { 16, 2, 8, 8, 32, 32, 8, 32, 0, 1, 0, 0, 2, 1 } },
{ "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } },
{ "default", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 0, 1, 1, 2 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 32, 16, 16, 64, 0, 1, 1, 0, 1, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 16, 64, 8, 16, 64, 0, 1, 0, 0, 4, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 16, 32, 128, 0, 0, 0, 0, 1, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 16, 16, 16, 128, 0, 0, 1, 1, 1, 4 } },
{ "default", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 16, 8, 8, 16, 64, 32, 8, 32, 0, 0, 0, 0, 2, 1 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 8, 8, 8, 32, 16, 16, 64, 1, 0, 0, 0, 4, 4 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 1, 2, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 0, 0, 0, 0, 4, 2 } },
{ "Iris", { 32, 8, 32, 16, 64, 8, 16, 64, 1, 0, 1, 0, 1, 1 } },
{ "Iris Pro", { 16, 2, 8, 8, 32, 32, 8, 32, 1, 1, 1, 1, 1, 1 } },
{ "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } },
{ "default", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 8, 32, 32, 64, 32, 16, 128, 1, 0, 1, 0, 1, 4 } },
{ "GeForce GTX 1070", { 16, 2, 16, 16, 128, 16, 16, 64, 1, 1, 1, 1, 2, 4 } },
{ "GeForce GTX 1080", { 16, 2, 32, 16, 64, 32, 8, 64, 1, 1, 0, 0, 1, 2 } },
{ "GeForce GTX 480", { 16, 2, 16, 16, 32, 32, 16, 128, 0, 1, 1, 1, 2, 2 } },
{ "GeForce GTX 670", { 16, 2, 32, 32, 64, 32, 8, 32, 1, 1, 1, 1, 1, 1 } },
{ "GeForce GTX 680", { 16, 2, 32, 16, 64, 32, 32, 128, 1, 0, 0, 0, 2, 2 } },
{ "GeForce GTX 750", { 16, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 0, 2, 2 } },
{ "GeForce GTX 750 Ti", { 16, 2, 16, 8, 32, 32, 16, 64, 1, 1, 1, 0, 1, 2 } },
{ "GeForce GTX 980", { 32, 8, 32, 32, 64, 16, 16, 64, 1, 1, 1, 0, 2, 1 } },
{ "GeForce GTX TITAN", { 16, 8, 16, 16, 64, 32, 16, 64, 1, 1, 1, 0, 1, 1 } },
{ "GeForce GTX TITAN Black", { 16, 2, 8, 16, 64, 8, 8, 32, 0, 1, 1, 0, 1, 2 } },
{ "GeForce GTX TITAN X", { 16, 2, 8, 8, 64, 8, 8, 32, 1, 0, 1, 1, 1, 4 } },
{ "TITAN X (Pascal)", { 32, 2, 32, 32, 64, 8, 8, 32, 1, 1, 0, 0, 2, 4 } },
{ "Tesla K20m", { 32, 2, 8, 16, 64, 8, 16, 64, 1, 0, 0, 0, 1, 4 } },
{ "Tesla K40m", { 16, 2, 32, 32, 32, 32, 8, 64, 0, 1, 0, 0, 1, 1 } },
{ "default", { 32, 2, 8, 8, 16, 32, 32, 64, 1, 1, 0, 0, 1, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 2, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemmDouble = {
"Xgemm", Precision::kDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 4 } },
{ "Ellesmere", { 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 2 } },
{ "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } },
{ "Hawaii", { 16, 8, 32, 8, 128, 8, 8, 32, 0, 1, 0, 0, 1, 4 } },
{ "Oland", { 16, 2, 8, 16, 64, 16, 8, 16, 0, 0, 1, 1, 1, 1 } },
{ "Pitcairn", { 32, 2, 32, 16, 64, 8, 16, 32, 0, 0, 0, 0, 1, 2 } },
{ "Tahiti", { 32, 2, 16, 8, 16, 8, 8, 32, 0, 0, 0, 1, 1, 4 } },
{ "Tonga", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } },
{ "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } },
{ "default", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 1, 1, 2, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 128, 16, 16, 64, 0, 1, 1, 0, 1, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 32, 16, 128, 16, 16, 128, 0, 0, 1, 0, 1, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 16, 8, 128, 8, 8, 64, 1, 0, 0, 1, 2, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 16, 8, 128, 8, 8, 128, 1, 0, 0, 0, 2, 8 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 128, 16, 8, 128, 0, 0, 1, 1, 1, 8 } },
{ "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 1, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } },
{ "default", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 2, 8, 8, 16, 8, 8, 32, 1, 0, 0, 1, 2, 2 } },
{ "GeForce GTX 1070", { 16, 2, 8, 16, 32, 8, 8, 64, 0, 0, 1, 1, 2, 8 } },
{ "GeForce GTX 1080", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } },
{ "GeForce GTX 480", { 16, 2, 8, 16, 32, 32, 8, 64, 1, 1, 1, 0, 1, 2 } },
{ "GeForce GTX 670", { 32, 8, 16, 32, 128, 16, 8, 32, 0, 1, 1, 0, 1, 1 } },
{ "GeForce GTX 680", { 32, 8, 8, 8, 32, 16, 32, 128, 1, 0, 0, 1, 2, 4 } },
{ "GeForce GTX 750", { 32, 8, 16, 32, 64, 16, 8, 128, 0, 0, 0, 1, 2, 1 } },
{ "GeForce GTX 750 Ti", { 32, 2, 8, 8, 32, 16, 16, 32, 0, 0, 0, 0, 4, 2 } },
{ "GeForce GTX 980", { 32, 8, 16, 8, 64, 32, 32, 128, 0, 0, 1, 0, 2, 4 } },
{ "GeForce GTX TITAN", { 16, 8, 16, 8, 32, 16, 32, 128, 1, 1, 1, 1, 2, 2 } },
{ "GeForce GTX TITAN Black", { 16, 2, 16, 8, 16, 16, 8, 16, 1, 1, 1, 0, 1, 1 } },
{ "GeForce GTX TITAN X", { 16, 8, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } },
{ "TITAN X (Pascal)", { 32, 2, 32, 32, 32, 16, 16, 32, 0, 0, 0, 0, 1, 2 } },
{ "Tesla K20m", { 16, 2, 32, 8, 32, 16, 16, 64, 1, 0, 0, 0, 1, 1 } },
{ "Tesla K40m", { 32, 2, 16, 8, 64, 16, 32, 128, 1, 0, 1, 1, 2, 4 } },
{ "default", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemmComplexDouble = {
"Xgemm", Precision::kComplexDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 8, 8, 16, 32, 16, 16, 32, 0, 0, 1, 1, 2, 2 } },
{ "Ellesmere", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } },
{ "Fiji", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } },
{ "Hawaii", { 16, 2, 16, 16, 16, 16, 16, 32, 1, 0, 0, 0, 1, 2 } },
{ "Oland", { 16, 2, 16, 8, 16, 16, 32, 128, 0, 0, 0, 0, 1, 4 } },
{ "Pitcairn", { 32, 2, 16, 8, 32, 8, 32, 32, 0, 1, 1, 0, 1, 1 } },
{ "Tahiti", { 16, 2, 16, 8, 16, 8, 8, 16, 0, 0, 1, 0, 1, 1 } },
{ "Tonga", { 16, 2, 32, 16, 32, 16, 16, 16, 1, 1, 1, 1, 1, 1 } },
{ "default", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } },
{ "default", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 64, 16, 8, 128, 0, 1, 0, 1, 2, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 16, 32, 128, 16, 16, 64, 0, 1, 0, 0, 2, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 32, 128, 16, 8, 32, 0, 1, 0, 0, 4, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 8, 16, 128, 0, 0, 0, 1, 1, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 32, 32, 8, 8, 32, 0, 1, 0, 0, 1, 2 } },
{ "default", { 32, 2, 8, 8, 16, 8, 8, 32, 1, 1, 0, 0, 1, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } },
{ "default", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 32, 8, 16, 16, 16, 8, 16, 64, 1, 0, 1, 1, 1, 1 } },
{ "GeForce GTX 1070", { 32, 8, 32, 16, 32, 8, 8, 32, 0, 0, 0, 1, 1, 4 } },
{ "GeForce GTX 1080", { 32, 2, 16, 16, 16, 8, 8, 16, 0, 0, 0, 0, 1, 2 } },
{ "GeForce GTX 480", { 16, 2, 32, 32, 32, 32, 8, 32, 0, 0, 1, 0, 1, 1 } },
{ "GeForce GTX 670", { 32, 8, 16, 8, 16, 16, 32, 64, 1, 0, 0, 1, 1, 2 } },
{ "GeForce GTX 680", { 16, 8, 16, 8, 64, 16, 32, 32, 0, 1, 1, 0, 1, 1 } },
{ "GeForce GTX 750", { 32, 2, 8, 32, 32, 8, 8, 64, 0, 0, 1, 0, 1, 4 } },
{ "GeForce GTX 750 Ti", { 32, 2, 8, 8, 16, 8, 8, 32, 0, 0, 0, 0, 1, 1 } },
{ "GeForce GTX 980", { 16, 2, 16, 8, 32, 8, 16, 128, 0, 0, 1, 1, 2, 2 } },
{ "GeForce GTX TITAN Black", { 16, 2, 16, 16, 32, 16, 8, 32, 0, 1, 1, 1, 1, 1 } },
{ "GeForce GTX TITAN X", { 32, 8, 16, 16, 128, 16, 16, 32, 0, 0, 1, 0, 1, 1 } },
{ "TITAN X (Pascal)", { 32, 2, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } },
{ "Tesla K20m", { 32, 2, 32, 8, 32, 16, 16, 64, 0, 0, 1, 0, 1, 1 } },
{ "Tesla K40m", { 16, 8, 8, 8, 32, 32, 16, 32, 0, 0, 1, 0, 1, 1 } },
{ "default", { 32, 2, 16, 16, 32, 16, 16, 32, 0, 0, 0, 0, 1, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 2, 32, 32, 32, 8, 8, 32, 1, 1, 0, 0, 1, 1 } },
}
},
}
};
// =================================================================================================
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,14 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemm' kernels.
//
// =================================================================================================
#include "database/kernels/xgemm/xgemm_16.hpp"
#include "database/kernels/xgemm/xgemm_32.hpp"
#include "database/kernels/xgemm/xgemm_3232.hpp"
#include "database/kernels/xgemm/xgemm_64.hpp"
#include "database/kernels/xgemm/xgemm_6464.hpp"

View File

@ -0,0 +1,36 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemm16' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemmHalf = {
"Xgemm", Precision::kHalf, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
{ "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
{ "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,101 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemm32' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemmSingle = {
"Xgemm", Precision::kSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 16, 128, 0, 0, 0, 0, 2, 8 } },
{ "ATI Radeon HD 6750M", { 32, 2, 8, 16, 128, 8, 8, 128, 0, 0, 1, 1, 8, 8 } },
{ "Ellesmere", { 32, 2, 8, 8, 16, 16, 16, 64, 1, 1, 0, 0, 1, 2 } },
{ "Fiji", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
{ "Hawaii", { 16, 2, 16, 32, 128, 32, 8, 64, 1, 1, 1, 1, 4, 2 } },
{ "Oland", { 16, 2, 32, 16, 64, 32, 16, 128, 1, 1, 1, 0, 2, 4 } },
{ "Pitcairn", { 16, 2, 16, 8, 32, 16, 16, 128, 0, 0, 1, 0, 1, 1 } },
{ "Tahiti", { 32, 2, 16, 32, 128, 16, 8, 64, 0, 0, 0, 0, 4, 1 } },
{ "Tonga", { 16, 2, 16, 32, 64, 16, 8, 128, 1, 1, 0, 0, 2, 8 } },
{ "Turks", { 32, 2, 8, 8, 64, 8, 8, 64, 0, 0, 0, 0, 4, 4 } },
{ "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } },
{ "default", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 8, 8, 128, 16, 8, 128, 0, 1, 1, 1, 1, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 32, 32, 64, 32, 16, 64, 1, 1, 1, 0, 2, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 8, 128, 16, 8, 64, 0, 0, 1, 0, 1, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 32, 8, 128, 8, 8, 128, 1, 1, 1, 1, 2, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 2, 8, 8, 128, 8, 8, 128, 1, 1, 1, 0, 1, 8 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 16, 16, 64, 32, 32, 64, 0, 1, 1, 0, 1, 2 } },
{ "default", { 32, 2, 8, 8, 32, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 32, 2, 8, 8, 128, 32, 16, 64, 0, 0, 1, 0, 4, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 8, 8, 64, 32, 16, 64, 1, 1, 1, 1, 4, 2 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 2, 16, 8, 32, 8, 16, 128, 1, 1, 1, 1, 2, 4 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
{ "Iris", { 16, 8, 16, 8, 128, 32, 16, 64, 1, 1, 1, 1, 4, 1 } },
{ "Iris Pro", { 16, 2, 16, 8, 64, 32, 32, 128, 1, 1, 1, 0, 4, 4 } },
{ "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } },
{ "default", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 2, 16, 8, 32, 8, 16, 64, 1, 1, 1, 1, 2, 4 } },
{ "GeForce GT 650M", { 32, 2, 8, 8, 32, 32, 32, 64, 1, 1, 0, 0, 4, 2 } },
{ "GeForce GTX 1070", { 16, 2, 32, 16, 128, 32, 8, 128, 1, 1, 1, 0, 4, 1 } },
{ "GeForce GTX 1080", { 32, 2, 16, 8, 64, 8, 8, 64, 1, 1, 1, 1, 4, 8 } },
{ "GeForce GTX 480", { 16, 2, 16, 8, 64, 32, 16, 64, 1, 1, 1, 1, 2, 2 } },
{ "GeForce GTX 670", { 16, 2, 8, 8, 64, 16, 16, 64, 1, 1, 1, 0, 2, 4 } },
{ "GeForce GTX 680", { 32, 8, 8, 16, 64, 32, 16, 128, 1, 1, 0, 0, 4, 2 } },
{ "GeForce GTX 750", { 16, 2, 16, 16, 64, 32, 8, 128, 1, 1, 1, 1, 1, 2 } },
{ "GeForce GTX 750 Ti", { 16, 2, 16, 16, 128, 32, 8, 64, 1, 1, 0, 1, 8, 2 } },
{ "GeForce GTX 980", { 16, 2, 16, 16, 64, 16, 8, 128, 1, 1, 1, 0, 4, 8 } },
{ "GeForce GTX TITAN", { 16, 8, 32, 16, 64, 8, 8, 64, 1, 1, 1, 0, 2, 2 } },
{ "GeForce GTX TITAN Black", { 16, 2, 16, 8, 64, 16, 16, 64, 1, 1, 1, 0, 4, 1 } },
{ "GeForce GTX TITAN X", { 16, 2, 8, 16, 128, 8, 8, 128, 1, 1, 1, 1, 4, 8 } },
{ "TITAN X (Pascal)", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } },
{ "Tesla K20m", { 16, 2, 32, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } },
{ "Tesla K40m", { 16, 8, 16, 8, 64, 16, 16, 128, 1, 1, 1, 0, 2, 4 } },
{ "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 2 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } },
{ "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,94 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemm3232' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemmComplexSingle = {
"Xgemm", Precision::kComplexSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 2, 32, 32, 64, 8, 8, 64, 0, 0, 1, 1, 2, 8 } },
{ "ATI Radeon HD 6750M", { 32, 2, 8, 8, 32, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
{ "Ellesmere", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } },
{ "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } },
{ "Hawaii", { 32, 2, 32, 8, 32, 8, 16, 32, 1, 0, 1, 0, 1, 1 } },
{ "Oland", { 32, 2, 16, 8, 32, 32, 32, 128, 1, 0, 0, 1, 2, 4 } },
{ "Pitcairn", { 16, 2, 8, 8, 32, 8, 8, 32, 0, 1, 1, 1, 4, 2 } },
{ "Tahiti", { 16, 2, 8, 8, 32, 8, 16, 32, 1, 0, 0, 1, 2, 1 } },
{ "Tonga", { 16, 2, 32, 8, 64, 16, 32, 64, 1, 1, 1, 0, 2, 1 } },
{ "Turks", { 16, 2, 8, 8, 32, 32, 8, 32, 0, 1, 0, 0, 2, 1 } },
{ "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } },
{ "default", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 0, 1, 1, 2 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 32, 16, 16, 64, 0, 1, 1, 0, 1, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 16, 64, 8, 16, 64, 0, 1, 0, 0, 4, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 16, 32, 128, 0, 0, 0, 0, 1, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 16, 16, 16, 128, 0, 0, 1, 1, 1, 4 } },
{ "default", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 16, 8, 8, 16, 64, 32, 8, 32, 0, 0, 0, 0, 2, 1 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 8, 8, 8, 32, 16, 16, 64, 1, 0, 0, 0, 4, 4 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 1, 2, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 0, 0, 0, 0, 4, 2 } },
{ "Iris", { 32, 8, 32, 16, 64, 8, 16, 64, 1, 0, 1, 0, 1, 1 } },
{ "Iris Pro", { 16, 2, 8, 8, 32, 32, 8, 32, 1, 1, 1, 1, 1, 1 } },
{ "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } },
{ "default", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 8, 32, 32, 64, 32, 16, 128, 1, 0, 1, 0, 1, 4 } },
{ "GeForce GTX 1070", { 16, 2, 16, 16, 128, 16, 16, 64, 1, 1, 1, 1, 2, 4 } },
{ "GeForce GTX 1080", { 16, 2, 32, 16, 64, 32, 8, 64, 1, 1, 0, 0, 1, 2 } },
{ "GeForce GTX 480", { 16, 2, 16, 16, 32, 32, 16, 128, 0, 1, 1, 1, 2, 2 } },
{ "GeForce GTX 670", { 16, 2, 32, 32, 64, 32, 8, 32, 1, 1, 1, 1, 1, 1 } },
{ "GeForce GTX 680", { 16, 2, 32, 16, 64, 32, 32, 128, 1, 0, 0, 0, 2, 2 } },
{ "GeForce GTX 750", { 16, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 0, 2, 2 } },
{ "GeForce GTX 750 Ti", { 16, 2, 16, 8, 32, 32, 16, 64, 1, 1, 1, 0, 1, 2 } },
{ "GeForce GTX 980", { 32, 8, 32, 32, 64, 16, 16, 64, 1, 1, 1, 0, 2, 1 } },
{ "GeForce GTX TITAN", { 16, 8, 16, 16, 64, 32, 16, 64, 1, 1, 1, 0, 1, 1 } },
{ "GeForce GTX TITAN Black", { 16, 2, 8, 16, 64, 8, 8, 32, 0, 1, 1, 0, 1, 2 } },
{ "GeForce GTX TITAN X", { 16, 2, 8, 8, 64, 8, 8, 32, 1, 0, 1, 1, 1, 4 } },
{ "TITAN X (Pascal)", { 32, 2, 32, 32, 64, 8, 8, 32, 1, 1, 0, 0, 2, 4 } },
{ "Tesla K20m", { 32, 2, 8, 16, 64, 8, 16, 64, 1, 0, 0, 0, 1, 4 } },
{ "Tesla K40m", { 16, 2, 32, 32, 32, 32, 8, 64, 0, 1, 0, 0, 1, 1 } },
{ "default", { 32, 2, 8, 8, 16, 32, 32, 64, 1, 1, 0, 0, 1, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 2, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,80 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemm64' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemmDouble = {
"Xgemm", Precision::kDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 4 } },
{ "Ellesmere", { 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 2 } },
{ "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } },
{ "Hawaii", { 16, 8, 32, 8, 128, 8, 8, 32, 0, 1, 0, 0, 1, 4 } },
{ "Oland", { 16, 2, 8, 16, 64, 16, 8, 16, 0, 0, 1, 1, 1, 1 } },
{ "Pitcairn", { 32, 2, 32, 16, 64, 8, 16, 32, 0, 0, 0, 0, 1, 2 } },
{ "Tahiti", { 32, 2, 16, 8, 16, 8, 8, 32, 0, 0, 0, 1, 1, 4 } },
{ "Tonga", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } },
{ "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } },
{ "default", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 1, 1, 2, 8 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 128, 16, 16, 64, 0, 1, 1, 0, 1, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 32, 16, 128, 16, 16, 128, 0, 0, 1, 0, 1, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 16, 8, 128, 8, 8, 64, 1, 0, 0, 1, 2, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 16, 8, 128, 8, 8, 128, 1, 0, 0, 0, 2, 8 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 128, 16, 8, 128, 0, 0, 1, 1, 1, 8 } },
{ "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 1, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } },
{ "default", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 2, 8, 8, 16, 8, 8, 32, 1, 0, 0, 1, 2, 2 } },
{ "GeForce GTX 1070", { 16, 2, 8, 16, 32, 8, 8, 64, 0, 0, 1, 1, 2, 8 } },
{ "GeForce GTX 1080", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } },
{ "GeForce GTX 480", { 16, 2, 8, 16, 32, 32, 8, 64, 1, 1, 1, 0, 1, 2 } },
{ "GeForce GTX 670", { 32, 8, 16, 32, 128, 16, 8, 32, 0, 1, 1, 0, 1, 1 } },
{ "GeForce GTX 680", { 32, 8, 8, 8, 32, 16, 32, 128, 1, 0, 0, 1, 2, 4 } },
{ "GeForce GTX 750", { 32, 8, 16, 32, 64, 16, 8, 128, 0, 0, 0, 1, 2, 1 } },
{ "GeForce GTX 750 Ti", { 32, 2, 8, 8, 32, 16, 16, 32, 0, 0, 0, 0, 4, 2 } },
{ "GeForce GTX 980", { 32, 8, 16, 8, 64, 32, 32, 128, 0, 0, 1, 0, 2, 4 } },
{ "GeForce GTX TITAN", { 16, 8, 16, 8, 32, 16, 32, 128, 1, 1, 1, 1, 2, 2 } },
{ "GeForce GTX TITAN Black", { 16, 2, 16, 8, 16, 16, 8, 16, 1, 1, 1, 0, 1, 1 } },
{ "GeForce GTX TITAN X", { 16, 8, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } },
{ "TITAN X (Pascal)", { 32, 2, 32, 32, 32, 16, 16, 32, 0, 0, 0, 0, 1, 2 } },
{ "Tesla K20m", { 16, 2, 32, 8, 32, 16, 16, 64, 1, 0, 0, 0, 1, 1 } },
{ "Tesla K40m", { 32, 2, 16, 8, 64, 16, 32, 128, 1, 0, 1, 1, 2, 4 } },
{ "default", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,79 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemm6464' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemmComplexDouble = {
"Xgemm", Precision::kComplexDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 8, 8, 16, 32, 16, 16, 32, 0, 0, 1, 1, 2, 2 } },
{ "Ellesmere", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } },
{ "Fiji", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } },
{ "Hawaii", { 16, 2, 16, 16, 16, 16, 16, 32, 1, 0, 0, 0, 1, 2 } },
{ "Oland", { 16, 2, 16, 8, 16, 16, 32, 128, 0, 0, 0, 0, 1, 4 } },
{ "Pitcairn", { 32, 2, 16, 8, 32, 8, 32, 32, 0, 1, 1, 0, 1, 1 } },
{ "Tahiti", { 16, 2, 16, 8, 16, 8, 8, 16, 0, 0, 1, 0, 1, 1 } },
{ "Tonga", { 16, 2, 32, 16, 32, 16, 16, 16, 1, 1, 1, 1, 1, 1 } },
{ "default", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } },
{ "default", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 64, 16, 8, 128, 0, 1, 0, 1, 2, 1 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 16, 32, 128, 16, 16, 64, 0, 1, 0, 0, 2, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 32, 128, 16, 8, 32, 0, 1, 0, 0, 4, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 8, 16, 128, 0, 0, 0, 1, 1, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 32, 32, 8, 8, 32, 0, 1, 0, 0, 1, 2 } },
{ "default", { 32, 2, 8, 8, 16, 8, 8, 32, 1, 1, 0, 0, 1, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } },
{ "default", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 32, 8, 16, 16, 16, 8, 16, 64, 1, 0, 1, 1, 1, 1 } },
{ "GeForce GTX 1070", { 32, 8, 32, 16, 32, 8, 8, 32, 0, 0, 0, 1, 1, 4 } },
{ "GeForce GTX 1080", { 32, 2, 16, 16, 16, 8, 8, 16, 0, 0, 0, 0, 1, 2 } },
{ "GeForce GTX 480", { 16, 2, 32, 32, 32, 32, 8, 32, 0, 0, 1, 0, 1, 1 } },
{ "GeForce GTX 670", { 32, 8, 16, 8, 16, 16, 32, 64, 1, 0, 0, 1, 1, 2 } },
{ "GeForce GTX 680", { 16, 8, 16, 8, 64, 16, 32, 32, 0, 1, 1, 0, 1, 1 } },
{ "GeForce GTX 750", { 32, 2, 8, 32, 32, 8, 8, 64, 0, 0, 1, 0, 1, 4 } },
{ "GeForce GTX 750 Ti", { 32, 2, 8, 8, 16, 8, 8, 32, 0, 0, 0, 0, 1, 1 } },
{ "GeForce GTX 980", { 16, 2, 16, 8, 32, 8, 16, 128, 0, 0, 1, 1, 2, 2 } },
{ "GeForce GTX TITAN Black", { 16, 2, 16, 16, 32, 16, 8, 32, 0, 1, 1, 1, 1, 1 } },
{ "GeForce GTX TITAN X", { 32, 8, 16, 16, 128, 16, 16, 32, 0, 0, 1, 0, 1, 1 } },
{ "TITAN X (Pascal)", { 32, 2, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } },
{ "Tesla K20m", { 32, 2, 32, 8, 32, 16, 16, 64, 0, 0, 1, 0, 1, 1 } },
{ "Tesla K40m", { 16, 8, 8, 8, 32, 32, 16, 32, 0, 0, 1, 0, 1, 1 } },
{ "default", { 32, 2, 16, 16, 32, 16, 16, 32, 0, 0, 0, 0, 1, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 2, 32, 32, 32, 8, 8, 32, 1, 1, 0, 0, 1, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -1,218 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Database generator <database.py>
//
// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry XgemmDirectHalf = {
"XgemmDirect", Precision::kHalf, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } },
{ "default", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemmDirectSingle = {
"XgemmDirect", Precision::kSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 32 } },
{ "ATI Radeon HD 6750M", { 8, 8, 16, 8, 8, 1, 0, 2, 2, 32 } },
{ "Ellesmere", { 2, 8, 8, 32, 32, 1, 1, 2, 1, 32 } },
{ "Fiji", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
{ "Tonga", { 16, 16, 16, 32, 8, 0, 1, 1, 1, 32 } },
{ "Turks", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } },
{ "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 1, 8, 64 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 16, 16, 8, 8, 8, 0, 0, 2, 4, 32 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 0, 0, 2, 2, 64 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
{ "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 4, 32 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GeForce GT 650M", { 16, 16, 16, 8, 16, 1, 0, 2, 2, 32 } },
{ "GeForce GTX 1080", { 16, 16, 8, 16, 8, 1, 1, 1, 1, 32 } },
{ "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } },
{ "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } },
{ "TITAN X (Pascal)", { 8, 32, 8, 8, 16, 1, 1, 1, 1, 32 } },
{ "default", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemmDirectComplexSingle = {
"XgemmDirect", Precision::kComplexSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
{ "ATI Radeon HD 6750M", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
{ "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
{ "Tonga", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } },
{ "Turks", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } },
{ "default", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 4, 4, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 16, 16, 8, 8, 1, 1, 1, 4, 32 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 16, 8, 1, 1, 2, 1, 32 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics Skylake ULT GT2", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
{ "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 2, 32 } },
{ "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GeForce GTX 1080", { 8, 8, 16, 16, 8, 1, 1, 2, 2, 32 } },
{ "GeForce GTX 750 Ti", { 16, 8, 8, 16, 8, 1, 1, 2, 1, 16 } },
{ "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 1, 1, 16 } },
{ "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemmDirectDouble = {
"XgemmDirect", Precision::kDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
{ "Ellesmere", { 8, 16, 16, 8, 16, 1, 1, 2, 1, 32 } },
{ "Fiji", { 16, 8, 8, 8, 16, 1, 1, 1, 1, 16 } },
{ "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 32 } },
{ "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 8, 8, 8, 8, 0, 0, 1, 4, 32 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } },
{ "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 2, 4, 32 } },
{ "GeForce GTX TITAN Black", { 8, 16, 16, 16, 8, 1, 0, 1, 1, 16 } },
{ "TITAN X (Pascal)", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemmDirectComplexDouble = {
"XgemmDirect", Precision::kComplexDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
{ "Ellesmere", { 16, 32, 32, 16, 8, 0, 0, 1, 1, 32 } },
{ "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
{ "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
{ "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 32, 8, 0, 0, 1, 1, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 16, 8, 8, 0, 0, 2, 1, 32 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 16, 8, 8, 8, 0, 0, 2, 2, 32 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
{ "GeForce GTX 750 Ti", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } },
{ "GeForce GTX TITAN Black", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
{ "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } },
{ "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
}
},
}
};
// =================================================================================================
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,14 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct' kernels.
//
// =================================================================================================
#include "database/kernels/xgemm_direct/xgemm_direct_16.hpp"
#include "database/kernels/xgemm_direct/xgemm_direct_32.hpp"
#include "database/kernels/xgemm_direct/xgemm_direct_3232.hpp"
#include "database/kernels/xgemm_direct/xgemm_direct_64.hpp"
#include "database/kernels/xgemm_direct/xgemm_direct_6464.hpp"

View File

@ -0,0 +1,36 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct16' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemmDirectHalf = {
"XgemmDirect", Precision::kHalf, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } },
{ "default", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,66 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct32' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemmDirectSingle = {
"XgemmDirect", Precision::kSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 32 } },
{ "ATI Radeon HD 6750M", { 8, 8, 16, 8, 8, 1, 0, 2, 2, 32 } },
{ "Ellesmere", { 2, 8, 8, 32, 32, 1, 1, 2, 1, 32 } },
{ "Fiji", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
{ "Tonga", { 16, 16, 16, 32, 8, 0, 1, 1, 1, 32 } },
{ "Turks", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } },
{ "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 1, 8, 64 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 16, 16, 8, 8, 8, 0, 0, 2, 4, 32 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 0, 0, 2, 2, 64 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
{ "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 4, 32 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GeForce GT 650M", { 16, 16, 16, 8, 16, 1, 0, 2, 2, 32 } },
{ "GeForce GTX 1080", { 16, 16, 8, 16, 8, 1, 1, 1, 1, 32 } },
{ "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } },
{ "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } },
{ "TITAN X (Pascal)", { 8, 32, 8, 8, 16, 1, 1, 1, 1, 32 } },
{ "default", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,58 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct3232' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemmDirectComplexSingle = {
"XgemmDirect", Precision::kComplexSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
{ "ATI Radeon HD 6750M", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
{ "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
{ "Tonga", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } },
{ "Turks", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } },
{ "default", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 4, 4, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 16, 16, 8, 8, 1, 1, 1, 4, 32 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 16, 8, 1, 1, 2, 1, 32 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics Skylake ULT GT2", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
{ "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 2, 32 } },
{ "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GeForce GTX 1080", { 8, 8, 16, 16, 8, 1, 1, 2, 2, 32 } },
{ "GeForce GTX 750 Ti", { 16, 8, 8, 16, 8, 1, 1, 2, 1, 16 } },
{ "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 1, 1, 16 } },
{ "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,50 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct64' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemmDirectDouble = {
"XgemmDirect", Precision::kDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
{ "Ellesmere", { 8, 16, 16, 8, 16, 1, 1, 2, 1, 32 } },
{ "Fiji", { 16, 8, 8, 8, 16, 1, 1, 1, 1, 16 } },
{ "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 32 } },
{ "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 8, 8, 8, 8, 0, 0, 1, 4, 32 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } },
{ "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 2, 4, 32 } },
{ "GeForce GTX TITAN Black", { 8, 16, 16, 16, 8, 1, 0, 1, 1, 16 } },
{ "TITAN X (Pascal)", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,50 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemm_Direct6464' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemmDirectComplexDouble = {
"XgemmDirect", Precision::kComplexDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
{ "Ellesmere", { 16, 32, 32, 16, 8, 0, 0, 1, 1, 32 } },
{ "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
{ "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
{ "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 32, 8, 0, 0, 1, 1, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 16, 8, 8, 0, 0, 2, 1, 32 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 16, 8, 8, 8, 0, 0, 2, 2, 32 } },
{ "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
{ "GeForce GTX 750 Ti", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } },
{ "GeForce GTX TITAN Black", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } },
{ "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } },
{ "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -1,306 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Database generator <database.py>
//
// This file populates the database with best-found tuning parameters for the 'Xgemv' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry XgemvHalf = {
"Xgemv", Precision::kHalf, {"WGS1", "WPT1"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 256, 1 } },
{ "default", { 256, 1 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 256, 1 } },
{ "default", { 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 64, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemvSingle = {
"Xgemv", Precision::kSingle, {"WGS1", "WPT1"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 128, 1 } },
{ "ATI Radeon HD 6750M", { 32, 1 } },
{ "Ellesmere", { 256, 1 } },
{ "Fiji", { 128, 1 } },
{ "Hawaii", { 128, 1 } },
{ "Oland", { 128, 1 } },
{ "Pitcairn", { 256, 1 } },
{ "Tahiti", { 256, 1 } },
{ "Tonga", { 128, 2 } },
{ "Turks", { 32, 1 } },
{ "default", { 128, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } },
{ "default", { 64, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 256, 1 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 1 } },
{ "Iris", { 64, 2 } },
{ "Iris Pro", { 128, 1 } },
{ "default", { 128, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 256, 1 } },
{ "GeForce GT 650M", { 256, 1 } },
{ "GeForce GTX 1070", { 128, 1 } },
{ "GeForce GTX 1080", { 32, 1 } },
{ "GeForce GTX 480", { 64, 1 } },
{ "GeForce GTX 670", { 64, 1 } },
{ "GeForce GTX 680", { 256, 1 } },
{ "GeForce GTX 750", { 256, 1 } },
{ "GeForce GTX 750 Ti", { 32, 1 } },
{ "GeForce GTX 980", { 128, 1 } },
{ "GeForce GTX TITAN", { 256, 1 } },
{ "GeForce GTX TITAN Black", { 256, 1 } },
{ "GeForce GTX TITAN X", { 256, 1 } },
{ "TITAN X (Pascal)", { 32, 1 } },
{ "Tesla K20m", { 128, 1 } },
{ "Tesla K40m", { 256, 1 } },
{ "default", { 256, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 128, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemvComplexSingle = {
"Xgemv", Precision::kComplexSingle, {"WGS1", "WPT1"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 1 } },
{ "ATI Radeon HD 6750M", { 64, 1 } },
{ "Ellesmere", { 32, 1 } },
{ "Fiji", { 32, 1 } },
{ "Hawaii", { 64, 1 } },
{ "Oland", { 64, 1 } },
{ "Pitcairn", { 64, 1 } },
{ "Tahiti", { 64, 1 } },
{ "Tonga", { 32, 1 } },
{ "Turks", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } },
{ "default", { 64, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 64, 1 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1 } },
{ "Iris", { 256, 1 } },
{ "Iris Pro", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 256, 1 } },
{ "GeForce GTX 1070", { 64, 1 } },
{ "GeForce GTX 1080", { 32, 1 } },
{ "GeForce GTX 480", { 64, 1 } },
{ "GeForce GTX 670", { 64, 1 } },
{ "GeForce GTX 680", { 64, 1 } },
{ "GeForce GTX 750", { 128, 1 } },
{ "GeForce GTX 750 Ti", { 32, 1 } },
{ "GeForce GTX TITAN", { 256, 1 } },
{ "GeForce GTX TITAN Black", { 32, 1 } },
{ "TITAN X (Pascal)", { 32, 1 } },
{ "default", { 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 64, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemvDouble = {
"Xgemv", Precision::kDouble, {"WGS1", "WPT1"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 1 } },
{ "Ellesmere", { 32, 1 } },
{ "Fiji", { 32, 1 } },
{ "Hawaii", { 128, 1 } },
{ "Oland", { 256, 1 } },
{ "Pitcairn", { 256, 1 } },
{ "Tahiti", { 256, 1 } },
{ "Tonga", { 32, 1 } },
{ "default", { 256, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } },
{ "default", { 64, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 128, 1 } },
{ "GeForce GTX 1070", { 64, 1 } },
{ "GeForce GTX 1080", { 32, 1 } },
{ "GeForce GTX 480", { 256, 1 } },
{ "GeForce GTX 670", { 128, 1 } },
{ "GeForce GTX 680", { 128, 1 } },
{ "GeForce GTX 750", { 64, 1 } },
{ "GeForce GTX 750 Ti", { 32, 1 } },
{ "GeForce GTX 980", { 64, 1 } },
{ "GeForce GTX TITAN", { 256, 1 } },
{ "GeForce GTX TITAN Black", { 32, 1 } },
{ "GeForce GTX TITAN X", { 64, 1 } },
{ "TITAN X (Pascal)", { 32, 1 } },
{ "Tesla K20m", { 256, 1 } },
{ "Tesla K40m", { 256, 1 } },
{ "default", { 128, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 128, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemvComplexDouble = {
"Xgemv", Precision::kComplexDouble, {"WGS1", "WPT1"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 1 } },
{ "Ellesmere", { 32, 1 } },
{ "Fiji", { 64, 1 } },
{ "Hawaii", { 64, 1 } },
{ "Oland", { 256, 1 } },
{ "Pitcairn", { 256, 1 } },
{ "Tahiti", { 256, 1 } },
{ "Tonga", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 4 } },
{ "default", { 64, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 128, 1 } },
{ "GeForce GTX 480", { 64, 1 } },
{ "GeForce GTX 670", { 128, 1 } },
{ "default", { 128, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 64, 1 } },
}
},
}
};
// =================================================================================================
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,14 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv' kernels.
//
// =================================================================================================
#include "database/kernels/xgemv/xgemv_16.hpp"
#include "database/kernels/xgemv/xgemv_32.hpp"
#include "database/kernels/xgemv/xgemv_3232.hpp"
#include "database/kernels/xgemv/xgemv_64.hpp"
#include "database/kernels/xgemv/xgemv_6464.hpp"

View File

@ -0,0 +1,37 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv16' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemvHalf = {
"Xgemv", Precision::kHalf, {"WGS1", "WPT1"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 256, 1 } },
{ "default", { 256, 1 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 256, 1 } },
{ "default", { 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 64, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,94 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv32' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemvSingle = {
"Xgemv", Precision::kSingle, {"WGS1", "WPT1"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 128, 1 } },
{ "ATI Radeon HD 6750M", { 32, 1 } },
{ "Ellesmere", { 256, 1 } },
{ "Fiji", { 128, 1 } },
{ "Hawaii", { 128, 1 } },
{ "Oland", { 128, 1 } },
{ "Pitcairn", { 256, 1 } },
{ "Tahiti", { 256, 1 } },
{ "Tonga", { 128, 2 } },
{ "Turks", { 32, 1 } },
{ "default", { 128, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } },
{ "default", { 64, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 256, 1 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 1 } },
{ "Iris", { 64, 2 } },
{ "Iris Pro", { 128, 1 } },
{ "default", { 128, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 256, 1 } },
{ "GeForce GT 650M", { 256, 1 } },
{ "GeForce GTX 1070", { 128, 1 } },
{ "GeForce GTX 1080", { 32, 1 } },
{ "GeForce GTX 480", { 64, 1 } },
{ "GeForce GTX 670", { 64, 1 } },
{ "GeForce GTX 680", { 256, 1 } },
{ "GeForce GTX 750", { 256, 1 } },
{ "GeForce GTX 750 Ti", { 32, 1 } },
{ "GeForce GTX 980", { 128, 1 } },
{ "GeForce GTX TITAN", { 256, 1 } },
{ "GeForce GTX TITAN Black", { 256, 1 } },
{ "GeForce GTX TITAN X", { 256, 1 } },
{ "TITAN X (Pascal)", { 32, 1 } },
{ "Tesla K20m", { 128, 1 } },
{ "Tesla K40m", { 256, 1 } },
{ "default", { 256, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 128, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,83 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv3232' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemvComplexSingle = {
"Xgemv", Precision::kComplexSingle, {"WGS1", "WPT1"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 1 } },
{ "ATI Radeon HD 6750M", { 64, 1 } },
{ "Ellesmere", { 32, 1 } },
{ "Fiji", { 32, 1 } },
{ "Hawaii", { 64, 1 } },
{ "Oland", { 64, 1 } },
{ "Pitcairn", { 64, 1 } },
{ "Tahiti", { 64, 1 } },
{ "Tonga", { 32, 1 } },
{ "Turks", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } },
{ "default", { 64, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 64, 1 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1 } },
{ "Iris", { 256, 1 } },
{ "Iris Pro", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 256, 1 } },
{ "GeForce GTX 1070", { 64, 1 } },
{ "GeForce GTX 1080", { 32, 1 } },
{ "GeForce GTX 480", { 64, 1 } },
{ "GeForce GTX 670", { 64, 1 } },
{ "GeForce GTX 680", { 64, 1 } },
{ "GeForce GTX 750", { 128, 1 } },
{ "GeForce GTX 750 Ti", { 32, 1 } },
{ "GeForce GTX TITAN", { 256, 1 } },
{ "GeForce GTX TITAN Black", { 32, 1 } },
{ "TITAN X (Pascal)", { 32, 1 } },
{ "default", { 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 64, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,73 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv64' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemvDouble = {
"Xgemv", Precision::kDouble, {"WGS1", "WPT1"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 1 } },
{ "Ellesmere", { 32, 1 } },
{ "Fiji", { 32, 1 } },
{ "Hawaii", { 128, 1 } },
{ "Oland", { 256, 1 } },
{ "Pitcairn", { 256, 1 } },
{ "Tahiti", { 256, 1 } },
{ "Tonga", { 32, 1 } },
{ "default", { 256, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } },
{ "default", { 64, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 128, 1 } },
{ "GeForce GTX 1070", { 64, 1 } },
{ "GeForce GTX 1080", { 32, 1 } },
{ "GeForce GTX 480", { 256, 1 } },
{ "GeForce GTX 670", { 128, 1 } },
{ "GeForce GTX 680", { 128, 1 } },
{ "GeForce GTX 750", { 64, 1 } },
{ "GeForce GTX 750 Ti", { 32, 1 } },
{ "GeForce GTX 980", { 64, 1 } },
{ "GeForce GTX TITAN", { 256, 1 } },
{ "GeForce GTX TITAN Black", { 32, 1 } },
{ "GeForce GTX TITAN X", { 64, 1 } },
{ "TITAN X (Pascal)", { 32, 1 } },
{ "Tesla K20m", { 256, 1 } },
{ "Tesla K40m", { 256, 1 } },
{ "default", { 128, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 128, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,61 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv6464' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemvComplexDouble = {
"Xgemv", Precision::kComplexDouble, {"WGS1", "WPT1"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 1 } },
{ "Ellesmere", { 32, 1 } },
{ "Fiji", { 64, 1 } },
{ "Hawaii", { 64, 1 } },
{ "Oland", { 256, 1 } },
{ "Pitcairn", { 256, 1 } },
{ "Tahiti", { 256, 1 } },
{ "Tonga", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 4 } },
{ "default", { 64, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } },
{ "default", { 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 128, 1 } },
{ "GeForce GTX 480", { 64, 1 } },
{ "GeForce GTX 670", { 128, 1 } },
{ "default", { 128, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 64, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -1,300 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Database generator <database.py>
//
// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry XgemvFastHalf = {
"XgemvFast", Precision::kHalf, {"VW2", "WGS2", "WPT2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 1, 32, 1 } },
{ "default", { 1, 32, 1 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 16, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 4 } },
{ "default", { 1, 16, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 16, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemvFastSingle = {
"XgemvFast", Precision::kSingle, {"VW2", "WGS2", "WPT2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } },
{ "ATI Radeon HD 6750M", { 2, 64, 2 } },
{ "Ellesmere", { 1, 64, 1 } },
{ "Fiji", { 1, 64, 2 } },
{ "Hawaii", { 1, 64, 1 } },
{ "Oland", { 1, 64, 1 } },
{ "Pitcairn", { 1, 64, 1 } },
{ "Tahiti", { 1, 64, 1 } },
{ "Tonga", { 1, 16, 4 } },
{ "Turks", { 1, 256, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 32, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 32, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 4 } },
{ "default", { 4, 128, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 1, 256, 1 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 32, 2 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 4 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 2 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 2, 32, 2 } },
{ "Iris", { 1, 128, 2 } },
{ "Iris Pro", { 4, 64, 4 } },
{ "default", { 2, 256, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 2, 256, 2 } },
{ "GeForce GT 650M", { 2, 32, 2 } },
{ "GeForce GTX 1070", { 1, 256, 1 } },
{ "GeForce GTX 1080", { 1, 128, 1 } },
{ "GeForce GTX 480", { 1, 128, 1 } },
{ "GeForce GTX 670", { 2, 256, 2 } },
{ "GeForce GTX 680", { 1, 128, 1 } },
{ "GeForce GTX 750", { 1, 256, 1 } },
{ "GeForce GTX 750 Ti", { 2, 32, 2 } },
{ "GeForce GTX 980", { 1, 256, 1 } },
{ "GeForce GTX TITAN", { 1, 256, 1 } },
{ "GeForce GTX TITAN Black", { 1, 256, 1 } },
{ "GeForce GTX TITAN X", { 1, 64, 1 } },
{ "TITAN X (Pascal)", { 1, 64, 1 } },
{ "Tesla K20m", { 1, 256, 1 } },
{ "Tesla K40m", { 1, 256, 1 } },
{ "default", { 1, 256, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 1, 64, 4 } },
{ "default", { 1, 64, 4 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 64, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemvFastComplexSingle = {
"XgemvFast", Precision::kComplexSingle, {"VW2", "WGS2", "WPT2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 2, 256, 2 } },
{ "ATI Radeon HD 6750M", { 1, 128, 1 } },
{ "Ellesmere", { 1, 64, 1 } },
{ "Fiji", { 1, 16, 1 } },
{ "Hawaii", { 1, 64, 1 } },
{ "Oland", { 1, 64, 1 } },
{ "Pitcairn", { 1, 64, 1 } },
{ "Tahiti", { 1, 128, 1 } },
{ "Tonga", { 2, 32, 2 } },
{ "Turks", { 1, 16, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 64, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 128, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 128, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 16, 4 } },
{ "default", { 1, 64, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 2, 128, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 32, 2 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 2, 128, 2 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 1, 32, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 1, 32, 1 } },
{ "Iris", { 1, 64, 1 } },
{ "Iris Pro", { 4, 128, 4 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 256, 1 } },
{ "GeForce GTX 1070", { 1, 64, 1 } },
{ "GeForce GTX 480", { 1, 64, 1 } },
{ "GeForce GTX 670", { 1, 64, 1 } },
{ "GeForce GTX 680", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 64, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemvFastDouble = {
"XgemvFast", Precision::kDouble, {"VW2", "WGS2", "WPT2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } },
{ "Ellesmere", { 1, 128, 1 } },
{ "Fiji", { 1, 32, 1 } },
{ "Hawaii", { 1, 64, 1 } },
{ "Oland", { 1, 64, 1 } },
{ "Pitcairn", { 1, 64, 1 } },
{ "Tahiti", { 1, 64, 1 } },
{ "Tonga", { 2, 32, 2 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 64, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 4 } },
{ "default", { 1, 64, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 256, 1 } },
{ "GeForce GTX 1070", { 1, 256, 1 } },
{ "GeForce GTX 1080", { 1, 32, 2 } },
{ "GeForce GTX 480", { 1, 64, 1 } },
{ "GeForce GTX 670", { 1, 128, 1 } },
{ "GeForce GTX 680", { 1, 128, 1 } },
{ "GeForce GTX 750", { 2, 256, 2 } },
{ "GeForce GTX 750 Ti", { 1, 32, 2 } },
{ "GeForce GTX 980", { 1, 64, 1 } },
{ "GeForce GTX TITAN", { 1, 256, 1 } },
{ "GeForce GTX TITAN Black", { 1, 256, 1 } },
{ "GeForce GTX TITAN X", { 1, 128, 1 } },
{ "TITAN X (Pascal)", { 1, 32, 1 } },
{ "Tesla K20m", { 1, 128, 1 } },
{ "Tesla K40m", { 1, 256, 1 } },
{ "default", { 1, 256, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 64, 1 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemvFastComplexDouble = {
"XgemvFast", Precision::kComplexDouble, {"VW2", "WGS2", "WPT2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } },
{ "Ellesmere", { 1, 16, 1 } },
{ "Fiji", { 1, 16, 1 } },
{ "Hawaii", { 1, 64, 1 } },
{ "Oland", { 1, 256, 1 } },
{ "Pitcairn", { 1, 64, 1 } },
{ "Tahiti", { 1, 64, 1 } },
{ "Tonga", { 1, 32, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 2, 64, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 64, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 2 } },
{ "default", { 4, 64, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 128, 1 } },
{ "GeForce GTX 480", { 1, 64, 1 } },
{ "GeForce GTX 670", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 64, 1 } },
}
},
}
};
// =================================================================================================
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,14 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast' kernels.
//
// =================================================================================================
#include "database/kernels/xgemv_fast/xgemv_fast_16.hpp"
#include "database/kernels/xgemv_fast/xgemv_fast_32.hpp"
#include "database/kernels/xgemv_fast/xgemv_fast_3232.hpp"
#include "database/kernels/xgemv_fast/xgemv_fast_64.hpp"
#include "database/kernels/xgemv_fast/xgemv_fast_6464.hpp"

View File

@ -0,0 +1,37 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast16' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemvFastHalf = {
"XgemvFast", Precision::kHalf, {"VW2", "WGS2", "WPT2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 1, 32, 1 } },
{ "default", { 1, 32, 1 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 16, 1 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 4 } },
{ "default", { 1, 16, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 16, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,94 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast32' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemvFastSingle = {
"XgemvFast", Precision::kSingle, {"VW2", "WGS2", "WPT2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } },
{ "ATI Radeon HD 6750M", { 2, 64, 2 } },
{ "Ellesmere", { 1, 64, 1 } },
{ "Fiji", { 1, 64, 2 } },
{ "Hawaii", { 1, 64, 1 } },
{ "Oland", { 1, 64, 1 } },
{ "Pitcairn", { 1, 64, 1 } },
{ "Tahiti", { 1, 64, 1 } },
{ "Tonga", { 1, 16, 4 } },
{ "Turks", { 1, 256, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 32, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 32, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 4 } },
{ "default", { 4, 128, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 1, 256, 1 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 32, 2 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 4 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 2 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 2, 32, 2 } },
{ "Iris", { 1, 128, 2 } },
{ "Iris Pro", { 4, 64, 4 } },
{ "default", { 2, 256, 2 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 2, 256, 2 } },
{ "GeForce GT 650M", { 2, 32, 2 } },
{ "GeForce GTX 1070", { 1, 256, 1 } },
{ "GeForce GTX 1080", { 1, 128, 1 } },
{ "GeForce GTX 480", { 1, 128, 1 } },
{ "GeForce GTX 670", { 2, 256, 2 } },
{ "GeForce GTX 680", { 1, 128, 1 } },
{ "GeForce GTX 750", { 1, 256, 1 } },
{ "GeForce GTX 750 Ti", { 2, 32, 2 } },
{ "GeForce GTX 980", { 1, 256, 1 } },
{ "GeForce GTX TITAN", { 1, 256, 1 } },
{ "GeForce GTX TITAN Black", { 1, 256, 1 } },
{ "GeForce GTX TITAN X", { 1, 64, 1 } },
{ "TITAN X (Pascal)", { 1, 64, 1 } },
{ "Tesla K20m", { 1, 256, 1 } },
{ "Tesla K40m", { 1, 256, 1 } },
{ "default", { 1, 256, 1 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 1, 64, 4 } },
{ "default", { 1, 64, 4 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 64, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,77 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast3232' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemvFastComplexSingle = {
"XgemvFast", Precision::kComplexSingle, {"VW2", "WGS2", "WPT2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 2, 256, 2 } },
{ "ATI Radeon HD 6750M", { 1, 128, 1 } },
{ "Ellesmere", { 1, 64, 1 } },
{ "Fiji", { 1, 16, 1 } },
{ "Hawaii", { 1, 64, 1 } },
{ "Oland", { 1, 64, 1 } },
{ "Pitcairn", { 1, 64, 1 } },
{ "Tahiti", { 1, 128, 1 } },
{ "Tonga", { 2, 32, 2 } },
{ "Turks", { 1, 16, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 64, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 128, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 128, 2 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 16, 4 } },
{ "default", { 1, 64, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 2, 128, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 32, 2 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 2, 128, 2 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 1, 32, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 1, 32, 1 } },
{ "Iris", { 1, 64, 1 } },
{ "Iris Pro", { 4, 128, 4 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 256, 1 } },
{ "GeForce GTX 1070", { 1, 64, 1 } },
{ "GeForce GTX 480", { 1, 64, 1 } },
{ "GeForce GTX 670", { 1, 64, 1 } },
{ "GeForce GTX 680", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 64, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,73 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast64' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemvFastDouble = {
"XgemvFast", Precision::kDouble, {"VW2", "WGS2", "WPT2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } },
{ "Ellesmere", { 1, 128, 1 } },
{ "Fiji", { 1, 32, 1 } },
{ "Hawaii", { 1, 64, 1 } },
{ "Oland", { 1, 64, 1 } },
{ "Pitcairn", { 1, 64, 1 } },
{ "Tahiti", { 1, 64, 1 } },
{ "Tonga", { 2, 32, 2 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 64, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 1 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 4 } },
{ "default", { 1, 64, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 256, 1 } },
{ "GeForce GTX 1070", { 1, 256, 1 } },
{ "GeForce GTX 1080", { 1, 32, 2 } },
{ "GeForce GTX 480", { 1, 64, 1 } },
{ "GeForce GTX 670", { 1, 128, 1 } },
{ "GeForce GTX 680", { 1, 128, 1 } },
{ "GeForce GTX 750", { 2, 256, 2 } },
{ "GeForce GTX 750 Ti", { 1, 32, 2 } },
{ "GeForce GTX 980", { 1, 64, 1 } },
{ "GeForce GTX TITAN", { 1, 256, 1 } },
{ "GeForce GTX TITAN Black", { 1, 256, 1 } },
{ "GeForce GTX TITAN X", { 1, 128, 1 } },
{ "TITAN X (Pascal)", { 1, 32, 1 } },
{ "Tesla K20m", { 1, 128, 1 } },
{ "Tesla K40m", { 1, 256, 1 } },
{ "default", { 1, 256, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 64, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,61 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast6464' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemvFastComplexDouble = {
"XgemvFast", Precision::kComplexDouble, {"VW2", "WGS2", "WPT2"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } },
{ "Ellesmere", { 1, 16, 1 } },
{ "Fiji", { 1, 16, 1 } },
{ "Hawaii", { 1, 64, 1 } },
{ "Oland", { 1, 256, 1 } },
{ "Pitcairn", { 1, 64, 1 } },
{ "Tahiti", { 1, 64, 1 } },
{ "Tonga", { 1, 32, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 2, 64, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 64, 4 } },
{ "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 2 } },
{ "default", { 4, 64, 4 } },
}
},
{ // Intel accelerators
kDeviceTypeAccelerator, "Intel", {
{ "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 1, 128, 1 } },
{ "GeForce GTX 480", { 1, 64, 1 } },
{ "GeForce GTX 670", { 1, 64, 1 } },
{ "default", { 1, 64, 1 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 1, 64, 1 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -1,213 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Database generator <database.py>
//
// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry XgemvFastRotHalf = {
"XgemvFastRot", Precision::kHalf, {"VW3", "WGS3", "WPT3"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 8, 32, 32 } },
{ "default", { 8, 32, 32 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics Skylake ULT GT2", { 8, 128, 32 } },
{ "default", { 8, 128, 32 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 8, 128, 32 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemvFastRotSingle = {
"XgemvFastRot", Precision::kSingle, {"VW3", "WGS3", "WPT3"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 8, 64, 32 } },
{ "ATI Radeon HD 6750M", { 8, 128, 16 } },
{ "Ellesmere", { 8, 32, 32 } },
{ "Fiji", { 4, 32, 16 } },
{ "Tonga", { 8, 128, 32 } },
{ "Turks", { 8, 128, 16 } },
{ "default", { 8, 32, 32 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } },
{ "default", { 8, 32, 32 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 64, 32 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 64, 16 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 2, 32, 16 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } },
{ "Iris Pro", { 4, 16, 16 } },
{ "default", { 4, 64, 16 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GeForce GT 650M", { 8, 32, 16 } },
{ "GeForce GTX 1080", { 8, 32, 32 } },
{ "GeForce GTX 750 Ti", { 8, 32, 32 } },
{ "GeForce GTX TITAN", { 1, 16, 16 } },
{ "GeForce GTX TITAN Black", { 4, 128, 16 } },
{ "TITAN X (Pascal)", { 8, 64, 32 } },
{ "default", { 8, 32, 32 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 4, 64, 16 } },
{ "default", { 4, 64, 16 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 8, 32, 32 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemvFastRotComplexSingle = {
"XgemvFastRot", Precision::kComplexSingle, {"VW3", "WGS3", "WPT3"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 8, 16, 16 } },
{ "ATI Radeon HD 6750M", { 8, 32, 8 } },
{ "Ellesmere", { 2, 32, 16 } },
{ "Fiji", { 4, 32, 32 } },
{ "Tonga", { 4, 32, 32 } },
{ "Turks", { 4, 32, 8 } },
{ "default", { 8, 16, 16 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 32, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 16, 16 } },
{ "default", { 4, 32, 32 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 16, 16 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 8 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 4, 32, 8 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } },
{ "Iris Pro", { 4, 16, 16 } },
{ "default", { 2, 32, 8 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 4, 16, 16 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemvFastRotDouble = {
"XgemvFastRot", Precision::kDouble, {"VW3", "WGS3", "WPT3"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 4, 16, 16 } },
{ "Ellesmere", { 4, 16, 16 } },
{ "Fiji", { 4, 32, 32 } },
{ "Tonga", { 4, 16, 16 } },
{ "default", { 4, 16, 16 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } },
{ "default", { 8, 32, 32 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GeForce GTX 1080", { 8, 32, 32 } },
{ "GeForce GTX 750 Ti", { 4, 32, 16 } },
{ "GeForce GTX TITAN", { 1, 16, 16 } },
{ "GeForce GTX TITAN Black", { 1, 16, 16 } },
{ "TITAN X (Pascal)", { 8, 32, 32 } },
{ "default", { 4, 32, 16 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 4, 16, 16 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgemvFastRotComplexDouble = {
"XgemvFastRot", Precision::kComplexDouble, {"VW3", "WGS3", "WPT3"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 4, 32, 16 } },
{ "Ellesmere", { 4, 16, 16 } },
{ "Fiji", { 4, 32, 8 } },
{ "Tonga", { 4, 16, 8 } },
{ "default", { 8, 32, 16 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 16, 16 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 16 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 16 } },
{ "default", { 8, 16, 16 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 4, 16, 16 } },
}
},
}
};
// =================================================================================================
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,14 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot' kernels.
//
// =================================================================================================
#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp"
#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp"
#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp"
#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp"
#include "database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp"

View File

@ -0,0 +1,36 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot16' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemvFastRotHalf = {
"XgemvFastRot", Precision::kHalf, {"VW3", "WGS3", "WPT3"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 8, 32, 32 } },
{ "default", { 8, 32, 32 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics Skylake ULT GT2", { 8, 128, 32 } },
{ "default", { 8, 128, 32 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 8, 128, 32 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,71 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot32' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemvFastRotSingle = {
"XgemvFastRot", Precision::kSingle, {"VW3", "WGS3", "WPT3"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 8, 64, 32 } },
{ "ATI Radeon HD 6750M", { 8, 128, 16 } },
{ "Ellesmere", { 8, 32, 32 } },
{ "Fiji", { 4, 32, 16 } },
{ "Tonga", { 8, 128, 32 } },
{ "Turks", { 8, 128, 16 } },
{ "default", { 8, 32, 32 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } },
{ "default", { 8, 32, 32 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 64, 32 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 64, 16 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 2, 32, 16 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } },
{ "Iris Pro", { 4, 16, 16 } },
{ "default", { 4, 64, 16 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GeForce GT 650M", { 8, 32, 16 } },
{ "GeForce GTX 1080", { 8, 32, 32 } },
{ "GeForce GTX 750 Ti", { 8, 32, 32 } },
{ "GeForce GTX TITAN", { 1, 16, 16 } },
{ "GeForce GTX TITAN Black", { 4, 128, 16 } },
{ "TITAN X (Pascal)", { 8, 64, 32 } },
{ "default", { 8, 32, 32 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 4, 64, 16 } },
{ "default", { 4, 64, 16 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 8, 32, 32 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,54 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot3232' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemvFastRotComplexSingle = {
"XgemvFastRot", Precision::kComplexSingle, {"VW3", "WGS3", "WPT3"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 8, 16, 16 } },
{ "ATI Radeon HD 6750M", { 8, 32, 8 } },
{ "Ellesmere", { 2, 32, 16 } },
{ "Fiji", { 4, 32, 32 } },
{ "Tonga", { 4, 32, 32 } },
{ "Turks", { 4, 32, 8 } },
{ "default", { 8, 16, 16 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 32, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 16, 16 } },
{ "default", { 4, 32, 32 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 16, 16 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 8 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 4, 32, 8 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } },
{ "Iris Pro", { 4, 16, 16 } },
{ "default", { 2, 32, 8 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 4, 16, 16 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,52 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot64' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemvFastRotDouble = {
"XgemvFastRot", Precision::kDouble, {"VW3", "WGS3", "WPT3"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 4, 16, 16 } },
{ "Ellesmere", { 4, 16, 16 } },
{ "Fiji", { 4, 32, 32 } },
{ "Tonga", { 4, 16, 16 } },
{ "default", { 4, 16, 16 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 8 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } },
{ "default", { 8, 32, 32 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GeForce GTX 1080", { 8, 32, 32 } },
{ "GeForce GTX 750 Ti", { 4, 32, 16 } },
{ "GeForce GTX TITAN", { 1, 16, 16 } },
{ "GeForce GTX TITAN Black", { 1, 16, 16 } },
{ "TITAN X (Pascal)", { 8, 32, 32 } },
{ "default", { 4, 32, 16 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 4, 16, 16 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,42 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xgemv_Fast_Rot6464' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgemvFastRotComplexDouble = {
"XgemvFastRot", Precision::kComplexDouble, {"VW3", "WGS3", "WPT3"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 4, 32, 16 } },
{ "Ellesmere", { 4, 16, 16 } },
{ "Fiji", { 4, 32, 8 } },
{ "Tonga", { 4, 16, 8 } },
{ "default", { 8, 32, 16 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 16, 16 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 64, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 16 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 16 } },
{ "default", { 8, 16, 16 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 4, 16, 16 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -1,316 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Database generator <database.py>
//
// This file populates the database with best-found tuning parameters for the 'Xger' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry XgerHalf = {
"Xger", Precision::kHalf, {"WGS1", "WGS2", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 64, 1, 2 } },
{ "default", { 64, 1, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 1, 2 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1, 4 } },
{ "default", { 4, 8, 2 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 64, 4, 2 } },
{ "default", { 64, 4, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 64, 1, 2 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgerSingle = {
"Xger", Precision::kSingle, {"WGS1", "WGS2", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 256, 1, 1 } },
{ "ATI Radeon HD 6750M", { 16, 16, 4 } },
{ "Ellesmere", { 64, 4, 2 } },
{ "Fiji", { 256, 1, 1 } },
{ "Hawaii", { 64, 2, 1 } },
{ "Oland", { 32, 4, 2 } },
{ "Pitcairn", { 64, 1, 1 } },
{ "Tahiti", { 256, 1, 1 } },
{ "Tonga", { 256, 1, 2 } },
{ "Turks", { 64, 4, 2 } },
{ "default", { 16, 16, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 64, 4, 4 } },
{ "default", { 64, 4, 4 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 2, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 16, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 128, 1, 4 } },
{ "default", { 128, 8, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 32, 1, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 2, 2 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1, 2 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 64, 1, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 4, 4 } },
{ "Iris Pro", { 64, 1, 4 } },
{ "default", { 32, 4, 2 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 128, 1, 2 } },
{ "GeForce GT 650M", { 32, 16, 4 } },
{ "GeForce GTX 1070", { 512, 1, 1 } },
{ "GeForce GTX 1080", { 16, 4, 1 } },
{ "GeForce GTX 480", { 256, 1, 4 } },
{ "GeForce GTX 670", { 32, 8, 2 } },
{ "GeForce GTX 680", { 128, 1, 4 } },
{ "GeForce GTX 750", { 64, 16, 4 } },
{ "GeForce GTX 750 Ti", { 64, 1, 2 } },
{ "GeForce GTX TITAN", { 32, 4, 2 } },
{ "GeForce GTX TITAN Black", { 32, 4, 2 } },
{ "TITAN X (Pascal)", { 512, 2, 1 } },
{ "default", { 128, 1, 2 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 128, 1, 2 } },
{ "default", { 128, 1, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 4, 2 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgerComplexSingle = {
"Xger", Precision::kComplexSingle, {"WGS1", "WGS2", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 4, 1 } },
{ "ATI Radeon HD 6750M", { 16, 16, 1 } },
{ "Ellesmere", { 16, 8, 2 } },
{ "Fiji", { 128, 2, 1 } },
{ "Hawaii", { 64, 1, 2 } },
{ "Oland", { 4, 8, 1 } },
{ "Pitcairn", { 128, 2, 1 } },
{ "Tahiti", { 64, 2, 1 } },
{ "Tonga", { 64, 1, 1 } },
{ "Turks", { 128, 2, 1 } },
{ "default", { 128, 2, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 128, 1, 1 } },
{ "default", { 128, 1, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 2, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 256, 1, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 2, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 4, 2 } },
{ "default", { 256, 2, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 32, 1, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 128, 2, 1 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 512, 1, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1, 2 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 1 } },
{ "Iris Pro", { 16, 2, 4 } },
{ "default", { 128, 2, 2 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 64, 4, 2 } },
{ "GeForce GTX 1070", { 16, 64, 2 } },
{ "GeForce GTX 1080", { 32, 2, 1 } },
{ "GeForce GTX 480", { 128, 2, 2 } },
{ "GeForce GTX 670", { 16, 32, 2 } },
{ "GeForce GTX 680", { 32, 4, 2 } },
{ "GeForce GTX 750", { 32, 16, 4 } },
{ "GeForce GTX 750 Ti", { 32, 8, 2 } },
{ "GeForce GTX TITAN", { 16, 16, 2 } },
{ "GeForce GTX TITAN Black", { 16, 16, 2 } },
{ "TITAN X (Pascal)", { 32, 2, 1 } },
{ "default", { 128, 2, 2 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 64, 1, 4 } },
{ "default", { 64, 1, 4 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 64, 2, 2 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgerDouble = {
"Xger", Precision::kDouble, {"WGS1", "WGS2", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 4, 1 } },
{ "Ellesmere", { 64, 1, 4 } },
{ "Fiji", { 256, 1, 2 } },
{ "Hawaii", { 32, 4, 2 } },
{ "Oland", { 128, 1, 2 } },
{ "Pitcairn", { 64, 1, 1 } },
{ "Tahiti", { 64, 2, 1 } },
{ "Tonga", { 8, 16, 2 } },
{ "default", { 128, 2, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 64, 4, 1 } },
{ "default", { 64, 4, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 256, 1, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 16, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 1, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 8, 2 } },
{ "default", { 256, 1, 4 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 128, 8, 2 } },
{ "GeForce GTX 1070", { 32, 8, 1 } },
{ "GeForce GTX 1080", { 32, 2, 1 } },
{ "GeForce GTX 480", { 32, 4, 2 } },
{ "GeForce GTX 670", { 32, 32, 2 } },
{ "GeForce GTX 680", { 128, 4, 2 } },
{ "GeForce GTX 750", { 256, 2, 2 } },
{ "GeForce GTX 750 Ti", { 32, 16, 1 } },
{ "GeForce GTX TITAN", { 16, 8, 2 } },
{ "GeForce GTX TITAN Black", { 32, 4, 2 } },
{ "TITAN X (Pascal)", { 32, 2, 1 } },
{ "default", { 128, 1, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 128, 1, 2 } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry XgerComplexDouble = {
"Xger", Precision::kComplexDouble, {"WGS1", "WGS2", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 1, 1 } },
{ "Ellesmere", { 8, 16, 1 } },
{ "Fiji", { 64, 4, 2 } },
{ "Hawaii", { 128, 1, 1 } },
{ "Oland", { 16, 16, 2 } },
{ "Pitcairn", { 64, 4, 1 } },
{ "Tahiti", { 32, 4, 1 } },
{ "Tonga", { 16, 4, 1 } },
{ "default", { 32, 4, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 64, 2, 4 } },
{ "default", { 64, 2, 4 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 4, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 512, 2, 2 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 256, 1, 2 } },
{ "default", { 256, 2, 2 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 8, 2 } },
{ "GeForce GTX 1070", { 8, 128, 1 } },
{ "GeForce GTX 1080", { 8, 4, 1 } },
{ "GeForce GTX 480", { 64, 2, 2 } },
{ "GeForce GTX 670", { 8, 16, 2 } },
{ "GeForce GTX 680", { 8, 16, 1 } },
{ "GeForce GTX 750", { 8, 32, 4 } },
{ "GeForce GTX 750 Ti", { 32, 8, 2 } },
{ "GeForce GTX TITAN", { 32, 4, 2 } },
{ "GeForce GTX TITAN Black", { 16, 16, 2 } },
{ "TITAN X (Pascal)", { 4, 8, 1 } },
{ "default", { 16, 8, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 64, 2, 2 } },
}
},
}
};
// =================================================================================================
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,14 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xger' kernels.
//
// =================================================================================================
#include "database/kernels/xger/xger_16.hpp"
#include "database/kernels/xger/xger_32.hpp"
#include "database/kernels/xger/xger_3232.hpp"
#include "database/kernels/xger/xger_64.hpp"
#include "database/kernels/xger/xger_6464.hpp"

View File

@ -0,0 +1,43 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xger16' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgerHalf = {
"Xger", Precision::kHalf, {"WGS1", "WGS2", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "Ellesmere", { 64, 1, 2 } },
{ "default", { 64, 1, 2 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 1, 2 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1, 4 } },
{ "default", { 4, 8, 2 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 64, 4, 2 } },
{ "default", { 64, 4, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 64, 1, 2 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,89 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xger32' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgerSingle = {
"Xger", Precision::kSingle, {"WGS1", "WGS2", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 256, 1, 1 } },
{ "ATI Radeon HD 6750M", { 16, 16, 4 } },
{ "Ellesmere", { 64, 4, 2 } },
{ "Fiji", { 256, 1, 1 } },
{ "Hawaii", { 64, 2, 1 } },
{ "Oland", { 32, 4, 2 } },
{ "Pitcairn", { 64, 1, 1 } },
{ "Tahiti", { 256, 1, 1 } },
{ "Tonga", { 256, 1, 2 } },
{ "Turks", { 64, 4, 2 } },
{ "default", { 16, 16, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 64, 4, 4 } },
{ "default", { 64, 4, 4 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 2, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 16, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 128, 1, 4 } },
{ "default", { 128, 8, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 32, 1, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 2, 2 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1, 2 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 64, 1, 4 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 32, 4, 4 } },
{ "Iris Pro", { 64, 1, 4 } },
{ "default", { 32, 4, 2 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 128, 1, 2 } },
{ "GeForce GT 650M", { 32, 16, 4 } },
{ "GeForce GTX 1070", { 512, 1, 1 } },
{ "GeForce GTX 1080", { 16, 4, 1 } },
{ "GeForce GTX 480", { 256, 1, 4 } },
{ "GeForce GTX 670", { 32, 8, 2 } },
{ "GeForce GTX 680", { 128, 1, 4 } },
{ "GeForce GTX 750", { 64, 16, 4 } },
{ "GeForce GTX 750 Ti", { 64, 1, 2 } },
{ "GeForce GTX TITAN", { 32, 4, 2 } },
{ "GeForce GTX TITAN Black", { 32, 4, 2 } },
{ "TITAN X (Pascal)", { 512, 2, 1 } },
{ "default", { 128, 1, 2 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 128, 1, 2 } },
{ "default", { 128, 1, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 32, 4, 2 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,88 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xger3232' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgerComplexSingle = {
"Xger", Precision::kComplexSingle, {"WGS1", "WGS2", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 4, 1 } },
{ "ATI Radeon HD 6750M", { 16, 16, 1 } },
{ "Ellesmere", { 16, 8, 2 } },
{ "Fiji", { 128, 2, 1 } },
{ "Hawaii", { 64, 1, 2 } },
{ "Oland", { 4, 8, 1 } },
{ "Pitcairn", { 128, 2, 1 } },
{ "Tahiti", { 64, 2, 1 } },
{ "Tonga", { 64, 1, 1 } },
{ "Turks", { 128, 2, 1 } },
{ "default", { 128, 2, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 128, 1, 1 } },
{ "default", { 128, 1, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 2, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 256, 1, 4 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 2, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 4, 2 } },
{ "default", { 256, 2, 4 } },
}
},
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 530", { 32, 1, 2 } },
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 128, 2, 1 } },
{ "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 512, 1, 1 } },
{ "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1, 2 } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 1 } },
{ "Iris Pro", { 16, 2, 4 } },
{ "default", { 128, 2, 2 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 64, 4, 2 } },
{ "GeForce GTX 1070", { 16, 64, 2 } },
{ "GeForce GTX 1080", { 32, 2, 1 } },
{ "GeForce GTX 480", { 128, 2, 2 } },
{ "GeForce GTX 670", { 16, 32, 2 } },
{ "GeForce GTX 680", { 32, 4, 2 } },
{ "GeForce GTX 750", { 32, 16, 4 } },
{ "GeForce GTX 750 Ti", { 32, 8, 2 } },
{ "GeForce GTX TITAN", { 16, 16, 2 } },
{ "GeForce GTX TITAN Black", { 16, 16, 2 } },
{ "TITAN X (Pascal)", { 32, 2, 1 } },
{ "default", { 128, 2, 2 } },
}
},
{ // QUALCOMM GPUs
kDeviceTypeGPU, "QUALCOMM", {
{ "QUALCOMM Adreno(TM)", { 64, 1, 4 } },
{ "default", { 64, 1, 4 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 64, 2, 2 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,69 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xger64' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgerDouble = {
"Xger", Precision::kDouble, {"WGS1", "WGS2", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 32, 4, 1 } },
{ "Ellesmere", { 64, 1, 4 } },
{ "Fiji", { 256, 1, 2 } },
{ "Hawaii", { 32, 4, 2 } },
{ "Oland", { 128, 1, 2 } },
{ "Pitcairn", { 64, 1, 1 } },
{ "Tahiti", { 64, 2, 1 } },
{ "Tonga", { 8, 16, 2 } },
{ "default", { 128, 2, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 64, 4, 1 } },
{ "default", { 64, 4, 1 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 256, 1, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 16, 1 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 1, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 8, 2 } },
{ "default", { 256, 1, 4 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 128, 8, 2 } },
{ "GeForce GTX 1070", { 32, 8, 1 } },
{ "GeForce GTX 1080", { 32, 2, 1 } },
{ "GeForce GTX 480", { 32, 4, 2 } },
{ "GeForce GTX 670", { 32, 32, 2 } },
{ "GeForce GTX 680", { 128, 4, 2 } },
{ "GeForce GTX 750", { 256, 2, 2 } },
{ "GeForce GTX 750 Ti", { 32, 16, 1 } },
{ "GeForce GTX TITAN", { 16, 8, 2 } },
{ "GeForce GTX TITAN Black", { 32, 4, 2 } },
{ "TITAN X (Pascal)", { 32, 2, 1 } },
{ "default", { 128, 1, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 128, 1, 2 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -0,0 +1,69 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
// is auto-generated by the 'scripts/database/database.py' Python script.
//
// This file populates the database with best-found tuning parameters for the 'Xger6464' kernels.
//
// =================================================================================================
namespace clblast {
namespace database {
const DatabaseEntry XgerComplexDouble = {
"Xger", Precision::kComplexDouble, {"WGS1", "WGS2", "WPT"}, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { 64, 1, 1 } },
{ "Ellesmere", { 8, 16, 1 } },
{ "Fiji", { 64, 4, 2 } },
{ "Hawaii", { 128, 1, 1 } },
{ "Oland", { 16, 16, 2 } },
{ "Pitcairn", { 64, 4, 1 } },
{ "Tahiti", { 32, 4, 1 } },
{ "Tonga", { 16, 4, 1 } },
{ "default", { 32, 4, 1 } },
}
},
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "Mali-T628", { 64, 2, 4 } },
{ "default", { 64, 2, 4 } },
}
},
{ // Intel CPUs
kDeviceTypeCPU, "Intel", {
{ "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4, 4 } },
{ "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 4, 2 } },
{ "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } },
{ "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 512, 2, 2 } },
{ "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 256, 1, 2 } },
{ "default", { 256, 2, 2 } },
}
},
{ // NVIDIA GPUs
kDeviceTypeGPU, "NVIDIA", {
{ "GRID K520", { 16, 8, 2 } },
{ "GeForce GTX 1070", { 8, 128, 1 } },
{ "GeForce GTX 1080", { 8, 4, 1 } },
{ "GeForce GTX 480", { 64, 2, 2 } },
{ "GeForce GTX 670", { 8, 16, 2 } },
{ "GeForce GTX 680", { 8, 16, 1 } },
{ "GeForce GTX 750", { 8, 32, 4 } },
{ "GeForce GTX 750 Ti", { 32, 8, 2 } },
{ "GeForce GTX TITAN", { 32, 4, 2 } },
{ "GeForce GTX TITAN Black", { 16, 16, 2 } },
{ "TITAN X (Pascal)", { 4, 8, 1 } },
{ "default", { 16, 8, 2 } },
}
},
{ // Default
kDeviceTypeAll, "default", {
{ "default", { 64, 2, 2 } },
}
},
}
};
} // namespace database
} // namespace clblast

View File

@ -15,7 +15,7 @@ namespace clblast {
namespace database {
// =================================================================================================
const Database::DatabaseEntry XtrsvHalf = {
const DatabaseEntry XtrsvHalf = {
"Xtrsv", Precision::kHalf, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@ -27,7 +27,7 @@ const Database::DatabaseEntry XtrsvHalf = {
// =================================================================================================
const Database::DatabaseEntry XtrsvSingle = {
const DatabaseEntry XtrsvSingle = {
"Xtrsv", Precision::kSingle, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@ -39,7 +39,7 @@ const Database::DatabaseEntry XtrsvSingle = {
// =================================================================================================
const Database::DatabaseEntry XtrsvComplexSingle = {
const DatabaseEntry XtrsvComplexSingle = {
"Xtrsv", Precision::kComplexSingle, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@ -51,7 +51,7 @@ const Database::DatabaseEntry XtrsvComplexSingle = {
// =================================================================================================
const Database::DatabaseEntry XtrsvDouble = {
const DatabaseEntry XtrsvDouble = {
"Xtrsv", Precision::kDouble, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {
@ -63,7 +63,7 @@ const Database::DatabaseEntry XtrsvDouble = {
// =================================================================================================
const Database::DatabaseEntry XtrsvComplexDouble = {
const DatabaseEntry XtrsvComplexDouble = {
"Xtrsv", Precision::kComplexDouble, {"TRSV_BLOCK_SIZE"}, {
{ // Default
kDeviceTypeAll, "default", {

View File

@ -51,7 +51,7 @@ const std::unordered_map<std::string, const std::vector<std::string>> Routine::r
// The constructor does all heavy work, errors are returned as exceptions
Routine::Routine(Queue &queue, EventPointer event, const std::string &name,
const std::vector<std::string> &kernel_names, const Precision precision,
const std::vector<Database::DatabaseEntry> &userDatabase,
const std::vector<database::DatabaseEntry> &userDatabase,
std::initializer_list<const char *> source):
precision_(precision),
routine_name_(name),
@ -67,7 +67,7 @@ Routine::Routine(Queue &queue, EventPointer event, const std::string &name,
InitProgram(source);
}
void Routine::InitDatabase(const std::vector<Database::DatabaseEntry> &userDatabase) {
void Routine::InitDatabase(const std::vector<database::DatabaseEntry> &userDatabase) {
for (const auto &kernel_name : kernel_names_) {
// Queries the cache to see whether or not the kernel parameter database is already there

View File

@ -40,7 +40,7 @@ class Routine {
// and routine list, otherwise the caching logic will break.
explicit Routine(Queue &queue, EventPointer event, const std::string &name,
const std::vector<std::string> &routines, const Precision precision,
const std::vector<Database::DatabaseEntry> &userDatabase,
const std::vector<database::DatabaseEntry> &userDatabase,
std::initializer_list<const char *> source);
// List of kernel-routine look-ups
@ -59,7 +59,7 @@ class Routine {
void InitProgram(std::initializer_list<const char *> source);
// Initializes db_, fetching cached database or building one
void InitDatabase(const std::vector<Database::DatabaseEntry> &userDatabase);
void InitDatabase(const std::vector<database::DatabaseEntry> &userDatabase);
protected: