diff --git a/CHANGELOG b/CHANGELOG index d7d70b7a..76a83826 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,8 @@ Development (next version) +- The tuning database now has defaults per architecture (e.g. NVIDIA Kepler SM3.5, AMD Fiji) +- The tuning database now has a dictionary to translate vendor/device names to a common set +- The tuners can now distinguish between different AMD GPU board names of the same architecture - The tuners can now use particle-swarm optimisation to search more efficiently (thanks to 'mcian') - Various minor fixes and enhancements - Added non-BLAS routines: diff --git a/scripts/database/database/clblast.py b/scripts/database/database/clblast.py index 9ce502ee..d0245dec 100644 --- a/scripts/database/database/clblast.py +++ b/scripts/database/database/clblast.py @@ -11,6 +11,7 @@ import os VENDOR_DEFAULT = "default" DEVICE_TYPE_DEFAULT = "All" DEVICE_NAME_DEFAULT = "default" +DEVICE_ARCHITECTURE_DEFAULT = "default" # List of attributes DEVICE_TYPE_ATTRIBUTES = ["clblast_device_vendor", "clblast_device_type"] @@ -141,35 +142,45 @@ def print_cpp_database(database, output_dir): type_database = [s for s in vendor_database if s["clblast_device_type"] == device_type] f.write(get_cpp_device_vendor(vendor, device_type)) - # Loops over every device of this vendor-type combination - devices = sorted(set([s["clblast_device_name"] for s in type_database])) - for device_name in devices: - device_database = [s for s in type_database if s["clblast_device_name"] == device_name] - device_name_quoted = "\"%s\"," % device_name.strip() - device_name_cpp = " { %-50s { " % device_name_quoted - f.write(device_name_cpp) + # Loops over every architecture of this vendor-type combination + architectures = sorted(set([s["clblast_device_architecture"] for s in type_database])) + for architecture in architectures: + architecture_database = [s for s in type_database if s["clblast_device_architecture"] == architecture] + architecture_string = DEVICE_ARCHITECTURE_DEFAULT if architecture == "" else architecture + f.write(" { \"%s\", {\n" % architecture_string) - # Collects the parameters for this entry - parameters = [] - parameter_index = 0 - kernels = sorted(set([s["kernel"] for s in device_database])) - for kernel in kernels: - kernel_database = [s for s in device_database if s["kernel"] == kernel] + # Loops over every device of this vendor-type combination + devices = sorted(set([s["clblast_device_name"] for s in architecture_database])) + for device_name in devices: + device_database = [s for s in architecture_database if s["clblast_device_name"] == device_name] + device_name_quoted = "\"%s\"," % device_name.strip() + device_name_cpp = " { %-50s { " % device_name_quoted + f.write(device_name_cpp) - assert len(kernel_database) == 1 - results = kernel_database[0]["results"] + # Collects the parameters for this entry + parameters = [] + parameter_index = 0 + kernels = sorted(set([s["kernel"] for s in device_database])) + for kernel in kernels: + kernel_database = [s for s in device_database if s["kernel"] == kernel] - assert len(results) == 1 - new_parameters = results[0]["parameters"] - for parameter_name in sorted(new_parameters): - assert parameter_name == parameter_names[parameter_index] - parameter_value = new_parameters[parameter_name] - parameters.append(str(parameter_value)) - parameter_index += 1 + assert len(kernel_database) == 1 + results = kernel_database[0]["results"] - # Prints the entry - f.write(", ".join(parameters)) - f.write(" } },\n") + assert len(results) == 1 + new_parameters = results[0]["parameters"] + for parameter_name in sorted(new_parameters): + assert parameter_name == parameter_names[parameter_index] + parameter_value = new_parameters[parameter_name] + parameters.append(str(parameter_value)) + parameter_index += 1 + + # Prints the entry + f.write(", ".join(parameters)) + f.write(" } },\n") + + # Prints the architecture footer + f.write(" } },\n") # Prints the vendor-type combination footer f.write(" }\n },\n") diff --git a/scripts/database/database/defaults.py b/scripts/database/database/defaults.py index 3d11de34..6042c374 100644 --- a/scripts/database/database/defaults.py +++ b/scripts/database/database/defaults.py @@ -12,14 +12,6 @@ import clblast import bests -def set_default_device(section): - """Sets the device name and parameters to some default values""" - section["clblast_device_name"] = clblast.DEVICE_NAME_DEFAULT - section["clblast_device_compute_units"] = 0 - section["clblast_device_core_clock"] = 0 - return section - - def set_identifiers(database, group_by_attributes, identifier_name): """Sets a group-identifier based on a given set of attributes. Modifies the database but also returns a list of unique identifiers.""" @@ -55,32 +47,56 @@ def get_groups_by_identifier(database, group_identifiers, identifier_name): return groups -def calculate_defaults(database, verbose): - """Sets defaults for devices of the same type/vendor""" +def add_default_sections(database, grouping, verbose, values_dict, condition, enable_warning): + default_sections = [] - # Groups the database by kernel, vendor and device type (e.g. AMD GPU) - group_identifiers = set_identifiers(database, clblast.GROUP_ATTRIBUTES, "group_identifier") + # Groups the database by a certain grouping + group_identifiers = set_identifiers(database, grouping, "group_identifier") groups = get_groups_by_identifier(database, group_identifiers, "group_identifier") # Loops over all groups - default_sections = {"sections": []} for group, group_identifier in groups: # Computes the best parameters - default_parameters = get_common_best_parameters(group, group_identifier, verbose) + default_parameters = get_common_best_parameters(group, group_identifier, verbose, enable_warning) + assert len(group) > 0 + if condition(group[0]): # Stores all the section's data - assert len(group) > 0 - default_section = {} - for attribute in group[0].keys(): - if attribute != "results" and attribute != "group_identifier": - default_section[attribute] = group[0][attribute] - default_section = set_default_device(default_section) - default_section["results"] = [{"time": 0.0, "parameters": default_parameters}] - default_sections["sections"].append(default_section) + default_section = {} + for attribute in group[0].keys(): + if attribute != "results" and attribute != "group_identifier": + default_section[attribute] = group[0][attribute] + default_section["clblast_device_compute_units"] = 0 + default_section["clblast_device_core_clock"] = 0 + for key in values_dict.keys(): + default_section[key] = values_dict[key] + default_section["results"] = [{"time": 0.0, "parameters": default_parameters}] + default_sections.append(default_section) + return default_sections - # Groups the database by kernel, vendor and device type (e.g. AMD GPU) - but not by arguments! This is to check for - # mis-matched arguments. + +def calculate_defaults(database, verbose): + """Sets defaults for devices of the same type/vendor""" + default_sections = {"sections": []} + + # Groups the database by kernel, vendor and device architecture (e.g. AMD GPU "Fiji") + architecture_group = clblast.GROUP_ATTRIBUTES + ["clblast_device_architecture"] + architecture_defaults = add_default_sections(database, architecture_group, verbose, + {"clblast_device_name": clblast.DEVICE_NAME_DEFAULT}, + lambda entry: True, enable_warning=False) + + # Groups the database by kernel, vendor and device type (e.g. AMD GPU) + device_defaults = add_default_sections(database, clblast.GROUP_ATTRIBUTES, verbose, + {"clblast_device_name": clblast.DEVICE_NAME_DEFAULT, + "clblast_device_architecture": clblast.DEVICE_ARCHITECTURE_DEFAULT}, + lambda entry: entry["clblast_device_architecture"] != "", + enable_warning=True) + default_sections["sections"].extend(device_defaults) + + # Groups the database by kernel, vendor and device type (e.g. AMD GPU) - but not by arguments! + # This is to check for mis-matched arguments in the database. Note: this is not a check on the + # architecture defaults attributes = clblast.DEVICE_TYPE_ATTRIBUTES + clblast.KERNEL_ATTRIBUTES + ["kernel"] group_identifiers = set_identifiers(default_sections, attributes, "temp_identifier") groups = get_groups_by_identifier(default_sections, group_identifiers, "temp_identifier") @@ -90,6 +106,9 @@ def calculate_defaults(database, verbose): assert len(group) == 1 remove_identifiers(default_sections, "temp_identifier") + # Adds the architecture defaults only after running the above check + default_sections["sections"].extend(architecture_defaults) + # Groups the database by kernel only group_identifiers = set_identifiers(database, clblast.KERNEL_ATTRIBUTES + ["kernel"], "group_identifier") groups = get_groups_by_identifier(database, group_identifiers, "group_identifier") @@ -98,7 +117,8 @@ def calculate_defaults(database, verbose): for group, group_identifier in groups: # Computes the best parameters - default_parameters = get_common_best_parameters(group, group_identifier, verbose) + default_parameters = get_common_best_parameters(group, group_identifier, verbose, + enable_warning=True) # Stores all the section's data assert len(group) > 0 @@ -106,9 +126,12 @@ def calculate_defaults(database, verbose): for attribute in group[0].keys(): if attribute != "results" and attribute != "group_identifier": default_section[attribute] = group[0][attribute] - default_section = set_default_device(default_section) + default_section["clblast_device_name"] = clblast.DEVICE_NAME_DEFAULT + default_section["clblast_device_architecture"] = clblast.DEVICE_ARCHITECTURE_DEFAULT default_section["clblast_device_vendor"] = clblast.VENDOR_DEFAULT default_section["clblast_device_type"] = clblast.DEVICE_TYPE_DEFAULT + default_section["clblast_device_compute_units"] = 0 + default_section["clblast_device_core_clock"] = 0 default_section["results"] = [{"time": 0.0, "parameters": default_parameters}] default_sections["sections"].append(default_section) @@ -143,7 +166,7 @@ def get_parameter_names(section): return [result["parameters"] for result in section["results"]] -def get_common_best_parameters(group, group_identifier, verbose): +def get_common_best_parameters(group, group_identifier, verbose, enable_warning): """Sets defaults based on the best values of entries supported by all devices. This might cause a problem in case not every device was tuned with the same parameters. In that case it falls back to the above method to retrieve the smallest best execution time""" @@ -179,7 +202,8 @@ def get_common_best_parameters(group, group_identifier, verbose): # Fall back method in case there are no shared entries at all across devices if num_devices_common == 1: - print("[database] Warning: No common kernels for: " + str(group_identifier) + " at all") + if enable_warning: + print("[database] Warning: No common kernels for: " + str(group_identifier) + " at all") smallest_best_parameters = get_smallest_best_parameters(group) if verbose: print("[database] " + str(group_identifier)) diff --git a/src/clblast.cpp b/src/clblast.cpp index ba2feb05..1c1100c0 100644 --- a/src/clblast.cpp +++ b/src/clblast.cpp @@ -2521,7 +2521,8 @@ StatusCode OverrideParameters(const cl_device_id device, const std::string &kern // Creates a small custom database based on the provided parameters const auto database_device = database::DatabaseDevice{"default", parameter_values}; - const auto database_vendor = database::DatabaseVendor{database::kDeviceTypeAll, "default", {database_device}}; + const auto database_architecture = database::DatabaseArchitecture{"default", {database_device}}; + const auto database_vendor = database::DatabaseVendor{database::kDeviceTypeAll, "default", {database_architecture}}; const auto database_entry = database::DatabaseEntry{kernel_name, precision, parameter_names, {database_vendor}}; const auto database_entries = std::vector{database_entry}; const auto database = Database(device_cpp, kernel_name, precision, database_entries); diff --git a/src/database/apple_cpu_fallback.hpp b/src/database/apple_cpu_fallback.hpp index c5434dd5..424e9e8b 100644 --- a/src/database/apple_cpu_fallback.hpp +++ b/src/database/apple_cpu_fallback.hpp @@ -23,46 +23,46 @@ namespace database { // ================================================================================================= const DatabaseEntry XaxpyApple = { - "Xaxpy", Precision::kAny, {"VW", "WGS", "WPT"}, { { kDeviceTypeAll, "default", { { "default", { 8, 1, 4 } } } } } + "Xaxpy", Precision::kAny, {"VW", "WGS", "WPT"}, { { kDeviceTypeAll, "default", { { "default", { { "default", { 8, 1, 4 } } } } } } } }; const DatabaseEntry XdotApple = { - "Xdot", Precision::kAny, {"WGS1", "WGS2"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1 } } } } } + "Xdot", Precision::kAny, {"WGS1", "WGS2"}, { { kDeviceTypeAll, "default", { { "default", { { "default", { 1, 1 } } } } } } } }; const DatabaseEntry XgemvApple = { - "Xgemv", Precision::kAny, {"WGS1", "WPT1", "UNROLL1"}, { { kDeviceTypeAll, "default", { { "default", { 1, 4, 1 } } } } } + "Xgemv", Precision::kAny, {"WGS1", "WPT1", "UNROLL1"}, { { kDeviceTypeAll, "default", { { "default", { { "default", { 1, 4, 1 } } } } } } } }; const DatabaseEntry XgemvFastApple = { - "XgemvFast", Precision::kAny, {"VW2", "WGS2", "WPT2"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1 } } } } } + "XgemvFast", Precision::kAny, {"VW2", "WGS2", "WPT2"}, { { kDeviceTypeAll, "default", { { "default", { { "default", { 1, 1, 1 } } } } } } } }; const DatabaseEntry XgemvFastRotApple = { - "XgemvFastRot", Precision::kAny, {"VW3", "WGS3", "WPT3"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1 } } } } } + "XgemvFastRot", Precision::kAny, {"VW3", "WGS3", "WPT3"}, { { kDeviceTypeAll, "default", { { "default", { { "default", { 1, 1, 1 } } } } } } } }; const DatabaseEntry XgerApple = { - "Xger", Precision::kAny, {"WGS1", "WGS2", "WPT"}, { { kDeviceTypeAll, "default", { { "default", { 64, 1, 2 } } } } } + "Xger", Precision::kAny, {"WGS1", "WGS2", "WPT"}, { { kDeviceTypeAll, "default", { { "default", { { "default", { 64, 1, 2 } } } } } } } }; const DatabaseEntry XtrsvApple = { - "Xtrsv", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { 32 } } } } } + "Xtrsv", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { "default", { 32 } } } } } } } }; const DatabaseEntry XgemmApple = { - "Xgemm", Precision::kAny, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1 } } } } } + "Xgemm", Precision::kAny, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { { kDeviceTypeAll, "default", { { "default", { { "default", { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1 } } } } } } } }; const DatabaseEntry XgemmDirectApple = { - "XgemmDirect", Precision::kAny, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1 } } } } } + "XgemmDirect", Precision::kAny, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { { kDeviceTypeAll, "default", { { "default", { { "default", { 1, 1, 1, 1, 1, 0, 0, 1, 1, 1 } } } } } } } }; const DatabaseEntry CopyApple = { - "Copy", Precision::kAny, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1, 1 } } } } } + "Copy", Precision::kAny, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { { kDeviceTypeAll, "default", { { "default", { { "default", { 1, 1, 1, 1 } } } } } } } }; const DatabaseEntry PadApple = { - "Pad", Precision::kAny, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { { kDeviceTypeAll, "default", { { "default", { 1, 1, 1, 1 } } } } } + "Pad", Precision::kAny, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { { kDeviceTypeAll, "default", { { "default", { { "default", { 1, 1, 1, 1 } } } } } } } }; const DatabaseEntry TransposeApple = { - "Transpose", Precision::kAny, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { { kDeviceTypeAll, "default", { { "default", { 1, 0, 0, 1 } } } } } + "Transpose", Precision::kAny, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { { kDeviceTypeAll, "default", { { "default", { { "default", { 1, 0, 0, 1 } } } } } } } }; const DatabaseEntry PadtransposeApple = { - "Padtranspose", Precision::kAny, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { { kDeviceTypeAll, "default", { { "default", { 0, 1, 1 } } } } } + "Padtranspose", Precision::kAny, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { { kDeviceTypeAll, "default", { { "default", { { "default", { 0, 1, 1 } } } } } } } }; const DatabaseEntry InvertApple = { - "Invert", Precision::kAny, {"INTERNAL_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { 16 } } } } } + "Invert", Precision::kAny, {"INTERNAL_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { "default", { 16 } } } } } } } }; // ================================================================================================= diff --git a/src/database/database.cpp b/src/database/database.cpp index 2d78e65c..9d9898e2 100644 --- a/src/database/database.cpp +++ b/src/database/database.cpp @@ -86,10 +86,8 @@ Database::Database(const Device &device, const std::string &kernel_name, const auto device_name = GetDeviceName(device); // Prints the obtained information in verbose mode - #ifdef VERBOSE - printf("[DEBUG] Device type '%s'; vendor '%s'\n", device_type.c_str(), device_vendor.c_str()); - printf("[DEBUG] Device name '%s'; architecture '%s'\n", device_name.c_str(), device_architecture.c_str()); - #endif + log_debug("Device type '" + device_type + "'; vendor '" + device_vendor + "'"); + log_debug("Device name '" + device_name + "'; architecture '" + device_architecture + "'"); // Sets the databases to search through const auto databases = std::list>{overlay, database}; @@ -172,14 +170,29 @@ database::Parameters Database::SearchVendorAndType(const std::string &target_ven const std::vector ¶meter_names) const { for (auto &vendor: vendors) { if ((vendor.name == target_vendor) && (vendor.type == target_type)) { + log_debug("Found architectures of vendor '" + target_vendor + "' and type '" + target_type + "'"); - // Searches the device; if unavailable, searches the architecture; if unavailable returns the - // vendor's default parameters - auto parameters = SearchDevice(this_device, vendor.devices, parameter_names); + // Searches the architecture; if unavailable returns the vendor's default parameters + auto parameters = SearchArchitecture(this_architecture, this_device, vendor.architectures, parameter_names); if (parameters.size() != 0) { return parameters; } - parameters = SearchDevice(this_architecture, vendor.devices, parameter_names); + return SearchArchitecture("default", this_device, vendor.architectures, parameter_names); + } + } + return database::Parameters(); +} + +database::Parameters Database::SearchArchitecture(const std::string &target_architecture, + const std::string &this_device, + const std::vector &architectures, + const std::vector ¶meter_names) const { + for (auto &architecture: architectures) { + if (architecture.name == target_architecture) { + log_debug("Found devices of architecture type '" + target_architecture + "'"); + + // Searches the device; if unavailable returns the architecture's default parameters + auto parameters = SearchDevice(this_device, architecture.devices, parameter_names); if (parameters.size() != 0) { return parameters; } - return SearchDevice("default", vendor.devices, parameter_names); + return SearchDevice("default", architecture.devices, parameter_names); } } return database::Parameters(); @@ -190,6 +203,7 @@ database::Parameters Database::SearchDevice(const std::string &target_device, const std::vector ¶meter_names) const { for (auto &device: devices) { if (device.name == target_device) { + log_debug("Found parameters for device type '" + target_device + "'"); // Sets the parameters accordingly auto parameters = database::Parameters(); diff --git a/src/database/database.hpp b/src/database/database.hpp index 7efcb7c4..e4764dea 100644 --- a/src/database/database.hpp +++ b/src/database/database.hpp @@ -64,8 +64,12 @@ class Database { const Precision this_precision, const std::vector &db) const; database::Parameters SearchDevice(const std::string &target_device, - const std::vector &devices, - const std::vector ¶meter_names) const; + const std::vector &devices, + const std::vector ¶meter_names) const; + database::Parameters SearchArchitecture(const std::string &target_architecture, + const std::string &this_device, + const std::vector &architectures, + const std::vector ¶meter_names) const; database::Parameters SearchVendorAndType(const std::string &target_vendor, const std::string &target_type, const std::string &this_device, const std::string &this_architecture, diff --git a/src/database/database_structure.hpp b/src/database/database_structure.hpp index d9ee95fb..35d73ffe 100644 --- a/src/database/database_structure.hpp +++ b/src/database/database_structure.hpp @@ -38,10 +38,14 @@ struct DatabaseDevice { const std::string name; const std::vector parameters; // parameter values }; +struct DatabaseArchitecture { + const std::string name; + const std::vector devices; +}; struct DatabaseVendor { const std::string type; const std::string name; - const std::vector devices; + const std::vector architectures; }; struct DatabaseEntry { const std::string kernel; diff --git a/src/database/kernel_selection.hpp b/src/database/kernel_selection.hpp index b492bd82..50d3c03e 100644 --- a/src/database/kernel_selection.hpp +++ b/src/database/kernel_selection.hpp @@ -22,17 +22,17 @@ const DatabaseEntry KernelSelectionHalf = { "KernelSelection", Precision::kHalf, {"XGEMM_MIN_INDIRECT_SIZE"}, { { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "default", { 1*1*1 } }, + { "default", { { "default", { 1*1*1 } } } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "default", { 1280*1280*1280 } }, + { "default", { { "default", { 1280*1280*1280 } } } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 512*512*512 } }, + { "default", { { "default", { 512*512*512 } } } }, } }, } @@ -44,22 +44,22 @@ const DatabaseEntry KernelSelectionSingle = { "KernelSelection", Precision::kSingle, {"XGEMM_MIN_INDIRECT_SIZE"}, { { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "default", { 1*1*1 } }, + { "default", { { "default", { 1*1*1 } } } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "default", { 1280*1280*1280 } }, + { "default", { { "default", { 1280*1280*1280 } } } }, } }, { kDeviceTypeGPU, "ARM", { - { "default", { 128*128*128} }, + { "default", { { "default", { 128*128*128} } } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 512*512*512 } }, + { "default", { { "default", { 512*512*512 } } } }, } }, } @@ -71,17 +71,17 @@ const DatabaseEntry KernelSelectionComplexSingle = { "KernelSelection", Precision::kComplexSingle, {"XGEMM_MIN_INDIRECT_SIZE"}, { { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "default", { 1*1*1 } }, + { "default", { { "default", { 1*1*1 } } } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "default", { 1280*1280*1280 } }, + { "default", { { "default", { 1280*1280*1280 } } } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 512*512*512 } }, + { "default", { { "default", { 512*512*512 } } } }, } }, } @@ -93,17 +93,17 @@ const DatabaseEntry KernelSelectionDouble = { "KernelSelection", Precision::kDouble, {"XGEMM_MIN_INDIRECT_SIZE"}, { { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "default", { 1*1*1 } }, + { "default", { { "default", { 1*1*1 } } } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "default", { 1280*1280*1280 } }, + { "default", { { "default", { 1280*1280*1280 } } } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 512*512*512 } }, + { "default", { { "default", { 512*512*512 } } } }, } }, } @@ -115,17 +115,17 @@ const DatabaseEntry KernelSelectionComplexDouble = { "KernelSelection", Precision::kComplexDouble, {"XGEMM_MIN_INDIRECT_SIZE"}, { { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "default", { 1*1*1 } }, + { "default", { { "default", { 1*1*1 } } } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "default", { 1280*1280*1280 } }, + { "default", { { "default", { 1280*1280*1280 } } } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 512*512*512 } }, + { "default", { { "default", { 512*512*512 } } } }, } }, } diff --git a/src/database/kernels/copy/copy_16.hpp b/src/database/kernels/copy/copy_16.hpp index dea61ca6..7f1c1825 100644 --- a/src/database/kernels/copy/copy_16.hpp +++ b/src/database/kernels/copy/copy_16.hpp @@ -14,26 +14,37 @@ const DatabaseEntry CopyHalf = { "Copy", Precision::kHalf, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 16, 8, 4, 4 } }, - { "default", { 16, 8, 4, 4 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 16, 8, 4, 4 } }, + { "default", { 16, 8, 4, 4 } }, + } }, + { "default", { + { "default", { 16, 8, 4, 4 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 16, 8, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 4, 8 } }, - { "default", { 8, 32, 4, 8 } }, + { "default", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 16, 8, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 4, 8 } }, + { "default", { 8, 32, 4, 8 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } }, - { "default", { 32, 8, 8, 1 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } }, + { "default", { 32, 8, 8, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 16, 8, 4, 4 } }, + { "default", { + { "default", { 16, 8, 4, 4 } }, + } }, } }, } diff --git a/src/database/kernels/copy/copy_32.hpp b/src/database/kernels/copy/copy_32.hpp index 254c2b38..111e3f89 100644 --- a/src/database/kernels/copy/copy_32.hpp +++ b/src/database/kernels/copy/copy_32.hpp @@ -14,84 +14,145 @@ const DatabaseEntry CopySingle = { "Copy", Precision::kSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 4, 1 } }, - { "ATI Radeon HD 6750M", { 16, 8, 2, 1 } }, - { "Ellesmere", { 8, 8, 4, 8 } }, - { "Fiji", { 16, 16, 1, 2 } }, - { "Hawaii", { 32, 8, 2, 2 } }, - { "Oland", { 32, 8, 4, 2 } }, - { "Pitcairn", { 8, 16, 4, 1 } }, - { "Tahiti", { 32, 8, 2, 2 } }, - { "Tonga", { 32, 8, 4, 4 } }, - { "Turks", { 8, 8, 4, 2 } }, - { "default", { 8, 16, 4, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 8, 8, 4, 8 } }, + { "default", { 8, 8, 4, 8 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 16, 16, 1, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 4, 1 } }, + { "default", { 16, 16, 1, 2 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 32, 8, 2, 2 } }, + { "default", { 32, 8, 2, 2 } }, + } }, + { "Oland", { + { "Oland", { 32, 8, 4, 2 } }, + { "default", { 32, 8, 4, 2 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 8, 16, 4, 1 } }, + { "default", { 8, 16, 4, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 32, 8, 2, 2 } }, + { "default", { 32, 8, 2, 2 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 32, 8, 4, 4 } }, + { "default", { 32, 8, 4, 4 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 8, 8, 4, 2 } }, + { "default", { 8, 8, 4, 2 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 16, 8, 2, 1 } }, + { "default", { 16, 8, 2, 1 } }, + } }, + { "default", { + { "default", { 8, 16, 4, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 8, 2, 4 } }, - { "default", { 32, 8, 2, 4 } }, + { "default", { + { "Mali-T628", { 32, 8, 2, 4 } }, + { "default", { 32, 8, 2, 4 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 8, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 8, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 16, 8, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 2 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 1 } }, - { "default", { 32, 16, 8, 2 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 8, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 8, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 16, 8, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 2 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 1 } }, + { "default", { 32, 16, 8, 2 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 8, 8, 2, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 16, 4, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 16, 4, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 2, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 8, 4, 8 } }, - { "Iris", { 16, 8, 1, 2 } }, - { "Iris Pro", { 32, 8, 4, 4 } }, - { "default", { 8, 8, 2, 1 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 8, 8, 2, 1 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 16, 4, 1 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 16, 4, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 2, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 8, 4, 8 } }, + { "Iris", { 16, 8, 1, 2 } }, + { "Iris Pro", { 32, 8, 4, 4 } }, + { "default", { 8, 8, 2, 1 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } }, - { "default", { 32, 8, 8, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } }, + { "default", { 32, 8, 8, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 8, 4, 1 } }, - { "GeForce GT 650M", { 16, 16, 4, 2 } }, - { "GeForce GTX 1070", { 8, 16, 4, 1 } }, - { "GeForce GTX 1080", { 8, 32, 4, 1 } }, - { "GeForce GTX 480", { 8, 8, 4, 1 } }, - { "GeForce GTX 670", { 16, 32, 4, 1 } }, - { "GeForce GTX 680", { 32, 16, 4, 1 } }, - { "GeForce GTX 750", { 32, 8, 2, 2 } }, - { "GeForce GTX 750 Ti", { 16, 32, 2, 2 } }, - { "GeForce GTX 980", { 32, 16, 1, 1 } }, - { "GeForce GTX TITAN", { 32, 8, 2, 4 } }, - { "GeForce GTX TITAN Black", { 8, 32, 4, 8 } }, - { "GeForce GTX TITAN X", { 32, 8, 1, 2 } }, - { "TITAN X (Pascal)", { 8, 32, 4, 1 } }, - { "Tesla K20m", { 8, 8, 4, 4 } }, - { "Tesla K40m", { 8, 8, 4, 2 } }, - { "default", { 8, 32, 4, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 8, 8, 4, 1 } }, + { "default", { 8, 8, 4, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 16, 8, 4, 1 } }, + { "GeForce GT 650M", { 16, 16, 4, 2 } }, + { "GeForce GTX 670", { 16, 32, 4, 1 } }, + { "GeForce GTX 680", { 32, 16, 4, 1 } }, + { "default", { 8, 16, 4, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 32, 8, 2, 4 } }, + { "GeForce GTX TITAN Black", { 8, 32, 4, 8 } }, + { "Tesla K20m", { 8, 8, 4, 4 } }, + { "Tesla K40m", { 8, 8, 4, 2 } }, + { "default", { 16, 16, 4, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 32, 8, 2, 2 } }, + { "GeForce GTX 750 Ti", { 16, 32, 2, 2 } }, + { "default", { 32, 8, 2, 2 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 32, 16, 1, 1 } }, + { "GeForce GTX TITAN X", { 32, 8, 1, 2 } }, + { "default", { 32, 16, 1, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 8, 16, 4, 1 } }, + { "GeForce GTX 1080", { 8, 32, 4, 1 } }, + { "TITAN X (Pascal)", { 8, 32, 4, 1 } }, + { "default", { 8, 32, 4, 1 } }, + } }, + { "default", { + { "default", { 8, 32, 4, 1 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } }, - { "default", { 32, 8, 8, 1 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 32, 8, 8, 1 } }, + { "default", { 32, 8, 8, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 32, 8, 4, 4 } }, + { "default", { + { "default", { 32, 8, 4, 4 } }, + } }, } }, } diff --git a/src/database/kernels/copy/copy_3232.hpp b/src/database/kernels/copy/copy_3232.hpp index 7af25017..985892ef 100644 --- a/src/database/kernels/copy/copy_3232.hpp +++ b/src/database/kernels/copy/copy_3232.hpp @@ -14,75 +14,134 @@ const DatabaseEntry CopyComplexSingle = { "Copy", Precision::kComplexSingle, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, - { "ATI Radeon HD 6750M", { 8, 8, 1, 1 } }, - { "Ellesmere", { 16, 16, 1, 4 } }, - { "Fiji", { 16, 8, 1, 2 } }, - { "Hawaii", { 32, 8, 1, 2 } }, - { "Oland", { 8, 16, 1, 1 } }, - { "Pitcairn", { 8, 8, 1, 2 } }, - { "Tahiti", { 8, 8, 2, 2 } }, - { "Tonga", { 8, 32, 1, 2 } }, - { "Turks", { 32, 8, 4, 1 } }, - { "default", { 16, 8, 1, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 16, 16, 1, 4 } }, + { "default", { 16, 16, 1, 4 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 16, 8, 1, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, + { "default", { 16, 8, 1, 2 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 32, 8, 1, 2 } }, + { "default", { 32, 8, 1, 2 } }, + } }, + { "Oland", { + { "Oland", { 8, 16, 1, 1 } }, + { "default", { 8, 16, 1, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 8, 8, 1, 2 } }, + { "default", { 8, 8, 1, 2 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 8, 8, 2, 2 } }, + { "default", { 8, 8, 2, 2 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 8, 32, 1, 2 } }, + { "default", { 8, 32, 1, 2 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 32, 8, 4, 1 } }, + { "default", { 32, 8, 4, 1 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 8, 8, 1, 1 } }, + { "default", { 8, 8, 1, 1 } }, + } }, + { "default", { + { "default", { 16, 8, 1, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 4, 2 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 16, 8, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 8, 2, 2 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 8, 8, 1 } }, - { "default", { 32, 8, 8, 1 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 16, 4, 2 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 16, 8, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 4, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 8, 2, 2 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 8, 8, 1 } }, + { "default", { 32, 8, 8, 1 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 16, 8, 2, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 16, 2, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 8, 32, 2, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 8, 2, 1 } }, - { "Iris", { 16, 8, 1, 2 } }, - { "Iris Pro", { 32, 16, 1, 4 } }, - { "default", { 16, 8, 1, 2 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 16, 8, 2, 1 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 16, 2, 2 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 8, 32, 2, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 8, 2, 1 } }, + { "Iris", { 16, 8, 1, 2 } }, + { "Iris Pro", { 32, 16, 1, 4 } }, + { "default", { 16, 8, 1, 2 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } }, - { "default", { 32, 8, 4, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } }, + { "default", { 32, 8, 4, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 8, 1, 1 } }, - { "GeForce GTX 1070", { 16, 8, 1, 1 } }, - { "GeForce GTX 1080", { 32, 8, 1, 2 } }, - { "GeForce GTX 480", { 16, 16, 1, 1 } }, - { "GeForce GTX 670", { 16, 8, 1, 1 } }, - { "GeForce GTX 750", { 16, 8, 1, 2 } }, - { "GeForce GTX 750 Ti", { 16, 32, 1, 1 } }, - { "GeForce GTX 980", { 8, 8, 1, 1 } }, - { "GeForce GTX TITAN Black", { 16, 8, 1, 1 } }, - { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, - { "TITAN X (Pascal)", { 8, 16, 2, 1 } }, - { "Tesla K20m", { 8, 8, 1, 4 } }, - { "Tesla K40m", { 16, 8, 1, 1 } }, - { "default", { 32, 8, 1, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 16, 16, 1, 1 } }, + { "default", { 16, 16, 1, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 16, 8, 1, 1 } }, + { "GeForce GTX 670", { 16, 8, 1, 1 } }, + { "default", { 16, 8, 1, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN Black", { 16, 8, 1, 1 } }, + { "Tesla K20m", { 8, 8, 1, 4 } }, + { "Tesla K40m", { 16, 8, 1, 1 } }, + { "default", { 16, 8, 1, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 16, 8, 1, 2 } }, + { "GeForce GTX 750 Ti", { 16, 32, 1, 1 } }, + { "default", { 16, 8, 1, 2 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 8, 8, 1, 1 } }, + { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, + { "default", { 16, 8, 1, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 16, 8, 1, 1 } }, + { "GeForce GTX 1080", { 32, 8, 1, 2 } }, + { "TITAN X (Pascal)", { 8, 16, 2, 1 } }, + { "default", { 32, 8, 1, 2 } }, + } }, + { "default", { + { "default", { 32, 8, 1, 1 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 8, 1, 1 } }, - { "default", { 32, 8, 1, 1 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 32, 8, 1, 1 } }, + { "default", { 32, 8, 1, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 16, 8, 1, 2 } }, + { "default", { + { "default", { 16, 8, 1, 2 } }, + } }, } }, } diff --git a/src/database/kernels/copy/copy_64.hpp b/src/database/kernels/copy/copy_64.hpp index 5c00407b..6f18f608 100644 --- a/src/database/kernels/copy/copy_64.hpp +++ b/src/database/kernels/copy/copy_64.hpp @@ -14,63 +14,114 @@ const DatabaseEntry CopyDouble = { "Copy", Precision::kDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, - { "Ellesmere", { 32, 8, 1, 4 } }, - { "Fiji", { 16, 8, 1, 2 } }, - { "Hawaii", { 32, 8, 1, 2 } }, - { "Oland", { 32, 8, 2, 8 } }, - { "Pitcairn", { 32, 8, 1, 1 } }, - { "Tahiti", { 8, 32, 2, 1 } }, - { "Tonga", { 8, 32, 2, 4 } }, - { "default", { 16, 8, 2, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 32, 8, 1, 4 } }, + { "default", { 32, 8, 1, 4 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 16, 8, 1, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, + { "default", { 16, 8, 1, 2 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 32, 8, 1, 2 } }, + { "default", { 32, 8, 1, 2 } }, + } }, + { "Oland", { + { "Oland", { 32, 8, 2, 8 } }, + { "default", { 32, 8, 2, 8 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 32, 8, 1, 1 } }, + { "default", { 32, 8, 1, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 8, 32, 2, 1 } }, + { "default", { 8, 32, 2, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 8, 32, 2, 4 } }, + { "default", { 8, 32, 2, 4 } }, + } }, + { "default", { + { "default", { 16, 8, 2, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 8, 8, 2 } }, - { "default", { 16, 8, 8, 2 } }, + { "default", { + { "Mali-T628", { 16, 8, 8, 2 } }, + { "default", { 16, 8, 8, 2 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 32, 8, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 8, 8, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 8, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 2, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 32, 8, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 16, 8, 1 } }, - { "default", { 16, 8, 8, 1 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 32, 8, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 16, 8, 8, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 8, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 2, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 32, 8, 1 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 16, 8, 1 } }, + { "default", { 16, 8, 8, 1 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 8, 8, 8, 1 } }, - { "default", { 8, 8, 8, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 8, 8, 8, 1 } }, + { "default", { 8, 8, 8, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 32, 16, 2, 1 } }, - { "GeForce GTX 1070", { 8, 8, 4, 1 } }, - { "GeForce GTX 1080", { 8, 8, 4, 1 } }, - { "GeForce GTX 480", { 8, 8, 2, 1 } }, - { "GeForce GTX 670", { 8, 8, 2, 1 } }, - { "GeForce GTX 680", { 16, 32, 2, 1 } }, - { "GeForce GTX 750", { 8, 16, 2, 1 } }, - { "GeForce GTX 750 Ti", { 16, 8, 2, 1 } }, - { "GeForce GTX 980", { 32, 8, 2, 1 } }, - { "GeForce GTX TITAN", { 16, 32, 2, 2 } }, - { "GeForce GTX TITAN Black", { 16, 8, 2, 8 } }, - { "GeForce GTX TITAN X", { 32, 16, 1, 1 } }, - { "TITAN X (Pascal)", { 8, 8, 2, 2 } }, - { "Tesla K20m", { 8, 8, 2, 1 } }, - { "Tesla K40m", { 8, 8, 2, 2 } }, - { "default", { 32, 32, 2, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 8, 8, 2, 1 } }, + { "default", { 8, 8, 2, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 32, 16, 2, 1 } }, + { "GeForce GTX 670", { 8, 8, 2, 1 } }, + { "GeForce GTX 680", { 16, 32, 2, 1 } }, + { "default", { 32, 32, 2, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 16, 32, 2, 2 } }, + { "GeForce GTX TITAN Black", { 16, 8, 2, 8 } }, + { "Tesla K20m", { 8, 8, 2, 1 } }, + { "Tesla K40m", { 8, 8, 2, 2 } }, + { "default", { 32, 16, 2, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 8, 16, 2, 1 } }, + { "GeForce GTX 750 Ti", { 16, 8, 2, 1 } }, + { "default", { 16, 8, 2, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 32, 8, 2, 1 } }, + { "GeForce GTX TITAN X", { 32, 16, 1, 1 } }, + { "default", { 32, 16, 1, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 8, 8, 4, 1 } }, + { "GeForce GTX 1080", { 8, 8, 4, 1 } }, + { "TITAN X (Pascal)", { 8, 8, 2, 2 } }, + { "default", { 8, 8, 4, 1 } }, + } }, + { "default", { + { "default", { 32, 32, 2, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 16, 8, 2, 1 } }, + { "default", { + { "default", { 16, 8, 2, 1 } }, + } }, } }, } diff --git a/src/database/kernels/copy/copy_6464.hpp b/src/database/kernels/copy/copy_6464.hpp index c7f74855..ce405135 100644 --- a/src/database/kernels/copy/copy_6464.hpp +++ b/src/database/kernels/copy/copy_6464.hpp @@ -14,63 +14,114 @@ const DatabaseEntry CopyComplexDouble = { "Copy", Precision::kComplexDouble, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 8, 16, 1, 1 } }, - { "Ellesmere", { 8, 32, 1, 2 } }, - { "Fiji", { 8, 16, 1, 1 } }, - { "Hawaii", { 32, 8, 2, 8 } }, - { "Oland", { 8, 16, 1, 1 } }, - { "Pitcairn", { 16, 8, 1, 1 } }, - { "Tahiti", { 8, 16, 1, 1 } }, - { "Tonga", { 16, 8, 2, 1 } }, - { "default", { 8, 16, 1, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 8, 32, 1, 2 } }, + { "default", { 8, 32, 1, 2 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 8, 16, 1, 1 } }, + { "AMD Radeon R9 M370X Compute Engine", { 8, 16, 1, 1 } }, + { "default", { 8, 16, 1, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 32, 8, 2, 8 } }, + { "default", { 32, 8, 2, 8 } }, + } }, + { "Oland", { + { "Oland", { 8, 16, 1, 1 } }, + { "default", { 8, 16, 1, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 16, 8, 1, 1 } }, + { "default", { 16, 8, 1, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 8, 16, 1, 1 } }, + { "default", { 8, 16, 1, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 16, 8, 2, 1 } }, + { "default", { 16, 8, 2, 1 } }, + } }, + { "default", { + { "default", { 8, 16, 1, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 8, 1, 2 } }, - { "default", { 32, 8, 1, 2 } }, + { "default", { + { "Mali-T628", { 32, 8, 1, 2 } }, + { "default", { 32, 8, 1, 2 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 8, 8, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 8, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 8, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 8, 8, 1 } }, - { "default", { 16, 8, 8, 1 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 8, 8, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 8, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 8, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 8, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 8, 8, 1 } }, + { "default", { 16, 8, 8, 1 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } }, - { "default", { 32, 8, 8, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 1 } }, + { "default", { 32, 8, 8, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 8, 8, 1, 1 } }, - { "GeForce GTX 1070", { 8, 32, 1, 4 } }, - { "GeForce GTX 1080", { 8, 8, 1, 1 } }, - { "GeForce GTX 480", { 16, 8, 1, 1 } }, - { "GeForce GTX 670", { 16, 8, 1, 1 } }, - { "GeForce GTX 680", { 8, 8, 1, 1 } }, - { "GeForce GTX 750", { 32, 8, 1, 1 } }, - { "GeForce GTX 750 Ti", { 16, 16, 1, 1 } }, - { "GeForce GTX 980", { 8, 8, 1, 1 } }, - { "GeForce GTX TITAN", { 16, 16, 1, 1 } }, - { "GeForce GTX TITAN Black", { 8, 8, 1, 2 } }, - { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, - { "TITAN X (Pascal)", { 8, 8, 1, 2 } }, - { "Tesla K20m", { 8, 8, 1, 2 } }, - { "Tesla K40m", { 8, 8, 1, 1 } }, - { "default", { 8, 8, 1, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 16, 8, 1, 1 } }, + { "default", { 16, 8, 1, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 8, 8, 1, 1 } }, + { "GeForce GTX 670", { 16, 8, 1, 1 } }, + { "GeForce GTX 680", { 8, 8, 1, 1 } }, + { "default", { 8, 8, 1, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 16, 16, 1, 1 } }, + { "GeForce GTX TITAN Black", { 8, 8, 1, 2 } }, + { "Tesla K20m", { 8, 8, 1, 2 } }, + { "Tesla K40m", { 8, 8, 1, 1 } }, + { "default", { 8, 8, 1, 2 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 32, 8, 1, 1 } }, + { "GeForce GTX 750 Ti", { 16, 16, 1, 1 } }, + { "default", { 16, 16, 1, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 8, 8, 1, 1 } }, + { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, + { "default", { 32, 8, 1, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 8, 32, 1, 4 } }, + { "GeForce GTX 1080", { 8, 8, 1, 1 } }, + { "TITAN X (Pascal)", { 8, 8, 1, 2 } }, + { "default", { 16, 8, 1, 1 } }, + } }, + { "default", { + { "default", { 8, 8, 1, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 16, 8, 1, 1 } }, + { "default", { + { "default", { 16, 8, 1, 1 } }, + } }, } }, } diff --git a/src/database/kernels/invert.hpp b/src/database/kernels/invert.hpp index e736c864..d6e316b2 100644 --- a/src/database/kernels/invert.hpp +++ b/src/database/kernels/invert.hpp @@ -19,7 +19,7 @@ const DatabaseEntry InvertHalf = { "Invert", Precision::kHalf, {"INTERNAL_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 16 } }, + { "default", { { "default", { 16 } } } }, } }, } @@ -31,7 +31,7 @@ const DatabaseEntry InvertSingle = { "Invert", Precision::kSingle, {"INTERNAL_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 16 } }, + { "default", { { "default", { 16 } } } }, } }, } @@ -43,7 +43,7 @@ const DatabaseEntry InvertComplexSingle = { "Invert", Precision::kComplexSingle, {"INTERNAL_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 16 } }, + { "default", { { "default", { 16 } } } }, } }, } @@ -55,7 +55,7 @@ const DatabaseEntry InvertDouble = { "Invert", Precision::kDouble, {"INTERNAL_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 16 } }, + { "default", { { "default", { 16 } } } }, } }, } @@ -67,7 +67,7 @@ const DatabaseEntry InvertComplexDouble = { "Invert", Precision::kComplexDouble, {"INTERNAL_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 16 } }, + { "default", { { "default", { 16 } } } }, } }, } diff --git a/src/database/kernels/pad/pad_16.hpp b/src/database/kernels/pad/pad_16.hpp index 8f31c31e..20fe1716 100644 --- a/src/database/kernels/pad/pad_16.hpp +++ b/src/database/kernels/pad/pad_16.hpp @@ -14,26 +14,37 @@ const DatabaseEntry PadHalf = { "Pad", Precision::kHalf, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 16, 8, 1, 2 } }, - { "default", { 16, 8, 1, 2 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 16, 8, 1, 2 } }, + { "default", { 16, 8, 1, 2 } }, + } }, + { "default", { + { "default", { 16, 8, 1, 2 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 4, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 2, 2 } }, - { "default", { 8, 8, 2, 1 } }, + { "default", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 4, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 32, 2, 2 } }, + { "default", { 8, 8, 2, 1 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 16, 8, 4, 2 } }, - { "default", { 16, 8, 4, 2 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 16, 8, 4, 2 } }, + { "default", { 16, 8, 4, 2 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 8, 8, 4, 1 } }, + { "default", { + { "default", { 8, 8, 4, 1 } }, + } }, } }, } diff --git a/src/database/kernels/pad/pad_32.hpp b/src/database/kernels/pad/pad_32.hpp index eda85e8b..ee00d2a1 100644 --- a/src/database/kernels/pad/pad_32.hpp +++ b/src/database/kernels/pad/pad_32.hpp @@ -14,84 +14,145 @@ const DatabaseEntry PadSingle = { "Pad", Precision::kSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, - { "ATI Radeon HD 6750M", { 8, 16, 2, 1 } }, - { "Ellesmere", { 32, 8, 2, 2 } }, - { "Fiji", { 16, 16, 1, 2 } }, - { "Hawaii", { 32, 8, 1, 4 } }, - { "Oland", { 8, 8, 1, 2 } }, - { "Pitcairn", { 32, 8, 1, 2 } }, - { "Tahiti", { 32, 8, 1, 2 } }, - { "Tonga", { 16, 16, 2, 2 } }, - { "Turks", { 32, 8, 2, 1 } }, - { "default", { 8, 16, 1, 2 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 32, 8, 2, 2 } }, + { "default", { 32, 8, 2, 2 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 16, 16, 1, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, + { "default", { 32, 8, 1, 2 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 32, 8, 1, 4 } }, + { "default", { 32, 8, 1, 4 } }, + } }, + { "Oland", { + { "Oland", { 8, 8, 1, 2 } }, + { "default", { 8, 8, 1, 2 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 32, 8, 1, 2 } }, + { "default", { 32, 8, 1, 2 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 32, 8, 1, 2 } }, + { "default", { 32, 8, 1, 2 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 16, 16, 2, 2 } }, + { "default", { 16, 16, 2, 2 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 32, 8, 2, 1 } }, + { "default", { 32, 8, 2, 1 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 8, 16, 2, 1 } }, + { "default", { 8, 16, 2, 1 } }, + } }, + { "default", { + { "default", { 8, 16, 1, 2 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 8, 1, 4 } }, - { "default", { 32, 8, 1, 4 } }, + { "default", { + { "Mali-T628", { 32, 8, 1, 4 } }, + { "default", { 32, 8, 1, 4 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32, 4, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 4, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 4, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 4, 1 } }, - { "default", { 32, 8, 4, 2 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32, 4, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 16, 4, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 16, 4, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 4, 1 } }, + { "default", { 32, 8, 4, 2 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 32, 8, 2, 4 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 2, 4 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 8, 1, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 4, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 4, 2 } }, - { "Iris", { 32, 16, 2, 1 } }, - { "Iris Pro", { 16, 8, 2, 1 } }, - { "default", { 32, 8, 4, 2 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 32, 8, 2, 4 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 2, 4 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 8, 1, 2 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 8, 4, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 4, 2 } }, + { "Iris", { 32, 16, 2, 1 } }, + { "Iris Pro", { 16, 8, 2, 1 } }, + { "default", { 32, 8, 4, 2 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 16, 2, 1 } }, - { "default", { 32, 16, 2, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 16, 2, 1 } }, + { "default", { 32, 16, 2, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 32, 8, 2, 1 } }, - { "GeForce GT 650M", { 32, 16, 2, 2 } }, - { "GeForce GTX 1070", { 16, 8, 1, 1 } }, - { "GeForce GTX 1080", { 16, 8, 1, 1 } }, - { "GeForce GTX 480", { 32, 8, 1, 4 } }, - { "GeForce GTX 670", { 32, 8, 2, 2 } }, - { "GeForce GTX 680", { 16, 8, 4, 1 } }, - { "GeForce GTX 750", { 32, 16, 4, 2 } }, - { "GeForce GTX 750 Ti", { 16, 8, 4, 1 } }, - { "GeForce GTX 980", { 16, 8, 1, 1 } }, - { "GeForce GTX TITAN", { 32, 8, 2, 1 } }, - { "GeForce GTX TITAN Black", { 32, 8, 1, 2 } }, - { "GeForce GTX TITAN X", { 16, 16, 1, 1 } }, - { "TITAN X (Pascal)", { 16, 8, 1, 2 } }, - { "Tesla K20m", { 32, 8, 2, 1 } }, - { "Tesla K40m", { 32, 8, 1, 1 } }, - { "default", { 32, 8, 4, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 32, 8, 1, 4 } }, + { "default", { 32, 8, 1, 4 } }, + } }, + { "SM3.0", { + { "GRID K520", { 32, 8, 2, 1 } }, + { "GeForce GT 650M", { 32, 16, 2, 2 } }, + { "GeForce GTX 670", { 32, 8, 2, 2 } }, + { "GeForce GTX 680", { 16, 8, 4, 1 } }, + { "default", { 32, 8, 2, 4 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 32, 8, 2, 1 } }, + { "GeForce GTX TITAN Black", { 32, 8, 1, 2 } }, + { "Tesla K20m", { 32, 8, 2, 1 } }, + { "Tesla K40m", { 32, 8, 1, 1 } }, + { "default", { 32, 8, 1, 2 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 32, 16, 4, 2 } }, + { "GeForce GTX 750 Ti", { 16, 8, 4, 1 } }, + { "default", { 32, 8, 4, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 16, 8, 1, 1 } }, + { "GeForce GTX TITAN X", { 16, 16, 1, 1 } }, + { "default", { 16, 8, 1, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 16, 8, 1, 1 } }, + { "GeForce GTX 1080", { 16, 8, 1, 1 } }, + { "TITAN X (Pascal)", { 16, 8, 1, 2 } }, + { "default", { 16, 32, 1, 2 } }, + } }, + { "default", { + { "default", { 32, 8, 4, 1 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 8, 2, 1 } }, - { "default", { 32, 8, 2, 1 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 32, 8, 2, 1 } }, + { "default", { 32, 8, 2, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 32, 8, 2, 1 } }, + { "default", { + { "default", { 32, 8, 2, 1 } }, + } }, } }, } diff --git a/src/database/kernels/pad/pad_3232.hpp b/src/database/kernels/pad/pad_3232.hpp index bc6ee662..62bfb112 100644 --- a/src/database/kernels/pad/pad_3232.hpp +++ b/src/database/kernels/pad/pad_3232.hpp @@ -14,83 +14,144 @@ const DatabaseEntry PadComplexSingle = { "Pad", Precision::kComplexSingle, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, - { "ATI Radeon HD 6750M", { 16, 8, 2, 1 } }, - { "Ellesmere", { 16, 16, 2, 4 } }, - { "Fiji", { 16, 8, 1, 2 } }, - { "Hawaii", { 32, 8, 1, 2 } }, - { "Oland", { 8, 32, 1, 1 } }, - { "Pitcairn", { 8, 8, 1, 2 } }, - { "Tahiti", { 16, 16, 1, 1 } }, - { "Tonga", { 16, 8, 1, 2 } }, - { "Turks", { 16, 8, 4, 4 } }, - { "default", { 16, 8, 1, 2 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 16, 16, 2, 4 } }, + { "default", { 16, 16, 2, 4 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 16, 8, 1, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, + { "default", { 16, 8, 1, 2 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 32, 8, 1, 2 } }, + { "default", { 32, 8, 1, 2 } }, + } }, + { "Oland", { + { "Oland", { 8, 32, 1, 1 } }, + { "default", { 8, 32, 1, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 8, 8, 1, 2 } }, + { "default", { 8, 8, 1, 2 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 16, 16, 1, 1 } }, + { "default", { 16, 16, 1, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 16, 8, 1, 2 } }, + { "default", { 16, 8, 1, 2 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 16, 8, 4, 4 } }, + { "default", { 16, 8, 4, 4 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 16, 8, 2, 1 } }, + { "default", { 16, 8, 2, 1 } }, + } }, + { "default", { + { "default", { 16, 8, 1, 2 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 8, 1, 4 } }, - { "default", { 32, 8, 1, 4 } }, + { "default", { + { "Mali-T628", { 32, 8, 1, 4 } }, + { "default", { 32, 8, 1, 4 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 1, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 8, 2, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 16, 4, 1 } }, - { "default", { 32, 8, 4, 2 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 1, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 8, 2, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 16, 4, 1 } }, + { "default", { 32, 8, 4, 2 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 8, 8, 1, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 1, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 8, 1, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 1, 1 } }, - { "Iris", { 32, 16, 2, 4 } }, - { "Iris Pro", { 32, 8, 2, 1 } }, - { "default", { 32, 8, 1, 4 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 8, 8, 1, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 8, 1, 1 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 8, 1, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 8, 1, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 8, 1, 1 } }, + { "Iris", { 32, 16, 2, 4 } }, + { "Iris Pro", { 32, 8, 2, 1 } }, + { "default", { 32, 8, 1, 4 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } }, - { "default", { 32, 8, 1, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } }, + { "default", { 32, 8, 1, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 16, 1, 1 } }, - { "GeForce GTX 1070", { 8, 32, 1, 1 } }, - { "GeForce GTX 1080", { 32, 8, 1, 1 } }, - { "GeForce GTX 480", { 16, 8, 2, 1 } }, - { "GeForce GTX 670", { 16, 8, 1, 2 } }, - { "GeForce GTX 680", { 16, 32, 1, 2 } }, - { "GeForce GTX 750", { 32, 8, 2, 1 } }, - { "GeForce GTX 750 Ti", { 16, 8, 1, 1 } }, - { "GeForce GTX 980", { 16, 16, 1, 1 } }, - { "GeForce GTX TITAN", { 16, 8, 2, 1 } }, - { "GeForce GTX TITAN Black", { 16, 8, 1, 2 } }, - { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, - { "TITAN X (Pascal)", { 32, 32, 1, 2 } }, - { "Tesla K20m", { 32, 8, 1, 2 } }, - { "Tesla K40m", { 16, 8, 1, 1 } }, - { "default", { 32, 8, 1, 2 } }, + { "SM2.0", { + { "GeForce GTX 480", { 16, 8, 2, 1 } }, + { "default", { 16, 8, 2, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 16, 16, 1, 1 } }, + { "GeForce GTX 670", { 16, 8, 1, 2 } }, + { "GeForce GTX 680", { 16, 32, 1, 2 } }, + { "default", { 16, 8, 1, 2 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 16, 8, 2, 1 } }, + { "GeForce GTX TITAN Black", { 16, 8, 1, 2 } }, + { "Tesla K20m", { 32, 8, 1, 2 } }, + { "Tesla K40m", { 16, 8, 1, 1 } }, + { "default", { 16, 8, 1, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 32, 8, 2, 1 } }, + { "GeForce GTX 750 Ti", { 16, 8, 1, 1 } }, + { "default", { 16, 8, 1, 2 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 16, 16, 1, 1 } }, + { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, + { "default", { 16, 16, 1, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 8, 32, 1, 1 } }, + { "GeForce GTX 1080", { 32, 8, 1, 1 } }, + { "TITAN X (Pascal)", { 32, 32, 1, 2 } }, + { "default", { 16, 8, 4, 1 } }, + } }, + { "default", { + { "default", { 32, 8, 1, 2 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 8, 4, 1 } }, - { "default", { 32, 8, 4, 1 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 32, 8, 4, 1 } }, + { "default", { 32, 8, 4, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 32, 8, 1, 1 } }, + { "default", { + { "default", { 32, 8, 1, 1 } }, + } }, } }, } diff --git a/src/database/kernels/pad/pad_64.hpp b/src/database/kernels/pad/pad_64.hpp index 94008efe..a8478616 100644 --- a/src/database/kernels/pad/pad_64.hpp +++ b/src/database/kernels/pad/pad_64.hpp @@ -14,63 +14,114 @@ const DatabaseEntry PadDouble = { "Pad", Precision::kDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, - { "Ellesmere", { 8, 32, 2, 1 } }, - { "Fiji", { 8, 16, 1, 2 } }, - { "Hawaii", { 32, 8, 1, 2 } }, - { "Oland", { 8, 32, 1, 1 } }, - { "Pitcairn", { 8, 8, 1, 2 } }, - { "Tahiti", { 32, 8, 1, 1 } }, - { "Tonga", { 32, 8, 4, 1 } }, - { "default", { 16, 16, 1, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 8, 32, 2, 1 } }, + { "default", { 8, 32, 2, 1 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 8, 16, 1, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 1, 1 } }, + { "default", { 8, 16, 1, 2 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 32, 8, 1, 2 } }, + { "default", { 32, 8, 1, 2 } }, + } }, + { "Oland", { + { "Oland", { 8, 32, 1, 1 } }, + { "default", { 8, 32, 1, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 8, 8, 1, 2 } }, + { "default", { 8, 8, 1, 2 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 32, 8, 1, 1 } }, + { "default", { 32, 8, 1, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 32, 8, 4, 1 } }, + { "default", { 32, 8, 4, 1 } }, + } }, + { "default", { + { "default", { 16, 16, 1, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 8, 4, 2 } }, - { "default", { 32, 8, 4, 2 } }, + { "default", { + { "Mali-T628", { 32, 8, 4, 2 } }, + { "default", { 32, 8, 4, 2 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 4, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } }, - { "default", { 32, 16, 4, 1 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 8, 4, 2 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 4, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 16, 2, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 32, 4, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 4, 1 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } }, + { "default", { 32, 16, 4, 1 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } }, - { "default", { 32, 8, 1, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 1, 1 } }, + { "default", { 32, 8, 1, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 32, 8, 1, 1 } }, - { "GeForce GTX 1070", { 8, 8, 1, 1 } }, - { "GeForce GTX 1080", { 32, 32, 2, 1 } }, - { "GeForce GTX 480", { 16, 8, 1, 1 } }, - { "GeForce GTX 670", { 16, 16, 2, 1 } }, - { "GeForce GTX 680", { 32, 32, 1, 2 } }, - { "GeForce GTX 750", { 32, 16, 1, 1 } }, - { "GeForce GTX 750 Ti", { 8, 16, 1, 1 } }, - { "GeForce GTX 980", { 8, 16, 1, 1 } }, - { "GeForce GTX TITAN", { 32, 8, 1, 1 } }, - { "GeForce GTX TITAN Black", { 16, 8, 1, 1 } }, - { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, - { "TITAN X (Pascal)", { 8, 32, 4, 1 } }, - { "Tesla K20m", { 32, 8, 1, 1 } }, - { "Tesla K40m", { 16, 8, 1, 2 } }, - { "default", { 32, 8, 1, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 16, 8, 1, 1 } }, + { "default", { 16, 8, 1, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 32, 8, 1, 1 } }, + { "GeForce GTX 670", { 16, 16, 2, 1 } }, + { "GeForce GTX 680", { 32, 32, 1, 2 } }, + { "default", { 32, 8, 1, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 32, 8, 1, 1 } }, + { "GeForce GTX TITAN Black", { 16, 8, 1, 1 } }, + { "Tesla K20m", { 32, 8, 1, 1 } }, + { "Tesla K40m", { 16, 8, 1, 2 } }, + { "default", { 32, 8, 1, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 32, 16, 1, 1 } }, + { "GeForce GTX 750 Ti", { 8, 16, 1, 1 } }, + { "default", { 8, 16, 1, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 8, 16, 1, 1 } }, + { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, + { "default", { 16, 8, 1, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 8, 8, 1, 1 } }, + { "GeForce GTX 1080", { 32, 32, 2, 1 } }, + { "TITAN X (Pascal)", { 8, 32, 4, 1 } }, + { "default", { 32, 32, 2, 1 } }, + } }, + { "default", { + { "default", { 32, 8, 1, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 32, 8, 1, 1 } }, + { "default", { + { "default", { 32, 8, 1, 1 } }, + } }, } }, } diff --git a/src/database/kernels/pad/pad_6464.hpp b/src/database/kernels/pad/pad_6464.hpp index 43c5a8e0..384a09f0 100644 --- a/src/database/kernels/pad/pad_6464.hpp +++ b/src/database/kernels/pad/pad_6464.hpp @@ -14,63 +14,114 @@ const DatabaseEntry PadComplexDouble = { "Pad", Precision::kComplexDouble, {"PAD_DIMX", "PAD_DIMY", "PAD_WPTX", "PAD_WPTY"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 16, 8, 1, 1 } }, - { "Ellesmere", { 8, 16, 1, 2 } }, - { "Fiji", { 32, 8, 2, 1 } }, - { "Hawaii", { 32, 8, 1, 1 } }, - { "Oland", { 8, 16, 2, 1 } }, - { "Pitcairn", { 16, 8, 1, 1 } }, - { "Tahiti", { 8, 16, 1, 1 } }, - { "Tonga", { 8, 16, 1, 1 } }, - { "default", { 8, 16, 1, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 8, 16, 1, 2 } }, + { "default", { 8, 16, 1, 2 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 32, 8, 2, 1 } }, + { "AMD Radeon R9 M370X Compute Engine", { 16, 8, 1, 1 } }, + { "default", { 32, 8, 2, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 32, 8, 1, 1 } }, + { "default", { 32, 8, 1, 1 } }, + } }, + { "Oland", { + { "Oland", { 8, 16, 2, 1 } }, + { "default", { 8, 16, 2, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 16, 8, 1, 1 } }, + { "default", { 16, 8, 1, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 8, 16, 1, 1 } }, + { "default", { 8, 16, 1, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 8, 16, 1, 1 } }, + { "default", { 8, 16, 1, 1 } }, + } }, + { "default", { + { "default", { 8, 16, 1, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 8, 4, 1 } }, - { "default", { 16, 8, 4, 1 } }, + { "default", { + { "Mali-T628", { 16, 8, 4, 1 } }, + { "default", { 16, 8, 4, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 16, 4, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 2, 2 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } }, - { "default", { 32, 8, 2, 2 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 16, 4, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 2, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 8, 2, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 16, 32, 4, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32, 2, 2 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 2, 1 } }, + { "default", { 32, 8, 2, 2 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } }, - { "default", { 32, 8, 4, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 4, 1 } }, + { "default", { 32, 8, 4, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 8, 8, 1, 1 } }, - { "GeForce GTX 1070", { 8, 8, 2, 2 } }, - { "GeForce GTX 1080", { 8, 8, 1, 1 } }, - { "GeForce GTX 480", { 16, 8, 1, 1 } }, - { "GeForce GTX 670", { 32, 8, 1, 1 } }, - { "GeForce GTX 680", { 8, 8, 1, 1 } }, - { "GeForce GTX 750", { 8, 8, 1, 1 } }, - { "GeForce GTX 750 Ti", { 16, 32, 1, 1 } }, - { "GeForce GTX 980", { 16, 16, 1, 1 } }, - { "GeForce GTX TITAN", { 8, 32, 1, 2 } }, - { "GeForce GTX TITAN Black", { 16, 8, 1, 4 } }, - { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, - { "TITAN X (Pascal)", { 8, 16, 1, 1 } }, - { "Tesla K20m", { 8, 8, 1, 2 } }, - { "Tesla K40m", { 8, 8, 1, 1 } }, - { "default", { 16, 8, 1, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 16, 8, 1, 1 } }, + { "default", { 16, 8, 1, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 8, 8, 1, 1 } }, + { "GeForce GTX 670", { 32, 8, 1, 1 } }, + { "GeForce GTX 680", { 8, 8, 1, 1 } }, + { "default", { 32, 8, 1, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 8, 32, 1, 2 } }, + { "GeForce GTX TITAN Black", { 16, 8, 1, 4 } }, + { "Tesla K20m", { 8, 8, 1, 2 } }, + { "Tesla K40m", { 8, 8, 1, 1 } }, + { "default", { 16, 32, 1, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 8, 8, 1, 1 } }, + { "GeForce GTX 750 Ti", { 16, 32, 1, 1 } }, + { "default", { 16, 32, 1, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 16, 16, 1, 1 } }, + { "GeForce GTX TITAN X", { 16, 8, 1, 1 } }, + { "default", { 16, 8, 1, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 8, 8, 2, 2 } }, + { "GeForce GTX 1080", { 8, 8, 1, 1 } }, + { "TITAN X (Pascal)", { 8, 16, 1, 1 } }, + { "default", { 16, 32, 2, 1 } }, + } }, + { "default", { + { "default", { 16, 8, 1, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 32, 8, 1, 1 } }, + { "default", { + { "default", { 32, 8, 1, 1 } }, + } }, } }, } diff --git a/src/database/kernels/padtranspose/padtranspose_16.hpp b/src/database/kernels/padtranspose/padtranspose_16.hpp index e815ced0..d47dfe86 100644 --- a/src/database/kernels/padtranspose/padtranspose_16.hpp +++ b/src/database/kernels/padtranspose/padtranspose_16.hpp @@ -14,26 +14,37 @@ const DatabaseEntry PadtransposeHalf = { "Padtranspose", Precision::kHalf, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 0, 16, 4 } }, - { "default", { 0, 16, 4 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 0, 16, 4 } }, + { "default", { 0, 16, 4 } }, + } }, + { "default", { + { "default", { 0, 16, 4 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 8, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 8, 2 } }, - { "default", { 0, 8, 1 } }, + { "default", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 8, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 8, 2 } }, + { "default", { 0, 8, 1 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 0, 8, 8 } }, - { "default", { 0, 8, 8 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 0, 8, 8 } }, + { "default", { 0, 8, 8 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 0, 8, 1 } }, + { "default", { + { "default", { 0, 8, 1 } }, + } }, } }, } diff --git a/src/database/kernels/padtranspose/padtranspose_32.hpp b/src/database/kernels/padtranspose/padtranspose_32.hpp index ca04b01e..8d40c65c 100644 --- a/src/database/kernels/padtranspose/padtranspose_32.hpp +++ b/src/database/kernels/padtranspose/padtranspose_32.hpp @@ -14,83 +14,144 @@ const DatabaseEntry PadtransposeSingle = { "Padtranspose", Precision::kSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } }, - { "ATI Radeon HD 6750M", { 1, 16, 1 } }, - { "Ellesmere", { 1, 8, 4 } }, - { "Fiji", { 0, 16, 2 } }, - { "Hawaii", { 1, 16, 4 } }, - { "Oland", { 0, 16, 4 } }, - { "Pitcairn", { 0, 16, 4 } }, - { "Tahiti", { 0, 16, 4 } }, - { "Tonga", { 0, 16, 2 } }, - { "Turks", { 1, 16, 1 } }, - { "default", { 0, 16, 4 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 1, 8, 4 } }, + { "default", { 1, 8, 4 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 0, 16, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } }, + { "default", { 0, 16, 4 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 1, 16, 4 } }, + { "default", { 1, 16, 4 } }, + } }, + { "Oland", { + { "Oland", { 0, 16, 4 } }, + { "default", { 0, 16, 4 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 0, 16, 4 } }, + { "default", { 0, 16, 4 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 0, 16, 4 } }, + { "default", { 0, 16, 4 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 0, 16, 2 } }, + { "default", { 0, 16, 2 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } }, + { "default", { + { "default", { 0, 16, 4 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 0, 8, 2 } }, - { "default", { 0, 8, 2 } }, + { "default", { + { "Mali-T628", { 0, 8, 2 } }, + { "default", { 0, 8, 2 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 0, 16, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 32, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 32, 1 } }, - { "default", { 0, 8, 8 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 0, 16, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 32, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 32, 1 } }, + { "default", { 0, 8, 8 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 1, 16, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 4 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 16, 2 } }, - { "Iris", { 1, 16, 2 } }, - { "Iris Pro", { 1, 16, 2 } }, - { "default", { 1, 16, 2 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 1, 16, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 4 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 16, 2 } }, + { "Iris", { 1, 16, 2 } }, + { "Iris Pro", { 1, 16, 2 } }, + { "default", { 1, 16, 2 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 2 } }, - { "default", { 0, 16, 2 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 2 } }, + { "default", { 0, 16, 2 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 32, 2 } }, - { "GeForce GTX 1070", { 0, 16, 1 } }, - { "GeForce GTX 1080", { 1, 16, 2 } }, - { "GeForce GTX 480", { 1, 16, 2 } }, - { "GeForce GTX 670", { 1, 32, 2 } }, - { "GeForce GTX 680", { 1, 16, 2 } }, - { "GeForce GTX 750", { 1, 32, 2 } }, - { "GeForce GTX 750 Ti", { 1, 32, 2 } }, - { "GeForce GTX 980", { 0, 16, 1 } }, - { "GeForce GTX TITAN", { 1, 16, 2 } }, - { "GeForce GTX TITAN Black", { 1, 32, 2 } }, - { "GeForce GTX TITAN X", { 1, 32, 1 } }, - { "TITAN X (Pascal)", { 1, 16, 2 } }, - { "Tesla K20m", { 1, 16, 2 } }, - { "Tesla K40m", { 1, 32, 2 } }, - { "default", { 1, 32, 2 } }, + { "SM2.0", { + { "GeForce GTX 480", { 1, 16, 2 } }, + { "default", { 1, 16, 2 } }, + } }, + { "SM3.0", { + { "GRID K520", { 1, 32, 2 } }, + { "GeForce GTX 670", { 1, 32, 2 } }, + { "GeForce GTX 680", { 1, 16, 2 } }, + { "default", { 1, 32, 2 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 1, 16, 2 } }, + { "GeForce GTX TITAN Black", { 1, 32, 2 } }, + { "Tesla K20m", { 1, 16, 2 } }, + { "Tesla K40m", { 1, 32, 2 } }, + { "default", { 1, 16, 2 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 1, 32, 2 } }, + { "GeForce GTX 750 Ti", { 1, 32, 2 } }, + { "default", { 1, 32, 2 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 0, 16, 1 } }, + { "GeForce GTX TITAN X", { 1, 32, 1 } }, + { "default", { 1, 32, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 0, 16, 1 } }, + { "GeForce GTX 1080", { 1, 16, 2 } }, + { "TITAN X (Pascal)", { 1, 16, 2 } }, + { "default", { 1, 16, 2 } }, + } }, + { "default", { + { "default", { 1, 32, 2 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 0, 8, 2 } }, - { "default", { 0, 8, 2 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 0, 8, 2 } }, + { "default", { 0, 8, 2 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 1, 16, 2 } }, + { "default", { + { "default", { 1, 16, 2 } }, + } }, } }, } diff --git a/src/database/kernels/padtranspose/padtranspose_3232.hpp b/src/database/kernels/padtranspose/padtranspose_3232.hpp index bc9425da..889f5154 100644 --- a/src/database/kernels/padtranspose/padtranspose_3232.hpp +++ b/src/database/kernels/padtranspose/padtranspose_3232.hpp @@ -14,83 +14,144 @@ const DatabaseEntry PadtransposeComplexSingle = { "Padtranspose", Precision::kComplexSingle, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } }, - { "ATI Radeon HD 6750M", { 1, 16, 1 } }, - { "Ellesmere", { 0, 8, 4 } }, - { "Fiji", { 1, 16, 2 } }, - { "Hawaii", { 0, 16, 2 } }, - { "Oland", { 0, 8, 4 } }, - { "Pitcairn", { 0, 8, 4 } }, - { "Tahiti", { 0, 16, 2 } }, - { "Tonga", { 0, 16, 2 } }, - { "Turks", { 0, 16, 4 } }, - { "default", { 0, 8, 4 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 0, 8, 4 } }, + { "default", { 0, 8, 4 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 1, 16, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } }, + { "default", { 0, 16, 4 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 0, 16, 2 } }, + { "default", { 0, 16, 2 } }, + } }, + { "Oland", { + { "Oland", { 0, 8, 4 } }, + { "default", { 0, 8, 4 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 0, 8, 4 } }, + { "default", { 0, 8, 4 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 0, 16, 2 } }, + { "default", { 0, 16, 2 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 0, 16, 2 } }, + { "default", { 0, 16, 2 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 0, 16, 4 } }, + { "default", { 0, 16, 4 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } }, + { "default", { + { "default", { 0, 8, 4 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 1, 16, 2 } }, - { "default", { 1, 16, 2 } }, + { "default", { + { "Mali-T628", { 1, 16, 2 } }, + { "default", { 1, 16, 2 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 8, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 8, 4 } }, - { "default", { 0, 8, 8 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 8, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 0, 8, 4 } }, + { "default", { 0, 8, 8 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 1, 16, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 2 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 0, 16, 4 } }, - { "Iris", { 0, 16, 2 } }, - { "Iris Pro", { 1, 16, 2 } }, - { "default", { 1, 16, 2 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 1, 16, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 0, 16, 2 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 16, 2 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 0, 16, 2 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 0, 16, 4 } }, + { "Iris", { 0, 16, 2 } }, + { "Iris Pro", { 1, 16, 2 } }, + { "default", { 1, 16, 2 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 16, 1 } }, - { "default", { 1, 16, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 16, 1 } }, - { "GeForce GTX 1070", { 1, 16, 1 } }, - { "GeForce GTX 1080", { 0, 8, 1 } }, - { "GeForce GTX 480", { 1, 16, 1 } }, - { "GeForce GTX 670", { 1, 16, 1 } }, - { "GeForce GTX 680", { 1, 16, 1 } }, - { "GeForce GTX 750", { 1, 16, 2 } }, - { "GeForce GTX 750 Ti", { 1, 16, 1 } }, - { "GeForce GTX 980", { 0, 16, 1 } }, - { "GeForce GTX TITAN", { 1, 16, 1 } }, - { "GeForce GTX TITAN Black", { 0, 16, 1 } }, - { "GeForce GTX TITAN X", { 1, 32, 1 } }, - { "TITAN X (Pascal)", { 1, 8, 1 } }, - { "Tesla K20m", { 0, 16, 1 } }, - { "Tesla K40m", { 1, 16, 1 } }, - { "default", { 1, 16, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 1, 16, 1 } }, + { "GeForce GTX 670", { 1, 16, 1 } }, + { "GeForce GTX 680", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 1, 16, 1 } }, + { "GeForce GTX TITAN Black", { 0, 16, 1 } }, + { "Tesla K20m", { 0, 16, 1 } }, + { "Tesla K40m", { 1, 16, 1 } }, + { "default", { 0, 16, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 1, 16, 2 } }, + { "GeForce GTX 750 Ti", { 1, 16, 1 } }, + { "default", { 1, 16, 2 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 0, 16, 1 } }, + { "GeForce GTX TITAN X", { 1, 32, 1 } }, + { "default", { 1, 32, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 1, 16, 1 } }, + { "GeForce GTX 1080", { 0, 8, 1 } }, + { "TITAN X (Pascal)", { 1, 8, 1 } }, + { "default", { 1, 8, 1 } }, + } }, + { "default", { + { "default", { 1, 16, 1 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 0, 8, 4 } }, - { "default", { 0, 8, 4 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 0, 8, 4 } }, + { "default", { 0, 8, 4 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 1, 8, 2 } }, + { "default", { + { "default", { 1, 8, 2 } }, + } }, } }, } diff --git a/src/database/kernels/padtranspose/padtranspose_64.hpp b/src/database/kernels/padtranspose/padtranspose_64.hpp index bdfe9788..bfe40758 100644 --- a/src/database/kernels/padtranspose/padtranspose_64.hpp +++ b/src/database/kernels/padtranspose/padtranspose_64.hpp @@ -14,63 +14,114 @@ const DatabaseEntry PadtransposeDouble = { "Padtranspose", Precision::kDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } }, - { "Ellesmere", { 0, 16, 4 } }, - { "Fiji", { 0, 16, 2 } }, - { "Hawaii", { 0, 16, 2 } }, - { "Oland", { 0, 16, 4 } }, - { "Pitcairn", { 0, 8, 4 } }, - { "Tahiti", { 1, 16, 2 } }, - { "Tonga", { 0, 8, 2 } }, - { "default", { 0, 16, 4 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 0, 16, 4 } }, + { "default", { 0, 16, 4 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 0, 16, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 0, 16, 4 } }, + { "default", { 0, 16, 4 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 0, 16, 2 } }, + { "default", { 0, 16, 2 } }, + } }, + { "Oland", { + { "Oland", { 0, 16, 4 } }, + { "default", { 0, 16, 4 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 0, 8, 4 } }, + { "default", { 0, 8, 4 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 1, 16, 2 } }, + { "default", { 1, 16, 2 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 0, 8, 2 } }, + { "default", { 0, 8, 2 } }, + } }, + { "default", { + { "default", { 0, 16, 4 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 0, 16, 2 } }, - { "default", { 0, 16, 2 } }, + { "default", { + { "Mali-T628", { 0, 16, 2 } }, + { "default", { 0, 16, 2 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 64, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 32, 1 } }, - { "default", { 1, 8, 4 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 0, 64, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 8 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 32, 1 } }, + { "default", { 1, 8, 4 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } }, - { "default", { 0, 16, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } }, + { "default", { 0, 16, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 16, 1 } }, - { "GeForce GTX 1070", { 1, 16, 1 } }, - { "GeForce GTX 1080", { 0, 8, 1 } }, - { "GeForce GTX 480", { 1, 16, 1 } }, - { "GeForce GTX 670", { 1, 16, 1 } }, - { "GeForce GTX 680", { 1, 16, 1 } }, - { "GeForce GTX 750", { 1, 16, 2 } }, - { "GeForce GTX 750 Ti", { 1, 32, 2 } }, - { "GeForce GTX 980", { 1, 32, 1 } }, - { "GeForce GTX TITAN", { 0, 16, 1 } }, - { "GeForce GTX TITAN Black", { 0, 16, 1 } }, - { "GeForce GTX TITAN X", { 1, 32, 1 } }, - { "TITAN X (Pascal)", { 0, 8, 1 } }, - { "Tesla K20m", { 0, 16, 1 } }, - { "Tesla K40m", { 1, 16, 1 } }, - { "default", { 1, 16, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 1, 16, 1 } }, + { "GeForce GTX 670", { 1, 16, 1 } }, + { "GeForce GTX 680", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 0, 16, 1 } }, + { "GeForce GTX TITAN Black", { 0, 16, 1 } }, + { "Tesla K20m", { 0, 16, 1 } }, + { "Tesla K40m", { 1, 16, 1 } }, + { "default", { 0, 16, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 1, 16, 2 } }, + { "GeForce GTX 750 Ti", { 1, 32, 2 } }, + { "default", { 1, 32, 2 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 1, 32, 1 } }, + { "GeForce GTX TITAN X", { 1, 32, 1 } }, + { "default", { 1, 32, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 1, 16, 1 } }, + { "GeForce GTX 1080", { 0, 8, 1 } }, + { "TITAN X (Pascal)", { 0, 8, 1 } }, + { "default", { 1, 16, 1 } }, + } }, + { "default", { + { "default", { 1, 16, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 1, 16, 2 } }, + { "default", { + { "default", { 1, 16, 2 } }, + } }, } }, } diff --git a/src/database/kernels/padtranspose/padtranspose_6464.hpp b/src/database/kernels/padtranspose/padtranspose_6464.hpp index c839ab2c..e201864a 100644 --- a/src/database/kernels/padtranspose/padtranspose_6464.hpp +++ b/src/database/kernels/padtranspose/padtranspose_6464.hpp @@ -14,63 +14,114 @@ const DatabaseEntry PadtransposeComplexDouble = { "Padtranspose", Precision::kComplexDouble, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 0, 8, 4 } }, - { "Ellesmere", { 0, 8, 4 } }, - { "Fiji", { 0, 8, 2 } }, - { "Hawaii", { 0, 8, 4 } }, - { "Oland", { 0, 8, 4 } }, - { "Pitcairn", { 0, 8, 4 } }, - { "Tahiti", { 0, 8, 2 } }, - { "Tonga", { 0, 8, 2 } }, - { "default", { 0, 8, 4 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 0, 8, 4 } }, + { "default", { 0, 8, 4 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 0, 8, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 0, 8, 4 } }, + { "default", { 0, 8, 4 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 0, 8, 4 } }, + { "default", { 0, 8, 4 } }, + } }, + { "Oland", { + { "Oland", { 0, 8, 4 } }, + { "default", { 0, 8, 4 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 0, 8, 4 } }, + { "default", { 0, 8, 4 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 0, 8, 2 } }, + { "default", { 0, 8, 2 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 0, 8, 2 } }, + { "default", { 0, 8, 2 } }, + } }, + { "default", { + { "default", { 0, 8, 4 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 0, 8, 1 } }, - { "default", { 0, 8, 1 } }, + { "default", { + { "Mali-T628", { 0, 8, 1 } }, + { "default", { 0, 8, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 8, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 8, 4 } }, - { "default", { 0, 8, 4 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 0, 8, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 8, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 8, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 0, 8, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 8, 4 } }, + { "default", { 0, 8, 4 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } }, - { "default", { 0, 16, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 0, 16, 1 } }, + { "default", { 0, 16, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 16, 1 } }, - { "GeForce GTX 1070", { 1, 16, 1 } }, - { "GeForce GTX 1080", { 1, 8, 1 } }, - { "GeForce GTX 480", { 1, 16, 1 } }, - { "GeForce GTX 670", { 1, 16, 1 } }, - { "GeForce GTX 680", { 1, 32, 1 } }, - { "GeForce GTX 750", { 1, 16, 1 } }, - { "GeForce GTX 750 Ti", { 1, 8, 2 } }, - { "GeForce GTX 980", { 0, 16, 1 } }, - { "GeForce GTX TITAN", { 1, 16, 1 } }, - { "GeForce GTX TITAN Black", { 0, 16, 1 } }, - { "GeForce GTX TITAN X", { 1, 32, 1 } }, - { "TITAN X (Pascal)", { 1, 8, 1 } }, - { "Tesla K20m", { 1, 16, 1 } }, - { "Tesla K40m", { 1, 16, 1 } }, - { "default", { 1, 16, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 1, 16, 1 } }, + { "GeForce GTX 670", { 1, 16, 1 } }, + { "GeForce GTX 680", { 1, 32, 1 } }, + { "default", { 1, 16, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 1, 16, 1 } }, + { "GeForce GTX TITAN Black", { 0, 16, 1 } }, + { "Tesla K20m", { 1, 16, 1 } }, + { "Tesla K40m", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 1, 16, 1 } }, + { "GeForce GTX 750 Ti", { 1, 8, 2 } }, + { "default", { 1, 16, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 0, 16, 1 } }, + { "GeForce GTX TITAN X", { 1, 32, 1 } }, + { "default", { 0, 16, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 1, 16, 1 } }, + { "GeForce GTX 1080", { 1, 8, 1 } }, + { "TITAN X (Pascal)", { 1, 8, 1 } }, + { "default", { 1, 8, 1 } }, + } }, + { "default", { + { "default", { 1, 16, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 0, 8, 2 } }, + { "default", { + { "default", { 0, 8, 2 } }, + } }, } }, } diff --git a/src/database/kernels/transpose/transpose_16.hpp b/src/database/kernels/transpose/transpose_16.hpp index 016788dc..9ba21ef2 100644 --- a/src/database/kernels/transpose/transpose_16.hpp +++ b/src/database/kernels/transpose/transpose_16.hpp @@ -14,26 +14,37 @@ const DatabaseEntry TransposeHalf = { "Transpose", Precision::kHalf, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 4, 0, 1, 8 } }, - { "default", { 4, 0, 1, 8 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 4, 0, 1, 8 } }, + { "default", { 4, 0, 1, 8 } }, + } }, + { "default", { + { "default", { 4, 0, 1, 8 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 1, 1, 8 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 0, 4 } }, - { "default", { 8, 1, 0, 8 } }, + { "default", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 1, 1, 8 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 0, 4 } }, + { "default", { 8, 1, 0, 8 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 8, 0, 0, 4 } }, - { "default", { 8, 0, 0, 4 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 8, 0, 0, 4 } }, + { "default", { 8, 0, 0, 4 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 8, 0, 1, 8 } }, + { "default", { + { "default", { 8, 0, 1, 8 } }, + } }, } }, } diff --git a/src/database/kernels/transpose/transpose_32.hpp b/src/database/kernels/transpose/transpose_32.hpp index abbe8e19..902b7ecd 100644 --- a/src/database/kernels/transpose/transpose_32.hpp +++ b/src/database/kernels/transpose/transpose_32.hpp @@ -14,84 +14,145 @@ const DatabaseEntry TransposeSingle = { "Transpose", Precision::kSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 8 } }, - { "ATI Radeon HD 6750M", { 8, 0, 1, 2 } }, - { "Ellesmere", { 16, 0, 1, 4 } }, - { "Fiji", { 16, 0, 1, 2 } }, - { "Hawaii", { 4, 0, 1, 8 } }, - { "Oland", { 8, 0, 1, 4 } }, - { "Pitcairn", { 16, 0, 1, 1 } }, - { "Tahiti", { 4, 0, 1, 4 } }, - { "Tonga", { 8, 1, 1, 2 } }, - { "Turks", { 8, 0, 1, 2 } }, - { "default", { 8, 0, 1, 2 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 16, 0, 1, 4 } }, + { "default", { 16, 0, 1, 4 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 16, 0, 1, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 8 } }, + { "default", { 8, 0, 1, 2 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 4, 0, 1, 8 } }, + { "default", { 4, 0, 1, 8 } }, + } }, + { "Oland", { + { "Oland", { 8, 0, 1, 4 } }, + { "default", { 8, 0, 1, 4 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 16, 0, 1, 1 } }, + { "default", { 16, 0, 1, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 4, 0, 1, 4 } }, + { "default", { 4, 0, 1, 4 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 8, 1, 1, 2 } }, + { "default", { 8, 1, 1, 2 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 8, 0, 1, 2 } }, + { "default", { 8, 0, 1, 2 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 8, 0, 1, 2 } }, + { "default", { 8, 0, 1, 2 } }, + } }, + { "default", { + { "default", { 8, 0, 1, 2 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 8, 0, 1, 4 } }, - { "default", { 8, 0, 1, 4 } }, + { "default", { + { "Mali-T628", { 8, 0, 1, 4 } }, + { "default", { 8, 0, 1, 4 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 16 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 8 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 1, 8 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 0, 0, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 16 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } }, - { "default", { 4, 0, 0, 8 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 16 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 8 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 1, 8 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 0, 0, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 16 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } }, + { "default", { 4, 0, 0, 8 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 16, 0, 1, 4 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 0, 0, 4 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 0, 0, 4 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 8, 0, 1, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 1, 2 } }, - { "Iris", { 8, 1, 0, 4 } }, - { "Iris Pro", { 16, 1, 0, 4 } }, - { "default", { 16, 0, 0, 4 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 16, 0, 1, 4 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 0, 0, 4 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 0, 0, 4 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 8, 0, 1, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 0, 1, 2 } }, + { "Iris", { 8, 1, 0, 4 } }, + { "Iris Pro", { 16, 1, 0, 4 } }, + { "default", { 16, 0, 0, 4 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 16, 1, 1, 1 } }, - { "default", { 16, 1, 1, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 16, 1, 1, 1 } }, + { "default", { 16, 1, 1, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 32, 1, 1, 2 } }, - { "GeForce GT 650M", { 8, 1, 0, 4 } }, - { "GeForce GTX 1070", { 8, 0, 1, 4 } }, - { "GeForce GTX 1080", { 4, 0, 0, 4 } }, - { "GeForce GTX 480", { 16, 1, 0, 2 } }, - { "GeForce GTX 670", { 16, 1, 1, 2 } }, - { "GeForce GTX 680", { 16, 1, 1, 2 } }, - { "GeForce GTX 750", { 4, 0, 0, 8 } }, - { "GeForce GTX 750 Ti", { 32, 1, 0, 2 } }, - { "GeForce GTX 980", { 16, 0, 0, 1 } }, - { "GeForce GTX TITAN", { 8, 1, 0, 4 } }, - { "GeForce GTX TITAN Black", { 8, 1, 0, 4 } }, - { "GeForce GTX TITAN X", { 16, 0, 0, 4 } }, - { "TITAN X (Pascal)", { 8, 0, 0, 4 } }, - { "Tesla K20m", { 8, 0, 0, 4 } }, - { "Tesla K40m", { 8, 1, 0, 4 } }, - { "default", { 8, 1, 0, 4 } }, + { "SM2.0", { + { "GeForce GTX 480", { 16, 1, 0, 2 } }, + { "default", { 16, 1, 0, 2 } }, + } }, + { "SM3.0", { + { "GRID K520", { 32, 1, 1, 2 } }, + { "GeForce GT 650M", { 8, 1, 0, 4 } }, + { "GeForce GTX 670", { 16, 1, 1, 2 } }, + { "GeForce GTX 680", { 16, 1, 1, 2 } }, + { "default", { 16, 1, 1, 2 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 8, 1, 0, 4 } }, + { "GeForce GTX TITAN Black", { 8, 1, 0, 4 } }, + { "Tesla K20m", { 8, 0, 0, 4 } }, + { "Tesla K40m", { 8, 1, 0, 4 } }, + { "default", { 8, 1, 0, 4 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 4, 0, 0, 8 } }, + { "GeForce GTX 750 Ti", { 32, 1, 0, 2 } }, + { "default", { 32, 1, 0, 2 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 16, 0, 0, 1 } }, + { "GeForce GTX TITAN X", { 16, 0, 0, 4 } }, + { "default", { 32, 1, 0, 2 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 8, 0, 1, 4 } }, + { "GeForce GTX 1080", { 4, 0, 0, 4 } }, + { "TITAN X (Pascal)", { 8, 0, 0, 4 } }, + { "default", { 8, 0, 1, 4 } }, + } }, + { "default", { + { "default", { 8, 1, 0, 4 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 8, 1, 1, 4 } }, - { "default", { 8, 1, 1, 4 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 8, 1, 1, 4 } }, + { "default", { 8, 1, 1, 4 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 8, 0, 1, 4 } }, + { "default", { + { "default", { 8, 0, 1, 4 } }, + } }, } }, } diff --git a/src/database/kernels/transpose/transpose_3232.hpp b/src/database/kernels/transpose/transpose_3232.hpp index f8560206..9f2c1b6d 100644 --- a/src/database/kernels/transpose/transpose_3232.hpp +++ b/src/database/kernels/transpose/transpose_3232.hpp @@ -14,77 +14,136 @@ const DatabaseEntry TransposeComplexSingle = { "Transpose", Precision::kComplexSingle, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 4 } }, - { "ATI Radeon HD 6750M", { 16, 1, 1, 1 } }, - { "Ellesmere", { 4, 0, 1, 4 } }, - { "Fiji", { 8, 1, 1, 2 } }, - { "Hawaii", { 16, 0, 1, 1 } }, - { "Oland", { 4, 0, 1, 2 } }, - { "Pitcairn", { 8, 0, 1, 1 } }, - { "Tahiti", { 16, 0, 1, 1 } }, - { "Tonga", { 16, 0, 1, 1 } }, - { "Turks", { 8, 1, 1, 4 } }, - { "default", { 8, 0, 1, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 4, 0, 1, 4 } }, + { "default", { 4, 0, 1, 4 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 8, 1, 1, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 4 } }, + { "default", { 8, 1, 1, 2 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 16, 0, 1, 1 } }, + { "default", { 16, 0, 1, 1 } }, + } }, + { "Oland", { + { "Oland", { 4, 0, 1, 2 } }, + { "default", { 4, 0, 1, 2 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 8, 0, 1, 1 } }, + { "default", { 8, 0, 1, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 16, 0, 1, 1 } }, + { "default", { 16, 0, 1, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 16, 0, 1, 1 } }, + { "default", { 16, 0, 1, 1 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 8, 1, 1, 4 } }, + { "default", { 8, 1, 1, 4 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 16, 1, 1, 1 } }, + { "default", { 16, 1, 1, 1 } }, + } }, + { "default", { + { "default", { 8, 0, 1, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 0, 0, 2 } }, - { "default", { 16, 0, 0, 2 } }, + { "default", { + { "Mali-T628", { 16, 0, 0, 2 } }, + { "default", { 16, 0, 0, 2 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 0, 0, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 0, 0, 4 } }, - { "default", { 4, 1, 0, 8 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 0, 0, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1, 0, 8 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 0, 0, 4 } }, + { "default", { 4, 1, 0, 8 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 16, 1, 1, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 0, 0, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 0, 0, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 1, 1, 2 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 0, 2 } }, - { "Iris", { 8, 0, 0, 2 } }, - { "Iris Pro", { 16, 1, 0, 2 } }, - { "default", { 16, 1, 0, 2 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 16, 1, 1, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 0, 0, 2 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 8, 0, 0, 2 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 16, 1, 1, 2 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 0, 2 } }, + { "Iris", { 8, 0, 0, 2 } }, + { "Iris Pro", { 16, 1, 0, 2 } }, + { "default", { 16, 1, 0, 2 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 1, 1, 1 } }, - { "GeForce GTX 1070", { 16, 1, 1, 1 } }, - { "GeForce GTX 1080", { 16, 1, 0, 1 } }, - { "GeForce GTX 480", { 16, 1, 0, 1 } }, - { "GeForce GTX 670", { 16, 1, 1, 1 } }, - { "GeForce GTX 680", { 16, 1, 1, 1 } }, - { "GeForce GTX 750", { 16, 1, 0, 1 } }, - { "GeForce GTX 750 Ti", { 16, 1, 0, 1 } }, - { "GeForce GTX 980", { 16, 1, 0, 1 } }, - { "GeForce GTX TITAN", { 16, 0, 0, 1 } }, - { "GeForce GTX TITAN Black", { 16, 1, 0, 1 } }, - { "GeForce GTX TITAN X", { 32, 1, 0, 1 } }, - { "TITAN X (Pascal)", { 8, 1, 0, 2 } }, - { "Tesla K20m", { 16, 0, 0, 1 } }, - { "Tesla K40m", { 16, 1, 0, 1 } }, - { "default", { 16, 1, 0, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 16, 1, 0, 1 } }, + { "default", { 16, 1, 0, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 16, 1, 1, 1 } }, + { "GeForce GTX 670", { 16, 1, 1, 1 } }, + { "GeForce GTX 680", { 16, 1, 1, 1 } }, + { "default", { 16, 1, 1, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 16, 0, 0, 1 } }, + { "GeForce GTX TITAN Black", { 16, 1, 0, 1 } }, + { "Tesla K20m", { 16, 0, 0, 1 } }, + { "Tesla K40m", { 16, 1, 0, 1 } }, + { "default", { 16, 1, 0, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 16, 1, 0, 1 } }, + { "GeForce GTX 750 Ti", { 16, 1, 0, 1 } }, + { "default", { 16, 1, 0, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 16, 1, 0, 1 } }, + { "GeForce GTX TITAN X", { 32, 1, 0, 1 } }, + { "default", { 32, 1, 0, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 16, 1, 1, 1 } }, + { "GeForce GTX 1080", { 16, 1, 0, 1 } }, + { "TITAN X (Pascal)", { 8, 1, 0, 2 } }, + { "default", { 16, 1, 0, 1 } }, + } }, + { "default", { + { "default", { 16, 1, 0, 1 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 16, 1, 0, 1 } }, - { "default", { 16, 1, 0, 1 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 16, 1, 0, 1 } }, + { "default", { 16, 1, 0, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 8, 1, 1, 2 } }, + { "default", { + { "default", { 8, 1, 1, 2 } }, + } }, } }, } diff --git a/src/database/kernels/transpose/transpose_64.hpp b/src/database/kernels/transpose/transpose_64.hpp index ddad3bac..608e1390 100644 --- a/src/database/kernels/transpose/transpose_64.hpp +++ b/src/database/kernels/transpose/transpose_64.hpp @@ -14,63 +14,114 @@ const DatabaseEntry TransposeDouble = { "Transpose", Precision::kDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 4 } }, - { "Ellesmere", { 4, 0, 1, 4 } }, - { "Fiji", { 8, 1, 1, 2 } }, - { "Hawaii", { 16, 0, 1, 1 } }, - { "Oland", { 8, 1, 1, 2 } }, - { "Pitcairn", { 4, 0, 1, 2 } }, - { "Tahiti", { 4, 1, 1, 4 } }, - { "Tonga", { 4, 0, 1, 4 } }, - { "default", { 4, 0, 1, 4 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 4, 0, 1, 4 } }, + { "default", { 4, 0, 1, 4 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 8, 1, 1, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 4, 0, 1, 4 } }, + { "default", { 8, 1, 1, 2 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 16, 0, 1, 1 } }, + { "default", { 16, 0, 1, 1 } }, + } }, + { "Oland", { + { "Oland", { 8, 1, 1, 2 } }, + { "default", { 8, 1, 1, 2 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 4, 0, 1, 2 } }, + { "default", { 4, 0, 1, 2 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 4, 1, 1, 4 } }, + { "default", { 4, 1, 1, 4 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 4, 0, 1, 4 } }, + { "default", { 4, 0, 1, 4 } }, + } }, + { "default", { + { "default", { 4, 0, 1, 4 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 8, 0, 0, 1 } }, - { "default", { 8, 0, 0, 1 } }, + { "default", { + { "Mali-T628", { 8, 0, 0, 1 } }, + { "default", { 8, 0, 0, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1, 0, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 0, 16 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } }, - { "default", { 4, 1, 0, 8 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1, 0, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1, 0, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 0, 16 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 0, 0, 8 } }, + { "default", { 4, 1, 0, 8 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 1, 0, 1 } }, - { "default", { 32, 1, 0, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 1, 0, 1 } }, + { "default", { 32, 1, 0, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 1, 1, 2 } }, - { "GeForce GTX 1070", { 8, 0, 1, 2 } }, - { "GeForce GTX 1080", { 8, 0, 0, 2 } }, - { "GeForce GTX 480", { 8, 1, 0, 2 } }, - { "GeForce GTX 670", { 16, 1, 1, 2 } }, - { "GeForce GTX 680", { 16, 1, 1, 2 } }, - { "GeForce GTX 750", { 16, 1, 0, 1 } }, - { "GeForce GTX 750 Ti", { 32, 1, 0, 2 } }, - { "GeForce GTX 980", { 16, 0, 0, 2 } }, - { "GeForce GTX TITAN", { 8, 0, 0, 2 } }, - { "GeForce GTX TITAN Black", { 16, 1, 0, 2 } }, - { "GeForce GTX TITAN X", { 32, 1, 0, 1 } }, - { "TITAN X (Pascal)", { 16, 1, 0, 2 } }, - { "Tesla K20m", { 16, 1, 0, 2 } }, - { "Tesla K40m", { 16, 1, 1, 2 } }, - { "default", { 16, 1, 1, 2 } }, + { "SM2.0", { + { "GeForce GTX 480", { 8, 1, 0, 2 } }, + { "default", { 8, 1, 0, 2 } }, + } }, + { "SM3.0", { + { "GRID K520", { 16, 1, 1, 2 } }, + { "GeForce GTX 670", { 16, 1, 1, 2 } }, + { "GeForce GTX 680", { 16, 1, 1, 2 } }, + { "default", { 16, 1, 1, 2 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 8, 0, 0, 2 } }, + { "GeForce GTX TITAN Black", { 16, 1, 0, 2 } }, + { "Tesla K20m", { 16, 1, 0, 2 } }, + { "Tesla K40m", { 16, 1, 1, 2 } }, + { "default", { 16, 1, 0, 2 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 16, 1, 0, 1 } }, + { "GeForce GTX 750 Ti", { 32, 1, 0, 2 } }, + { "default", { 32, 1, 0, 2 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 16, 0, 0, 2 } }, + { "GeForce GTX TITAN X", { 32, 1, 0, 1 } }, + { "default", { 32, 1, 0, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 8, 0, 1, 2 } }, + { "GeForce GTX 1080", { 8, 0, 0, 2 } }, + { "TITAN X (Pascal)", { 16, 1, 0, 2 } }, + { "default", { 8, 1, 0, 2 } }, + } }, + { "default", { + { "default", { 16, 1, 1, 2 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 16, 1, 1, 2 } }, + { "default", { + { "default", { 16, 1, 1, 2 } }, + } }, } }, } diff --git a/src/database/kernels/transpose/transpose_6464.hpp b/src/database/kernels/transpose/transpose_6464.hpp index 95c7fbba..baca60dc 100644 --- a/src/database/kernels/transpose/transpose_6464.hpp +++ b/src/database/kernels/transpose/transpose_6464.hpp @@ -14,57 +14,106 @@ const DatabaseEntry TransposeComplexDouble = { "Transpose", Precision::kComplexDouble, {"TRA_DIM", "TRA_PAD", "TRA_SHUFFLE", "TRA_WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 2 } }, - { "Ellesmere", { 16, 0, 1, 1 } }, - { "Fiji", { 16, 0, 1, 1 } }, - { "Hawaii", { 4, 0, 1, 2 } }, - { "Oland", { 16, 0, 1, 1 } }, - { "Pitcairn", { 4, 0, 1, 1 } }, - { "Tahiti", { 16, 0, 1, 1 } }, - { "Tonga", { 8, 1, 1, 2 } }, - { "default", { 16, 0, 1, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 16, 0, 1, 1 } }, + { "default", { 16, 0, 1, 1 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 16, 0, 1, 1 } }, + { "AMD Radeon R9 M370X Compute Engine", { 4, 1, 1, 2 } }, + { "default", { 16, 0, 1, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 4, 0, 1, 2 } }, + { "default", { 4, 0, 1, 2 } }, + } }, + { "Oland", { + { "Oland", { 16, 0, 1, 1 } }, + { "default", { 16, 0, 1, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 4, 0, 1, 1 } }, + { "default", { 4, 0, 1, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 16, 0, 1, 1 } }, + { "default", { 16, 0, 1, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 8, 1, 1, 2 } }, + { "default", { 8, 1, 1, 2 } }, + } }, + { "default", { + { "default", { 16, 0, 1, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 8, 0, 0, 1 } }, - { "default", { 8, 0, 0, 1 } }, + { "default", { + { "Mali-T628", { 8, 0, 0, 1 } }, + { "default", { 8, 0, 0, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 1, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 1, 0, 2 } }, - { "default", { 4, 0, 0, 8 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 0, 1, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 0, 0, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 0, 0, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 1, 0, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 0, 1, 8 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 16, 1, 0, 2 } }, + { "default", { 4, 0, 0, 8 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 1, 1, 1 } }, - { "GeForce GTX 1070", { 8, 0, 0, 1 } }, - { "GeForce GTX 1080", { 8, 0, 0, 1 } }, - { "GeForce GTX 480", { 8, 1, 0, 1 } }, - { "GeForce GTX 670", { 16, 1, 1, 1 } }, - { "GeForce GTX 680", { 16, 1, 1, 1 } }, - { "GeForce GTX 750", { 16, 1, 0, 1 } }, - { "GeForce GTX 750 Ti", { 16, 1, 0, 1 } }, - { "GeForce GTX 980", { 32, 1, 0, 1 } }, - { "GeForce GTX TITAN", { 16, 1, 0, 1 } }, - { "GeForce GTX TITAN Black", { 16, 0, 0, 1 } }, - { "GeForce GTX TITAN X", { 32, 1, 0, 1 } }, - { "TITAN X (Pascal)", { 8, 0, 0, 1 } }, - { "Tesla K20m", { 16, 1, 0, 1 } }, - { "Tesla K40m", { 16, 1, 0, 1 } }, - { "default", { 16, 1, 0, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 8, 1, 0, 1 } }, + { "default", { 8, 1, 0, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 16, 1, 1, 1 } }, + { "GeForce GTX 670", { 16, 1, 1, 1 } }, + { "GeForce GTX 680", { 16, 1, 1, 1 } }, + { "default", { 16, 1, 1, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 16, 1, 0, 1 } }, + { "GeForce GTX TITAN Black", { 16, 0, 0, 1 } }, + { "Tesla K20m", { 16, 1, 0, 1 } }, + { "Tesla K40m", { 16, 1, 0, 1 } }, + { "default", { 16, 1, 0, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 16, 1, 0, 1 } }, + { "GeForce GTX 750 Ti", { 16, 1, 0, 1 } }, + { "default", { 16, 1, 0, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 32, 1, 0, 1 } }, + { "GeForce GTX TITAN X", { 32, 1, 0, 1 } }, + { "default", { 32, 1, 0, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 8, 0, 0, 1 } }, + { "GeForce GTX 1080", { 8, 0, 0, 1 } }, + { "TITAN X (Pascal)", { 8, 0, 0, 1 } }, + { "default", { 8, 0, 0, 1 } }, + } }, + { "default", { + { "default", { 16, 1, 0, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 16, 1, 1, 1 } }, + { "default", { + { "default", { 16, 1, 1, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xaxpy/xaxpy_16.hpp b/src/database/kernels/xaxpy/xaxpy_16.hpp index 08f635f8..07594592 100644 --- a/src/database/kernels/xaxpy/xaxpy_16.hpp +++ b/src/database/kernels/xaxpy/xaxpy_16.hpp @@ -14,26 +14,37 @@ const DatabaseEntry XaxpyHalf = { "Xaxpy", Precision::kHalf, {"VW", "WGS", "WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 4, 128, 4 } }, - { "default", { 4, 128, 4 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 4, 128, 4 } }, + { "default", { 4, 128, 4 } }, + } }, + { "default", { + { "default", { 4, 128, 4 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 64, 1 } }, - { "default", { 8, 64, 1 } }, + { "default", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 64, 1 } }, + { "default", { 8, 64, 1 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 8, 64, 1 } }, - { "default", { 8, 64, 1 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 8, 64, 1 } }, + { "default", { 8, 64, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 8, 64, 1 } }, + { "default", { + { "default", { 8, 64, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xaxpy/xaxpy_32.hpp b/src/database/kernels/xaxpy/xaxpy_32.hpp index e5530a15..7da10ab5 100644 --- a/src/database/kernels/xaxpy/xaxpy_32.hpp +++ b/src/database/kernels/xaxpy/xaxpy_32.hpp @@ -14,84 +14,145 @@ const DatabaseEntry XaxpySingle = { "Xaxpy", Precision::kSingle, {"VW", "WGS", "WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } }, - { "ATI Radeon HD 6750M", { 1, 256, 2 } }, - { "Ellesmere", { 1, 64, 4 } }, - { "Fiji", { 4, 64, 1 } }, - { "Hawaii", { 2, 64, 2 } }, - { "Oland", { 1, 128, 1 } }, - { "Pitcairn", { 2, 128, 1 } }, - { "Tahiti", { 2, 64, 1 } }, - { "Tonga", { 1, 256, 8 } }, - { "Turks", { 2, 256, 1 } }, - { "default", { 2, 256, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 1, 64, 4 } }, + { "default", { 1, 64, 4 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 4, 64, 1 } }, + { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } }, + { "default", { 4, 64, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 2, 64, 2 } }, + { "default", { 2, 64, 2 } }, + } }, + { "Oland", { + { "Oland", { 1, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 2, 128, 1 } }, + { "default", { 2, 128, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 2, 64, 1 } }, + { "default", { 2, 64, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 1, 256, 8 } }, + { "default", { 1, 256, 8 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 2, 256, 1 } }, + { "default", { 2, 256, 1 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 1, 256, 2 } }, + { "default", { 1, 256, 2 } }, + } }, + { "default", { + { "default", { 2, 256, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 4, 256, 1 } }, - { "default", { 4, 256, 1 } }, + { "default", { + { "Mali-T628", { 4, 256, 1 } }, + { "default", { 4, 256, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 512, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 512, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 128, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 256, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 1024, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 128, 1 } }, - { "default", { 8, 512, 1 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 8, 512, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 512, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 128, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 256, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 1024, 1 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 128, 1 } }, + { "default", { 8, 512, 1 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 1, 128, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 256, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 512, 1 } }, - { "Iris", { 1, 64, 1 } }, - { "Iris Pro", { 1, 128, 2 } }, - { "default", { 4, 256, 1 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 1, 128, 1 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 256, 1 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 512, 1 } }, + { "Iris", { 1, 64, 1 } }, + { "Iris Pro", { 1, 128, 2 } }, + { "default", { 4, 256, 1 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 2, 1024, 2 } }, - { "default", { 2, 1024, 2 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 2, 1024, 2 } }, + { "default", { 2, 1024, 2 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 2, 64, 1 } }, - { "GeForce GT 650M", { 2, 1024, 1 } }, - { "GeForce GTX 1070", { 1, 64, 4 } }, - { "GeForce GTX 1080", { 1, 256, 1 } }, - { "GeForce GTX 480", { 2, 128, 1 } }, - { "GeForce GTX 670", { 2, 64, 1 } }, - { "GeForce GTX 680", { 1, 128, 1 } }, - { "GeForce GTX 750", { 1, 64, 1 } }, - { "GeForce GTX 750 Ti", { 2, 64, 1 } }, - { "GeForce GTX 980", { 1, 1024, 1 } }, - { "GeForce GTX TITAN", { 4, 256, 1 } }, - { "GeForce GTX TITAN Black", { 4, 128, 4 } }, - { "GeForce GTX TITAN X", { 1, 64, 1 } }, - { "TITAN X (Pascal)", { 4, 128, 1 } }, - { "Tesla K20m", { 4, 128, 1 } }, - { "Tesla K40m", { 4, 128, 1 } }, - { "default", { 4, 1024, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 2, 128, 1 } }, + { "default", { 2, 128, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 2, 64, 1 } }, + { "GeForce GT 650M", { 2, 1024, 1 } }, + { "GeForce GTX 670", { 2, 64, 1 } }, + { "GeForce GTX 680", { 1, 128, 1 } }, + { "default", { 2, 1024, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 4, 256, 1 } }, + { "GeForce GTX TITAN Black", { 4, 128, 4 } }, + { "Tesla K20m", { 4, 128, 1 } }, + { "Tesla K40m", { 4, 128, 1 } }, + { "default", { 4, 256, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 1, 64, 1 } }, + { "GeForce GTX 750 Ti", { 2, 64, 1 } }, + { "default", { 2, 64, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 1, 1024, 1 } }, + { "GeForce GTX TITAN X", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 1, 64, 4 } }, + { "GeForce GTX 1080", { 1, 256, 1 } }, + { "TITAN X (Pascal)", { 4, 128, 1 } }, + { "default", { 1, 512, 2 } }, + } }, + { "default", { + { "default", { 4, 1024, 1 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 4, 128, 2 } }, - { "default", { 4, 128, 2 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 4, 128, 2 } }, + { "default", { 4, 128, 2 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 4, 64, 1 } }, + { "default", { + { "default", { 4, 64, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xaxpy/xaxpy_3232.hpp b/src/database/kernels/xaxpy/xaxpy_3232.hpp index f0a2d117..2ef56b8e 100644 --- a/src/database/kernels/xaxpy/xaxpy_3232.hpp +++ b/src/database/kernels/xaxpy/xaxpy_3232.hpp @@ -14,83 +14,144 @@ const DatabaseEntry XaxpyComplexSingle = { "Xaxpy", Precision::kComplexSingle, {"VW", "WGS", "WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 64, 8 } }, - { "ATI Radeon HD 6750M", { 1, 64, 1 } }, - { "Ellesmere", { 2, 256, 1 } }, - { "Fiji", { 1, 128, 2 } }, - { "Hawaii", { 1, 128, 2 } }, - { "Oland", { 1, 128, 1 } }, - { "Pitcairn", { 1, 64, 1 } }, - { "Tahiti", { 1, 64, 1 } }, - { "Tonga", { 1, 256, 8 } }, - { "Turks", { 2, 256, 1 } }, - { "default", { 1, 128, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 2, 256, 1 } }, + { "default", { 2, 256, 1 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 1, 128, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 2, 64, 8 } }, + { "default", { 2, 64, 8 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 1, 128, 2 } }, + { "default", { 1, 128, 2 } }, + } }, + { "Oland", { + { "Oland", { 1, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 1, 256, 8 } }, + { "default", { 1, 256, 8 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 2, 256, 1 } }, + { "default", { 2, 256, 1 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "default", { + { "default", { 1, 128, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 1, 256, 1 } }, - { "default", { 1, 256, 1 } }, + { "default", { + { "Mali-T628", { 1, 256, 1 } }, + { "default", { 1, 256, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 256, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1024, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 1024, 2 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1024, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 2, 1024, 1 } }, - { "default", { 8, 1024, 1 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 256, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 1024, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 1024, 2 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 1024, 1 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 2, 1024, 1 } }, + { "default", { 8, 1024, 1 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 4, 64, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 1 } }, - { "Iris", { 2, 128, 1 } }, - { "Iris Pro", { 1, 256, 8 } }, - { "default", { 4, 64, 1 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 4, 64, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 64, 1 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 1, 64, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 1 } }, + { "Iris", { 2, 128, 1 } }, + { "Iris Pro", { 1, 256, 8 } }, + { "default", { 4, 64, 1 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } }, - { "default", { 1, 1024, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } }, + { "default", { 1, 1024, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 512, 1 } }, - { "GeForce GTX 1070", { 1, 64, 2 } }, - { "GeForce GTX 1080", { 2, 64, 1 } }, - { "GeForce GTX 480", { 1, 256, 1 } }, - { "GeForce GTX 670", { 1, 256, 1 } }, - { "GeForce GTX 680", { 1, 256, 1 } }, - { "GeForce GTX 750", { 1, 512, 1 } }, - { "GeForce GTX 750 Ti", { 1, 512, 1 } }, - { "GeForce GTX 980", { 1, 64, 1 } }, - { "GeForce GTX TITAN", { 1, 256, 1 } }, - { "GeForce GTX TITAN Black", { 1, 128, 2 } }, - { "GeForce GTX TITAN X", { 1, 512, 1 } }, - { "TITAN X (Pascal)", { 2, 512, 1 } }, - { "Tesla K20m", { 1, 128, 1 } }, - { "Tesla K40m", { 1, 128, 1 } }, - { "default", { 1, 256, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 1, 256, 1 } }, + { "default", { 1, 256, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 1, 512, 1 } }, + { "GeForce GTX 670", { 1, 256, 1 } }, + { "GeForce GTX 680", { 1, 256, 1 } }, + { "default", { 1, 256, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 1, 256, 1 } }, + { "GeForce GTX TITAN Black", { 1, 128, 2 } }, + { "Tesla K20m", { 1, 128, 1 } }, + { "Tesla K40m", { 1, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 1, 512, 1 } }, + { "GeForce GTX 750 Ti", { 1, 512, 1 } }, + { "default", { 1, 512, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 1, 64, 1 } }, + { "GeForce GTX TITAN X", { 1, 512, 1 } }, + { "default", { 1, 512, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 1, 64, 2 } }, + { "GeForce GTX 1080", { 2, 64, 1 } }, + { "TITAN X (Pascal)", { 2, 512, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "default", { + { "default", { 1, 256, 1 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 1, 128, 1 } }, + { "default", { + { "default", { 1, 128, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xaxpy/xaxpy_64.hpp b/src/database/kernels/xaxpy/xaxpy_64.hpp index dc9ba62c..f0c1ac9c 100644 --- a/src/database/kernels/xaxpy/xaxpy_64.hpp +++ b/src/database/kernels/xaxpy/xaxpy_64.hpp @@ -14,63 +14,114 @@ const DatabaseEntry XaxpyDouble = { "Xaxpy", Precision::kDouble, {"VW", "WGS", "WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } }, - { "Ellesmere", { 2, 64, 4 } }, - { "Fiji", { 2, 64, 4 } }, - { "Hawaii", { 1, 64, 2 } }, - { "Oland", { 1, 64, 1 } }, - { "Pitcairn", { 1, 128, 1 } }, - { "Tahiti", { 1, 64, 1 } }, - { "Tonga", { 1, 128, 4 } }, - { "default", { 2, 64, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 2, 64, 4 } }, + { "default", { 2, 64, 4 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 2, 64, 4 } }, + { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 1, 64, 2 } }, + { "default", { 1, 64, 2 } }, + } }, + { "Oland", { + { "Oland", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 1, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 1, 128, 4 } }, + { "default", { 1, 128, 4 } }, + } }, + { "default", { + { "default", { 2, 64, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 2, 128, 2 } }, - { "default", { 2, 128, 2 } }, + { "default", { + { "Mali-T628", { 2, 128, 2 } }, + { "default", { 2, 128, 2 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 64, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 1024, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 1024, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 64, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 256, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 2048, 1 } }, - { "default", { 8, 64, 1 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 64, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 1024, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 1024, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 64, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 256, 1 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 2048, 1 } }, + { "default", { 8, 64, 1 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 2, 512, 1 } }, - { "default", { 2, 512, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 2, 512, 1 } }, + { "default", { 2, 512, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 64, 1 } }, - { "GeForce GTX 1070", { 1, 64, 8 } }, - { "GeForce GTX 1080", { 1, 128, 1 } }, - { "GeForce GTX 480", { 1, 128, 1 } }, - { "GeForce GTX 670", { 1, 64, 1 } }, - { "GeForce GTX 680", { 1, 64, 1 } }, - { "GeForce GTX 750", { 1, 128, 1 } }, - { "GeForce GTX 750 Ti", { 1, 256, 2 } }, - { "GeForce GTX 980", { 1, 256, 1 } }, - { "GeForce GTX TITAN", { 2, 1024, 1 } }, - { "GeForce GTX TITAN Black", { 2, 128, 1 } }, - { "GeForce GTX TITAN X", { 1, 512, 1 } }, - { "TITAN X (Pascal)", { 2, 512, 1 } }, - { "Tesla K20m", { 2, 128, 1 } }, - { "Tesla K40m", { 2, 128, 1 } }, - { "default", { 1, 128, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 1, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 1, 64, 1 } }, + { "GeForce GTX 670", { 1, 64, 1 } }, + { "GeForce GTX 680", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 2, 1024, 1 } }, + { "GeForce GTX TITAN Black", { 2, 128, 1 } }, + { "Tesla K20m", { 2, 128, 1 } }, + { "Tesla K40m", { 2, 128, 1 } }, + { "default", { 2, 128, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 1, 128, 1 } }, + { "GeForce GTX 750 Ti", { 1, 256, 2 } }, + { "default", { 1, 128, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 1, 256, 1 } }, + { "GeForce GTX TITAN X", { 1, 512, 1 } }, + { "default", { 1, 512, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 1, 64, 8 } }, + { "GeForce GTX 1080", { 1, 128, 1 } }, + { "TITAN X (Pascal)", { 2, 512, 1 } }, + { "default", { 2, 256, 1 } }, + } }, + { "default", { + { "default", { 1, 128, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 2, 256, 1 } }, + { "default", { + { "default", { 2, 256, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xaxpy/xaxpy_6464.hpp b/src/database/kernels/xaxpy/xaxpy_6464.hpp index 0b5f5bcf..553b02fe 100644 --- a/src/database/kernels/xaxpy/xaxpy_6464.hpp +++ b/src/database/kernels/xaxpy/xaxpy_6464.hpp @@ -14,63 +14,114 @@ const DatabaseEntry XaxpyComplexDouble = { "Xaxpy", Precision::kComplexDouble, {"VW", "WGS", "WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } }, - { "Ellesmere", { 1, 128, 1 } }, - { "Fiji", { 1, 64, 1 } }, - { "Hawaii", { 2, 64, 1 } }, - { "Oland", { 1, 256, 1 } }, - { "Pitcairn", { 1, 128, 1 } }, - { "Tahiti", { 1, 128, 1 } }, - { "Tonga", { 1, 64, 1 } }, - { "default", { 1, 128, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 1, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 1, 64, 1 } }, + { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 2, 64, 1 } }, + { "default", { 2, 64, 1 } }, + } }, + { "Oland", { + { "Oland", { 1, 256, 1 } }, + { "default", { 1, 256, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 1, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 1, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "default", { + { "default", { 1, 128, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 1, 64, 8 } }, - { "default", { 1, 64, 8 } }, + { "default", { + { "Mali-T628", { 1, 64, 8 } }, + { "default", { 1, 64, 8 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 128, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 512, 1 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 1024, 1 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 256, 1 } }, - { "default", { 8, 256, 1 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 1024, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 8, 128, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 8, 512, 1 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 1024, 1 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 1, 256, 1 } }, + { "default", { 8, 256, 1 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } }, - { "default", { 1, 1024, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 1, 1024, 1 } }, + { "default", { 1, 1024, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 64, 1 } }, - { "GeForce GTX 1070", { 1, 64, 2 } }, - { "GeForce GTX 1080", { 1, 256, 1 } }, - { "GeForce GTX 480", { 1, 128, 1 } }, - { "GeForce GTX 670", { 1, 256, 1 } }, - { "GeForce GTX 680", { 1, 64, 1 } }, - { "GeForce GTX 750", { 1, 1024, 1 } }, - { "GeForce GTX 750 Ti", { 1, 64, 2 } }, - { "GeForce GTX 980", { 1, 1024, 1 } }, - { "GeForce GTX TITAN", { 1, 64, 4 } }, - { "GeForce GTX TITAN Black", { 1, 128, 4 } }, - { "GeForce GTX TITAN X", { 1, 1024, 1 } }, - { "TITAN X (Pascal)", { 1, 256, 2 } }, - { "Tesla K20m", { 1, 64, 1 } }, - { "Tesla K40m", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 1, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 1, 64, 1 } }, + { "GeForce GTX 670", { 1, 256, 1 } }, + { "GeForce GTX 680", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 1, 64, 4 } }, + { "GeForce GTX TITAN Black", { 1, 128, 4 } }, + { "Tesla K20m", { 1, 64, 1 } }, + { "Tesla K40m", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 1, 1024, 1 } }, + { "GeForce GTX 750 Ti", { 1, 64, 2 } }, + { "default", { 1, 1024, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 1, 1024, 1 } }, + { "GeForce GTX TITAN X", { 1, 1024, 1 } }, + { "default", { 1, 1024, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 1, 64, 2 } }, + { "GeForce GTX 1080", { 1, 256, 1 } }, + { "TITAN X (Pascal)", { 1, 256, 2 } }, + { "default", { 1, 256, 2 } }, + } }, + { "default", { + { "default", { 1, 64, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 1, 256, 1 } }, + { "default", { + { "default", { 1, 256, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xdot/xdot_16.hpp b/src/database/kernels/xdot/xdot_16.hpp index 570604ec..e206671e 100644 --- a/src/database/kernels/xdot/xdot_16.hpp +++ b/src/database/kernels/xdot/xdot_16.hpp @@ -14,26 +14,37 @@ const DatabaseEntry XdotHalf = { "Xdot", Precision::kHalf, {"WGS1", "WGS2"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 256, 64 } }, - { "default", { 256, 64 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 256, 64 } }, + { "default", { 256, 64 } }, + } }, + { "default", { + { "default", { 256, 64 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 128, 32 } }, - { "default", { 128, 32 } }, + { "default", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 128, 32 } }, + { "default", { 128, 32 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 64, 64 } }, - { "default", { 64, 64 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 64, 64 } }, + { "default", { 64, 64 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 128, 64 } }, + { "default", { + { "default", { 128, 64 } }, + } }, } }, } diff --git a/src/database/kernels/xdot/xdot_32.hpp b/src/database/kernels/xdot/xdot_32.hpp index 8a7bd9d8..189ceee6 100644 --- a/src/database/kernels/xdot/xdot_32.hpp +++ b/src/database/kernels/xdot/xdot_32.hpp @@ -14,66 +14,120 @@ const DatabaseEntry XdotSingle = { "Xdot", Precision::kSingle, {"WGS1", "WGS2"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 128, 32 } }, - { "ATI Radeon HD 6750M", { 256, 32 } }, - { "Ellesmere", { 128, 32 } }, - { "Fiji", { 256, 32 } }, - { "Oland", { 256, 32 } }, - { "Pitcairn", { 128, 32 } }, - { "Tahiti", { 128, 32 } }, - { "Tonga", { 64, 32 } }, - { "Turks", { 128, 64 } }, - { "default", { 256, 32 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 128, 32 } }, + { "default", { 128, 32 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 256, 32 } }, + { "AMD Radeon R9 M370X Compute Engine", { 128, 32 } }, + { "default", { 256, 32 } }, + } }, + { "Oland", { + { "Oland", { 256, 32 } }, + { "default", { 256, 32 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 128, 32 } }, + { "default", { 128, 32 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 128, 32 } }, + { "default", { 128, 32 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 64, 32 } }, + { "default", { 64, 32 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 128, 64 } }, + { "default", { 128, 64 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 256, 32 } }, + { "default", { 256, 32 } }, + } }, + { "default", { + { "default", { 256, 32 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 128 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 32 } }, - { "default", { 64, 64 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 32 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 128 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 32 } }, + { "default", { 64, 64 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 64, 32 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 32 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 512, 128 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 32 } }, - { "Iris Pro", { 512, 64 } }, - { "default", { 64, 32 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 64, 32 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 32 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 512, 128 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 32 } }, + { "Iris Pro", { 512, 64 } }, + { "default", { 64, 32 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 32 } }, - { "GeForce GT 650M", { 128, 64 } }, - { "GeForce GTX 1070", { 128, 1024 } }, - { "GeForce GTX 1080", { 512, 64 } }, - { "GeForce GTX 480", { 512, 32 } }, - { "GeForce GTX 670", { 512, 1024 } }, - { "GeForce GTX 680", { 128, 128 } }, - { "GeForce GTX 750", { 128, 32 } }, - { "GeForce GTX 750 Ti", { 64, 32 } }, - { "GeForce GTX 980", { 256, 32 } }, - { "GeForce GTX TITAN Black", { 512, 64 } }, - { "GeForce GTX TITAN X", { 256, 32 } }, - { "TITAN X (Pascal)", { 1024, 32 } }, - { "Tesla K20m", { 1024, 32 } }, - { "default", { 256, 64 } }, + { "SM2.0", { + { "GeForce GTX 480", { 512, 32 } }, + { "default", { 512, 32 } }, + } }, + { "SM3.0", { + { "GRID K520", { 128, 32 } }, + { "GeForce GT 650M", { 128, 64 } }, + { "GeForce GTX 670", { 512, 1024 } }, + { "GeForce GTX 680", { 128, 128 } }, + { "default", { 256, 1024 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN Black", { 512, 64 } }, + { "Tesla K20m", { 1024, 32 } }, + { "default", { 1024, 64 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 128, 32 } }, + { "GeForce GTX 750 Ti", { 64, 32 } }, + { "default", { 128, 32 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 256, 32 } }, + { "GeForce GTX TITAN X", { 256, 32 } }, + { "default", { 256, 32 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 128, 1024 } }, + { "GeForce GTX 1080", { 512, 64 } }, + { "TITAN X (Pascal)", { 1024, 32 } }, + { "default", { 1024, 32 } }, + } }, + { "default", { + { "default", { 256, 64 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 128, 64 } }, - { "default", { 128, 64 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 128, 64 } }, + { "default", { 128, 64 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 128, 32 } }, + { "default", { + { "default", { 128, 32 } }, + } }, } }, } diff --git a/src/database/kernels/xdot/xdot_3232.hpp b/src/database/kernels/xdot/xdot_3232.hpp index 4950c1f2..a2f8cf73 100644 --- a/src/database/kernels/xdot/xdot_3232.hpp +++ b/src/database/kernels/xdot/xdot_3232.hpp @@ -14,65 +14,119 @@ const DatabaseEntry XdotComplexSingle = { "Xdot", Precision::kComplexSingle, {"WGS1", "WGS2"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 32 } }, - { "ATI Radeon HD 6750M", { 256, 256 } }, - { "Ellesmere", { 256, 32 } }, - { "Fiji", { 256, 64 } }, - { "Oland", { 128, 32 } }, - { "Pitcairn", { 256, 32 } }, - { "Tahiti", { 64, 32 } }, - { "Tonga", { 256, 64 } }, - { "Turks", { 128, 32 } }, - { "default", { 256, 32 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 256, 32 } }, + { "default", { 256, 32 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 256, 64 } }, + { "AMD Radeon R9 M370X Compute Engine", { 64, 32 } }, + { "default", { 256, 64 } }, + } }, + { "Oland", { + { "Oland", { 128, 32 } }, + { "default", { 128, 32 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 256, 32 } }, + { "default", { 256, 32 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 64, 32 } }, + { "default", { 64, 32 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 256, 64 } }, + { "default", { 256, 64 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 128, 32 } }, + { "default", { 128, 32 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 256, 256 } }, + { "default", { 256, 256 } }, + } }, + { "default", { + { "default", { 256, 32 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 64 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } }, - { "default", { 256, 32 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 64 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 32 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } }, + { "default", { 256, 32 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 256, 32 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 32 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 512, 32 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 256 } }, - { "Iris Pro", { 32, 32 } }, - { "default", { 32, 32 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 256, 32 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 32 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 32 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 512, 32 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 256 } }, + { "Iris Pro", { 32, 32 } }, + { "default", { 32, 32 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 64, 32 } }, - { "GeForce GTX 1070", { 128, 32 } }, - { "GeForce GTX 1080", { 128, 64 } }, - { "GeForce GTX 480", { 512, 32 } }, - { "GeForce GTX 670", { 256, 32 } }, - { "GeForce GTX 680", { 128, 64 } }, - { "GeForce GTX 750", { 64, 32 } }, - { "GeForce GTX 750 Ti", { 64, 32 } }, - { "GeForce GTX 980", { 256, 64 } }, - { "GeForce GTX TITAN Black", { 128, 64 } }, - { "GeForce GTX TITAN X", { 256, 32 } }, - { "TITAN X (Pascal)", { 256, 32 } }, - { "Tesla K20m", { 512, 32 } }, - { "default", { 512, 64 } }, + { "SM2.0", { + { "GeForce GTX 480", { 512, 32 } }, + { "default", { 512, 32 } }, + } }, + { "SM3.0", { + { "GRID K520", { 64, 32 } }, + { "GeForce GTX 670", { 256, 32 } }, + { "GeForce GTX 680", { 128, 64 } }, + { "default", { 256, 64 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN Black", { 128, 64 } }, + { "Tesla K20m", { 512, 32 } }, + { "default", { 128, 32 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 64, 32 } }, + { "GeForce GTX 750 Ti", { 64, 32 } }, + { "default", { 64, 32 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 256, 64 } }, + { "GeForce GTX TITAN X", { 256, 32 } }, + { "default", { 256, 64 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 128, 32 } }, + { "GeForce GTX 1080", { 128, 64 } }, + { "TITAN X (Pascal)", { 256, 32 } }, + { "default", { 128, 128 } }, + } }, + { "default", { + { "default", { 512, 64 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 64, 256 } }, - { "default", { 64, 256 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 64, 256 } }, + { "default", { 64, 256 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 256, 32 } }, + { "default", { + { "default", { 256, 32 } }, + } }, } }, } diff --git a/src/database/kernels/xdot/xdot_64.hpp b/src/database/kernels/xdot/xdot_64.hpp index 7fbcb474..f6e5671e 100644 --- a/src/database/kernels/xdot/xdot_64.hpp +++ b/src/database/kernels/xdot/xdot_64.hpp @@ -14,46 +14,90 @@ const DatabaseEntry XdotDouble = { "Xdot", Precision::kDouble, {"WGS1", "WGS2"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 128 } }, - { "Ellesmere", { 128, 64 } }, - { "Fiji", { 256, 32 } }, - { "Oland", { 256, 32 } }, - { "Pitcairn", { 128, 32 } }, - { "Tahiti", { 256, 32 } }, - { "Tonga", { 128, 64 } }, - { "default", { 128, 64 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 128, 64 } }, + { "default", { 128, 64 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 256, 32 } }, + { "AMD Radeon R9 M370X Compute Engine", { 64, 128 } }, + { "default", { 256, 32 } }, + } }, + { "Oland", { + { "Oland", { 256, 32 } }, + { "default", { 256, 32 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 128, 32 } }, + { "default", { 128, 32 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 256, 32 } }, + { "default", { 256, 32 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 128, 64 } }, + { "default", { 128, 64 } }, + } }, + { "default", { + { "default", { 128, 64 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 128 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 64 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 64 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } }, - { "default", { 256, 64 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 128 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 64 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 64, 64 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 32 } }, + { "default", { 256, 64 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 32 } }, - { "GeForce GTX 1070", { 128, 512 } }, - { "GeForce GTX 1080", { 128, 128 } }, - { "GeForce GTX 480", { 512, 32 } }, - { "GeForce GTX 670", { 256, 32 } }, - { "GeForce GTX 680", { 128, 64 } }, - { "GeForce GTX 750", { 64, 256 } }, - { "GeForce GTX 750 Ti", { 128, 64 } }, - { "GeForce GTX 980", { 128, 32 } }, - { "GeForce GTX TITAN Black", { 128, 64 } }, - { "GeForce GTX TITAN X", { 256, 32 } }, - { "TITAN X (Pascal)", { 128, 32 } }, - { "Tesla K20m", { 512, 32 } }, - { "default", { 128, 128 } }, + { "SM2.0", { + { "GeForce GTX 480", { 512, 32 } }, + { "default", { 512, 32 } }, + } }, + { "SM3.0", { + { "GRID K520", { 128, 32 } }, + { "GeForce GTX 670", { 256, 32 } }, + { "GeForce GTX 680", { 128, 64 } }, + { "default", { 256, 64 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN Black", { 128, 64 } }, + { "Tesla K20m", { 512, 32 } }, + { "default", { 128, 64 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 64, 256 } }, + { "GeForce GTX 750 Ti", { 128, 64 } }, + { "default", { 64, 256 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 128, 32 } }, + { "GeForce GTX TITAN X", { 256, 32 } }, + { "default", { 256, 32 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 128, 512 } }, + { "GeForce GTX 1080", { 128, 128 } }, + { "TITAN X (Pascal)", { 128, 32 } }, + { "default", { 128, 32 } }, + } }, + { "default", { + { "default", { 128, 128 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 128, 64 } }, + { "default", { + { "default", { 128, 64 } }, + } }, } }, } diff --git a/src/database/kernels/xdot/xdot_6464.hpp b/src/database/kernels/xdot/xdot_6464.hpp index ea23b329..72d418ab 100644 --- a/src/database/kernels/xdot/xdot_6464.hpp +++ b/src/database/kernels/xdot/xdot_6464.hpp @@ -14,46 +14,90 @@ const DatabaseEntry XdotComplexDouble = { "Xdot", Precision::kComplexDouble, {"WGS1", "WGS2"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 32 } }, - { "Ellesmere", { 256, 32 } }, - { "Fiji", { 256, 32 } }, - { "Oland", { 256, 32 } }, - { "Pitcairn", { 256, 32 } }, - { "Tahiti", { 256, 32 } }, - { "Tonga", { 128, 64 } }, - { "default", { 256, 32 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 256, 32 } }, + { "default", { 256, 32 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 256, 32 } }, + { "AMD Radeon R9 M370X Compute Engine", { 64, 32 } }, + { "default", { 256, 32 } }, + } }, + { "Oland", { + { "Oland", { 256, 32 } }, + { "default", { 256, 32 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 256, 32 } }, + { "default", { 256, 32 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 256, 32 } }, + { "default", { 256, 32 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 128, 64 } }, + { "default", { 128, 64 } }, + } }, + { "default", { + { "default", { 256, 32 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 128 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1024, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32 } }, - { "default", { 128, 32 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 128 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1024, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1024, 32 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 32 } }, + { "default", { 128, 32 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 64, 32 } }, - { "GeForce GTX 1070", { 128, 64 } }, - { "GeForce GTX 1080", { 128, 32 } }, - { "GeForce GTX 480", { 512, 32 } }, - { "GeForce GTX 670", { 512, 128 } }, - { "GeForce GTX 680", { 256, 64 } }, - { "GeForce GTX 750", { 256, 32 } }, - { "GeForce GTX 750 Ti", { 64, 32 } }, - { "GeForce GTX 980", { 64, 32 } }, - { "GeForce GTX TITAN Black", { 128, 32 } }, - { "GeForce GTX TITAN X", { 128, 32 } }, - { "TITAN X (Pascal)", { 128, 64 } }, - { "Tesla K20m", { 128, 32 } }, - { "default", { 128, 64 } }, + { "SM2.0", { + { "GeForce GTX 480", { 512, 32 } }, + { "default", { 512, 32 } }, + } }, + { "SM3.0", { + { "GRID K520", { 64, 32 } }, + { "GeForce GTX 670", { 512, 128 } }, + { "GeForce GTX 680", { 256, 64 } }, + { "default", { 128, 128 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN Black", { 128, 32 } }, + { "Tesla K20m", { 128, 32 } }, + { "default", { 128, 32 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 256, 32 } }, + { "GeForce GTX 750 Ti", { 64, 32 } }, + { "default", { 64, 32 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 64, 32 } }, + { "GeForce GTX TITAN X", { 128, 32 } }, + { "default", { 128, 32 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 128, 64 } }, + { "GeForce GTX 1080", { 128, 32 } }, + { "TITAN X (Pascal)", { 128, 64 } }, + { "default", { 128, 256 } }, + } }, + { "default", { + { "default", { 128, 64 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 256, 32 } }, + { "default", { + { "default", { 256, 32 } }, + } }, } }, } diff --git a/src/database/kernels/xgemm/xgemm_16.hpp b/src/database/kernels/xgemm/xgemm_16.hpp index 53a8ccd9..c9954a9e 100644 --- a/src/database/kernels/xgemm/xgemm_16.hpp +++ b/src/database/kernels/xgemm/xgemm_16.hpp @@ -14,19 +14,28 @@ const DatabaseEntry XgemmHalf = { "Xgemm", Precision::kHalf, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + } }, + { "default", { + { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { "default", { + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { "default", { + { "default", { 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + } }, } }, } diff --git a/src/database/kernels/xgemm/xgemm_32.hpp b/src/database/kernels/xgemm/xgemm_32.hpp index 2c0a63f2..ea713339 100644 --- a/src/database/kernels/xgemm/xgemm_32.hpp +++ b/src/database/kernels/xgemm/xgemm_32.hpp @@ -14,84 +14,145 @@ const DatabaseEntry XgemmSingle = { "Xgemm", Precision::kSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 16, 128, 0, 0, 0, 0, 2, 8 } }, - { "ATI Radeon HD 6750M", { 32, 2, 8, 16, 128, 8, 8, 128, 0, 0, 1, 1, 8, 8 } }, - { "Ellesmere", { 32, 2, 8, 8, 16, 16, 16, 64, 1, 1, 0, 0, 1, 2 } }, - { "Fiji", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { "Hawaii", { 16, 2, 16, 32, 128, 32, 8, 64, 1, 1, 1, 1, 4, 2 } }, - { "Oland", { 16, 2, 32, 16, 64, 32, 16, 128, 1, 1, 1, 0, 2, 4 } }, - { "Pitcairn", { 16, 2, 16, 8, 32, 16, 16, 128, 0, 0, 1, 0, 1, 1 } }, - { "Tahiti", { 32, 2, 16, 32, 128, 16, 8, 64, 0, 0, 0, 0, 4, 1 } }, - { "Tonga", { 16, 2, 16, 32, 64, 16, 8, 128, 1, 1, 0, 0, 2, 8 } }, - { "Turks", { 32, 2, 8, 8, 64, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, - { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 32, 2, 8, 8, 16, 16, 16, 64, 1, 1, 0, 0, 1, 2 } }, + { "default", { 32, 2, 8, 8, 16, 16, 16, 64, 1, 1, 0, 0, 1, 2 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 16, 128, 0, 0, 0, 0, 2, 8 } }, + { "default", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 2, 2 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 16, 2, 16, 32, 128, 32, 8, 64, 1, 1, 1, 1, 4, 2 } }, + { "default", { 16, 2, 16, 32, 128, 32, 8, 64, 1, 1, 1, 1, 4, 2 } }, + } }, + { "Oland", { + { "Oland", { 16, 2, 32, 16, 64, 32, 16, 128, 1, 1, 1, 0, 2, 4 } }, + { "default", { 16, 2, 32, 16, 64, 32, 16, 128, 1, 1, 1, 0, 2, 4 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 16, 2, 16, 8, 32, 16, 16, 128, 0, 0, 1, 0, 1, 1 } }, + { "default", { 16, 2, 16, 8, 32, 16, 16, 128, 0, 0, 1, 0, 1, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 32, 2, 16, 32, 128, 16, 8, 64, 0, 0, 0, 0, 4, 1 } }, + { "default", { 32, 2, 16, 32, 128, 16, 8, 64, 0, 0, 0, 0, 4, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 16, 2, 16, 32, 64, 16, 8, 128, 1, 1, 0, 0, 2, 8 } }, + { "default", { 16, 2, 16, 32, 64, 16, 8, 128, 1, 1, 0, 0, 2, 8 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 32, 2, 8, 8, 64, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, + { "default", { 32, 2, 8, 8, 64, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 32, 2, 8, 16, 128, 8, 8, 128, 0, 0, 1, 1, 8, 8 } }, + { "default", { 32, 2, 8, 16, 128, 8, 8, 128, 0, 0, 1, 1, 8, 8 } }, + } }, + { "default", { + { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } }, - { "default", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } }, + { "default", { + { "Mali-T628", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } }, + { "default", { 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 8, 8, 128, 16, 8, 128, 0, 1, 1, 1, 1, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 32, 32, 64, 32, 16, 64, 1, 1, 1, 0, 2, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 8, 128, 16, 8, 64, 0, 0, 1, 0, 1, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 32, 8, 128, 8, 8, 128, 1, 1, 1, 1, 2, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 2, 8, 8, 128, 8, 8, 128, 1, 1, 1, 0, 1, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 16, 16, 64, 32, 32, 64, 0, 1, 1, 0, 1, 2 } }, - { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 8, 8, 128, 16, 8, 128, 0, 1, 1, 1, 1, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 8, 32, 32, 64, 32, 16, 64, 1, 1, 1, 0, 2, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 8, 128, 16, 8, 64, 0, 0, 1, 0, 1, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 32, 8, 128, 8, 8, 128, 1, 1, 1, 1, 2, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 16, 2, 8, 8, 128, 8, 8, 128, 1, 1, 1, 0, 1, 8 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 16, 16, 64, 32, 32, 64, 0, 1, 1, 0, 1, 2 } }, + { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 32, 2, 8, 8, 128, 32, 16, 64, 0, 0, 1, 0, 4, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 8, 8, 64, 32, 16, 64, 1, 1, 1, 1, 4, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 2, 16, 8, 32, 8, 16, 128, 1, 1, 1, 1, 2, 4 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, - { "Iris", { 16, 8, 16, 8, 128, 32, 16, 64, 1, 1, 1, 1, 4, 1 } }, - { "Iris Pro", { 16, 2, 16, 8, 64, 32, 32, 128, 1, 1, 1, 0, 4, 4 } }, - { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 32, 2, 8, 8, 128, 32, 16, 64, 0, 0, 1, 0, 4, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 32, 8, 8, 8, 64, 32, 16, 64, 1, 1, 1, 1, 4, 2 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 16, 2, 16, 8, 32, 8, 16, 128, 1, 1, 1, 1, 2, 4 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, + { "Iris", { 16, 8, 16, 8, 128, 32, 16, 64, 1, 1, 1, 1, 4, 1 } }, + { "Iris Pro", { 16, 2, 16, 8, 64, 32, 32, 128, 1, 1, 1, 0, 4, 4 } }, + { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } }, - { "default", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } }, + { "default", { 32, 2, 32, 32, 32, 32, 8, 128, 0, 0, 1, 0, 1, 4 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 2, 16, 8, 32, 8, 16, 64, 1, 1, 1, 1, 2, 4 } }, - { "GeForce GT 650M", { 32, 2, 8, 8, 32, 32, 32, 64, 1, 1, 0, 0, 4, 2 } }, - { "GeForce GTX 1070", { 16, 2, 32, 16, 128, 32, 8, 128, 1, 1, 1, 0, 4, 1 } }, - { "GeForce GTX 1080", { 32, 2, 16, 8, 64, 8, 8, 64, 1, 1, 1, 1, 4, 8 } }, - { "GeForce GTX 480", { 16, 2, 16, 8, 64, 32, 16, 64, 1, 1, 1, 1, 2, 2 } }, - { "GeForce GTX 670", { 16, 2, 8, 8, 64, 16, 16, 64, 1, 1, 1, 0, 2, 4 } }, - { "GeForce GTX 680", { 32, 8, 8, 16, 64, 32, 16, 128, 1, 1, 0, 0, 4, 2 } }, - { "GeForce GTX 750", { 16, 2, 16, 16, 64, 32, 8, 128, 1, 1, 1, 1, 1, 2 } }, - { "GeForce GTX 750 Ti", { 16, 2, 16, 16, 128, 32, 8, 64, 1, 1, 0, 1, 8, 2 } }, - { "GeForce GTX 980", { 16, 2, 16, 16, 64, 16, 8, 128, 1, 1, 1, 0, 4, 8 } }, - { "GeForce GTX TITAN", { 16, 8, 32, 16, 64, 8, 8, 64, 1, 1, 1, 0, 2, 2 } }, - { "GeForce GTX TITAN Black", { 16, 2, 16, 8, 64, 16, 16, 64, 1, 1, 1, 0, 4, 1 } }, - { "GeForce GTX TITAN X", { 16, 2, 8, 16, 128, 8, 8, 128, 1, 1, 1, 1, 4, 8 } }, - { "TITAN X (Pascal)", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } }, - { "Tesla K20m", { 16, 2, 32, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } }, - { "Tesla K40m", { 16, 8, 16, 8, 64, 16, 16, 128, 1, 1, 1, 0, 2, 4 } }, - { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 2 } }, + { "SM2.0", { + { "GeForce GTX 480", { 16, 2, 16, 8, 64, 32, 16, 64, 1, 1, 1, 1, 2, 2 } }, + { "default", { 16, 2, 16, 8, 64, 32, 16, 64, 1, 1, 1, 1, 2, 2 } }, + } }, + { "SM3.0", { + { "GRID K520", { 16, 2, 16, 8, 32, 8, 16, 64, 1, 1, 1, 1, 2, 4 } }, + { "GeForce GT 650M", { 32, 2, 8, 8, 32, 32, 32, 64, 1, 1, 0, 0, 4, 2 } }, + { "GeForce GTX 670", { 16, 2, 8, 8, 64, 16, 16, 64, 1, 1, 1, 0, 2, 4 } }, + { "GeForce GTX 680", { 32, 8, 8, 16, 64, 32, 16, 128, 1, 1, 0, 0, 4, 2 } }, + { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 4 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 16, 8, 32, 16, 64, 8, 8, 64, 1, 1, 1, 0, 2, 2 } }, + { "GeForce GTX TITAN Black", { 16, 2, 16, 8, 64, 16, 16, 64, 1, 1, 1, 0, 4, 1 } }, + { "Tesla K20m", { 16, 2, 32, 16, 64, 16, 8, 64, 1, 1, 1, 0, 2, 4 } }, + { "Tesla K40m", { 16, 8, 16, 8, 64, 16, 16, 128, 1, 1, 1, 0, 2, 4 } }, + { "default", { 16, 8, 32, 16, 64, 32, 16, 64, 1, 0, 1, 0, 2, 2 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 16, 2, 16, 16, 64, 32, 8, 128, 1, 1, 1, 1, 1, 2 } }, + { "GeForce GTX 750 Ti", { 16, 2, 16, 16, 128, 32, 8, 64, 1, 1, 0, 1, 8, 2 } }, + { "default", { 32, 2, 8, 8, 64, 32, 32, 64, 0, 0, 0, 0, 2, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 16, 2, 16, 16, 64, 16, 8, 128, 1, 1, 1, 0, 4, 8 } }, + { "GeForce GTX TITAN X", { 16, 2, 8, 16, 128, 8, 8, 128, 1, 1, 1, 1, 4, 8 } }, + { "default", { 16, 2, 16, 16, 128, 16, 8, 128, 1, 1, 1, 0, 4, 8 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 16, 2, 32, 16, 128, 32, 8, 128, 1, 1, 1, 0, 4, 1 } }, + { "GeForce GTX 1080", { 32, 2, 16, 8, 64, 8, 8, 64, 1, 1, 1, 1, 4, 8 } }, + { "TITAN X (Pascal)", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } }, + { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 1 } }, + } }, + { "default", { + { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 2 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } }, - { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } }, + { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } }, + { "default", { + { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } }, + } }, } }, } diff --git a/src/database/kernels/xgemm/xgemm_3232.hpp b/src/database/kernels/xgemm/xgemm_3232.hpp index 3ba015e2..6f29dda1 100644 --- a/src/database/kernels/xgemm/xgemm_3232.hpp +++ b/src/database/kernels/xgemm/xgemm_3232.hpp @@ -14,77 +14,136 @@ const DatabaseEntry XgemmComplexSingle = { "Xgemm", Precision::kComplexSingle, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 32, 32, 64, 8, 8, 64, 0, 0, 1, 1, 2, 8 } }, - { "ATI Radeon HD 6750M", { 32, 2, 8, 8, 32, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { "Ellesmere", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } }, - { "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } }, - { "Hawaii", { 32, 2, 32, 8, 32, 8, 16, 32, 1, 0, 1, 0, 1, 1 } }, - { "Oland", { 32, 2, 16, 8, 32, 32, 32, 128, 1, 0, 0, 1, 2, 4 } }, - { "Pitcairn", { 16, 2, 8, 8, 32, 8, 8, 32, 0, 1, 1, 1, 4, 2 } }, - { "Tahiti", { 16, 2, 8, 8, 32, 8, 16, 32, 1, 0, 0, 1, 2, 1 } }, - { "Tonga", { 16, 2, 32, 8, 64, 16, 32, 64, 1, 1, 1, 0, 2, 1 } }, - { "Turks", { 16, 2, 8, 8, 32, 32, 8, 32, 0, 1, 0, 0, 2, 1 } }, - { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } }, + { "default", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 32, 32, 64, 8, 8, 64, 0, 0, 1, 1, 2, 8 } }, + { "default", { 32, 2, 8, 8, 16, 32, 32, 32, 1, 1, 0, 0, 1, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 32, 2, 32, 8, 32, 8, 16, 32, 1, 0, 1, 0, 1, 1 } }, + { "default", { 32, 2, 32, 8, 32, 8, 16, 32, 1, 0, 1, 0, 1, 1 } }, + } }, + { "Oland", { + { "Oland", { 32, 2, 16, 8, 32, 32, 32, 128, 1, 0, 0, 1, 2, 4 } }, + { "default", { 32, 2, 16, 8, 32, 32, 32, 128, 1, 0, 0, 1, 2, 4 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 16, 2, 8, 8, 32, 8, 8, 32, 0, 1, 1, 1, 4, 2 } }, + { "default", { 16, 2, 8, 8, 32, 8, 8, 32, 0, 1, 1, 1, 4, 2 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 16, 2, 8, 8, 32, 8, 16, 32, 1, 0, 0, 1, 2, 1 } }, + { "default", { 16, 2, 8, 8, 32, 8, 16, 32, 1, 0, 0, 1, 2, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 16, 2, 32, 8, 64, 16, 32, 64, 1, 1, 1, 0, 2, 1 } }, + { "default", { 16, 2, 32, 8, 64, 16, 32, 64, 1, 1, 1, 0, 2, 1 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 16, 2, 8, 8, 32, 32, 8, 32, 0, 1, 0, 0, 2, 1 } }, + { "default", { 16, 2, 8, 8, 32, 32, 8, 32, 0, 1, 0, 0, 2, 1 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 32, 2, 8, 8, 32, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { "default", { 32, 2, 8, 8, 32, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + } }, + { "default", { + { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 1, 2 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } }, - { "default", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } }, + { "default", { + { "Mali-T628", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } }, + { "default", { 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 0, 1, 1, 2 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 32, 16, 16, 64, 0, 1, 1, 0, 1, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 16, 64, 8, 16, 64, 0, 1, 0, 0, 4, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 16, 32, 128, 0, 0, 0, 0, 1, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 16, 16, 16, 128, 0, 0, 1, 1, 1, 4 } }, - { "default", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 2 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 0, 1, 1, 2 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 32, 16, 16, 64, 0, 1, 1, 0, 1, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 16, 64, 8, 16, 64, 0, 1, 0, 0, 4, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 16, 32, 128, 0, 0, 0, 0, 1, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 16, 16, 16, 128, 0, 0, 1, 1, 1, 4 } }, + { "default", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 2 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 16, 8, 8, 16, 64, 32, 8, 32, 0, 0, 0, 0, 2, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 8, 8, 8, 32, 16, 16, 64, 1, 0, 0, 0, 4, 4 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 1, 2, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 0, 0, 0, 0, 4, 2 } }, - { "Iris", { 32, 8, 32, 16, 64, 8, 16, 64, 1, 0, 1, 0, 1, 1 } }, - { "Iris Pro", { 16, 2, 8, 8, 32, 32, 8, 32, 1, 1, 1, 1, 1, 1 } }, - { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 16, 8, 8, 16, 64, 32, 8, 32, 0, 0, 0, 0, 2, 1 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 16, 8, 8, 8, 32, 16, 16, 64, 1, 0, 0, 0, 4, 4 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 32, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 1, 2, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 2, 16, 16, 64, 16, 16, 64, 0, 0, 0, 0, 4, 2 } }, + { "Iris", { 32, 8, 32, 16, 64, 8, 16, 64, 1, 0, 1, 0, 1, 1 } }, + { "Iris Pro", { 16, 2, 8, 8, 32, 32, 8, 32, 1, 1, 1, 1, 1, 1 } }, + { "default", { 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 2 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } }, - { "default", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } }, + { "default", { 32, 2, 32, 32, 32, 32, 16, 128, 1, 0, 0, 0, 1, 4 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 8, 32, 32, 64, 32, 16, 128, 1, 0, 1, 0, 1, 4 } }, - { "GeForce GTX 1070", { 16, 2, 16, 16, 128, 16, 16, 64, 1, 1, 1, 1, 2, 4 } }, - { "GeForce GTX 1080", { 16, 2, 32, 16, 64, 32, 8, 64, 1, 1, 0, 0, 1, 2 } }, - { "GeForce GTX 480", { 16, 2, 16, 16, 32, 32, 16, 128, 0, 1, 1, 1, 2, 2 } }, - { "GeForce GTX 670", { 16, 2, 32, 32, 64, 32, 8, 32, 1, 1, 1, 1, 1, 1 } }, - { "GeForce GTX 680", { 16, 2, 32, 16, 64, 32, 32, 128, 1, 0, 0, 0, 2, 2 } }, - { "GeForce GTX 750", { 16, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 0, 2, 2 } }, - { "GeForce GTX 750 Ti", { 16, 2, 16, 8, 32, 32, 16, 64, 1, 1, 1, 0, 1, 2 } }, - { "GeForce GTX 980", { 32, 8, 32, 32, 64, 16, 16, 64, 1, 1, 1, 0, 2, 1 } }, - { "GeForce GTX TITAN", { 16, 8, 16, 16, 64, 32, 16, 64, 1, 1, 1, 0, 1, 1 } }, - { "GeForce GTX TITAN Black", { 16, 2, 8, 16, 64, 8, 8, 32, 0, 1, 1, 0, 1, 2 } }, - { "GeForce GTX TITAN X", { 16, 2, 8, 8, 64, 8, 8, 32, 1, 0, 1, 1, 1, 4 } }, - { "TITAN X (Pascal)", { 32, 2, 32, 32, 64, 8, 8, 32, 1, 1, 0, 0, 2, 4 } }, - { "Tesla K20m", { 32, 2, 8, 16, 64, 8, 16, 64, 1, 0, 0, 0, 1, 4 } }, - { "Tesla K40m", { 16, 2, 32, 32, 32, 32, 8, 64, 0, 1, 0, 0, 1, 1 } }, - { "default", { 32, 2, 8, 8, 16, 32, 32, 64, 1, 1, 0, 0, 1, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 16, 2, 16, 16, 32, 32, 16, 128, 0, 1, 1, 1, 2, 2 } }, + { "default", { 16, 2, 16, 16, 32, 32, 16, 128, 0, 1, 1, 1, 2, 2 } }, + } }, + { "SM3.0", { + { "GRID K520", { 16, 8, 32, 32, 64, 32, 16, 128, 1, 0, 1, 0, 1, 4 } }, + { "GeForce GTX 670", { 16, 2, 32, 32, 64, 32, 8, 32, 1, 1, 1, 1, 1, 1 } }, + { "GeForce GTX 680", { 16, 2, 32, 16, 64, 32, 32, 128, 1, 0, 0, 0, 2, 2 } }, + { "default", { 16, 2, 32, 16, 64, 32, 16, 128, 1, 0, 0, 0, 1, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 16, 8, 16, 16, 64, 32, 16, 64, 1, 1, 1, 0, 1, 1 } }, + { "GeForce GTX TITAN Black", { 16, 2, 8, 16, 64, 8, 8, 32, 0, 1, 1, 0, 1, 2 } }, + { "Tesla K20m", { 32, 2, 8, 16, 64, 8, 16, 64, 1, 0, 0, 0, 1, 4 } }, + { "Tesla K40m", { 16, 2, 32, 32, 32, 32, 8, 64, 0, 1, 0, 0, 1, 1 } }, + { "default", { 32, 2, 16, 16, 128, 8, 8, 64, 0, 1, 0, 1, 8, 2 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 16, 8, 16, 16, 64, 16, 16, 64, 1, 1, 1, 0, 2, 2 } }, + { "GeForce GTX 750 Ti", { 16, 2, 16, 8, 32, 32, 16, 64, 1, 1, 1, 0, 1, 2 } }, + { "default", { 16, 2, 16, 16, 32, 16, 16, 64, 1, 1, 1, 0, 1, 2 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 32, 8, 32, 32, 64, 16, 16, 64, 1, 1, 1, 0, 2, 1 } }, + { "GeForce GTX TITAN X", { 16, 2, 8, 8, 64, 8, 8, 32, 1, 0, 1, 1, 1, 4 } }, + { "default", { 16, 2, 32, 32, 64, 16, 16, 32, 1, 0, 1, 0, 1, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 16, 2, 16, 16, 128, 16, 16, 64, 1, 1, 1, 1, 2, 4 } }, + { "GeForce GTX 1080", { 16, 2, 32, 16, 64, 32, 8, 64, 1, 1, 0, 0, 1, 2 } }, + { "TITAN X (Pascal)", { 32, 2, 32, 32, 64, 8, 8, 32, 1, 1, 0, 0, 2, 4 } }, + { "default", { 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 4 } }, + } }, + { "default", { + { "default", { 32, 2, 8, 8, 16, 32, 32, 64, 1, 1, 0, 0, 1, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 2, 1 } }, + { "default", { + { "default", { 32, 2, 16, 16, 32, 8, 8, 32, 1, 1, 0, 0, 2, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xgemm/xgemm_64.hpp b/src/database/kernels/xgemm/xgemm_64.hpp index 45d04e80..625920b7 100644 --- a/src/database/kernels/xgemm/xgemm_64.hpp +++ b/src/database/kernels/xgemm/xgemm_64.hpp @@ -14,63 +14,114 @@ const DatabaseEntry XgemmDouble = { "Xgemm", Precision::kDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 4 } }, - { "Ellesmere", { 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 2 } }, - { "Fiji", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, - { "Hawaii", { 16, 8, 32, 8, 128, 8, 8, 32, 0, 1, 0, 0, 1, 4 } }, - { "Oland", { 16, 2, 8, 16, 64, 16, 8, 16, 0, 0, 1, 1, 1, 1 } }, - { "Pitcairn", { 32, 2, 32, 16, 64, 8, 16, 32, 0, 0, 0, 0, 1, 2 } }, - { "Tahiti", { 32, 2, 16, 8, 16, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, - { "Tonga", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, - { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 2 } }, + { "default", { 32, 2, 16, 16, 32, 16, 16, 64, 1, 1, 0, 0, 2, 2 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 32, 2, 16, 16, 64, 8, 8, 32, 0, 0, 0, 0, 4, 4 } }, + { "default", { 32, 2, 8, 8, 16, 32, 32, 32, 1, 1, 0, 0, 1, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 16, 8, 32, 8, 128, 8, 8, 32, 0, 1, 0, 0, 1, 4 } }, + { "default", { 16, 8, 32, 8, 128, 8, 8, 32, 0, 1, 0, 0, 1, 4 } }, + } }, + { "Oland", { + { "Oland", { 16, 2, 8, 16, 64, 16, 8, 16, 0, 0, 1, 1, 1, 1 } }, + { "default", { 16, 2, 8, 16, 64, 16, 8, 16, 0, 0, 1, 1, 1, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 32, 2, 32, 16, 64, 8, 16, 32, 0, 0, 0, 0, 1, 2 } }, + { "default", { 32, 2, 32, 16, 64, 8, 16, 32, 0, 0, 0, 0, 1, 2 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 32, 2, 16, 8, 16, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, + { "default", { 32, 2, 16, 8, 16, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, + { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, + } }, + { "default", { + { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 1, 1, 0, 0, 2, 2 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } }, - { "default", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } }, + { "default", { + { "Mali-T628", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } }, + { "default", { 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 1, 1, 2, 8 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 128, 16, 16, 64, 0, 1, 1, 0, 1, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 32, 16, 128, 16, 16, 128, 0, 0, 1, 0, 1, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 16, 8, 128, 8, 8, 64, 1, 0, 0, 1, 2, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 16, 8, 128, 8, 8, 128, 1, 0, 0, 0, 2, 8 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 128, 16, 8, 128, 0, 0, 1, 1, 1, 8 } }, - { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 1, 4 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 128, 16, 16, 128, 1, 1, 1, 1, 2, 8 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 32, 16, 128, 16, 16, 64, 0, 1, 1, 0, 1, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 32, 16, 128, 16, 16, 128, 0, 0, 1, 0, 1, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 16, 8, 128, 8, 8, 64, 1, 0, 0, 1, 2, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 16, 8, 128, 8, 8, 128, 1, 0, 0, 0, 2, 8 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 2, 8, 16, 128, 16, 8, 128, 0, 0, 1, 1, 1, 8 } }, + { "default", { 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 1, 4 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } }, - { "default", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } }, + { "default", { 32, 8, 8, 16, 16, 16, 16, 128, 0, 0, 1, 0, 1, 4 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 2, 8, 8, 16, 8, 8, 32, 1, 0, 0, 1, 2, 2 } }, - { "GeForce GTX 1070", { 16, 2, 8, 16, 32, 8, 8, 64, 0, 0, 1, 1, 2, 8 } }, - { "GeForce GTX 1080", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, - { "GeForce GTX 480", { 16, 2, 8, 16, 32, 32, 8, 64, 1, 1, 1, 0, 1, 2 } }, - { "GeForce GTX 670", { 32, 8, 16, 32, 128, 16, 8, 32, 0, 1, 1, 0, 1, 1 } }, - { "GeForce GTX 680", { 32, 8, 8, 8, 32, 16, 32, 128, 1, 0, 0, 1, 2, 4 } }, - { "GeForce GTX 750", { 32, 8, 16, 32, 64, 16, 8, 128, 0, 0, 0, 1, 2, 1 } }, - { "GeForce GTX 750 Ti", { 32, 2, 8, 8, 32, 16, 16, 32, 0, 0, 0, 0, 4, 2 } }, - { "GeForce GTX 980", { 32, 8, 16, 8, 64, 32, 32, 128, 0, 0, 1, 0, 2, 4 } }, - { "GeForce GTX TITAN", { 16, 8, 16, 8, 32, 16, 32, 128, 1, 1, 1, 1, 2, 2 } }, - { "GeForce GTX TITAN Black", { 16, 2, 16, 8, 16, 16, 8, 16, 1, 1, 1, 0, 1, 1 } }, - { "GeForce GTX TITAN X", { 16, 8, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, - { "TITAN X (Pascal)", { 32, 2, 32, 32, 32, 16, 16, 32, 0, 0, 0, 0, 1, 2 } }, - { "Tesla K20m", { 16, 2, 32, 8, 32, 16, 16, 64, 1, 0, 0, 0, 1, 1 } }, - { "Tesla K40m", { 32, 2, 16, 8, 64, 16, 32, 128, 1, 0, 1, 1, 2, 4 } }, - { "default", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, + { "SM2.0", { + { "GeForce GTX 480", { 16, 2, 8, 16, 32, 32, 8, 64, 1, 1, 1, 0, 1, 2 } }, + { "default", { 16, 2, 8, 16, 32, 32, 8, 64, 1, 1, 1, 0, 1, 2 } }, + } }, + { "SM3.0", { + { "GRID K520", { 16, 2, 8, 8, 16, 8, 8, 32, 1, 0, 0, 1, 2, 2 } }, + { "GeForce GTX 670", { 32, 8, 16, 32, 128, 16, 8, 32, 0, 1, 1, 0, 1, 1 } }, + { "GeForce GTX 680", { 32, 8, 8, 8, 32, 16, 32, 128, 1, 0, 0, 1, 2, 4 } }, + { "default", { 16, 2, 16, 32, 128, 16, 32, 128, 0, 0, 0, 0, 1, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 16, 8, 16, 8, 32, 16, 32, 128, 1, 1, 1, 1, 2, 2 } }, + { "GeForce GTX TITAN Black", { 16, 2, 16, 8, 16, 16, 8, 16, 1, 1, 1, 0, 1, 1 } }, + { "Tesla K20m", { 16, 2, 32, 8, 32, 16, 16, 64, 1, 0, 0, 0, 1, 1 } }, + { "Tesla K40m", { 32, 2, 16, 8, 64, 16, 32, 128, 1, 0, 1, 1, 2, 4 } }, + { "default", { 16, 2, 16, 8, 16, 16, 16, 128, 1, 0, 0, 0, 1, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 32, 8, 16, 32, 64, 16, 8, 128, 0, 0, 0, 1, 2, 1 } }, + { "GeForce GTX 750 Ti", { 32, 2, 8, 8, 32, 16, 16, 32, 0, 0, 0, 0, 4, 2 } }, + { "default", { 32, 2, 16, 32, 32, 16, 16, 128, 0, 0, 0, 0, 2, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 32, 8, 16, 8, 64, 32, 32, 128, 0, 0, 1, 0, 2, 4 } }, + { "GeForce GTX TITAN X", { 16, 8, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, + { "default", { 16, 8, 16, 16, 16, 16, 16, 128, 0, 0, 0, 0, 1, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 16, 2, 8, 16, 32, 8, 8, 64, 0, 0, 1, 1, 2, 8 } }, + { "GeForce GTX 1080", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, + { "TITAN X (Pascal)", { 32, 2, 32, 32, 32, 16, 16, 32, 0, 0, 0, 0, 1, 2 } }, + { "default", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, + } }, + { "default", { + { "default", { 32, 2, 16, 16, 32, 16, 16, 64, 0, 0, 0, 0, 2, 4 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, + { "default", { + { "default", { 32, 2, 8, 8, 32, 8, 8, 64, 0, 0, 0, 0, 4, 4 } }, + } }, } }, } diff --git a/src/database/kernels/xgemm/xgemm_6464.hpp b/src/database/kernels/xgemm/xgemm_6464.hpp index 14f47eff..1cddbfae 100644 --- a/src/database/kernels/xgemm/xgemm_6464.hpp +++ b/src/database/kernels/xgemm/xgemm_6464.hpp @@ -14,62 +14,113 @@ const DatabaseEntry XgemmComplexDouble = { "Xgemm", Precision::kComplexDouble, {"KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 8, 16, 32, 16, 16, 32, 0, 0, 1, 1, 2, 2 } }, - { "Ellesmere", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, - { "Fiji", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, - { "Hawaii", { 16, 2, 16, 16, 16, 16, 16, 32, 1, 0, 0, 0, 1, 2 } }, - { "Oland", { 16, 2, 16, 8, 16, 16, 32, 128, 0, 0, 0, 0, 1, 4 } }, - { "Pitcairn", { 32, 2, 16, 8, 32, 8, 32, 32, 0, 1, 1, 0, 1, 1 } }, - { "Tahiti", { 16, 2, 16, 8, 16, 8, 8, 16, 0, 0, 1, 0, 1, 1 } }, - { "Tonga", { 16, 2, 32, 16, 32, 16, 16, 16, 1, 1, 1, 1, 1, 1 } }, - { "default", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, + { "default", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, + { "AMD Radeon R9 M370X Compute Engine", { 32, 8, 8, 16, 32, 16, 16, 32, 0, 0, 1, 1, 2, 2 } }, + { "default", { 32, 2, 16, 16, 32, 8, 8, 32, 0, 0, 0, 0, 2, 4 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 16, 2, 16, 16, 16, 16, 16, 32, 1, 0, 0, 0, 1, 2 } }, + { "default", { 16, 2, 16, 16, 16, 16, 16, 32, 1, 0, 0, 0, 1, 2 } }, + } }, + { "Oland", { + { "Oland", { 16, 2, 16, 8, 16, 16, 32, 128, 0, 0, 0, 0, 1, 4 } }, + { "default", { 16, 2, 16, 8, 16, 16, 32, 128, 0, 0, 0, 0, 1, 4 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 32, 2, 16, 8, 32, 8, 32, 32, 0, 1, 1, 0, 1, 1 } }, + { "default", { 32, 2, 16, 8, 32, 8, 32, 32, 0, 1, 1, 0, 1, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 16, 2, 16, 8, 16, 8, 8, 16, 0, 0, 1, 0, 1, 1 } }, + { "default", { 16, 2, 16, 8, 16, 8, 8, 16, 0, 0, 1, 0, 1, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 16, 2, 32, 16, 32, 16, 16, 16, 1, 1, 1, 1, 1, 1 } }, + { "default", { 16, 2, 32, 16, 32, 16, 16, 16, 1, 1, 1, 1, 1, 1 } }, + } }, + { "default", { + { "default", { 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } }, - { "default", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } }, + { "default", { + { "Mali-T628", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } }, + { "default", { 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 64, 16, 8, 128, 0, 1, 0, 1, 2, 1 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 16, 32, 128, 16, 16, 64, 0, 1, 0, 0, 2, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 32, 128, 16, 8, 32, 0, 1, 0, 0, 4, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 8, 16, 128, 0, 0, 0, 1, 1, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 32, 32, 8, 8, 32, 0, 1, 0, 0, 1, 2 } }, - { "default", { 32, 2, 8, 8, 16, 8, 8, 32, 1, 1, 0, 0, 1, 2 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 16, 2, 32, 8, 64, 16, 8, 128, 0, 1, 0, 1, 2, 1 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 32, 2, 16, 32, 128, 16, 16, 64, 0, 1, 0, 0, 2, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 2, 16, 32, 128, 16, 8, 32, 0, 1, 0, 0, 4, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 32, 2, 8, 8, 128, 8, 16, 128, 0, 0, 0, 1, 1, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 2, 8, 8, 128, 32, 8, 128, 0, 0, 0, 0, 1, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 32, 8, 8, 32, 32, 8, 8, 32, 0, 1, 0, 0, 1, 2 } }, + { "default", { 32, 2, 8, 8, 16, 8, 8, 32, 1, 1, 0, 0, 1, 2 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } }, - { "default", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } }, + { "default", { 32, 2, 16, 16, 16, 16, 8, 32, 0, 0, 1, 0, 1, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 32, 8, 16, 16, 16, 8, 16, 64, 1, 0, 1, 1, 1, 1 } }, - { "GeForce GTX 1070", { 32, 8, 32, 16, 32, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, - { "GeForce GTX 1080", { 32, 2, 16, 16, 16, 8, 8, 16, 0, 0, 0, 0, 1, 2 } }, - { "GeForce GTX 480", { 16, 2, 32, 32, 32, 32, 8, 32, 0, 0, 1, 0, 1, 1 } }, - { "GeForce GTX 670", { 32, 8, 16, 8, 16, 16, 32, 64, 1, 0, 0, 1, 1, 2 } }, - { "GeForce GTX 680", { 16, 8, 16, 8, 64, 16, 32, 32, 0, 1, 1, 0, 1, 1 } }, - { "GeForce GTX 750", { 32, 2, 8, 32, 32, 8, 8, 64, 0, 0, 1, 0, 1, 4 } }, - { "GeForce GTX 750 Ti", { 32, 2, 8, 8, 16, 8, 8, 32, 0, 0, 0, 0, 1, 1 } }, - { "GeForce GTX 980", { 16, 2, 16, 8, 32, 8, 16, 128, 0, 0, 1, 1, 2, 2 } }, - { "GeForce GTX TITAN Black", { 16, 2, 16, 16, 32, 16, 8, 32, 0, 1, 1, 1, 1, 1 } }, - { "GeForce GTX TITAN X", { 32, 8, 16, 16, 128, 16, 16, 32, 0, 0, 1, 0, 1, 1 } }, - { "TITAN X (Pascal)", { 32, 2, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, - { "Tesla K20m", { 32, 2, 32, 8, 32, 16, 16, 64, 0, 0, 1, 0, 1, 1 } }, - { "Tesla K40m", { 16, 8, 8, 8, 32, 32, 16, 32, 0, 0, 1, 0, 1, 1 } }, - { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 0, 0, 0, 0, 1, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 16, 2, 32, 32, 32, 32, 8, 32, 0, 0, 1, 0, 1, 1 } }, + { "default", { 16, 2, 32, 32, 32, 32, 8, 32, 0, 0, 1, 0, 1, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 32, 8, 16, 16, 16, 8, 16, 64, 1, 0, 1, 1, 1, 1 } }, + { "GeForce GTX 670", { 32, 8, 16, 8, 16, 16, 32, 64, 1, 0, 0, 1, 1, 2 } }, + { "GeForce GTX 680", { 16, 8, 16, 8, 64, 16, 32, 32, 0, 1, 1, 0, 1, 1 } }, + { "default", { 16, 8, 16, 16, 16, 16, 16, 32, 0, 0, 0, 0, 1, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN Black", { 16, 2, 16, 16, 32, 16, 8, 32, 0, 1, 1, 1, 1, 1 } }, + { "Tesla K20m", { 32, 2, 32, 8, 32, 16, 16, 64, 0, 0, 1, 0, 1, 1 } }, + { "Tesla K40m", { 16, 8, 8, 8, 32, 32, 16, 32, 0, 0, 1, 0, 1, 1 } }, + { "default", { 32, 2, 8, 8, 32, 16, 16, 64, 0, 0, 0, 0, 4, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 32, 2, 8, 32, 32, 8, 8, 64, 0, 0, 1, 0, 1, 4 } }, + { "GeForce GTX 750 Ti", { 32, 2, 8, 8, 16, 8, 8, 32, 0, 0, 0, 0, 1, 1 } }, + { "default", { 32, 2, 8, 32, 16, 8, 8, 32, 0, 0, 0, 0, 1, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 16, 2, 16, 8, 32, 8, 16, 128, 0, 0, 1, 1, 2, 2 } }, + { "GeForce GTX TITAN X", { 32, 8, 16, 16, 128, 16, 16, 32, 0, 0, 1, 0, 1, 1 } }, + { "default", { 16, 2, 16, 16, 128, 16, 16, 128, 0, 0, 1, 0, 1, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 32, 8, 32, 16, 32, 8, 8, 32, 0, 0, 0, 1, 1, 4 } }, + { "GeForce GTX 1080", { 32, 2, 16, 16, 16, 8, 8, 16, 0, 0, 0, 0, 1, 2 } }, + { "TITAN X (Pascal)", { 32, 2, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 1, 1 } }, + { "default", { 32, 2, 32, 32, 32, 32, 32, 64, 0, 0, 0, 0, 1, 2 } }, + } }, + { "default", { + { "default", { 32, 2, 16, 16, 32, 16, 16, 32, 0, 0, 0, 0, 1, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 32, 2, 32, 32, 32, 8, 8, 32, 1, 1, 0, 0, 1, 1 } }, + { "default", { + { "default", { 32, 2, 32, 32, 32, 8, 8, 32, 1, 1, 0, 0, 1, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp index ed71285f..e2536bd2 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp @@ -14,19 +14,28 @@ const DatabaseEntry XgemmDirectHalf = { "XgemmDirect", Precision::kHalf, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } }, - { "default", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } }, + { "default", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } }, + } }, + { "default", { + { "default", { 8, 32, 8, 8, 32, 1, 1, 1, 1, 32 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + { "default", { + { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "default", { + { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + } }, } }, } diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp index e0a991c0..610532f0 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp @@ -14,49 +14,88 @@ const DatabaseEntry XgemmDirectSingle = { "XgemmDirect", Precision::kSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 32 } }, - { "ATI Radeon HD 6750M", { 8, 8, 16, 8, 8, 1, 0, 2, 2, 32 } }, - { "Ellesmere", { 2, 8, 8, 32, 32, 1, 1, 2, 1, 32 } }, - { "Fiji", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - { "Tonga", { 16, 16, 16, 32, 8, 0, 1, 1, 1, 32 } }, - { "Turks", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } }, - { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 2, 8, 8, 32, 32, 1, 1, 2, 1, 32 } }, + { "default", { 2, 8, 8, 32, 32, 1, 1, 2, 1, 32 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + { "AMD Radeon R9 M370X Compute Engine", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 32 } }, + { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 16, 16, 16, 32, 8, 0, 1, 1, 1, 32 } }, + { "default", { 16, 16, 16, 32, 8, 0, 1, 1, 1, 32 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 8, 8, 16, 8, 8, 1, 0, 2, 2, 32 } }, + { "default", { 8, 8, 16, 8, 8, 1, 0, 2, 2, 32 } }, + } }, + { "default", { + { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 1, 8, 64 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 16, 16, 8, 8, 8, 0, 0, 2, 4, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 0, 0, 2, 2, 64 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 1, 8, 64 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 16, 16, 8, 8, 8, 0, 0, 2, 4, 32 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 0, 0, 2, 2, 64 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - { "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 4, 32 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + { "default", { + { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + { "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 4, 32 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GeForce GT 650M", { 16, 16, 16, 8, 16, 1, 0, 2, 2, 32 } }, - { "GeForce GTX 1080", { 16, 16, 8, 16, 8, 1, 1, 1, 1, 32 } }, - { "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, - { "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } }, - { "TITAN X (Pascal)", { 8, 32, 8, 8, 16, 1, 1, 1, 1, 32 } }, - { "default", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } }, + { "SM3.0", { + { "GeForce GT 650M", { 16, 16, 16, 8, 16, 1, 0, 2, 2, 32 } }, + { "default", { 16, 16, 16, 8, 16, 1, 0, 2, 2, 32 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } }, + { "default", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1080", { 16, 16, 8, 16, 8, 1, 1, 1, 1, 32 } }, + { "TITAN X (Pascal)", { 8, 32, 8, 8, 16, 1, 1, 1, 1, 32 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, + } }, + { "default", { + { "default", { 2, 8, 8, 16, 16, 1, 1, 4, 2, 32 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 1, 16 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, + { "default", { + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, + } }, } }, } diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp index 05e672ac..efbcf8c7 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp @@ -14,41 +14,72 @@ const DatabaseEntry XgemmDirectComplexSingle = { "XgemmDirect", Precision::kComplexSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "ATI Radeon HD 6750M", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - { "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } }, - { "Turks", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, - { "default", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } }, + { "default", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + } }, + { "default", { + { "default", { 2, 16, 16, 16, 16, 1, 1, 2, 2, 32 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 4, 4, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 16, 16, 8, 8, 1, 1, 1, 4, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 16, 8, 1, 1, 2, 1, 32 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 0, 0, 4, 4, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 16, 16, 8, 8, 1, 1, 1, 4, 32 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 16, 8, 1, 1, 2, 1, 32 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 2, 32 } }, - { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "default", { + { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "Iris Pro", { 2, 16, 16, 8, 8, 1, 1, 2, 2, 32 } }, + { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GeForce GTX 1080", { 8, 8, 16, 16, 8, 1, 1, 2, 2, 32 } }, - { "GeForce GTX 750 Ti", { 16, 8, 8, 16, 8, 1, 1, 2, 1, 16 } }, - { "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 1, 1, 16 } }, - { "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, + { "SM3.5", { + { "GeForce GTX TITAN Black", { 2, 8, 8, 16, 16, 1, 1, 1, 1, 16 } }, + { "default", { 2, 8, 8, 16, 16, 1, 1, 1, 1, 16 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750 Ti", { 16, 8, 8, 16, 8, 1, 1, 2, 1, 16 } }, + { "default", { 16, 8, 8, 16, 8, 1, 1, 2, 1, 16 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1080", { 8, 8, 16, 16, 8, 1, 1, 2, 2, 32 } }, + { "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + { "default", { 2, 16, 16, 8, 8, 1, 1, 2, 4, 32 } }, + } }, + { "default", { + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } }, + { "default", { + { "default", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } }, + } }, } }, } diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp index ac740dae..6c35c38c 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp @@ -14,33 +14,59 @@ const DatabaseEntry XgemmDirectDouble = { "XgemmDirect", Precision::kDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - { "Ellesmere", { 8, 16, 16, 8, 16, 1, 1, 2, 1, 32 } }, - { "Fiji", { 16, 8, 8, 8, 16, 1, 1, 1, 1, 16 } }, - { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 32 } }, - { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 8, 16, 16, 8, 16, 1, 1, 2, 1, 32 } }, + { "default", { 8, 16, 16, 8, 16, 1, 1, 2, 1, 32 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 16, 8, 8, 8, 16, 1, 1, 1, 1, 16 } }, + { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 32 } }, + { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 32 } }, + } }, + { "default", { + { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 8, 8, 8, 8, 0, 0, 1, 4, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 8, 8, 8, 8, 0, 0, 1, 4, 32 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 8, 8, 8, 8, 1, 1, 4, 4, 32 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 4, 2, 32 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } }, - { "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 2, 4, 32 } }, - { "GeForce GTX TITAN Black", { 8, 16, 16, 16, 8, 1, 0, 1, 1, 16 } }, - { "TITAN X (Pascal)", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, + { "SM3.5", { + { "GeForce GTX TITAN Black", { 8, 16, 16, 16, 8, 1, 0, 1, 1, 16 } }, + { "default", { 8, 16, 16, 16, 8, 1, 0, 1, 1, 16 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750 Ti", { 2, 8, 8, 8, 8, 1, 1, 2, 4, 32 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 4, 32 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } }, + { "TITAN X (Pascal)", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 16 } }, + } }, + { "default", { + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, + { "default", { + { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, + } }, } }, } diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp index 1352ec66..852e1d30 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp @@ -14,33 +14,59 @@ const DatabaseEntry XgemmDirectComplexDouble = { "XgemmDirect", Precision::kComplexDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "Ellesmere", { 16, 32, 32, 16, 8, 0, 0, 1, 1, 32 } }, - { "Fiji", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "Tonga", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, - { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 16, 32, 32, 16, 8, 0, 0, 1, 1, 32 } }, + { "default", { 16, 32, 32, 16, 8, 0, 0, 1, 1, 32 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "AMD Radeon R9 M370X Compute Engine", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + } }, + { "default", { + { "default", { 2, 16, 16, 16, 16, 1, 1, 1, 1, 16 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 32, 8, 0, 0, 1, 1, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 16, 8, 8, 0, 0, 2, 1, 32 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 16, 8, 8, 8, 0, 0, 2, 2, 32 } }, - { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 8, 8, 32, 8, 0, 0, 1, 1, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 16, 8, 8, 0, 0, 2, 1, 32 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 8, 16, 8, 8, 8, 0, 0, 2, 2, 32 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 2, 2, 16 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, - { "GeForce GTX 750 Ti", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } }, - { "GeForce GTX TITAN Black", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, - { "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } }, - { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + { "SM3.5", { + { "GeForce GTX TITAN Black", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 1, 8 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750 Ti", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } }, + { "default", { 2, 32, 32, 8, 8, 1, 1, 1, 1, 32 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1080", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + { "TITAN X (Pascal)", { 2, 16, 16, 8, 8, 1, 1, 1, 2, 16 } }, + { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + } }, + { "default", { + { "default", { 2, 16, 16, 8, 8, 1, 1, 1, 1, 16 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, + { "default", { + { "default", { 2, 8, 8, 8, 8, 1, 1, 1, 2, 16 } }, + } }, } }, } diff --git a/src/database/kernels/xgemv/xgemv_16.hpp b/src/database/kernels/xgemv/xgemv_16.hpp index 1fc86276..28a4e209 100644 --- a/src/database/kernels/xgemv/xgemv_16.hpp +++ b/src/database/kernels/xgemv/xgemv_16.hpp @@ -14,20 +14,29 @@ const DatabaseEntry XgemvHalf = { "Xgemv", Precision::kHalf, {"WGS1", "WPT1"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 256, 1 } }, - { "default", { 256, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 256, 1 } }, + { "default", { 256, 1 } }, + } }, + { "default", { + { "default", { 256, 1 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 256, 1 } }, - { "default", { 64, 1 } }, + { "default", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 256, 1 } }, + { "default", { 64, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 64, 1 } }, + { "default", { + { "default", { 64, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xgemv/xgemv_32.hpp b/src/database/kernels/xgemv/xgemv_32.hpp index fd1aa7aa..0045e251 100644 --- a/src/database/kernels/xgemv/xgemv_32.hpp +++ b/src/database/kernels/xgemv/xgemv_32.hpp @@ -14,77 +14,136 @@ const DatabaseEntry XgemvSingle = { "Xgemv", Precision::kSingle, {"WGS1", "WPT1"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 128, 1 } }, - { "ATI Radeon HD 6750M", { 32, 1 } }, - { "Ellesmere", { 256, 1 } }, - { "Fiji", { 128, 1 } }, - { "Hawaii", { 128, 1 } }, - { "Oland", { 128, 1 } }, - { "Pitcairn", { 256, 1 } }, - { "Tahiti", { 256, 1 } }, - { "Tonga", { 128, 2 } }, - { "Turks", { 32, 1 } }, - { "default", { 128, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 256, 1 } }, + { "default", { 256, 1 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 128, 1 } }, + { "AMD Radeon R9 M370X Compute Engine", { 128, 1 } }, + { "default", { 128, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 128, 1 } }, + { "default", { 128, 1 } }, + } }, + { "Oland", { + { "Oland", { 128, 1 } }, + { "default", { 128, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 256, 1 } }, + { "default", { 256, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 256, 1 } }, + { "default", { 256, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 128, 2 } }, + { "default", { 128, 2 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 32, 1 } }, + { "default", { 32, 1 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 32, 1 } }, + { "default", { 32, 1 } }, + } }, + { "default", { + { "default", { 128, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } }, - { "default", { 64, 4 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 32, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } }, + { "default", { 64, 4 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 256, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 1 } }, - { "Iris", { 64, 2 } }, - { "Iris Pro", { 128, 1 } }, - { "default", { 128, 1 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 256, 1 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 64, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 1 } }, + { "Iris", { 64, 2 } }, + { "Iris Pro", { 128, 1 } }, + { "default", { 128, 1 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, - { "default", { 64, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 256, 1 } }, - { "GeForce GT 650M", { 256, 1 } }, - { "GeForce GTX 1070", { 128, 1 } }, - { "GeForce GTX 1080", { 32, 1 } }, - { "GeForce GTX 480", { 64, 1 } }, - { "GeForce GTX 670", { 64, 1 } }, - { "GeForce GTX 680", { 256, 1 } }, - { "GeForce GTX 750", { 256, 1 } }, - { "GeForce GTX 750 Ti", { 32, 1 } }, - { "GeForce GTX 980", { 128, 1 } }, - { "GeForce GTX TITAN", { 256, 1 } }, - { "GeForce GTX TITAN Black", { 256, 1 } }, - { "GeForce GTX TITAN X", { 256, 1 } }, - { "TITAN X (Pascal)", { 32, 1 } }, - { "Tesla K20m", { 128, 1 } }, - { "Tesla K40m", { 256, 1 } }, - { "default", { 256, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 256, 1 } }, + { "GeForce GT 650M", { 256, 1 } }, + { "GeForce GTX 670", { 64, 1 } }, + { "GeForce GTX 680", { 256, 1 } }, + { "default", { 256, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 256, 1 } }, + { "GeForce GTX TITAN Black", { 256, 1 } }, + { "Tesla K20m", { 128, 1 } }, + { "Tesla K40m", { 256, 1 } }, + { "default", { 256, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 256, 1 } }, + { "GeForce GTX 750 Ti", { 32, 1 } }, + { "default", { 256, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 128, 1 } }, + { "GeForce GTX TITAN X", { 256, 1 } }, + { "default", { 256, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 128, 1 } }, + { "GeForce GTX 1080", { 32, 1 } }, + { "TITAN X (Pascal)", { 32, 1 } }, + { "default", { 128, 1 } }, + } }, + { "default", { + { "default", { 256, 1 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 64, 1 } }, - { "default", { 64, 1 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 128, 1 } }, + { "default", { + { "default", { 128, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xgemv/xgemv_3232.hpp b/src/database/kernels/xgemv/xgemv_3232.hpp index 442dd97f..9bbb4eb7 100644 --- a/src/database/kernels/xgemv/xgemv_3232.hpp +++ b/src/database/kernels/xgemv/xgemv_3232.hpp @@ -14,66 +14,120 @@ const DatabaseEntry XgemvComplexSingle = { "Xgemv", Precision::kComplexSingle, {"WGS1", "WPT1"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } }, - { "ATI Radeon HD 6750M", { 64, 1 } }, - { "Ellesmere", { 32, 1 } }, - { "Fiji", { 32, 1 } }, - { "Hawaii", { 64, 1 } }, - { "Oland", { 64, 1 } }, - { "Pitcairn", { 64, 1 } }, - { "Tahiti", { 64, 1 } }, - { "Tonga", { 32, 1 } }, - { "Turks", { 64, 1 } }, - { "default", { 64, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 32, 1 } }, + { "default", { 32, 1 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 32, 1 } }, + { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "Oland", { + { "Oland", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 32, 1 } }, + { "default", { 32, 1 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "default", { + { "default", { 64, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } }, - { "default", { 64, 2 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } }, + { "default", { 64, 2 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 64, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1 } }, - { "Iris", { 256, 1 } }, - { "Iris Pro", { 64, 1 } }, - { "default", { 64, 1 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 64, 1 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 64, 1 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1 } }, + { "Iris", { 256, 1 } }, + { "Iris Pro", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, - { "default", { 64, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 256, 1 } }, - { "GeForce GTX 1070", { 64, 1 } }, - { "GeForce GTX 1080", { 32, 1 } }, - { "GeForce GTX 480", { 64, 1 } }, - { "GeForce GTX 670", { 64, 1 } }, - { "GeForce GTX 680", { 64, 1 } }, - { "GeForce GTX 750", { 128, 1 } }, - { "GeForce GTX 750 Ti", { 32, 1 } }, - { "GeForce GTX TITAN", { 256, 1 } }, - { "GeForce GTX TITAN Black", { 32, 1 } }, - { "TITAN X (Pascal)", { 32, 1 } }, - { "default", { 64, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 256, 1 } }, + { "GeForce GTX 670", { 64, 1 } }, + { "GeForce GTX 680", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 256, 1 } }, + { "GeForce GTX TITAN Black", { 32, 1 } }, + { "default", { 256, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 128, 1 } }, + { "GeForce GTX 750 Ti", { 32, 1 } }, + { "default", { 64, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 64, 1 } }, + { "GeForce GTX 1080", { 32, 1 } }, + { "TITAN X (Pascal)", { 32, 1 } }, + { "default", { 64, 1 } }, + } }, + { "default", { + { "default", { 64, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 64, 1 } }, + { "default", { + { "default", { 64, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xgemv/xgemv_64.hpp b/src/database/kernels/xgemv/xgemv_64.hpp index 8dd899c3..72a3fd98 100644 --- a/src/database/kernels/xgemv/xgemv_64.hpp +++ b/src/database/kernels/xgemv/xgemv_64.hpp @@ -14,56 +14,105 @@ const DatabaseEntry XgemvDouble = { "Xgemv", Precision::kDouble, {"WGS1", "WPT1"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } }, - { "Ellesmere", { 32, 1 } }, - { "Fiji", { 32, 1 } }, - { "Hawaii", { 128, 1 } }, - { "Oland", { 256, 1 } }, - { "Pitcairn", { 256, 1 } }, - { "Tahiti", { 256, 1 } }, - { "Tonga", { 32, 1 } }, - { "default", { 256, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 32, 1 } }, + { "default", { 32, 1 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 32, 1 } }, + { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 128, 1 } }, + { "default", { 128, 1 } }, + } }, + { "Oland", { + { "Oland", { 256, 1 } }, + { "default", { 256, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 256, 1 } }, + { "default", { 256, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 256, 1 } }, + { "default", { 256, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 32, 1 } }, + { "default", { 32, 1 } }, + } }, + { "default", { + { "default", { 256, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } }, - { "default", { 64, 4 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 64, 4 } }, + { "default", { 64, 4 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, - { "default", { 64, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 1 } }, - { "GeForce GTX 1070", { 64, 1 } }, - { "GeForce GTX 1080", { 32, 1 } }, - { "GeForce GTX 480", { 256, 1 } }, - { "GeForce GTX 670", { 128, 1 } }, - { "GeForce GTX 680", { 128, 1 } }, - { "GeForce GTX 750", { 64, 1 } }, - { "GeForce GTX 750 Ti", { 32, 1 } }, - { "GeForce GTX 980", { 64, 1 } }, - { "GeForce GTX TITAN", { 256, 1 } }, - { "GeForce GTX TITAN Black", { 32, 1 } }, - { "GeForce GTX TITAN X", { 64, 1 } }, - { "TITAN X (Pascal)", { 32, 1 } }, - { "Tesla K20m", { 256, 1 } }, - { "Tesla K40m", { 256, 1 } }, - { "default", { 128, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 256, 1 } }, + { "default", { 256, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 128, 1 } }, + { "GeForce GTX 670", { 128, 1 } }, + { "GeForce GTX 680", { 128, 1 } }, + { "default", { 128, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 256, 1 } }, + { "GeForce GTX TITAN Black", { 32, 1 } }, + { "Tesla K20m", { 256, 1 } }, + { "Tesla K40m", { 256, 1 } }, + { "default", { 256, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 64, 1 } }, + { "GeForce GTX 750 Ti", { 32, 1 } }, + { "default", { 64, 1 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 64, 1 } }, + { "GeForce GTX TITAN X", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 64, 1 } }, + { "GeForce GTX 1080", { 32, 1 } }, + { "TITAN X (Pascal)", { 32, 1 } }, + { "default", { 128, 1 } }, + } }, + { "default", { + { "default", { 128, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 128, 1 } }, + { "default", { + { "default", { 128, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xgemv/xgemv_6464.hpp b/src/database/kernels/xgemv/xgemv_6464.hpp index 50dc8ea0..0295e145 100644 --- a/src/database/kernels/xgemv/xgemv_6464.hpp +++ b/src/database/kernels/xgemv/xgemv_6464.hpp @@ -14,44 +14,81 @@ const DatabaseEntry XgemvComplexDouble = { "Xgemv", Precision::kComplexDouble, {"WGS1", "WPT1"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } }, - { "Ellesmere", { 32, 1 } }, - { "Fiji", { 64, 1 } }, - { "Hawaii", { 64, 1 } }, - { "Oland", { 256, 1 } }, - { "Pitcairn", { 256, 1 } }, - { "Tahiti", { 256, 1 } }, - { "Tonga", { 64, 1 } }, - { "default", { 64, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 32, 1 } }, + { "default", { 32, 1 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 64, 1 } }, + { "AMD Radeon R9 M370X Compute Engine", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "Oland", { + { "Oland", { 256, 1 } }, + { "default", { 256, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 256, 1 } }, + { "default", { 256, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 256, 1 } }, + { "default", { 256, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "default", { + { "default", { 64, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 4 } }, - { "default", { 64, 4 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 64, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 64, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 128, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 32, 4 } }, + { "default", { 64, 4 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, - { "default", { 64, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 1 } }, - { "GeForce GTX 480", { 64, 1 } }, - { "GeForce GTX 670", { 128, 1 } }, - { "default", { 128, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 64, 1 } }, + { "default", { 64, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 128, 1 } }, + { "GeForce GTX 670", { 128, 1 } }, + { "default", { 128, 1 } }, + } }, + { "default", { + { "default", { 128, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 64, 1 } }, + { "default", { + { "default", { 64, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp index 6728e713..42a1e4ab 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_16.hpp @@ -14,20 +14,29 @@ const DatabaseEntry XgemvFastHalf = { "XgemvFast", Precision::kHalf, {"VW2", "WGS2", "WPT2"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 1, 32, 1 } }, - { "default", { 1, 32, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 1, 32, 1 } }, + { "default", { 1, 32, 1 } }, + } }, + { "default", { + { "default", { 1, 32, 1 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 16, 1 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 4 } }, - { "default", { 1, 16, 1 } }, + { "default", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 16, 1 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 4 } }, + { "default", { 1, 16, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 1, 16, 1 } }, + { "default", { + { "default", { 1, 16, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp index eae35316..79cdf444 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp @@ -14,77 +14,136 @@ const DatabaseEntry XgemvFastSingle = { "XgemvFast", Precision::kSingle, {"VW2", "WGS2", "WPT2"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } }, - { "ATI Radeon HD 6750M", { 2, 64, 2 } }, - { "Ellesmere", { 1, 64, 1 } }, - { "Fiji", { 1, 64, 2 } }, - { "Hawaii", { 1, 64, 1 } }, - { "Oland", { 1, 64, 1 } }, - { "Pitcairn", { 1, 64, 1 } }, - { "Tahiti", { 1, 64, 1 } }, - { "Tonga", { 1, 16, 4 } }, - { "Turks", { 1, 256, 1 } }, - { "default", { 1, 64, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 1, 64, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 1, 128, 1 } }, + { "default", { 2, 256, 2 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Oland", { + { "Oland", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 1, 16, 4 } }, + { "default", { 1, 16, 4 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 1, 256, 1 } }, + { "default", { 1, 256, 1 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 2, 64, 2 } }, + { "default", { 2, 64, 2 } }, + } }, + { "default", { + { "default", { 1, 64, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 32, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 32, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 4 } }, - { "default", { 4, 128, 4 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 32, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 32, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 4 } }, + { "default", { 4, 128, 4 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 1, 256, 1 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 32, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 4 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 2 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 32, 2 } }, - { "Iris", { 1, 128, 2 } }, - { "Iris Pro", { 4, 64, 4 } }, - { "default", { 2, 256, 2 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 1, 256, 1 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 32, 2 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 4 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 64, 2 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 2, 32, 2 } }, + { "Iris", { 1, 128, 2 } }, + { "Iris Pro", { 4, 64, 4 } }, + { "default", { 2, 256, 2 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 2, 256, 2 } }, - { "GeForce GT 650M", { 2, 32, 2 } }, - { "GeForce GTX 1070", { 1, 256, 1 } }, - { "GeForce GTX 1080", { 1, 128, 1 } }, - { "GeForce GTX 480", { 1, 128, 1 } }, - { "GeForce GTX 670", { 2, 256, 2 } }, - { "GeForce GTX 680", { 1, 128, 1 } }, - { "GeForce GTX 750", { 1, 256, 1 } }, - { "GeForce GTX 750 Ti", { 2, 32, 2 } }, - { "GeForce GTX 980", { 1, 256, 1 } }, - { "GeForce GTX TITAN", { 1, 256, 1 } }, - { "GeForce GTX TITAN Black", { 1, 256, 1 } }, - { "GeForce GTX TITAN X", { 1, 64, 1 } }, - { "TITAN X (Pascal)", { 1, 64, 1 } }, - { "Tesla K20m", { 1, 256, 1 } }, - { "Tesla K40m", { 1, 256, 1 } }, - { "default", { 1, 256, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 1, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 2, 256, 2 } }, + { "GeForce GT 650M", { 2, 32, 2 } }, + { "GeForce GTX 670", { 2, 256, 2 } }, + { "GeForce GTX 680", { 1, 128, 1 } }, + { "default", { 2, 128, 2 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 1, 256, 1 } }, + { "GeForce GTX TITAN Black", { 1, 256, 1 } }, + { "Tesla K20m", { 1, 256, 1 } }, + { "Tesla K40m", { 1, 256, 1 } }, + { "default", { 1, 256, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 1, 256, 1 } }, + { "GeForce GTX 750 Ti", { 2, 32, 2 } }, + { "default", { 2, 64, 2 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 1, 256, 1 } }, + { "GeForce GTX TITAN X", { 1, 64, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 1, 256, 1 } }, + { "GeForce GTX 1080", { 1, 128, 1 } }, + { "TITAN X (Pascal)", { 1, 64, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "default", { + { "default", { 1, 256, 1 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 1, 64, 4 } }, - { "default", { 1, 64, 4 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 1, 64, 4 } }, + { "default", { 1, 64, 4 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 1, 64, 1 } }, + { "default", { + { "default", { 1, 64, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp index c66cdc19..6a3b6f20 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp @@ -14,60 +14,108 @@ const DatabaseEntry XgemvFastComplexSingle = { "XgemvFast", Precision::kComplexSingle, {"VW2", "WGS2", "WPT2"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 2, 256, 2 } }, - { "ATI Radeon HD 6750M", { 1, 128, 1 } }, - { "Ellesmere", { 1, 64, 1 } }, - { "Fiji", { 1, 16, 1 } }, - { "Hawaii", { 1, 64, 1 } }, - { "Oland", { 1, 64, 1 } }, - { "Pitcairn", { 1, 64, 1 } }, - { "Tahiti", { 1, 128, 1 } }, - { "Tonga", { 2, 32, 2 } }, - { "Turks", { 1, 16, 1 } }, - { "default", { 1, 64, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 1, 16, 1 } }, + { "AMD Radeon R9 M370X Compute Engine", { 2, 256, 2 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Oland", { + { "Oland", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 1, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 2, 32, 2 } }, + { "default", { 2, 32, 2 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 1, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "default", { + { "default", { 1, 64, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 64, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 128, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 128, 2 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 16, 4 } }, - { "default", { 1, 64, 2 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 64, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 1, 128, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 2, 128, 2 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 16, 4 } }, + { "default", { 1, 64, 2 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 2, 128, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 32, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 2, 128, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 32, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 32, 1 } }, - { "Iris", { 1, 64, 1 } }, - { "Iris Pro", { 4, 128, 4 } }, - { "default", { 1, 64, 1 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 2, 128, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 1, 32, 2 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 2, 128, 2 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 1, 32, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 1, 32, 1 } }, + { "Iris", { 1, 64, 1 } }, + { "Iris Pro", { 4, 128, 4 } }, + { "default", { 1, 64, 1 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 256, 1 } }, - { "GeForce GTX 1070", { 1, 64, 1 } }, - { "GeForce GTX 480", { 1, 64, 1 } }, - { "GeForce GTX 670", { 1, 64, 1 } }, - { "GeForce GTX 680", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 1, 256, 1 } }, + { "GeForce GTX 670", { 1, 64, 1 } }, + { "GeForce GTX 680", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "default", { + { "default", { 1, 64, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 1, 64, 1 } }, + { "default", { + { "default", { 1, 64, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp index 53692530..974d14e7 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp @@ -14,56 +14,105 @@ const DatabaseEntry XgemvFastDouble = { "XgemvFast", Precision::kDouble, {"VW2", "WGS2", "WPT2"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } }, - { "Ellesmere", { 1, 128, 1 } }, - { "Fiji", { 1, 32, 1 } }, - { "Hawaii", { 1, 64, 1 } }, - { "Oland", { 1, 64, 1 } }, - { "Pitcairn", { 1, 64, 1 } }, - { "Tahiti", { 1, 64, 1 } }, - { "Tonga", { 2, 32, 2 } }, - { "default", { 1, 64, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 1, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 1, 32, 1 } }, + { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } }, + { "default", { 1, 256, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Oland", { + { "Oland", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 2, 32, 2 } }, + { "default", { 2, 32, 2 } }, + } }, + { "default", { + { "default", { 1, 64, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 64, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 1 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 4 } }, - { "default", { 1, 64, 4 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 1, 64, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 4, 128, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 16, 1 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 1, 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 4 } }, + { "default", { 1, 64, 4 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 256, 1 } }, - { "GeForce GTX 1070", { 1, 256, 1 } }, - { "GeForce GTX 1080", { 1, 32, 2 } }, - { "GeForce GTX 480", { 1, 64, 1 } }, - { "GeForce GTX 670", { 1, 128, 1 } }, - { "GeForce GTX 680", { 1, 128, 1 } }, - { "GeForce GTX 750", { 2, 256, 2 } }, - { "GeForce GTX 750 Ti", { 1, 32, 2 } }, - { "GeForce GTX 980", { 1, 64, 1 } }, - { "GeForce GTX TITAN", { 1, 256, 1 } }, - { "GeForce GTX TITAN Black", { 1, 256, 1 } }, - { "GeForce GTX TITAN X", { 1, 128, 1 } }, - { "TITAN X (Pascal)", { 1, 32, 1 } }, - { "Tesla K20m", { 1, 128, 1 } }, - { "Tesla K40m", { 1, 256, 1 } }, - { "default", { 1, 256, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 1, 256, 1 } }, + { "GeForce GTX 670", { 1, 128, 1 } }, + { "GeForce GTX 680", { 1, 128, 1 } }, + { "default", { 1, 128, 1 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 1, 256, 1 } }, + { "GeForce GTX TITAN Black", { 1, 256, 1 } }, + { "Tesla K20m", { 1, 128, 1 } }, + { "Tesla K40m", { 1, 256, 1 } }, + { "default", { 1, 256, 1 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 2, 256, 2 } }, + { "GeForce GTX 750 Ti", { 1, 32, 2 } }, + { "default", { 2, 256, 2 } }, + } }, + { "SM5.2", { + { "GeForce GTX 980", { 1, 64, 1 } }, + { "GeForce GTX TITAN X", { 1, 128, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 1, 256, 1 } }, + { "GeForce GTX 1080", { 1, 32, 2 } }, + { "TITAN X (Pascal)", { 1, 32, 1 } }, + { "default", { 1, 256, 1 } }, + } }, + { "default", { + { "default", { 1, 256, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 1, 64, 1 } }, + { "default", { + { "default", { 1, 64, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp index fdf3d508..8bd4f7fb 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp @@ -14,44 +14,81 @@ const DatabaseEntry XgemvFastComplexDouble = { "XgemvFast", Precision::kComplexDouble, {"VW2", "WGS2", "WPT2"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } }, - { "Ellesmere", { 1, 16, 1 } }, - { "Fiji", { 1, 16, 1 } }, - { "Hawaii", { 1, 64, 1 } }, - { "Oland", { 1, 256, 1 } }, - { "Pitcairn", { 1, 64, 1 } }, - { "Tahiti", { 1, 64, 1 } }, - { "Tonga", { 1, 32, 1 } }, - { "default", { 1, 64, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 1, 16, 1 } }, + { "default", { 1, 16, 1 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 1, 16, 1 } }, + { "AMD Radeon R9 M370X Compute Engine", { 1, 256, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Oland", { + { "Oland", { 1, 256, 1 } }, + { "default", { 1, 256, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 1, 32, 1 } }, + { "default", { 1, 32, 1 } }, + } }, + { "default", { + { "default", { 1, 64, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 2, 64, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 64, 4 } }, - { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 2 } }, - { "default", { 4, 64, 4 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 2, 64, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 1, 64, 4 } }, + { "Intel(R) Core(TM) i7-3770 CPU @ 3.40GHz", { 4, 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 1, 16, 2 } }, + { "default", { 4, 64, 4 } }, + } }, } }, { // Intel accelerators kDeviceTypeAccelerator, "Intel", { - { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, + { "default", { + { "Intel(R) Many Integrated Core Acceleration Card", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 1, 128, 1 } }, - { "GeForce GTX 480", { 1, 64, 1 } }, - { "GeForce GTX 670", { 1, 64, 1 } }, - { "default", { 1, 64, 1 } }, + { "SM2.0", { + { "GeForce GTX 480", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "SM3.0", { + { "GRID K520", { 1, 128, 1 } }, + { "GeForce GTX 670", { 1, 64, 1 } }, + { "default", { 1, 64, 1 } }, + } }, + { "default", { + { "default", { 1, 64, 1 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 1, 64, 1 } }, + { "default", { + { "default", { 1, 64, 1 } }, + } }, } }, } diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp index 8d516141..15d1b4fc 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_16.hpp @@ -14,19 +14,28 @@ const DatabaseEntry XgemvFastRotHalf = { "XgemvFastRot", Precision::kHalf, {"VW3", "WGS3", "WPT3"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 8, 32, 32 } }, - { "default", { 8, 32, 32 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 8, 32, 32 } }, + { "default", { 8, 32, 32 } }, + } }, + { "default", { + { "default", { 8, 32, 32 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 128, 32 } }, - { "default", { 8, 128, 32 } }, + { "default", { + { "Intel(R) HD Graphics Skylake ULT GT2", { 8, 128, 32 } }, + { "default", { 8, 128, 32 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 8, 128, 32 } }, + { "default", { + { "default", { 8, 128, 32 } }, + } }, } }, } diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp index 71b8b355..66e3eecf 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp @@ -14,54 +14,93 @@ const DatabaseEntry XgemvFastRotSingle = { "XgemvFastRot", Precision::kSingle, {"VW3", "WGS3", "WPT3"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 8, 64, 32 } }, - { "ATI Radeon HD 6750M", { 8, 128, 16 } }, - { "Ellesmere", { 8, 32, 32 } }, - { "Fiji", { 4, 32, 16 } }, - { "Tonga", { 8, 128, 32 } }, - { "Turks", { 8, 128, 16 } }, - { "default", { 8, 32, 32 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 8, 32, 32 } }, + { "default", { 8, 32, 32 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 4, 32, 16 } }, + { "AMD Radeon R9 M370X Compute Engine", { 8, 64, 32 } }, + { "default", { 8, 32, 32 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 8, 128, 32 } }, + { "default", { 8, 128, 32 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 8, 128, 16 } }, + { "default", { 8, 128, 16 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 8, 128, 16 } }, + { "default", { 8, 128, 16 } }, + } }, + { "default", { + { "default", { 8, 32, 32 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } }, - { "default", { 8, 32, 32 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 128, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } }, + { "default", { 8, 32, 32 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 64, 32 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 64, 16 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 2, 32, 16 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } }, - { "Iris Pro", { 4, 16, 16 } }, - { "default", { 4, 64, 16 } }, + { "default", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 8, 64, 32 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 64, 16 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 2, 32, 16 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } }, + { "Iris Pro", { 4, 16, 16 } }, + { "default", { 4, 64, 16 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GeForce GT 650M", { 8, 32, 16 } }, - { "GeForce GTX 1080", { 8, 32, 32 } }, - { "GeForce GTX 750 Ti", { 8, 32, 32 } }, - { "GeForce GTX TITAN", { 1, 16, 16 } }, - { "GeForce GTX TITAN Black", { 4, 128, 16 } }, - { "TITAN X (Pascal)", { 8, 64, 32 } }, - { "default", { 8, 32, 32 } }, + { "SM3.0", { + { "GeForce GT 650M", { 8, 32, 16 } }, + { "default", { 8, 32, 16 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 1, 16, 16 } }, + { "GeForce GTX TITAN Black", { 4, 128, 16 } }, + { "default", { 1, 16, 16 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750 Ti", { 8, 32, 32 } }, + { "default", { 8, 32, 32 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1080", { 8, 32, 32 } }, + { "TITAN X (Pascal)", { 8, 64, 32 } }, + { "default", { 8, 64, 32 } }, + } }, + { "default", { + { "default", { 8, 32, 32 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 4, 64, 16 } }, - { "default", { 4, 64, 16 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 4, 64, 16 } }, + { "default", { 4, 64, 16 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 8, 32, 32 } }, + { "default", { + { "default", { 8, 32, 32 } }, + } }, } }, } diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp index 4fd88fc4..445e7161 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp @@ -14,37 +14,60 @@ const DatabaseEntry XgemvFastRotComplexSingle = { "XgemvFastRot", Precision::kComplexSingle, {"VW3", "WGS3", "WPT3"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 8, 16, 16 } }, - { "ATI Radeon HD 6750M", { 8, 32, 8 } }, - { "Ellesmere", { 2, 32, 16 } }, - { "Fiji", { 4, 32, 32 } }, - { "Tonga", { 4, 32, 32 } }, - { "Turks", { 4, 32, 8 } }, - { "default", { 8, 16, 16 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 2, 32, 16 } }, + { "default", { 2, 32, 16 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 4, 32, 32 } }, + { "AMD Radeon R9 M370X Compute Engine", { 8, 16, 16 } }, + { "default", { 8, 16, 16 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 4, 32, 32 } }, + { "default", { 4, 32, 32 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 4, 32, 8 } }, + { "default", { 4, 32, 8 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 8, 32, 8 } }, + { "default", { 8, 32, 8 } }, + } }, + { "default", { + { "default", { 8, 16, 16 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 32, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 16, 16 } }, - { "default", { 4, 32, 32 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 32, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 4, 16, 16 } }, + { "default", { 4, 32, 32 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 16, 16 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 8 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 4, 32, 8 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } }, - { "Iris Pro", { 4, 16, 16 } }, - { "default", { 2, 32, 8 } }, + { "default", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 2, 16, 16 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 4, 128, 8 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 4, 32, 8 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 4, 64, 16 } }, + { "Iris Pro", { 4, 16, 16 } }, + { "default", { 2, 32, 8 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 4, 16, 16 } }, + { "default", { + { "default", { 4, 16, 16 } }, + } }, } }, } diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp index 66299b56..842c5bae 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp @@ -14,35 +14,61 @@ const DatabaseEntry XgemvFastRotDouble = { "XgemvFastRot", Precision::kDouble, {"VW3", "WGS3", "WPT3"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 16, 16 } }, - { "Ellesmere", { 4, 16, 16 } }, - { "Fiji", { 4, 32, 32 } }, - { "Tonga", { 4, 16, 16 } }, - { "default", { 4, 16, 16 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 4, 16, 16 } }, + { "default", { 4, 16, 16 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 4, 32, 32 } }, + { "AMD Radeon R9 M370X Compute Engine", { 4, 16, 16 } }, + { "default", { 4, 16, 16 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 4, 16, 16 } }, + { "default", { 4, 16, 16 } }, + } }, + { "default", { + { "default", { 4, 16, 16 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 8 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } }, - { "default", { 8, 32, 32 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 4, 32, 32 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 8, 16, 8 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 4, 32, 32 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 8 } }, + { "default", { 8, 32, 32 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GeForce GTX 1080", { 8, 32, 32 } }, - { "GeForce GTX 750 Ti", { 4, 32, 16 } }, - { "GeForce GTX TITAN", { 1, 16, 16 } }, - { "GeForce GTX TITAN Black", { 1, 16, 16 } }, - { "TITAN X (Pascal)", { 8, 32, 32 } }, - { "default", { 4, 32, 16 } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 1, 16, 16 } }, + { "GeForce GTX TITAN Black", { 1, 16, 16 } }, + { "default", { 1, 16, 16 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750 Ti", { 4, 32, 16 } }, + { "default", { 4, 32, 16 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1080", { 8, 32, 32 } }, + { "TITAN X (Pascal)", { 8, 32, 32 } }, + { "default", { 8, 32, 32 } }, + } }, + { "default", { + { "default", { 4, 32, 16 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 4, 16, 16 } }, + { "default", { + { "default", { 4, 16, 16 } }, + } }, } }, } diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp index bc1964ff..84340b4c 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp @@ -14,25 +14,40 @@ const DatabaseEntry XgemvFastRotComplexDouble = { "XgemvFastRot", Precision::kComplexDouble, {"VW3", "WGS3", "WPT3"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 4, 32, 16 } }, - { "Ellesmere", { 4, 16, 16 } }, - { "Fiji", { 4, 32, 8 } }, - { "Tonga", { 4, 16, 8 } }, - { "default", { 8, 32, 16 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 4, 16, 16 } }, + { "default", { 4, 16, 16 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 4, 32, 8 } }, + { "AMD Radeon R9 M370X Compute Engine", { 4, 32, 16 } }, + { "default", { 4, 32, 8 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 4, 16, 8 } }, + { "default", { 4, 16, 8 } }, + } }, + { "default", { + { "default", { 8, 32, 16 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 16, 16 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 64, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 16 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 16 } }, - { "default", { 8, 16, 16 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 2, 16, 16 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 4, 64, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 2, 16, 16 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 8, 16, 16 } }, + { "default", { 8, 16, 16 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 4, 16, 16 } }, + { "default", { + { "default", { 4, 16, 16 } }, + } }, } }, } diff --git a/src/database/kernels/xger/xger_16.hpp b/src/database/kernels/xger/xger_16.hpp index 376716b7..54dc2c3a 100644 --- a/src/database/kernels/xger/xger_16.hpp +++ b/src/database/kernels/xger/xger_16.hpp @@ -14,26 +14,37 @@ const DatabaseEntry XgerHalf = { "Xger", Precision::kHalf, {"WGS1", "WGS2", "WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "Ellesmere", { 64, 1, 2 } }, - { "default", { 64, 1, 2 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 64, 1, 2 } }, + { "default", { 64, 1, 2 } }, + } }, + { "default", { + { "default", { 64, 1, 2 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 1, 2 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1, 4 } }, - { "default", { 4, 8, 2 } }, + { "default", { + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 1, 2 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 64, 1, 4 } }, + { "default", { 4, 8, 2 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 64, 4, 2 } }, - { "default", { 64, 4, 2 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 64, 4, 2 } }, + { "default", { 64, 4, 2 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 64, 1, 2 } }, + { "default", { + { "default", { 64, 1, 2 } }, + } }, } }, } diff --git a/src/database/kernels/xger/xger_32.hpp b/src/database/kernels/xger/xger_32.hpp index bc18f20e..9bad0efb 100644 --- a/src/database/kernels/xger/xger_32.hpp +++ b/src/database/kernels/xger/xger_32.hpp @@ -14,72 +14,128 @@ const DatabaseEntry XgerSingle = { "Xger", Precision::kSingle, {"WGS1", "WGS2", "WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 256, 1, 1 } }, - { "ATI Radeon HD 6750M", { 16, 16, 4 } }, - { "Ellesmere", { 64, 4, 2 } }, - { "Fiji", { 256, 1, 1 } }, - { "Hawaii", { 64, 2, 1 } }, - { "Oland", { 32, 4, 2 } }, - { "Pitcairn", { 64, 1, 1 } }, - { "Tahiti", { 256, 1, 1 } }, - { "Tonga", { 256, 1, 2 } }, - { "Turks", { 64, 4, 2 } }, - { "default", { 16, 16, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 64, 4, 2 } }, + { "default", { 64, 4, 2 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 256, 1, 1 } }, + { "AMD Radeon R9 M370X Compute Engine", { 256, 1, 1 } }, + { "default", { 256, 1, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 64, 2, 1 } }, + { "default", { 64, 2, 1 } }, + } }, + { "Oland", { + { "Oland", { 32, 4, 2 } }, + { "default", { 32, 4, 2 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 64, 1, 1 } }, + { "default", { 64, 1, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 256, 1, 1 } }, + { "default", { 256, 1, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 256, 1, 2 } }, + { "default", { 256, 1, 2 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 64, 4, 2 } }, + { "default", { 64, 4, 2 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 16, 16, 4 } }, + { "default", { 16, 16, 4 } }, + } }, + { "default", { + { "default", { 16, 16, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 64, 4, 4 } }, - { "default", { 64, 4, 4 } }, + { "default", { + { "Mali-T628", { 64, 4, 4 } }, + { "default", { 64, 4, 4 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 2, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 16, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 128, 1, 4 } }, - { "default", { 128, 8, 4 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 32, 4, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 128, 2, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 16, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 128, 1, 4 } }, + { "default", { 128, 8, 4 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 32, 1, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 2, 2 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1, 2 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 64, 1, 4 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 4, 4 } }, - { "Iris Pro", { 64, 1, 4 } }, - { "default", { 32, 4, 2 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 32, 1, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 256, 2, 2 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 128, 1, 2 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 64, 1, 4 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 32, 4, 4 } }, + { "Iris Pro", { 64, 1, 4 } }, + { "default", { 32, 4, 2 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 1, 2 } }, - { "GeForce GT 650M", { 32, 16, 4 } }, - { "GeForce GTX 1070", { 512, 1, 1 } }, - { "GeForce GTX 1080", { 16, 4, 1 } }, - { "GeForce GTX 480", { 256, 1, 4 } }, - { "GeForce GTX 670", { 32, 8, 2 } }, - { "GeForce GTX 680", { 128, 1, 4 } }, - { "GeForce GTX 750", { 64, 16, 4 } }, - { "GeForce GTX 750 Ti", { 64, 1, 2 } }, - { "GeForce GTX TITAN", { 32, 4, 2 } }, - { "GeForce GTX TITAN Black", { 32, 4, 2 } }, - { "TITAN X (Pascal)", { 512, 2, 1 } }, - { "default", { 128, 1, 2 } }, + { "SM2.0", { + { "GeForce GTX 480", { 256, 1, 4 } }, + { "default", { 256, 1, 4 } }, + } }, + { "SM3.0", { + { "GRID K520", { 128, 1, 2 } }, + { "GeForce GT 650M", { 32, 16, 4 } }, + { "GeForce GTX 670", { 32, 8, 2 } }, + { "GeForce GTX 680", { 128, 1, 4 } }, + { "default", { 128, 1, 2 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 32, 4, 2 } }, + { "GeForce GTX TITAN Black", { 32, 4, 2 } }, + { "default", { 32, 4, 2 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 64, 16, 4 } }, + { "GeForce GTX 750 Ti", { 64, 1, 2 } }, + { "default", { 128, 1, 2 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 512, 1, 1 } }, + { "GeForce GTX 1080", { 16, 4, 1 } }, + { "TITAN X (Pascal)", { 512, 2, 1 } }, + { "default", { 512, 2, 1 } }, + } }, + { "default", { + { "default", { 128, 1, 2 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 128, 1, 2 } }, - { "default", { 128, 1, 2 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 128, 1, 2 } }, + { "default", { 128, 1, 2 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 32, 4, 2 } }, + { "default", { + { "default", { 32, 4, 2 } }, + } }, } }, } diff --git a/src/database/kernels/xger/xger_3232.hpp b/src/database/kernels/xger/xger_3232.hpp index d2d1587f..49c3207c 100644 --- a/src/database/kernels/xger/xger_3232.hpp +++ b/src/database/kernels/xger/xger_3232.hpp @@ -14,71 +14,127 @@ const DatabaseEntry XgerComplexSingle = { "Xger", Precision::kComplexSingle, {"WGS1", "WGS2", "WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 4, 1 } }, - { "ATI Radeon HD 6750M", { 16, 16, 1 } }, - { "Ellesmere", { 16, 8, 2 } }, - { "Fiji", { 128, 2, 1 } }, - { "Hawaii", { 64, 1, 2 } }, - { "Oland", { 4, 8, 1 } }, - { "Pitcairn", { 128, 2, 1 } }, - { "Tahiti", { 64, 2, 1 } }, - { "Tonga", { 64, 1, 1 } }, - { "Turks", { 128, 2, 1 } }, - { "default", { 128, 2, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 16, 8, 2 } }, + { "default", { 16, 8, 2 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 128, 2, 1 } }, + { "AMD Radeon R9 M370X Compute Engine", { 64, 4, 1 } }, + { "default", { 128, 2, 1 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 64, 1, 2 } }, + { "default", { 64, 1, 2 } }, + } }, + { "Oland", { + { "Oland", { 4, 8, 1 } }, + { "default", { 4, 8, 1 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 128, 2, 1 } }, + { "default", { 128, 2, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 64, 2, 1 } }, + { "default", { 64, 2, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 64, 1, 1 } }, + { "default", { 64, 1, 1 } }, + } }, + { "Turks", { + { "AMD Radeon HD 6770M", { 128, 2, 1 } }, + { "default", { 128, 2, 1 } }, + } }, + { "Vancouver", { + { "ATI Radeon HD 6750M", { 16, 16, 1 } }, + { "default", { 16, 16, 1 } }, + } }, + { "default", { + { "default", { 128, 2, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 128, 1, 1 } }, - { "default", { 128, 1, 1 } }, + { "default", { + { "Mali-T628", { 128, 1, 1 } }, + { "default", { 128, 1, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 2, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 256, 1, 4 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 2, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 4, 2 } }, - { "default", { 256, 2, 4 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 2, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 256, 1, 4 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 2, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 4, 2 } }, + { "default", { 256, 2, 4 } }, + } }, } }, { // Intel GPUs kDeviceTypeGPU, "Intel", { - { "Intel(R) HD Graphics 530", { 32, 1, 2 } }, - { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 128, 2, 1 } }, - { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 512, 1, 1 } }, - { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1, 2 } }, - { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 1 } }, - { "Iris Pro", { 16, 2, 4 } }, - { "default", { 128, 2, 2 } }, + { "default", { + { "Intel(R) HD Graphics 530", { 32, 1, 2 } }, + { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { 128, 2, 1 } }, + { "Intel(R) HD Graphics Haswell Ultrabook GT2 Mobile", { 512, 1, 1 } }, + { "Intel(R) HD Graphics IvyBridge M GT2", { 256, 1, 2 } }, + { "Intel(R) HD Graphics Skylake ULT GT2", { 16, 1, 1 } }, + { "Iris Pro", { 16, 2, 4 } }, + { "default", { 128, 2, 2 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 64, 4, 2 } }, - { "GeForce GTX 1070", { 16, 64, 2 } }, - { "GeForce GTX 1080", { 32, 2, 1 } }, - { "GeForce GTX 480", { 128, 2, 2 } }, - { "GeForce GTX 670", { 16, 32, 2 } }, - { "GeForce GTX 680", { 32, 4, 2 } }, - { "GeForce GTX 750", { 32, 16, 4 } }, - { "GeForce GTX 750 Ti", { 32, 8, 2 } }, - { "GeForce GTX TITAN", { 16, 16, 2 } }, - { "GeForce GTX TITAN Black", { 16, 16, 2 } }, - { "TITAN X (Pascal)", { 32, 2, 1 } }, - { "default", { 128, 2, 2 } }, + { "SM2.0", { + { "GeForce GTX 480", { 128, 2, 2 } }, + { "default", { 128, 2, 2 } }, + } }, + { "SM3.0", { + { "GRID K520", { 64, 4, 2 } }, + { "GeForce GTX 670", { 16, 32, 2 } }, + { "GeForce GTX 680", { 32, 4, 2 } }, + { "default", { 64, 2, 2 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 16, 16, 2 } }, + { "GeForce GTX TITAN Black", { 16, 16, 2 } }, + { "default", { 16, 16, 2 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 32, 16, 4 } }, + { "GeForce GTX 750 Ti", { 32, 8, 2 } }, + { "default", { 32, 16, 4 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 16, 64, 2 } }, + { "GeForce GTX 1080", { 32, 2, 1 } }, + { "TITAN X (Pascal)", { 32, 2, 1 } }, + { "default", { 32, 2, 1 } }, + } }, + { "default", { + { "default", { 128, 2, 2 } }, + } }, } }, { // QUALCOMM GPUs kDeviceTypeGPU, "QUALCOMM", { - { "QUALCOMM Adreno(TM)", { 64, 1, 4 } }, - { "default", { 64, 1, 4 } }, + { "default", { + { "QUALCOMM Adreno(TM)", { 64, 1, 4 } }, + { "default", { 64, 1, 4 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 64, 2, 2 } }, + { "default", { + { "default", { 64, 2, 2 } }, + } }, } }, } diff --git a/src/database/kernels/xger/xger_64.hpp b/src/database/kernels/xger/xger_64.hpp index 304fcaf4..09b5a6af 100644 --- a/src/database/kernels/xger/xger_64.hpp +++ b/src/database/kernels/xger/xger_64.hpp @@ -14,52 +14,98 @@ const DatabaseEntry XgerDouble = { "Xger", Precision::kDouble, {"WGS1", "WGS2", "WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 32, 4, 1 } }, - { "Ellesmere", { 64, 1, 4 } }, - { "Fiji", { 256, 1, 2 } }, - { "Hawaii", { 32, 4, 2 } }, - { "Oland", { 128, 1, 2 } }, - { "Pitcairn", { 64, 1, 1 } }, - { "Tahiti", { 64, 2, 1 } }, - { "Tonga", { 8, 16, 2 } }, - { "default", { 128, 2, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 64, 1, 4 } }, + { "default", { 64, 1, 4 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 256, 1, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 32, 4, 1 } }, + { "default", { 256, 1, 2 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 32, 4, 2 } }, + { "default", { 32, 4, 2 } }, + } }, + { "Oland", { + { "Oland", { 128, 1, 2 } }, + { "default", { 128, 1, 2 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 64, 1, 1 } }, + { "default", { 64, 1, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 64, 2, 1 } }, + { "default", { 64, 2, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 8, 16, 2 } }, + { "default", { 8, 16, 2 } }, + } }, + { "default", { + { "default", { 128, 2, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 64, 4, 1 } }, - { "default", { 64, 4, 1 } }, + { "default", { + { "Mali-T628", { 64, 4, 1 } }, + { "default", { 64, 4, 1 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 256, 1, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 16, 1 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 1, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 8, 2 } }, - { "default", { 256, 1, 4 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 256, 1, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 16, 1 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 1, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 256, 4, 4 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 512, 8, 2 } }, + { "default", { 256, 1, 4 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 128, 8, 2 } }, - { "GeForce GTX 1070", { 32, 8, 1 } }, - { "GeForce GTX 1080", { 32, 2, 1 } }, - { "GeForce GTX 480", { 32, 4, 2 } }, - { "GeForce GTX 670", { 32, 32, 2 } }, - { "GeForce GTX 680", { 128, 4, 2 } }, - { "GeForce GTX 750", { 256, 2, 2 } }, - { "GeForce GTX 750 Ti", { 32, 16, 1 } }, - { "GeForce GTX TITAN", { 16, 8, 2 } }, - { "GeForce GTX TITAN Black", { 32, 4, 2 } }, - { "TITAN X (Pascal)", { 32, 2, 1 } }, - { "default", { 128, 1, 2 } }, + { "SM2.0", { + { "GeForce GTX 480", { 32, 4, 2 } }, + { "default", { 32, 4, 2 } }, + } }, + { "SM3.0", { + { "GRID K520", { 128, 8, 2 } }, + { "GeForce GTX 670", { 32, 32, 2 } }, + { "GeForce GTX 680", { 128, 4, 2 } }, + { "default", { 128, 8, 2 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 16, 8, 2 } }, + { "GeForce GTX TITAN Black", { 32, 4, 2 } }, + { "default", { 32, 4, 2 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 256, 2, 2 } }, + { "GeForce GTX 750 Ti", { 32, 16, 1 } }, + { "default", { 16, 8, 1 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 32, 8, 1 } }, + { "GeForce GTX 1080", { 32, 2, 1 } }, + { "TITAN X (Pascal)", { 32, 2, 1 } }, + { "default", { 32, 2, 1 } }, + } }, + { "default", { + { "default", { 128, 1, 2 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 128, 1, 2 } }, + { "default", { + { "default", { 128, 1, 2 } }, + } }, } }, } diff --git a/src/database/kernels/xger/xger_6464.hpp b/src/database/kernels/xger/xger_6464.hpp index dd7e6572..9f7dfe44 100644 --- a/src/database/kernels/xger/xger_6464.hpp +++ b/src/database/kernels/xger/xger_6464.hpp @@ -14,52 +14,98 @@ const DatabaseEntry XgerComplexDouble = { "Xger", Precision::kComplexDouble, {"WGS1", "WGS2", "WPT"}, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { 64, 1, 1 } }, - { "Ellesmere", { 8, 16, 1 } }, - { "Fiji", { 64, 4, 2 } }, - { "Hawaii", { 128, 1, 1 } }, - { "Oland", { 16, 16, 2 } }, - { "Pitcairn", { 64, 4, 1 } }, - { "Tahiti", { 32, 4, 1 } }, - { "Tonga", { 16, 4, 1 } }, - { "default", { 32, 4, 1 } }, + { "Ellesmere", { + { "AMD Radeon RX 480", { 8, 16, 1 } }, + { "default", { 8, 16, 1 } }, + } }, + { "Fiji", { + { "AMD Radeon R9 Fury X", { 64, 4, 2 } }, + { "AMD Radeon R9 M370X Compute Engine", { 64, 1, 1 } }, + { "default", { 64, 4, 2 } }, + } }, + { "Hawaii", { + { "AMD Radeon R9 290X", { 128, 1, 1 } }, + { "default", { 128, 1, 1 } }, + } }, + { "Oland", { + { "Oland", { 16, 16, 2 } }, + { "default", { 16, 16, 2 } }, + } }, + { "Pitcairn", { + { "AMD Radeon R9 270X", { 64, 4, 1 } }, + { "default", { 64, 4, 1 } }, + } }, + { "Tahiti", { + { "AMD Radeon HD 7970", { 32, 4, 1 } }, + { "default", { 32, 4, 1 } }, + } }, + { "Tonga", { + { "AMD Radeon R9 380", { 16, 4, 1 } }, + { "default", { 16, 4, 1 } }, + } }, + { "default", { + { "default", { 32, 4, 1 } }, + } }, } }, { // ARM GPUs kDeviceTypeGPU, "ARM", { - { "Mali-T628", { 64, 2, 4 } }, - { "default", { 64, 2, 4 } }, + { "default", { + { "Mali-T628", { 64, 2, 4 } }, + { "default", { 64, 2, 4 } }, + } }, } }, { // Intel CPUs kDeviceTypeCPU, "Intel", { - { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4, 4 } }, - { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 4, 2 } }, - { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } }, - { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 512, 2, 2 } }, - { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 256, 1, 2 } }, - { "default", { 256, 2, 2 } }, + { "default", { + { "Intel(R) Core(TM) i7-2670QM CPU @ 2.20GHz", { 128, 4, 4 } }, + { "Intel(R) Core(TM) i5-6200U CPU @ 2.30GHz", { 512, 4, 2 } }, + { "Intel(R) Core(TM) i7 CPU 920 @ 2.67GHz", { 256, 8, 4 } }, + { "Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz", { 512, 2, 2 } }, + { "Intel(R) Core(TM) i7-5930K CPU @ 3.50GHz", { 256, 1, 2 } }, + { "default", { 256, 2, 2 } }, + } }, } }, { // NVIDIA GPUs kDeviceTypeGPU, "NVIDIA", { - { "GRID K520", { 16, 8, 2 } }, - { "GeForce GTX 1070", { 8, 128, 1 } }, - { "GeForce GTX 1080", { 8, 4, 1 } }, - { "GeForce GTX 480", { 64, 2, 2 } }, - { "GeForce GTX 670", { 8, 16, 2 } }, - { "GeForce GTX 680", { 8, 16, 1 } }, - { "GeForce GTX 750", { 8, 32, 4 } }, - { "GeForce GTX 750 Ti", { 32, 8, 2 } }, - { "GeForce GTX TITAN", { 32, 4, 2 } }, - { "GeForce GTX TITAN Black", { 16, 16, 2 } }, - { "TITAN X (Pascal)", { 4, 8, 1 } }, - { "default", { 16, 8, 2 } }, + { "SM2.0", { + { "GeForce GTX 480", { 64, 2, 2 } }, + { "default", { 64, 2, 2 } }, + } }, + { "SM3.0", { + { "GRID K520", { 16, 8, 2 } }, + { "GeForce GTX 670", { 8, 16, 2 } }, + { "GeForce GTX 680", { 8, 16, 1 } }, + { "default", { 16, 8, 2 } }, + } }, + { "SM3.5", { + { "GeForce GTX TITAN", { 32, 4, 2 } }, + { "GeForce GTX TITAN Black", { 16, 16, 2 } }, + { "default", { 32, 4, 2 } }, + } }, + { "SM5.0", { + { "GeForce GTX 750", { 8, 32, 4 } }, + { "GeForce GTX 750 Ti", { 32, 8, 2 } }, + { "default", { 32, 1, 4 } }, + } }, + { "SM6.1", { + { "GeForce GTX 1070", { 8, 128, 1 } }, + { "GeForce GTX 1080", { 8, 4, 1 } }, + { "TITAN X (Pascal)", { 4, 8, 1 } }, + { "default", { 8, 4, 1 } }, + } }, + { "default", { + { "default", { 16, 8, 2 } }, + } }, } }, { // Default kDeviceTypeAll, "default", { - { "default", { 64, 2, 2 } }, + { "default", { + { "default", { 64, 2, 2 } }, + } }, } }, } diff --git a/src/database/kernels/xtrsv.hpp b/src/database/kernels/xtrsv.hpp index 7c22aa57..ffd15d3e 100644 --- a/src/database/kernels/xtrsv.hpp +++ b/src/database/kernels/xtrsv.hpp @@ -19,7 +19,7 @@ const DatabaseEntry XtrsvHalf = { "Xtrsv", Precision::kHalf, {"TRSV_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 32 } }, + { "default", { { "default", { 32 } } } }, } }, } @@ -31,7 +31,7 @@ const DatabaseEntry XtrsvSingle = { "Xtrsv", Precision::kSingle, {"TRSV_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 32 } }, + { "default", { { "default", { 32 } } } }, } }, } @@ -43,7 +43,7 @@ const DatabaseEntry XtrsvComplexSingle = { "Xtrsv", Precision::kComplexSingle, {"TRSV_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 32 } }, + { "default", { { "default", { 32 } } } }, } }, } @@ -55,7 +55,7 @@ const DatabaseEntry XtrsvDouble = { "Xtrsv", Precision::kDouble, {"TRSV_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 32 } }, + { "default", { { "default", { 32 } } } }, } }, } @@ -67,7 +67,7 @@ const DatabaseEntry XtrsvComplexDouble = { "Xtrsv", Precision::kComplexDouble, {"TRSV_BLOCK_SIZE"}, { { // Default kDeviceTypeAll, "default", { - { "default", { 32 } }, + { "default", { { "default", { 32 } } } }, } }, } diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp index d912a377..1ba3ab03 100644 --- a/src/utilities/utilities.hpp +++ b/src/utilities/utilities.hpp @@ -123,6 +123,14 @@ constexpr auto kBufScalar = "Scalar"; // ================================================================================================= +inline void log_debug(const std::string &log_string) { + #ifdef VERBOSE + printf("[DEBUG] %s\n", log_string.c_str()); + #endif +} + +// ================================================================================================= + // Converts a regular or complex type to it's base type (e.g. float2 to float) template struct BaseType { using Type = T; }; template <> struct BaseType { using Type = float; };