mirror of
https://github.com/CNugteren/CLBlast.git
synced 2024-08-22 21:12:26 +02:00
Merge branch 'database_rewrite' into development
This commit is contained in:
commit
9fb7a0efe1
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -2,5 +2,6 @@ build
|
||||||
stash
|
stash
|
||||||
.*
|
.*
|
||||||
*.pyc
|
*.pyc
|
||||||
*.db
|
database.json
|
||||||
|
database_best.json
|
||||||
cl.hpp
|
cl.hpp
|
|
@ -11,8 +11,6 @@ import os.path
|
||||||
import glob
|
import glob
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
import database.io as io
|
import database.io as io
|
||||||
import database.db as db
|
import database.db as db
|
||||||
import database.clblast as clblast
|
import database.clblast as clblast
|
||||||
|
@ -20,15 +18,15 @@ import database.bests as bests
|
||||||
import database.defaults as defaults
|
import database.defaults as defaults
|
||||||
|
|
||||||
# Server storing a copy of the database
|
# Server storing a copy of the database
|
||||||
DATABASE_SERVER_URL = "http://www.cedricnugteren.nl/tuning/clblast.db"
|
DATABASE_SERVER_URL = "http://www.cedricnugteren.nl/tuning/clblast.json"
|
||||||
|
|
||||||
# OpenCL vendor names and their short name
|
# OpenCL vendor names and their short name
|
||||||
VENDOR_TRANSLATION_TABLE = {"device_vendor": {
|
VENDOR_TRANSLATION_TABLE = {
|
||||||
"GenuineIntel": "Intel",
|
"GenuineIntel": "Intel",
|
||||||
"Intel(R) Corporation": "Intel",
|
"Intel(R) Corporation": "Intel",
|
||||||
"Advanced Micro Devices, Inc.": "AMD",
|
"Advanced Micro Devices, Inc.": "AMD",
|
||||||
"NVIDIA Corporation": "NVIDIA",
|
"NVIDIA Corporation": "NVIDIA",
|
||||||
}}
|
}
|
||||||
|
|
||||||
|
|
||||||
def main(argv):
|
def main(argv):
|
||||||
|
@ -41,7 +39,8 @@ def main(argv):
|
||||||
cl_args = parser.parse_args(argv)
|
cl_args = parser.parse_args(argv)
|
||||||
|
|
||||||
# Parses the path arguments
|
# Parses the path arguments
|
||||||
database_filename = os.path.join(cl_args.clblast_root, "scripts", "database", "database.db")
|
database_filename = os.path.join(cl_args.clblast_root, "scripts", "database", "database.json")
|
||||||
|
database_best_filename = os.path.join(cl_args.clblast_root, "scripts", "database", "database_best.json")
|
||||||
json_files = os.path.join(cl_args.source_folder, "*.json")
|
json_files = os.path.join(cl_args.source_folder, "*.json")
|
||||||
cpp_database_path = os.path.join(cl_args.clblast_root, "src", "database", "kernels")
|
cpp_database_path = os.path.join(cl_args.clblast_root, "src", "database", "kernels")
|
||||||
|
|
||||||
|
@ -52,11 +51,6 @@ def main(argv):
|
||||||
if len(glob.glob(json_files)) < 1:
|
if len(glob.glob(json_files)) < 1:
|
||||||
print("[database] The path '" + cl_args.source_folder + "' does not contain any JSON files")
|
print("[database] The path '" + cl_args.source_folder + "' does not contain any JSON files")
|
||||||
|
|
||||||
# Pandas options
|
|
||||||
pd.set_option('display.width', 1000)
|
|
||||||
if cl_args.verbose:
|
|
||||||
print("[database] Using pandas version " + pd.__version__)
|
|
||||||
|
|
||||||
# Downloads the database if a local copy is not present
|
# Downloads the database if a local copy is not present
|
||||||
if not os.path.isfile(database_filename):
|
if not os.path.isfile(database_filename):
|
||||||
io.download_database(database_filename, DATABASE_SERVER_URL)
|
io.download_database(database_filename, DATABASE_SERVER_URL)
|
||||||
|
@ -68,30 +62,22 @@ def main(argv):
|
||||||
for file_json in glob.glob(json_files):
|
for file_json in glob.glob(json_files):
|
||||||
|
|
||||||
# Loads the newly imported data
|
# Loads the newly imported data
|
||||||
sys.stdout.write("[database] Processing '"+file_json+"' ") # No newline printed
|
sys.stdout.write("[database] Processing '" + file_json + "' ") # No newline printed
|
||||||
imported_data = io.load_json_to_pandas(file_json)
|
imported_data = io.load_tuning_results(file_json)
|
||||||
|
|
||||||
# Fixes the problem that some vendors use multiple different names
|
# Fixes the problem that some vendors use multiple different names
|
||||||
imported_data = db.find_and_replace(imported_data, VENDOR_TRANSLATION_TABLE)
|
for target in VENDOR_TRANSLATION_TABLE:
|
||||||
|
if imported_data["device_vendor"] == target:
|
||||||
|
imported_data["device_vendor"] = VENDOR_TRANSLATION_TABLE[target]
|
||||||
|
|
||||||
# Adds the new data to the database
|
# Adds the new data to the database
|
||||||
old_size = len(database.index)
|
old_size = db.length(database)
|
||||||
database = db.concatenate_database(database, imported_data)
|
database = db.add_section(database, imported_data)
|
||||||
database = database.drop_duplicates()
|
new_size = db.length(database)
|
||||||
new_size = len(database.index)
|
|
||||||
print("with " + str(new_size - old_size) + " new items") # Newline printed here
|
print("with " + str(new_size - old_size) + " new items") # Newline printed here
|
||||||
|
|
||||||
# Stores the modified database back to disk
|
# Stores the modified database back to disk
|
||||||
if len(glob.glob(json_files)) >= 1:
|
if len(glob.glob(json_files)) >= 1:
|
||||||
database = db.remove_duplicates(database)
|
|
||||||
io.save_database(database, database_filename)
|
|
||||||
|
|
||||||
# Optional: update the database here. Default is disabled, code below is just an example
|
|
||||||
if False: # TODO: Use command-line arguments to enable updates in a flexible way
|
|
||||||
database = db.update_database(database,
|
|
||||||
((database["kernel"] == "CopyMatrixFast") &
|
|
||||||
(database["precision"] == "3232")),
|
|
||||||
"arg_alpha", "2+0.5i")
|
|
||||||
io.save_database(database, database_filename)
|
io.save_database(database, database_filename)
|
||||||
|
|
||||||
# Retrieves the best performing results
|
# Retrieves the best performing results
|
||||||
|
@ -101,7 +87,11 @@ def main(argv):
|
||||||
# Determines the defaults for other vendors and per vendor
|
# Determines the defaults for other vendors and per vendor
|
||||||
print("[database] Calculating the default values...")
|
print("[database] Calculating the default values...")
|
||||||
database_defaults = defaults.calculate_defaults(database, cl_args.verbose)
|
database_defaults = defaults.calculate_defaults(database, cl_args.verbose)
|
||||||
database_best_results = db.concatenate_database(database_best_results, database_defaults)
|
database_best_results["sections"].extend(database_defaults["sections"])
|
||||||
|
|
||||||
|
# Optionally outputs the database to disk
|
||||||
|
if cl_args.verbose:
|
||||||
|
io.save_database(database_best_results, database_best_filename)
|
||||||
|
|
||||||
# Outputs the database as a C++ database
|
# Outputs the database as a C++ database
|
||||||
print("[database] Producing a C++ database in '" + cpp_database_path + "'...")
|
print("[database] Producing a C++ database in '" + cpp_database_path + "'...")
|
||||||
|
|
|
@ -5,39 +5,54 @@
|
||||||
# Author(s):
|
# Author(s):
|
||||||
# Cedric Nugteren <www.cedricnugteren.nl>
|
# Cedric Nugteren <www.cedricnugteren.nl>
|
||||||
|
|
||||||
import pandas as pd
|
import sys
|
||||||
import clblast
|
|
||||||
|
|
||||||
|
|
||||||
def get_best_results(df):
|
def get_best_results(database):
|
||||||
"""Retrieves the results with the lowests execution times"""
|
"""Retrieves the results with the lowest execution times"""
|
||||||
database_bests = pd.DataFrame()
|
sections_best = []
|
||||||
database_entries = df.groupby(clblast.ATTRIBUTES + ["kernel"])
|
for section in database["sections"]:
|
||||||
for name, database_entry in database_entries:
|
section_best = {}
|
||||||
best_time = database_entry["time"].min()
|
|
||||||
best_parameters = database_entry[database_entry["time"] == best_time].iloc[0]
|
# Stores all the section's meta data
|
||||||
database_bests = database_bests.append(best_parameters, ignore_index=True)
|
for attribute in section.keys():
|
||||||
return database_bests
|
if attribute != "results":
|
||||||
|
section_best[attribute] = section[attribute]
|
||||||
|
|
||||||
|
# Find the best result
|
||||||
|
parameters_best = None
|
||||||
|
time_best = sys.float_info.max
|
||||||
|
for result in section["results"]:
|
||||||
|
if result["time"] < time_best:
|
||||||
|
time_best = result["time"]
|
||||||
|
parameters_best = result["parameters"]
|
||||||
|
|
||||||
|
# Stores the best result
|
||||||
|
section_best["results"] = [{"time": time_best, "parameters": parameters_best}]
|
||||||
|
sections_best.append(section_best)
|
||||||
|
|
||||||
|
return {"sections": sections_best}
|
||||||
|
|
||||||
|
|
||||||
def get_relative_bests(df, parameter_column_names, name, verbose=False):
|
def get_relative_bests(name, common_results, common_parameters, verbose=False):
|
||||||
"""Retrieves the relative best execution time over different devices"""
|
"""Retrieves the parameters with the relative best execution time over different devices"""
|
||||||
|
|
||||||
|
# Helper function
|
||||||
|
def argmax(iterable):
|
||||||
|
return max(enumerate(iterable), key=lambda x: x[1])[0]
|
||||||
|
|
||||||
# Computes the sum of the execution times over the different devices
|
# Computes the sum of the execution times over the different devices
|
||||||
def sum_performance(x):
|
performance_sums = []
|
||||||
x["group_performance"] = x["relative_performance"].sum()
|
for parameters in common_parameters:
|
||||||
return x
|
performance_sum = sum([r["relative_performance"] for r in common_results if r["parameters"] == parameters])
|
||||||
df = df.groupby(parameter_column_names).apply(sum_performance)
|
performance_sums.append(performance_sum)
|
||||||
|
|
||||||
# Retrieves the entries with the highest performance
|
# Retrieves the entry with the highest performance
|
||||||
best_performance = df["group_performance"].max()
|
best_index = argmax(performance_sums)
|
||||||
df_bests = df[df["group_performance"] == best_performance]
|
best_performance = performance_sums[best_index]
|
||||||
|
best_parameters = common_parameters[best_index]
|
||||||
# Retrieves one example only (the parameters are the same anyway)
|
|
||||||
df_bests = df_bests.drop_duplicates(["group_performance"])
|
|
||||||
|
|
||||||
# Completed, report and return the results
|
# Completed, report and return the results
|
||||||
if verbose:
|
if verbose:
|
||||||
print("[database] " + str(name) + " with performance " + str(best_performance) + " " + str(df_bests.shape))
|
print("[database] " + str(name) + " with performance " + str(best_performance))
|
||||||
assert len(df_bests) == 1
|
return best_parameters
|
||||||
return df_bests
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ DEVICE_ATTRIBUTES = ["device", "device_core_clock", "device_compute_units"]
|
||||||
KERNEL_ATTRIBUTES = ["precision", "kernel_family"]
|
KERNEL_ATTRIBUTES = ["precision", "kernel_family"]
|
||||||
ARGUMENT_ATTRIBUTES = ["arg_m", "arg_n", "arg_k", "arg_alpha", "arg_beta"]
|
ARGUMENT_ATTRIBUTES = ["arg_m", "arg_n", "arg_k", "arg_alpha", "arg_beta"]
|
||||||
ATTRIBUTES = DEVICE_ATTRIBUTES + DEVICE_TYPE_ATTRIBUTES + KERNEL_ATTRIBUTES + ARGUMENT_ATTRIBUTES
|
ATTRIBUTES = DEVICE_ATTRIBUTES + DEVICE_TYPE_ATTRIBUTES + KERNEL_ATTRIBUTES + ARGUMENT_ATTRIBUTES
|
||||||
|
GROUP_ATTRIBUTES = DEVICE_TYPE_ATTRIBUTES + KERNEL_ATTRIBUTES + ["kernel"] + ARGUMENT_ATTRIBUTES
|
||||||
|
|
||||||
|
|
||||||
def precision_to_string(precision):
|
def precision_to_string(precision):
|
||||||
|
@ -81,42 +82,63 @@ def print_cpp_database(database, output_dir):
|
||||||
"""Outputs the database as C++ code"""
|
"""Outputs the database as C++ code"""
|
||||||
|
|
||||||
# Iterates over the kernel families
|
# Iterates over the kernel families
|
||||||
for family_name, family_database in database.groupby(["kernel_family"]):
|
kernel_families = sorted(set([s["kernel_family"] for s in database["sections"]]))
|
||||||
family_database = family_database.dropna(axis=1, how='all')
|
for family_name in kernel_families:
|
||||||
|
family_database = [s for s in database["sections"] if s["kernel_family"] == family_name]
|
||||||
|
|
||||||
# Opens a new file for each kernel family
|
# Opens a new file for each kernel family
|
||||||
full_path = os.path.join(output_dir, family_name+'.hpp')
|
full_path = os.path.join(output_dir, family_name + ".hpp")
|
||||||
with open(full_path, 'w+') as f:
|
with open(full_path, 'w+') as f:
|
||||||
f.write(get_cpp_header(family_name))
|
f.write(get_cpp_header(family_name))
|
||||||
|
|
||||||
# Loops over the different precision (e.g. 16, 32, 3232, 64, 6464)
|
# Loops over the different precision (e.g. 16, 32, 3232, 64, 6464)
|
||||||
for precision, precision_database in family_database.groupby(["precision"]):
|
precisions = sorted(set([s["precision"] for s in database["sections"]])) # Based on full database
|
||||||
|
for precision in precisions:
|
||||||
|
precision_database = [s for s in family_database if s["precision"] == precision]
|
||||||
f.write(get_cpp_precision(family_name, precision))
|
f.write(get_cpp_precision(family_name, precision))
|
||||||
|
|
||||||
# Loops over a combination of device vendors and device types (e.g. AMD GPU)
|
# In case there is nothing found at all (e.g. 16-bit): continue as if this was a precision of 32 but
|
||||||
for vendor, vendor_database in precision_database.groupby(["device_vendor"]):
|
# with the defaults only
|
||||||
for device_type, device_type_database in vendor_database.groupby(["device_type"]):
|
if len(precision_database) == 0:
|
||||||
|
print("[database] No results found for %s:%s, retrieving defaults from %s:32" %
|
||||||
|
(family_name, precision, family_name))
|
||||||
|
precision_database = [s for s in family_database if s["precision"] == "32"
|
||||||
|
and s["device_vendor"] == VENDOR_DEFAULT
|
||||||
|
and s["device_type"] == DEVICE_TYPE_DEFAULT
|
||||||
|
and s["device"] == DEVICE_NAME_DEFAULT]
|
||||||
|
|
||||||
|
# Loops over device vendors (e.g. AMD)
|
||||||
|
device_vendors = sorted(set([s["device_vendor"] for s in precision_database]))
|
||||||
|
for vendor in device_vendors:
|
||||||
|
vendor_database = [s for s in precision_database if s["device_vendor"] == vendor]
|
||||||
|
|
||||||
|
# Loops over device types (e.g. GPU)
|
||||||
|
device_types = sorted(set([s["device_type"] for s in vendor_database]))
|
||||||
|
for device_type in device_types:
|
||||||
|
type_database = [s for s in vendor_database if s["device_type"] == device_type]
|
||||||
f.write(get_cpp_device_vendor(vendor, device_type))
|
f.write(get_cpp_device_vendor(vendor, device_type))
|
||||||
|
|
||||||
# Loops over every device of this vendor-type combination
|
# Loops over every device of this vendor-type combination
|
||||||
for device_name, device_database in device_type_database.groupby(["device"]):
|
devices = sorted(set([s["device"] for s in type_database]))
|
||||||
|
for device_name in devices:
|
||||||
|
device_database = [s for s in type_database if s["device"] == device_name]
|
||||||
device_name_quoted = "\"%s\"," % device_name
|
device_name_quoted = "\"%s\"," % device_name
|
||||||
device_name_cpp = " { %-50s { " % device_name_quoted
|
device_name_cpp = " { %-50s { " % device_name_quoted
|
||||||
f.write(device_name_cpp)
|
f.write(device_name_cpp)
|
||||||
|
|
||||||
# Collects the parameters for this entry
|
# Collects the parameters for this entry
|
||||||
parameters = []
|
parameters = []
|
||||||
for kernel, kernel_database in device_database.groupby(["kernel"]):
|
kernels = sorted(set([s["kernel"] for s in device_database]))
|
||||||
kernel_database = kernel_database.dropna(axis=1)
|
for kernel in kernels:
|
||||||
|
kernel_database = [s for s in device_database if s["kernel"] == kernel]
|
||||||
|
|
||||||
# Only consider the actual parameters, not the precision
|
assert len(kernel_database) == 1
|
||||||
def is_parameter(column):
|
results = kernel_database[0]["results"]
|
||||||
return column.startswith('parameters.') and column != "parameters.PRECISION"
|
|
||||||
column_names = [col for col in list(kernel_database) if is_parameter(col)]
|
|
||||||
|
|
||||||
for p in column_names:
|
assert len(results) == 1
|
||||||
parameter_name = p.replace("parameters.", "")
|
new_parameters = results[0]["parameters"]
|
||||||
parameter_value = int(kernel_database[p].iloc[0])
|
for parameter_name in sorted(new_parameters):
|
||||||
|
parameter_value = new_parameters[parameter_name]
|
||||||
parameters.append("{\"" + parameter_name + "\"," + str(parameter_value) + "}")
|
parameters.append("{\"" + parameter_name + "\"," + str(parameter_value) + "}")
|
||||||
|
|
||||||
# Prints the entry
|
# Prints the entry
|
||||||
|
|
|
@ -5,66 +5,60 @@
|
||||||
# Author(s):
|
# Author(s):
|
||||||
# Cedric Nugteren <www.cedricnugteren.nl>
|
# Cedric Nugteren <www.cedricnugteren.nl>
|
||||||
|
|
||||||
import pandas as pd
|
import clblast
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
|
|
||||||
def get_entries_by_field(database, field, value):
|
def length(database):
|
||||||
"""Retrieves entries from the database with a specific value for a given field"""
|
"""Computes the total number of tuning entries"""
|
||||||
return database[database[field] == value]
|
num_tuning_entries = 0
|
||||||
|
for section in database["sections"]:
|
||||||
|
num_tuning_entries += len(section["results"])
|
||||||
|
return num_tuning_entries
|
||||||
|
|
||||||
|
|
||||||
def concatenate_database(database1, database2):
|
def add_section(database, new_section):
|
||||||
"""Concatenates two databases row-wise and returns the result"""
|
"""Adds a new section to the database"""
|
||||||
return pd.concat([database1, database2])
|
for old_section in database["sections"]:
|
||||||
|
|
||||||
|
# Verify whether the sections match
|
||||||
|
equal = True
|
||||||
|
for attribute in new_section.keys():
|
||||||
|
if attribute != "results":
|
||||||
|
if attribute not in old_section or new_section[attribute] != old_section[attribute]:
|
||||||
|
equal = False
|
||||||
|
break
|
||||||
|
|
||||||
def find_and_replace(database, dictionary):
|
# They match: append the new section's results to the corresponding entry in the database and return
|
||||||
"""Finds and replaces entries in a database based on a dictionary. Example:
|
if equal:
|
||||||
dictionary = { "key_to_edit": { find1: replace1, find2, replace2 } }"""
|
old_section["results"] = combine_results(old_section["results"], new_section["results"])
|
||||||
return database.replace(dictionary)
|
return database
|
||||||
|
|
||||||
|
# No match found: append the whole new section to the database
|
||||||
def remove_entries_by_key_value(database, key, value):
|
database["sections"].append(new_section)
|
||||||
"""Removes entries in the databased which have a specific value for a given key"""
|
|
||||||
return database[database[key] != value]
|
|
||||||
|
|
||||||
|
|
||||||
def remove_entries_by_device(database, device_name):
|
|
||||||
"""Shorthand for the above, specifically removes entries for a given device"""
|
|
||||||
return remove_entries_by_key_value(database, "device", device_name)
|
|
||||||
|
|
||||||
|
|
||||||
def remove_entries_by_kernel_family(database, kernel_family_name):
|
|
||||||
"""Shorthand for the above, specifically removes entries for a given kernel family"""
|
|
||||||
return remove_entries_by_key_value(database, "kernel_family", kernel_family_name)
|
|
||||||
|
|
||||||
|
|
||||||
def update_database(database, condition, field, value):
|
|
||||||
"""Updates the database by writing a specific value to a given field, given certain conditions"""
|
|
||||||
database.loc[condition, field] = value
|
|
||||||
return database
|
return database
|
||||||
|
|
||||||
|
|
||||||
def remove_duplicates(database):
|
def combine_results(old_results, new_results):
|
||||||
"""Removes duplicates from the database based on all but the 'time' column"""
|
"""Adds new results to the results JSON list"""
|
||||||
|
for new_result in new_results:
|
||||||
|
old_results = combine_result(old_results, new_result)
|
||||||
|
return old_results
|
||||||
|
|
||||||
# First remove 100% duplicate entries
|
|
||||||
database = database.drop_duplicates()
|
|
||||||
|
|
||||||
# Replace NaNs with -1 first (needed for groupby)
|
def combine_result(old_results, new_result):
|
||||||
database = database.replace(np.nan, -1)
|
"""Adds a new result to the results JSON list; filters for duplicate entries and saves the best performing one"""
|
||||||
|
|
||||||
# In case multiple runs for the exact same configuration where made: take just the best performing one into account
|
# Loops over all existing results to test for already existing entries with these parameters
|
||||||
other_column_names = list(database.columns.values)
|
for old_result in old_results:
|
||||||
other_column_names.remove("time")
|
|
||||||
database_by_time = database.groupby(other_column_names,)
|
|
||||||
num_removals = len(database) - len(database_by_time)
|
|
||||||
if num_removals > 0:
|
|
||||||
print("[database] Removing %d entries: keeping only those with the lowest execution time" % num_removals)
|
|
||||||
print("[database] Note: this might take a while")
|
|
||||||
database = database_by_time.apply(lambda x: x[x["time"] == x["time"].min()])
|
|
||||||
|
|
||||||
# Re-replace the NaN values
|
# Verify whether the results match
|
||||||
database = database.replace(-1, np.nan)
|
equal = new_result["parameters"] == old_result["parameters"]
|
||||||
return database
|
|
||||||
|
# They match: keep only the one with the minimum execution time
|
||||||
|
if equal:
|
||||||
|
old_result["time"] = min(old_result["time"], new_result["time"])
|
||||||
|
return old_results
|
||||||
|
|
||||||
|
# No match found: append a new result
|
||||||
|
old_results.append(new_result)
|
||||||
|
return old_results
|
||||||
|
|
|
@ -5,102 +5,176 @@
|
||||||
# Author(s):
|
# Author(s):
|
||||||
# Cedric Nugteren <www.cedricnugteren.nl>
|
# Cedric Nugteren <www.cedricnugteren.nl>
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
import clblast
|
import clblast
|
||||||
import bests
|
import bests
|
||||||
|
|
||||||
|
|
||||||
def set_default_device(database_entry):
|
def set_default_device(section):
|
||||||
"""Sets the device name and parameters to some default values"""
|
"""Sets the device name and parameters to some default values"""
|
||||||
database_entry["device"] = clblast.DEVICE_NAME_DEFAULT
|
section["device"] = clblast.DEVICE_NAME_DEFAULT
|
||||||
database_entry["device_compute_units"] = 0
|
section["device_compute_units"] = 0
|
||||||
database_entry["device_core_clock"] = 0
|
section["device_core_clock"] = 0
|
||||||
return database_entry
|
return section
|
||||||
|
|
||||||
|
|
||||||
def set_default_time(database_entry):
|
def set_identifiers(database, group_by_attributes, identifier_name):
|
||||||
"""Sets the execution time to some default value"""
|
"""Sets a group-identifier based on a given set of attributes. Modifies the database but also returns a list of
|
||||||
database_entry["time"] = 0.0
|
unique identifiers."""
|
||||||
return database_entry
|
identifiers = []
|
||||||
|
for section in database["sections"]:
|
||||||
|
identifier = []
|
||||||
|
for attribute in group_by_attributes:
|
||||||
|
if attribute in section:
|
||||||
|
identifier.append(section[attribute])
|
||||||
|
section[identifier_name] = ";".join(identifier)
|
||||||
|
identifiers.append(section[identifier_name])
|
||||||
|
return sorted(set(identifiers))
|
||||||
|
|
||||||
|
|
||||||
def calculate_defaults(database, verbose, calculate_common_best=True):
|
def remove_identifiers(database, identifier_name):
|
||||||
"""Sets defaults for devices of the same type/vendor. An option determines how to compute the defaults."""
|
"""Removes an identifier from all sections in the database"""
|
||||||
database_defaults = pd.DataFrame()
|
for section in database["sections"]:
|
||||||
|
section.pop(identifier_name, None)
|
||||||
|
|
||||||
# Defaults per combination of device vendors and device types (e.g. AMD GPU)
|
|
||||||
database_type_vendor = database.groupby(clblast.DEVICE_TYPE_ATTRIBUTES + clblast.KERNEL_ATTRIBUTES + ["kernel"] +
|
|
||||||
clblast.ARGUMENT_ATTRIBUTES)
|
|
||||||
for group_name, database_group in database_type_vendor:
|
|
||||||
if calculate_common_best:
|
|
||||||
default_values = get_common_best(database_group, group_name, verbose)
|
|
||||||
else:
|
|
||||||
default_values = get_smallest_best(database_group)
|
|
||||||
default_values = set_default_device(default_values)
|
|
||||||
default_values = set_default_time(default_values)
|
|
||||||
database_defaults = database_defaults.append(default_values, ignore_index=True)
|
|
||||||
|
|
||||||
# Checks for mis-matched arguments
|
def get_groups_by_identifier(database, group_identifiers, identifier_name):
|
||||||
groups = database_defaults.groupby(clblast.DEVICE_TYPE_ATTRIBUTES + clblast.KERNEL_ATTRIBUTES + ["kernel"])
|
"""Returns a list of (group, group_identifier) tuples based a previously made grouping"""
|
||||||
for group_name, database_group in groups:
|
groups = []
|
||||||
if len(database_group) != 1:
|
for group_identifier in group_identifiers:
|
||||||
print("[WARNING] Entries for a single kernel with multiple argument values: " + str(group_name))
|
|
||||||
|
|
||||||
# Defaults over all device types and vendors
|
# Get all sections in this group
|
||||||
groups = database.groupby(clblast.KERNEL_ATTRIBUTES + ["kernel"] + clblast.ARGUMENT_ATTRIBUTES)
|
group = []
|
||||||
for group_name, database_group in groups:
|
for section in database["sections"]:
|
||||||
if calculate_common_best:
|
if section[identifier_name] == group_identifier:
|
||||||
default_values = get_common_best(database_group, group_name, verbose)
|
group.append(section)
|
||||||
else:
|
|
||||||
default_values = get_smallest_best(database_group)
|
groups.append((group, group_identifier))
|
||||||
default_values["device_vendor"] = clblast.VENDOR_DEFAULT
|
return groups
|
||||||
default_values["device_type"] = clblast.DEVICE_TYPE_DEFAULT
|
|
||||||
default_values = set_default_device(default_values)
|
|
||||||
default_values = set_default_time(default_values)
|
def calculate_defaults(database, verbose):
|
||||||
database_defaults = database_defaults.append(default_values, ignore_index=True)
|
"""Sets defaults for devices of the same type/vendor"""
|
||||||
|
|
||||||
|
# Groups the database by kernel, vendor and device type (e.g. AMD GPU)
|
||||||
|
group_identifiers = set_identifiers(database, clblast.GROUP_ATTRIBUTES, "group_identifier")
|
||||||
|
groups = get_groups_by_identifier(database, group_identifiers, "group_identifier")
|
||||||
|
|
||||||
|
# Loops over all groups
|
||||||
|
default_sections = {"sections": []}
|
||||||
|
for group, group_identifier in groups:
|
||||||
|
|
||||||
|
# Computes the best parameters
|
||||||
|
default_parameters = get_common_best_parameters(group, group_identifier, verbose)
|
||||||
|
|
||||||
|
# Stores all the section's data
|
||||||
|
assert len(group) > 0
|
||||||
|
default_section = {}
|
||||||
|
for attribute in group[0].keys():
|
||||||
|
if attribute != "results" and attribute != "group_identifier":
|
||||||
|
default_section[attribute] = group[0][attribute]
|
||||||
|
default_section = set_default_device(default_section)
|
||||||
|
default_section["results"] = [{"time": 0.0, "parameters": default_parameters}]
|
||||||
|
default_sections["sections"].append(default_section)
|
||||||
|
|
||||||
|
# Groups the database by kernel, vendor and device type (e.g. AMD GPU) - but not by arguments! This is to check for
|
||||||
|
# mis-matched arguments.
|
||||||
|
attributes = clblast.DEVICE_TYPE_ATTRIBUTES + clblast.KERNEL_ATTRIBUTES + ["kernel"]
|
||||||
|
group_identifiers = set_identifiers(default_sections, attributes, "temp_identifier")
|
||||||
|
groups = get_groups_by_identifier(default_sections, group_identifiers, "temp_identifier")
|
||||||
|
for group, group_identifier in groups:
|
||||||
|
if len(group) != 1:
|
||||||
|
print("[ERROR] Entries for a single kernel with multiple argument values: " + str(group_identifier))
|
||||||
|
assert len(group) == 1
|
||||||
|
remove_identifiers(default_sections, "temp_identifier")
|
||||||
|
|
||||||
|
# Groups the database by kernel only
|
||||||
|
group_identifiers = set_identifiers(database, clblast.KERNEL_ATTRIBUTES + ["kernel"], "group_identifier")
|
||||||
|
groups = get_groups_by_identifier(database, group_identifiers, "group_identifier")
|
||||||
|
|
||||||
|
# Loops over all groups
|
||||||
|
for group, group_identifier in groups:
|
||||||
|
|
||||||
|
# Computes the best parameters
|
||||||
|
default_parameters = get_common_best_parameters(group, group_identifier, verbose)
|
||||||
|
|
||||||
|
# Stores all the section's data
|
||||||
|
assert len(group) > 0
|
||||||
|
default_section = {}
|
||||||
|
for attribute in group[0].keys():
|
||||||
|
if attribute != "results" and attribute != "group_identifier":
|
||||||
|
default_section[attribute] = group[0][attribute]
|
||||||
|
default_section = set_default_device(default_section)
|
||||||
|
default_section["device_vendor"] = clblast.VENDOR_DEFAULT
|
||||||
|
default_section["device_type"] = clblast.DEVICE_TYPE_DEFAULT
|
||||||
|
default_section["results"] = [{"time": 0.0, "parameters": default_parameters}]
|
||||||
|
default_sections["sections"].append(default_section)
|
||||||
|
|
||||||
# Database with both types of defaults only
|
# Database with both types of defaults only
|
||||||
return database_defaults
|
return default_sections
|
||||||
|
|
||||||
|
|
||||||
def get_smallest_best(database):
|
def get_smallest_best_parameters(group):
|
||||||
"""Sets defaults based on the smallest values of all known entries. The average might be better for performance but
|
"""Sets defaults based on the smallest values of all known entries. The average might be better for performance but
|
||||||
some parameters might not be supported on other devices."""
|
some parameters might not be supported on other devices."""
|
||||||
database_best_results = bests.get_best_results(database)
|
|
||||||
return database_best_results.min(axis=0)
|
# Counts the number of devices in this group
|
||||||
|
assert len(group) > 0
|
||||||
|
|
||||||
|
# Find the smallest values of the parameters
|
||||||
|
min_parameters = {}
|
||||||
|
for section in group:
|
||||||
|
assert len(section["results"]) > 0
|
||||||
|
minimum_time = min([result["time"] for result in section["results"]])
|
||||||
|
for result in section["results"]:
|
||||||
|
if result["time"] == minimum_time:
|
||||||
|
for parameter in result["parameters"]:
|
||||||
|
if parameter in min_parameters:
|
||||||
|
min_parameters[parameter] = min(min_parameters[parameter], result["parameters"][parameter])
|
||||||
|
else:
|
||||||
|
min_parameters[parameter] = result["parameters"][parameter]
|
||||||
|
|
||||||
|
return min_parameters
|
||||||
|
|
||||||
|
|
||||||
def get_common_best(database, group_name, verbose):
|
def get_common_best_parameters(group, group_identifier, verbose):
|
||||||
"""Sets defaults based on the best values of entries supported by all devices. This might cause a problem in case
|
"""Sets defaults based on the best values of entries supported by all devices. This might cause a problem in case
|
||||||
not every device was tuned with the same parameters. In that case it falls back to the above method to retrieve
|
not every device was tuned with the same parameters. In that case it falls back to the above method to retrieve
|
||||||
the smallest best execution time"""
|
the smallest best execution time"""
|
||||||
|
|
||||||
# Counts the number of devices in this group
|
# Counts the number of devices in this group
|
||||||
num_devices = len(database.groupby(clblast.DEVICE_ATTRIBUTES))
|
num_devices = len(group)
|
||||||
|
assert num_devices > 0
|
||||||
# Removes columns without any values
|
|
||||||
database = database.dropna(axis=1, how='all')
|
|
||||||
|
|
||||||
# Inserts the relative execution times into the database
|
# Inserts the relative execution times into the database
|
||||||
def relative_performance(x):
|
for section in group:
|
||||||
x["relative_performance"] = x["time"].min() / x["time"]
|
assert len(section["results"]) > 0
|
||||||
return x
|
minimum_time = min([result["time"] for result in section["results"]])
|
||||||
database = database.groupby(clblast.ATTRIBUTES + ["kernel"]).apply(relative_performance)
|
for result in section["results"]:
|
||||||
|
result["relative_performance"] = minimum_time / result["time"]
|
||||||
|
|
||||||
# Retrieves the parameter names for this kernel
|
# Determine which parameters are available for all devices
|
||||||
all_column_names = list(database.columns.values)
|
common_parameters = [result["parameters"] for result in group[0]["results"]] # Parameters of the first section
|
||||||
parameter_column_names = [c for c in all_column_names if "parameters." in c]
|
for i in range(1, num_devices):
|
||||||
|
section_parameters = [result["parameters"] for result in group[i]["results"]]
|
||||||
# Removes entries which are not available for all devices
|
common_parameters = [p for p in section_parameters if p in common_parameters] # Intersection of the parameters
|
||||||
database_by_parameters = database.groupby(parameter_column_names)
|
|
||||||
database_common = database_by_parameters.filter(lambda x: len(x) == num_devices)
|
|
||||||
|
|
||||||
# Fall back to another method in case there are no shared entries at all across devices
|
# Fall back to another method in case there are no shared entries at all across devices
|
||||||
if len(database_common) == 0:
|
if len(common_parameters) == 0:
|
||||||
if verbose:
|
if verbose:
|
||||||
print("[database] No common kernels for: " + str(group_name) + " with devices: %d " % num_devices)
|
print("[database] No common kernels for: " + str(group_identifier) + " with devices: %d " % num_devices)
|
||||||
return get_smallest_best(database)
|
smallest_best_parameters = get_smallest_best_parameters(group)
|
||||||
|
if verbose:
|
||||||
|
print("[database] " + str(group_identifier))
|
||||||
|
return smallest_best_parameters
|
||||||
|
|
||||||
|
# Removes entries with parameters which are not common
|
||||||
|
common_results = []
|
||||||
|
for section in group:
|
||||||
|
for result in section["results"]:
|
||||||
|
if result["parameters"] in common_parameters:
|
||||||
|
common_results.append(result)
|
||||||
|
|
||||||
# Retrieves the entries with the highest relative performance
|
# Retrieves the entries with the highest relative performance
|
||||||
return bests.get_relative_bests(database_common, parameter_column_names, group_name, verbose)
|
relative_best_parameters = bests.get_relative_bests(group_identifier, common_results, common_parameters, verbose)
|
||||||
|
return relative_best_parameters
|
||||||
|
|
|
@ -13,46 +13,48 @@ try:
|
||||||
except ImportError:
|
except ImportError:
|
||||||
from urllib2 import urlopen # Python 2
|
from urllib2 import urlopen # Python 2
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
import clblast
|
|
||||||
|
|
||||||
|
|
||||||
def download_database(filename, database_url):
|
def download_database(filename, database_url):
|
||||||
"""Downloads a database and saves it to disk"""
|
"""Downloads a database and saves it to disk"""
|
||||||
print("[database] Downloading database from '" + database_url + "'...")
|
print("[database] Downloading database from '" + database_url + "'...")
|
||||||
database = urlopen(database_url)
|
database = urlopen(database_url)
|
||||||
with open(filename, 'wb') as f:
|
with open(filename, "wb") as f:
|
||||||
f.write(database.read())
|
f.write(database.read())
|
||||||
|
|
||||||
|
|
||||||
def load_database(filename):
|
def load_database(filename):
|
||||||
"""Loads a database from disk"""
|
"""Loads a database from disk"""
|
||||||
print("[database] Loading database from '" + filename + "'")
|
print("[database] Loading database from '" + filename + "'")
|
||||||
return pd.read_pickle(filename)
|
with open(filename) as f:
|
||||||
|
return json.load(f)
|
||||||
|
|
||||||
|
|
||||||
def save_database(database, filename):
|
def save_database(database, filename):
|
||||||
"""Saves a database to disk"""
|
"""Saves a database to disk"""
|
||||||
print("[database] Saving database to '" + filename + "'")
|
print("[database] Saving database to '" + filename + "'")
|
||||||
database.to_pickle(filename)
|
with open(filename, "wb") as f:
|
||||||
|
json.dump(database, f, sort_keys=True, indent=4)
|
||||||
|
|
||||||
|
|
||||||
def load_json_to_pandas(filename):
|
def load_tuning_results(filename):
|
||||||
"""Loads JSON data from file and converts it to a pandas database"""
|
"""Loads JSON data from file and pre-processes it"""
|
||||||
with open(filename) as f:
|
with open(filename) as f:
|
||||||
json_data = json.load(f)
|
json_data = json.load(f)
|
||||||
|
|
||||||
# Gathers all results and stores them in a new database
|
# Removes the numbering following the kernel family name
|
||||||
json_database = pd.DataFrame(json_data)
|
json_data["kernel_family"] = re.sub(r'_\d+', '', json_data["kernel_family"])
|
||||||
new_database = pd.io.json.json_normalize(json_database["results"])
|
|
||||||
|
|
||||||
# Sets the common attributes to each entry in the results
|
# Adds the kernel name to the section instead of to the individual results
|
||||||
for attribute in clblast.ATTRIBUTES:
|
assert len(json_data["results"]) > 0
|
||||||
if attribute == "kernel_family":
|
json_data["kernel"] = json_data["results"][0]["kernel"]
|
||||||
new_database[attribute] = re.sub(r'_\d+', '', json_data[attribute])
|
for result in json_data["results"]:
|
||||||
elif attribute in json_data:
|
assert json_data["kernel"] == result["kernel"]
|
||||||
new_database[attribute] = json_data[attribute]
|
result.pop("kernel", None)
|
||||||
else:
|
|
||||||
new_database[attribute] = 0 # For example a parameters that was not used by this kernel
|
# Removes the 'PRECISION' parameter from the individual results: it is redundant
|
||||||
return new_database
|
for result in json_data["results"]:
|
||||||
|
assert json_data["precision"] == str(result["parameters"]["PRECISION"])
|
||||||
|
result["parameters"].pop("PRECISION", None)
|
||||||
|
|
||||||
|
# All done
|
||||||
|
return json_data
|
||||||
|
|
|
@ -35,9 +35,9 @@ const std::vector<Database::DatabaseEntry> Database::database = {
|
||||||
XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble,
|
XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble,
|
||||||
XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble,
|
XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble,
|
||||||
XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble,
|
XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble,
|
||||||
/* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble,
|
XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble,
|
||||||
XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble,
|
XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble,
|
||||||
/* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble,
|
XgemmHalf, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble,
|
||||||
CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble,
|
CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble,
|
||||||
PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble,
|
PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble,
|
||||||
TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble,
|
TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble,
|
||||||
|
|
|
@ -72,9 +72,9 @@ class Database {
|
||||||
static const DatabaseEntry XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble;
|
static const DatabaseEntry XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble;
|
||||||
static const DatabaseEntry XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble;
|
static const DatabaseEntry XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble;
|
||||||
static const DatabaseEntry XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble;
|
static const DatabaseEntry XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble;
|
||||||
static const DatabaseEntry /* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble;
|
static const DatabaseEntry XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble;
|
||||||
static const DatabaseEntry XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble;
|
static const DatabaseEntry XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble;
|
||||||
static const DatabaseEntry /* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble;
|
static const DatabaseEntry XgemmHalf, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble;
|
||||||
static const DatabaseEntry CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble;
|
static const DatabaseEntry CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble;
|
||||||
static const DatabaseEntry PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble;
|
static const DatabaseEntry PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble;
|
||||||
static const DatabaseEntry TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble;
|
static const DatabaseEntry TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble;
|
||||||
|
|
|
@ -20,12 +20,12 @@ const Database::DatabaseEntry Database::XaxpyHalf = {
|
||||||
kDeviceTypeGPU, "Intel", {
|
kDeviceTypeGPU, "Intel", {
|
||||||
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
|
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
|
||||||
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",4}, {"WGS",512}, {"WPT",8} } },
|
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",4}, {"WGS",512}, {"WPT",8} } },
|
||||||
{ "default", { {"VW",4}, {"WGS",512}, {"WPT",8} } },
|
{ "default", { {"VW",8}, {"WGS",64}, {"WPT",1} } },
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{ // Default
|
{ // Default
|
||||||
kDeviceTypeAll, "default", {
|
kDeviceTypeAll, "default", {
|
||||||
{ "default", { {"VW",4}, {"WGS",512}, {"WPT",8} } },
|
{ "default", { {"VW",8}, {"WGS",64}, {"WPT",1} } },
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,11 +38,10 @@ const Database::DatabaseEntry Database::XdotSingle = {
|
||||||
{ // AMD GPUs
|
{ // AMD GPUs
|
||||||
kDeviceTypeGPU, "AMD", {
|
kDeviceTypeGPU, "AMD", {
|
||||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",128}, {"WGS2",32} } },
|
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",128}, {"WGS2",32} } },
|
||||||
{ "Hawaii", { {"WGS1",256}, {"WGS2",32} } },
|
|
||||||
{ "Oland", { {"WGS1",256}, {"WGS2",32} } },
|
{ "Oland", { {"WGS1",256}, {"WGS2",32} } },
|
||||||
{ "Pitcairn", { {"WGS1",128}, {"WGS2",32} } },
|
{ "Pitcairn", { {"WGS1",128}, {"WGS2",32} } },
|
||||||
{ "Tahiti", { {"WGS1",128}, {"WGS2",32} } },
|
{ "Tahiti", { {"WGS1",128}, {"WGS2",32} } },
|
||||||
{ "default", { {"WGS1",256}, {"WGS2",32} } },
|
{ "default", { {"WGS1",128}, {"WGS2",32} } },
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{ // Intel CPUs
|
{ // Intel CPUs
|
||||||
|
@ -90,11 +89,10 @@ const Database::DatabaseEntry Database::XdotComplexSingle = {
|
||||||
{ // AMD GPUs
|
{ // AMD GPUs
|
||||||
kDeviceTypeGPU, "AMD", {
|
kDeviceTypeGPU, "AMD", {
|
||||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } },
|
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } },
|
||||||
{ "Hawaii", { {"WGS1",256}, {"WGS2",32} } },
|
|
||||||
{ "Oland", { {"WGS1",128}, {"WGS2",32} } },
|
{ "Oland", { {"WGS1",128}, {"WGS2",32} } },
|
||||||
{ "Pitcairn", { {"WGS1",256}, {"WGS2",32} } },
|
{ "Pitcairn", { {"WGS1",256}, {"WGS2",32} } },
|
||||||
{ "Tahiti", { {"WGS1",64}, {"WGS2",32} } },
|
{ "Tahiti", { {"WGS1",64}, {"WGS2",32} } },
|
||||||
{ "default", { {"WGS1",256}, {"WGS2",32} } },
|
{ "default", { {"WGS1",128}, {"WGS2",32} } },
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{ // Intel CPUs
|
{ // Intel CPUs
|
||||||
|
@ -142,7 +140,6 @@ const Database::DatabaseEntry Database::XdotDouble = {
|
||||||
{ // AMD GPUs
|
{ // AMD GPUs
|
||||||
kDeviceTypeGPU, "AMD", {
|
kDeviceTypeGPU, "AMD", {
|
||||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",128} } },
|
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",128} } },
|
||||||
{ "Hawaii", { {"WGS1",256}, {"WGS2",32} } },
|
|
||||||
{ "Oland", { {"WGS1",256}, {"WGS2",32} } },
|
{ "Oland", { {"WGS1",256}, {"WGS2",32} } },
|
||||||
{ "Pitcairn", { {"WGS1",128}, {"WGS2",32} } },
|
{ "Pitcairn", { {"WGS1",128}, {"WGS2",32} } },
|
||||||
{ "Tahiti", { {"WGS1",256}, {"WGS2",32} } },
|
{ "Tahiti", { {"WGS1",256}, {"WGS2",32} } },
|
||||||
|
@ -184,7 +181,6 @@ const Database::DatabaseEntry Database::XdotComplexDouble = {
|
||||||
{ // AMD GPUs
|
{ // AMD GPUs
|
||||||
kDeviceTypeGPU, "AMD", {
|
kDeviceTypeGPU, "AMD", {
|
||||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } },
|
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } },
|
||||||
{ "Hawaii", { {"WGS1",256}, {"WGS2",32} } },
|
|
||||||
{ "Oland", { {"WGS1",256}, {"WGS2",32} } },
|
{ "Oland", { {"WGS1",256}, {"WGS2",32} } },
|
||||||
{ "Pitcairn", { {"WGS1",256}, {"WGS2",32} } },
|
{ "Pitcairn", { {"WGS1",256}, {"WGS2",32} } },
|
||||||
{ "Tahiti", { {"WGS1",256}, {"WGS2",32} } },
|
{ "Tahiti", { {"WGS1",256}, {"WGS2",32} } },
|
||||||
|
|
|
@ -14,6 +14,18 @@
|
||||||
namespace clblast {
|
namespace clblast {
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
|
const Database::DatabaseEntry Database::XgemmHalf = {
|
||||||
|
"Xgemm", Precision::kHalf, {
|
||||||
|
{ // Default
|
||||||
|
kDeviceTypeAll, "default", {
|
||||||
|
{ "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
const Database::DatabaseEntry Database::XgemmSingle = {
|
const Database::DatabaseEntry Database::XgemmSingle = {
|
||||||
"Xgemm", Precision::kSingle, {
|
"Xgemm", Precision::kSingle, {
|
||||||
{ // AMD GPUs
|
{ // AMD GPUs
|
||||||
|
@ -76,7 +88,7 @@ const Database::DatabaseEntry Database::XgemmSingle = {
|
||||||
},
|
},
|
||||||
{ // Default
|
{ // Default
|
||||||
kDeviceTypeAll, "default", {
|
kDeviceTypeAll, "default", {
|
||||||
{ "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
{ "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,12 +20,12 @@ const Database::DatabaseEntry Database::XgemvHalf = {
|
||||||
kDeviceTypeGPU, "Intel", {
|
kDeviceTypeGPU, "Intel", {
|
||||||
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",64}, {"WPT1",1} } },
|
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",64}, {"WPT1",1} } },
|
||||||
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",128}, {"WPT1",1} } },
|
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",128}, {"WPT1",1} } },
|
||||||
{ "default", { {"WGS1",128}, {"WPT1",1} } },
|
{ "default", { {"WGS1",64}, {"WPT1",1} } },
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{ // Default
|
{ // Default
|
||||||
kDeviceTypeAll, "default", {
|
kDeviceTypeAll, "default", {
|
||||||
{ "default", { {"WGS1",128}, {"WPT1",1} } },
|
{ "default", { {"WGS1",64}, {"WPT1",1} } },
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,12 +20,12 @@ const Database::DatabaseEntry Database::XgemvFastHalf = {
|
||||||
kDeviceTypeGPU, "Intel", {
|
kDeviceTypeGPU, "Intel", {
|
||||||
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } },
|
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } },
|
||||||
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } },
|
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } },
|
||||||
{ "default", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } },
|
{ "default", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } },
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{ // Default
|
{ // Default
|
||||||
kDeviceTypeAll, "default", {
|
kDeviceTypeAll, "default", {
|
||||||
{ "default", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } },
|
{ "default", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } },
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,18 @@
|
||||||
namespace clblast {
|
namespace clblast {
|
||||||
// =================================================================================================
|
// =================================================================================================
|
||||||
|
|
||||||
|
const Database::DatabaseEntry Database::XgemvFastRotHalf = {
|
||||||
|
"XgemvFastRot", Precision::kHalf, {
|
||||||
|
{ // Default
|
||||||
|
kDeviceTypeAll, "default", {
|
||||||
|
{ "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } },
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
const Database::DatabaseEntry Database::XgemvFastRotSingle = {
|
const Database::DatabaseEntry Database::XgemvFastRotSingle = {
|
||||||
"XgemvFastRot", Precision::kSingle, {
|
"XgemvFastRot", Precision::kSingle, {
|
||||||
{ // AMD GPUs
|
{ // AMD GPUs
|
||||||
|
|
Loading…
Reference in a new issue