Merge branch 'database_rewrite' into development

This commit is contained in:
Cedric Nugteren 2016-09-12 20:16:18 +02:00
commit 9fb7a0efe1
15 changed files with 342 additions and 224 deletions

3
.gitignore vendored
View file

@ -2,5 +2,6 @@ build
stash stash
.* .*
*.pyc *.pyc
*.db database.json
database_best.json
cl.hpp cl.hpp

View file

@ -11,8 +11,6 @@ import os.path
import glob import glob
import argparse import argparse
import pandas as pd
import database.io as io import database.io as io
import database.db as db import database.db as db
import database.clblast as clblast import database.clblast as clblast
@ -20,15 +18,15 @@ import database.bests as bests
import database.defaults as defaults import database.defaults as defaults
# Server storing a copy of the database # Server storing a copy of the database
DATABASE_SERVER_URL = "http://www.cedricnugteren.nl/tuning/clblast.db" DATABASE_SERVER_URL = "http://www.cedricnugteren.nl/tuning/clblast.json"
# OpenCL vendor names and their short name # OpenCL vendor names and their short name
VENDOR_TRANSLATION_TABLE = {"device_vendor": { VENDOR_TRANSLATION_TABLE = {
"GenuineIntel": "Intel", "GenuineIntel": "Intel",
"Intel(R) Corporation": "Intel", "Intel(R) Corporation": "Intel",
"Advanced Micro Devices, Inc.": "AMD", "Advanced Micro Devices, Inc.": "AMD",
"NVIDIA Corporation": "NVIDIA", "NVIDIA Corporation": "NVIDIA",
}} }
def main(argv): def main(argv):
@ -41,7 +39,8 @@ def main(argv):
cl_args = parser.parse_args(argv) cl_args = parser.parse_args(argv)
# Parses the path arguments # Parses the path arguments
database_filename = os.path.join(cl_args.clblast_root, "scripts", "database", "database.db") database_filename = os.path.join(cl_args.clblast_root, "scripts", "database", "database.json")
database_best_filename = os.path.join(cl_args.clblast_root, "scripts", "database", "database_best.json")
json_files = os.path.join(cl_args.source_folder, "*.json") json_files = os.path.join(cl_args.source_folder, "*.json")
cpp_database_path = os.path.join(cl_args.clblast_root, "src", "database", "kernels") cpp_database_path = os.path.join(cl_args.clblast_root, "src", "database", "kernels")
@ -52,11 +51,6 @@ def main(argv):
if len(glob.glob(json_files)) < 1: if len(glob.glob(json_files)) < 1:
print("[database] The path '" + cl_args.source_folder + "' does not contain any JSON files") print("[database] The path '" + cl_args.source_folder + "' does not contain any JSON files")
# Pandas options
pd.set_option('display.width', 1000)
if cl_args.verbose:
print("[database] Using pandas version " + pd.__version__)
# Downloads the database if a local copy is not present # Downloads the database if a local copy is not present
if not os.path.isfile(database_filename): if not os.path.isfile(database_filename):
io.download_database(database_filename, DATABASE_SERVER_URL) io.download_database(database_filename, DATABASE_SERVER_URL)
@ -68,30 +62,22 @@ def main(argv):
for file_json in glob.glob(json_files): for file_json in glob.glob(json_files):
# Loads the newly imported data # Loads the newly imported data
sys.stdout.write("[database] Processing '"+file_json+"' ") # No newline printed sys.stdout.write("[database] Processing '" + file_json + "' ") # No newline printed
imported_data = io.load_json_to_pandas(file_json) imported_data = io.load_tuning_results(file_json)
# Fixes the problem that some vendors use multiple different names # Fixes the problem that some vendors use multiple different names
imported_data = db.find_and_replace(imported_data, VENDOR_TRANSLATION_TABLE) for target in VENDOR_TRANSLATION_TABLE:
if imported_data["device_vendor"] == target:
imported_data["device_vendor"] = VENDOR_TRANSLATION_TABLE[target]
# Adds the new data to the database # Adds the new data to the database
old_size = len(database.index) old_size = db.length(database)
database = db.concatenate_database(database, imported_data) database = db.add_section(database, imported_data)
database = database.drop_duplicates() new_size = db.length(database)
new_size = len(database.index)
print("with " + str(new_size - old_size) + " new items") # Newline printed here print("with " + str(new_size - old_size) + " new items") # Newline printed here
# Stores the modified database back to disk # Stores the modified database back to disk
if len(glob.glob(json_files)) >= 1: if len(glob.glob(json_files)) >= 1:
database = db.remove_duplicates(database)
io.save_database(database, database_filename)
# Optional: update the database here. Default is disabled, code below is just an example
if False: # TODO: Use command-line arguments to enable updates in a flexible way
database = db.update_database(database,
((database["kernel"] == "CopyMatrixFast") &
(database["precision"] == "3232")),
"arg_alpha", "2+0.5i")
io.save_database(database, database_filename) io.save_database(database, database_filename)
# Retrieves the best performing results # Retrieves the best performing results
@ -101,7 +87,11 @@ def main(argv):
# Determines the defaults for other vendors and per vendor # Determines the defaults for other vendors and per vendor
print("[database] Calculating the default values...") print("[database] Calculating the default values...")
database_defaults = defaults.calculate_defaults(database, cl_args.verbose) database_defaults = defaults.calculate_defaults(database, cl_args.verbose)
database_best_results = db.concatenate_database(database_best_results, database_defaults) database_best_results["sections"].extend(database_defaults["sections"])
# Optionally outputs the database to disk
if cl_args.verbose:
io.save_database(database_best_results, database_best_filename)
# Outputs the database as a C++ database # Outputs the database as a C++ database
print("[database] Producing a C++ database in '" + cpp_database_path + "'...") print("[database] Producing a C++ database in '" + cpp_database_path + "'...")

View file

@ -5,39 +5,54 @@
# Author(s): # Author(s):
# Cedric Nugteren <www.cedricnugteren.nl> # Cedric Nugteren <www.cedricnugteren.nl>
import pandas as pd import sys
import clblast
def get_best_results(df): def get_best_results(database):
"""Retrieves the results with the lowests execution times""" """Retrieves the results with the lowest execution times"""
database_bests = pd.DataFrame() sections_best = []
database_entries = df.groupby(clblast.ATTRIBUTES + ["kernel"]) for section in database["sections"]:
for name, database_entry in database_entries: section_best = {}
best_time = database_entry["time"].min()
best_parameters = database_entry[database_entry["time"] == best_time].iloc[0] # Stores all the section's meta data
database_bests = database_bests.append(best_parameters, ignore_index=True) for attribute in section.keys():
return database_bests if attribute != "results":
section_best[attribute] = section[attribute]
# Find the best result
parameters_best = None
time_best = sys.float_info.max
for result in section["results"]:
if result["time"] < time_best:
time_best = result["time"]
parameters_best = result["parameters"]
# Stores the best result
section_best["results"] = [{"time": time_best, "parameters": parameters_best}]
sections_best.append(section_best)
return {"sections": sections_best}
def get_relative_bests(df, parameter_column_names, name, verbose=False): def get_relative_bests(name, common_results, common_parameters, verbose=False):
"""Retrieves the relative best execution time over different devices""" """Retrieves the parameters with the relative best execution time over different devices"""
# Helper function
def argmax(iterable):
return max(enumerate(iterable), key=lambda x: x[1])[0]
# Computes the sum of the execution times over the different devices # Computes the sum of the execution times over the different devices
def sum_performance(x): performance_sums = []
x["group_performance"] = x["relative_performance"].sum() for parameters in common_parameters:
return x performance_sum = sum([r["relative_performance"] for r in common_results if r["parameters"] == parameters])
df = df.groupby(parameter_column_names).apply(sum_performance) performance_sums.append(performance_sum)
# Retrieves the entries with the highest performance # Retrieves the entry with the highest performance
best_performance = df["group_performance"].max() best_index = argmax(performance_sums)
df_bests = df[df["group_performance"] == best_performance] best_performance = performance_sums[best_index]
best_parameters = common_parameters[best_index]
# Retrieves one example only (the parameters are the same anyway)
df_bests = df_bests.drop_duplicates(["group_performance"])
# Completed, report and return the results # Completed, report and return the results
if verbose: if verbose:
print("[database] " + str(name) + " with performance " + str(best_performance) + " " + str(df_bests.shape)) print("[database] " + str(name) + " with performance " + str(best_performance))
assert len(df_bests) == 1 return best_parameters
return df_bests

View file

@ -18,6 +18,7 @@ DEVICE_ATTRIBUTES = ["device", "device_core_clock", "device_compute_units"]
KERNEL_ATTRIBUTES = ["precision", "kernel_family"] KERNEL_ATTRIBUTES = ["precision", "kernel_family"]
ARGUMENT_ATTRIBUTES = ["arg_m", "arg_n", "arg_k", "arg_alpha", "arg_beta"] ARGUMENT_ATTRIBUTES = ["arg_m", "arg_n", "arg_k", "arg_alpha", "arg_beta"]
ATTRIBUTES = DEVICE_ATTRIBUTES + DEVICE_TYPE_ATTRIBUTES + KERNEL_ATTRIBUTES + ARGUMENT_ATTRIBUTES ATTRIBUTES = DEVICE_ATTRIBUTES + DEVICE_TYPE_ATTRIBUTES + KERNEL_ATTRIBUTES + ARGUMENT_ATTRIBUTES
GROUP_ATTRIBUTES = DEVICE_TYPE_ATTRIBUTES + KERNEL_ATTRIBUTES + ["kernel"] + ARGUMENT_ATTRIBUTES
def precision_to_string(precision): def precision_to_string(precision):
@ -81,42 +82,63 @@ def print_cpp_database(database, output_dir):
"""Outputs the database as C++ code""" """Outputs the database as C++ code"""
# Iterates over the kernel families # Iterates over the kernel families
for family_name, family_database in database.groupby(["kernel_family"]): kernel_families = sorted(set([s["kernel_family"] for s in database["sections"]]))
family_database = family_database.dropna(axis=1, how='all') for family_name in kernel_families:
family_database = [s for s in database["sections"] if s["kernel_family"] == family_name]
# Opens a new file for each kernel family # Opens a new file for each kernel family
full_path = os.path.join(output_dir, family_name+'.hpp') full_path = os.path.join(output_dir, family_name + ".hpp")
with open(full_path, 'w+') as f: with open(full_path, 'w+') as f:
f.write(get_cpp_header(family_name)) f.write(get_cpp_header(family_name))
# Loops over the different precision (e.g. 16, 32, 3232, 64, 6464) # Loops over the different precision (e.g. 16, 32, 3232, 64, 6464)
for precision, precision_database in family_database.groupby(["precision"]): precisions = sorted(set([s["precision"] for s in database["sections"]])) # Based on full database
for precision in precisions:
precision_database = [s for s in family_database if s["precision"] == precision]
f.write(get_cpp_precision(family_name, precision)) f.write(get_cpp_precision(family_name, precision))
# Loops over a combination of device vendors and device types (e.g. AMD GPU) # In case there is nothing found at all (e.g. 16-bit): continue as if this was a precision of 32 but
for vendor, vendor_database in precision_database.groupby(["device_vendor"]): # with the defaults only
for device_type, device_type_database in vendor_database.groupby(["device_type"]): if len(precision_database) == 0:
print("[database] No results found for %s:%s, retrieving defaults from %s:32" %
(family_name, precision, family_name))
precision_database = [s for s in family_database if s["precision"] == "32"
and s["device_vendor"] == VENDOR_DEFAULT
and s["device_type"] == DEVICE_TYPE_DEFAULT
and s["device"] == DEVICE_NAME_DEFAULT]
# Loops over device vendors (e.g. AMD)
device_vendors = sorted(set([s["device_vendor"] for s in precision_database]))
for vendor in device_vendors:
vendor_database = [s for s in precision_database if s["device_vendor"] == vendor]
# Loops over device types (e.g. GPU)
device_types = sorted(set([s["device_type"] for s in vendor_database]))
for device_type in device_types:
type_database = [s for s in vendor_database if s["device_type"] == device_type]
f.write(get_cpp_device_vendor(vendor, device_type)) f.write(get_cpp_device_vendor(vendor, device_type))
# Loops over every device of this vendor-type combination # Loops over every device of this vendor-type combination
for device_name, device_database in device_type_database.groupby(["device"]): devices = sorted(set([s["device"] for s in type_database]))
for device_name in devices:
device_database = [s for s in type_database if s["device"] == device_name]
device_name_quoted = "\"%s\"," % device_name device_name_quoted = "\"%s\"," % device_name
device_name_cpp = " { %-50s { " % device_name_quoted device_name_cpp = " { %-50s { " % device_name_quoted
f.write(device_name_cpp) f.write(device_name_cpp)
# Collects the parameters for this entry # Collects the parameters for this entry
parameters = [] parameters = []
for kernel, kernel_database in device_database.groupby(["kernel"]): kernels = sorted(set([s["kernel"] for s in device_database]))
kernel_database = kernel_database.dropna(axis=1) for kernel in kernels:
kernel_database = [s for s in device_database if s["kernel"] == kernel]
# Only consider the actual parameters, not the precision assert len(kernel_database) == 1
def is_parameter(column): results = kernel_database[0]["results"]
return column.startswith('parameters.') and column != "parameters.PRECISION"
column_names = [col for col in list(kernel_database) if is_parameter(col)]
for p in column_names: assert len(results) == 1
parameter_name = p.replace("parameters.", "") new_parameters = results[0]["parameters"]
parameter_value = int(kernel_database[p].iloc[0]) for parameter_name in sorted(new_parameters):
parameter_value = new_parameters[parameter_name]
parameters.append("{\"" + parameter_name + "\"," + str(parameter_value) + "}") parameters.append("{\"" + parameter_name + "\"," + str(parameter_value) + "}")
# Prints the entry # Prints the entry

View file

@ -5,66 +5,60 @@
# Author(s): # Author(s):
# Cedric Nugteren <www.cedricnugteren.nl> # Cedric Nugteren <www.cedricnugteren.nl>
import pandas as pd import clblast
import numpy as np
def get_entries_by_field(database, field, value): def length(database):
"""Retrieves entries from the database with a specific value for a given field""" """Computes the total number of tuning entries"""
return database[database[field] == value] num_tuning_entries = 0
for section in database["sections"]:
num_tuning_entries += len(section["results"])
return num_tuning_entries
def concatenate_database(database1, database2): def add_section(database, new_section):
"""Concatenates two databases row-wise and returns the result""" """Adds a new section to the database"""
return pd.concat([database1, database2]) for old_section in database["sections"]:
# Verify whether the sections match
equal = True
for attribute in new_section.keys():
if attribute != "results":
if attribute not in old_section or new_section[attribute] != old_section[attribute]:
equal = False
break
def find_and_replace(database, dictionary): # They match: append the new section's results to the corresponding entry in the database and return
"""Finds and replaces entries in a database based on a dictionary. Example: if equal:
dictionary = { "key_to_edit": { find1: replace1, find2, replace2 } }""" old_section["results"] = combine_results(old_section["results"], new_section["results"])
return database.replace(dictionary) return database
# No match found: append the whole new section to the database
def remove_entries_by_key_value(database, key, value): database["sections"].append(new_section)
"""Removes entries in the databased which have a specific value for a given key"""
return database[database[key] != value]
def remove_entries_by_device(database, device_name):
"""Shorthand for the above, specifically removes entries for a given device"""
return remove_entries_by_key_value(database, "device", device_name)
def remove_entries_by_kernel_family(database, kernel_family_name):
"""Shorthand for the above, specifically removes entries for a given kernel family"""
return remove_entries_by_key_value(database, "kernel_family", kernel_family_name)
def update_database(database, condition, field, value):
"""Updates the database by writing a specific value to a given field, given certain conditions"""
database.loc[condition, field] = value
return database return database
def remove_duplicates(database): def combine_results(old_results, new_results):
"""Removes duplicates from the database based on all but the 'time' column""" """Adds new results to the results JSON list"""
for new_result in new_results:
old_results = combine_result(old_results, new_result)
return old_results
# First remove 100% duplicate entries
database = database.drop_duplicates()
# Replace NaNs with -1 first (needed for groupby) def combine_result(old_results, new_result):
database = database.replace(np.nan, -1) """Adds a new result to the results JSON list; filters for duplicate entries and saves the best performing one"""
# In case multiple runs for the exact same configuration where made: take just the best performing one into account # Loops over all existing results to test for already existing entries with these parameters
other_column_names = list(database.columns.values) for old_result in old_results:
other_column_names.remove("time")
database_by_time = database.groupby(other_column_names,)
num_removals = len(database) - len(database_by_time)
if num_removals > 0:
print("[database] Removing %d entries: keeping only those with the lowest execution time" % num_removals)
print("[database] Note: this might take a while")
database = database_by_time.apply(lambda x: x[x["time"] == x["time"].min()])
# Re-replace the NaN values # Verify whether the results match
database = database.replace(-1, np.nan) equal = new_result["parameters"] == old_result["parameters"]
return database
# They match: keep only the one with the minimum execution time
if equal:
old_result["time"] = min(old_result["time"], new_result["time"])
return old_results
# No match found: append a new result
old_results.append(new_result)
return old_results

View file

@ -5,102 +5,176 @@
# Author(s): # Author(s):
# Cedric Nugteren <www.cedricnugteren.nl> # Cedric Nugteren <www.cedricnugteren.nl>
import pandas as pd
import clblast import clblast
import bests import bests
def set_default_device(database_entry): def set_default_device(section):
"""Sets the device name and parameters to some default values""" """Sets the device name and parameters to some default values"""
database_entry["device"] = clblast.DEVICE_NAME_DEFAULT section["device"] = clblast.DEVICE_NAME_DEFAULT
database_entry["device_compute_units"] = 0 section["device_compute_units"] = 0
database_entry["device_core_clock"] = 0 section["device_core_clock"] = 0
return database_entry return section
def set_default_time(database_entry): def set_identifiers(database, group_by_attributes, identifier_name):
"""Sets the execution time to some default value""" """Sets a group-identifier based on a given set of attributes. Modifies the database but also returns a list of
database_entry["time"] = 0.0 unique identifiers."""
return database_entry identifiers = []
for section in database["sections"]:
identifier = []
for attribute in group_by_attributes:
if attribute in section:
identifier.append(section[attribute])
section[identifier_name] = ";".join(identifier)
identifiers.append(section[identifier_name])
return sorted(set(identifiers))
def calculate_defaults(database, verbose, calculate_common_best=True): def remove_identifiers(database, identifier_name):
"""Sets defaults for devices of the same type/vendor. An option determines how to compute the defaults.""" """Removes an identifier from all sections in the database"""
database_defaults = pd.DataFrame() for section in database["sections"]:
section.pop(identifier_name, None)
# Defaults per combination of device vendors and device types (e.g. AMD GPU)
database_type_vendor = database.groupby(clblast.DEVICE_TYPE_ATTRIBUTES + clblast.KERNEL_ATTRIBUTES + ["kernel"] +
clblast.ARGUMENT_ATTRIBUTES)
for group_name, database_group in database_type_vendor:
if calculate_common_best:
default_values = get_common_best(database_group, group_name, verbose)
else:
default_values = get_smallest_best(database_group)
default_values = set_default_device(default_values)
default_values = set_default_time(default_values)
database_defaults = database_defaults.append(default_values, ignore_index=True)
# Checks for mis-matched arguments def get_groups_by_identifier(database, group_identifiers, identifier_name):
groups = database_defaults.groupby(clblast.DEVICE_TYPE_ATTRIBUTES + clblast.KERNEL_ATTRIBUTES + ["kernel"]) """Returns a list of (group, group_identifier) tuples based a previously made grouping"""
for group_name, database_group in groups: groups = []
if len(database_group) != 1: for group_identifier in group_identifiers:
print("[WARNING] Entries for a single kernel with multiple argument values: " + str(group_name))
# Defaults over all device types and vendors # Get all sections in this group
groups = database.groupby(clblast.KERNEL_ATTRIBUTES + ["kernel"] + clblast.ARGUMENT_ATTRIBUTES) group = []
for group_name, database_group in groups: for section in database["sections"]:
if calculate_common_best: if section[identifier_name] == group_identifier:
default_values = get_common_best(database_group, group_name, verbose) group.append(section)
else:
default_values = get_smallest_best(database_group) groups.append((group, group_identifier))
default_values["device_vendor"] = clblast.VENDOR_DEFAULT return groups
default_values["device_type"] = clblast.DEVICE_TYPE_DEFAULT
default_values = set_default_device(default_values)
default_values = set_default_time(default_values) def calculate_defaults(database, verbose):
database_defaults = database_defaults.append(default_values, ignore_index=True) """Sets defaults for devices of the same type/vendor"""
# Groups the database by kernel, vendor and device type (e.g. AMD GPU)
group_identifiers = set_identifiers(database, clblast.GROUP_ATTRIBUTES, "group_identifier")
groups = get_groups_by_identifier(database, group_identifiers, "group_identifier")
# Loops over all groups
default_sections = {"sections": []}
for group, group_identifier in groups:
# Computes the best parameters
default_parameters = get_common_best_parameters(group, group_identifier, verbose)
# Stores all the section's data
assert len(group) > 0
default_section = {}
for attribute in group[0].keys():
if attribute != "results" and attribute != "group_identifier":
default_section[attribute] = group[0][attribute]
default_section = set_default_device(default_section)
default_section["results"] = [{"time": 0.0, "parameters": default_parameters}]
default_sections["sections"].append(default_section)
# Groups the database by kernel, vendor and device type (e.g. AMD GPU) - but not by arguments! This is to check for
# mis-matched arguments.
attributes = clblast.DEVICE_TYPE_ATTRIBUTES + clblast.KERNEL_ATTRIBUTES + ["kernel"]
group_identifiers = set_identifiers(default_sections, attributes, "temp_identifier")
groups = get_groups_by_identifier(default_sections, group_identifiers, "temp_identifier")
for group, group_identifier in groups:
if len(group) != 1:
print("[ERROR] Entries for a single kernel with multiple argument values: " + str(group_identifier))
assert len(group) == 1
remove_identifiers(default_sections, "temp_identifier")
# Groups the database by kernel only
group_identifiers = set_identifiers(database, clblast.KERNEL_ATTRIBUTES + ["kernel"], "group_identifier")
groups = get_groups_by_identifier(database, group_identifiers, "group_identifier")
# Loops over all groups
for group, group_identifier in groups:
# Computes the best parameters
default_parameters = get_common_best_parameters(group, group_identifier, verbose)
# Stores all the section's data
assert len(group) > 0
default_section = {}
for attribute in group[0].keys():
if attribute != "results" and attribute != "group_identifier":
default_section[attribute] = group[0][attribute]
default_section = set_default_device(default_section)
default_section["device_vendor"] = clblast.VENDOR_DEFAULT
default_section["device_type"] = clblast.DEVICE_TYPE_DEFAULT
default_section["results"] = [{"time": 0.0, "parameters": default_parameters}]
default_sections["sections"].append(default_section)
# Database with both types of defaults only # Database with both types of defaults only
return database_defaults return default_sections
def get_smallest_best(database): def get_smallest_best_parameters(group):
"""Sets defaults based on the smallest values of all known entries. The average might be better for performance but """Sets defaults based on the smallest values of all known entries. The average might be better for performance but
some parameters might not be supported on other devices.""" some parameters might not be supported on other devices."""
database_best_results = bests.get_best_results(database)
return database_best_results.min(axis=0) # Counts the number of devices in this group
assert len(group) > 0
# Find the smallest values of the parameters
min_parameters = {}
for section in group:
assert len(section["results"]) > 0
minimum_time = min([result["time"] for result in section["results"]])
for result in section["results"]:
if result["time"] == minimum_time:
for parameter in result["parameters"]:
if parameter in min_parameters:
min_parameters[parameter] = min(min_parameters[parameter], result["parameters"][parameter])
else:
min_parameters[parameter] = result["parameters"][parameter]
return min_parameters
def get_common_best(database, group_name, verbose): def get_common_best_parameters(group, group_identifier, verbose):
"""Sets defaults based on the best values of entries supported by all devices. This might cause a problem in case """Sets defaults based on the best values of entries supported by all devices. This might cause a problem in case
not every device was tuned with the same parameters. In that case it falls back to the above method to retrieve not every device was tuned with the same parameters. In that case it falls back to the above method to retrieve
the smallest best execution time""" the smallest best execution time"""
# Counts the number of devices in this group # Counts the number of devices in this group
num_devices = len(database.groupby(clblast.DEVICE_ATTRIBUTES)) num_devices = len(group)
assert num_devices > 0
# Removes columns without any values
database = database.dropna(axis=1, how='all')
# Inserts the relative execution times into the database # Inserts the relative execution times into the database
def relative_performance(x): for section in group:
x["relative_performance"] = x["time"].min() / x["time"] assert len(section["results"]) > 0
return x minimum_time = min([result["time"] for result in section["results"]])
database = database.groupby(clblast.ATTRIBUTES + ["kernel"]).apply(relative_performance) for result in section["results"]:
result["relative_performance"] = minimum_time / result["time"]
# Retrieves the parameter names for this kernel # Determine which parameters are available for all devices
all_column_names = list(database.columns.values) common_parameters = [result["parameters"] for result in group[0]["results"]] # Parameters of the first section
parameter_column_names = [c for c in all_column_names if "parameters." in c] for i in range(1, num_devices):
section_parameters = [result["parameters"] for result in group[i]["results"]]
# Removes entries which are not available for all devices common_parameters = [p for p in section_parameters if p in common_parameters] # Intersection of the parameters
database_by_parameters = database.groupby(parameter_column_names)
database_common = database_by_parameters.filter(lambda x: len(x) == num_devices)
# Fall back to another method in case there are no shared entries at all across devices # Fall back to another method in case there are no shared entries at all across devices
if len(database_common) == 0: if len(common_parameters) == 0:
if verbose: if verbose:
print("[database] No common kernels for: " + str(group_name) + " with devices: %d " % num_devices) print("[database] No common kernels for: " + str(group_identifier) + " with devices: %d " % num_devices)
return get_smallest_best(database) smallest_best_parameters = get_smallest_best_parameters(group)
if verbose:
print("[database] " + str(group_identifier))
return smallest_best_parameters
# Removes entries with parameters which are not common
common_results = []
for section in group:
for result in section["results"]:
if result["parameters"] in common_parameters:
common_results.append(result)
# Retrieves the entries with the highest relative performance # Retrieves the entries with the highest relative performance
return bests.get_relative_bests(database_common, parameter_column_names, group_name, verbose) relative_best_parameters = bests.get_relative_bests(group_identifier, common_results, common_parameters, verbose)
return relative_best_parameters

View file

@ -13,46 +13,48 @@ try:
except ImportError: except ImportError:
from urllib2 import urlopen # Python 2 from urllib2 import urlopen # Python 2
import pandas as pd
import clblast
def download_database(filename, database_url): def download_database(filename, database_url):
"""Downloads a database and saves it to disk""" """Downloads a database and saves it to disk"""
print("[database] Downloading database from '" + database_url + "'...") print("[database] Downloading database from '" + database_url + "'...")
database = urlopen(database_url) database = urlopen(database_url)
with open(filename, 'wb') as f: with open(filename, "wb") as f:
f.write(database.read()) f.write(database.read())
def load_database(filename): def load_database(filename):
"""Loads a database from disk""" """Loads a database from disk"""
print("[database] Loading database from '" + filename + "'") print("[database] Loading database from '" + filename + "'")
return pd.read_pickle(filename) with open(filename) as f:
return json.load(f)
def save_database(database, filename): def save_database(database, filename):
"""Saves a database to disk""" """Saves a database to disk"""
print("[database] Saving database to '" + filename + "'") print("[database] Saving database to '" + filename + "'")
database.to_pickle(filename) with open(filename, "wb") as f:
json.dump(database, f, sort_keys=True, indent=4)
def load_json_to_pandas(filename): def load_tuning_results(filename):
"""Loads JSON data from file and converts it to a pandas database""" """Loads JSON data from file and pre-processes it"""
with open(filename) as f: with open(filename) as f:
json_data = json.load(f) json_data = json.load(f)
# Gathers all results and stores them in a new database # Removes the numbering following the kernel family name
json_database = pd.DataFrame(json_data) json_data["kernel_family"] = re.sub(r'_\d+', '', json_data["kernel_family"])
new_database = pd.io.json.json_normalize(json_database["results"])
# Sets the common attributes to each entry in the results # Adds the kernel name to the section instead of to the individual results
for attribute in clblast.ATTRIBUTES: assert len(json_data["results"]) > 0
if attribute == "kernel_family": json_data["kernel"] = json_data["results"][0]["kernel"]
new_database[attribute] = re.sub(r'_\d+', '', json_data[attribute]) for result in json_data["results"]:
elif attribute in json_data: assert json_data["kernel"] == result["kernel"]
new_database[attribute] = json_data[attribute] result.pop("kernel", None)
else:
new_database[attribute] = 0 # For example a parameters that was not used by this kernel # Removes the 'PRECISION' parameter from the individual results: it is redundant
return new_database for result in json_data["results"]:
assert json_data["precision"] == str(result["parameters"]["PRECISION"])
result["parameters"].pop("PRECISION", None)
# All done
return json_data

View file

@ -35,9 +35,9 @@ const std::vector<Database::DatabaseEntry> Database::database = {
XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble, XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble,
XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble, XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble,
XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble, XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble,
/* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble, XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble,
XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble, XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble,
/* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble, XgemmHalf, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble,
CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble, CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble,
PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble, PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble,
TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble, TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble,

View file

@ -72,9 +72,9 @@ class Database {
static const DatabaseEntry XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble; static const DatabaseEntry XdotHalf, XdotSingle, XdotDouble, XdotComplexSingle, XdotComplexDouble;
static const DatabaseEntry XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble; static const DatabaseEntry XgemvHalf, XgemvSingle, XgemvDouble, XgemvComplexSingle, XgemvComplexDouble;
static const DatabaseEntry XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble; static const DatabaseEntry XgemvFastHalf, XgemvFastSingle, XgemvFastDouble, XgemvFastComplexSingle, XgemvFastComplexDouble;
static const DatabaseEntry /* XgemvFastRotHalf, */ XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble; static const DatabaseEntry XgemvFastRotHalf, XgemvFastRotSingle, XgemvFastRotDouble, XgemvFastRotComplexSingle, XgemvFastRotComplexDouble;
static const DatabaseEntry XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble; static const DatabaseEntry XgerHalf, XgerSingle, XgerDouble, XgerComplexSingle, XgerComplexDouble;
static const DatabaseEntry /* XgemmHalf, */ XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble; static const DatabaseEntry XgemmHalf, XgemmSingle, XgemmDouble, XgemmComplexSingle, XgemmComplexDouble;
static const DatabaseEntry CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble; static const DatabaseEntry CopyHalf, CopySingle, CopyDouble, CopyComplexSingle, CopyComplexDouble;
static const DatabaseEntry PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble; static const DatabaseEntry PadHalf, PadSingle, PadDouble, PadComplexSingle, PadComplexDouble;
static const DatabaseEntry TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble; static const DatabaseEntry TransposeHalf, TransposeSingle, TransposeDouble, TransposeComplexSingle, TransposeComplexDouble;

View file

@ -20,12 +20,12 @@ const Database::DatabaseEntry Database::XaxpyHalf = {
kDeviceTypeGPU, "Intel", { kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW",1}, {"WGS",64}, {"WPT",1} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",4}, {"WGS",512}, {"WPT",8} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW",4}, {"WGS",512}, {"WPT",8} } },
{ "default", { {"VW",4}, {"WGS",512}, {"WPT",8} } }, { "default", { {"VW",8}, {"WGS",64}, {"WPT",1} } },
} }
}, },
{ // Default { // Default
kDeviceTypeAll, "default", { kDeviceTypeAll, "default", {
{ "default", { {"VW",4}, {"WGS",512}, {"WPT",8} } }, { "default", { {"VW",8}, {"WGS",64}, {"WPT",1} } },
} }
}, },
} }

View file

@ -38,11 +38,10 @@ const Database::DatabaseEntry Database::XdotSingle = {
{ // AMD GPUs { // AMD GPUs
kDeviceTypeGPU, "AMD", { kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",128}, {"WGS2",32} } }, { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",128}, {"WGS2",32} } },
{ "Hawaii", { {"WGS1",256}, {"WGS2",32} } },
{ "Oland", { {"WGS1",256}, {"WGS2",32} } }, { "Oland", { {"WGS1",256}, {"WGS2",32} } },
{ "Pitcairn", { {"WGS1",128}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",128}, {"WGS2",32} } },
{ "Tahiti", { {"WGS1",128}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",128}, {"WGS2",32} } },
{ "default", { {"WGS1",256}, {"WGS2",32} } }, { "default", { {"WGS1",128}, {"WGS2",32} } },
} }
}, },
{ // Intel CPUs { // Intel CPUs
@ -90,11 +89,10 @@ const Database::DatabaseEntry Database::XdotComplexSingle = {
{ // AMD GPUs { // AMD GPUs
kDeviceTypeGPU, "AMD", { kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } }, { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } },
{ "Hawaii", { {"WGS1",256}, {"WGS2",32} } },
{ "Oland", { {"WGS1",128}, {"WGS2",32} } }, { "Oland", { {"WGS1",128}, {"WGS2",32} } },
{ "Pitcairn", { {"WGS1",256}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",256}, {"WGS2",32} } },
{ "Tahiti", { {"WGS1",64}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",64}, {"WGS2",32} } },
{ "default", { {"WGS1",256}, {"WGS2",32} } }, { "default", { {"WGS1",128}, {"WGS2",32} } },
} }
}, },
{ // Intel CPUs { // Intel CPUs
@ -142,7 +140,6 @@ const Database::DatabaseEntry Database::XdotDouble = {
{ // AMD GPUs { // AMD GPUs
kDeviceTypeGPU, "AMD", { kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",128} } }, { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",128} } },
{ "Hawaii", { {"WGS1",256}, {"WGS2",32} } },
{ "Oland", { {"WGS1",256}, {"WGS2",32} } }, { "Oland", { {"WGS1",256}, {"WGS2",32} } },
{ "Pitcairn", { {"WGS1",128}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",128}, {"WGS2",32} } },
{ "Tahiti", { {"WGS1",256}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",256}, {"WGS2",32} } },
@ -184,7 +181,6 @@ const Database::DatabaseEntry Database::XdotComplexDouble = {
{ // AMD GPUs { // AMD GPUs
kDeviceTypeGPU, "AMD", { kDeviceTypeGPU, "AMD", {
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } }, { "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } },
{ "Hawaii", { {"WGS1",256}, {"WGS2",32} } },
{ "Oland", { {"WGS1",256}, {"WGS2",32} } }, { "Oland", { {"WGS1",256}, {"WGS2",32} } },
{ "Pitcairn", { {"WGS1",256}, {"WGS2",32} } }, { "Pitcairn", { {"WGS1",256}, {"WGS2",32} } },
{ "Tahiti", { {"WGS1",256}, {"WGS2",32} } }, { "Tahiti", { {"WGS1",256}, {"WGS2",32} } },

View file

@ -14,6 +14,18 @@
namespace clblast { namespace clblast {
// ================================================================================================= // =================================================================================================
const Database::DatabaseEntry Database::XgemmHalf = {
"Xgemm", Precision::kHalf, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry Database::XgemmSingle = { const Database::DatabaseEntry Database::XgemmSingle = {
"Xgemm", Precision::kSingle, { "Xgemm", Precision::kSingle, {
{ // AMD GPUs { // AMD GPUs
@ -76,7 +88,7 @@ const Database::DatabaseEntry Database::XgemmSingle = {
}, },
{ // Default { // Default
kDeviceTypeAll, "default", { kDeviceTypeAll, "default", {
{ "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
} }
}, },
} }

View file

@ -20,12 +20,12 @@ const Database::DatabaseEntry Database::XgemvHalf = {
kDeviceTypeGPU, "Intel", { kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",64}, {"WPT1",1} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",64}, {"WPT1",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",128}, {"WPT1",1} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"WGS1",128}, {"WPT1",1} } },
{ "default", { {"WGS1",128}, {"WPT1",1} } }, { "default", { {"WGS1",64}, {"WPT1",1} } },
} }
}, },
{ // Default { // Default
kDeviceTypeAll, "default", { kDeviceTypeAll, "default", {
{ "default", { {"WGS1",128}, {"WPT1",1} } }, { "default", { {"WGS1",64}, {"WPT1",1} } },
} }
}, },
} }

View file

@ -20,12 +20,12 @@ const Database::DatabaseEntry Database::XgemvFastHalf = {
kDeviceTypeGPU, "Intel", { kDeviceTypeGPU, "Intel", {
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } }, { "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } },
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } }, { "Intel(R) HD Graphics Skylake ULT GT2", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } },
{ "default", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } }, { "default", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } },
} }
}, },
{ // Default { // Default
kDeviceTypeAll, "default", { kDeviceTypeAll, "default", {
{ "default", { {"VW2",2}, {"WGS2",128}, {"WPT2",2} } }, { "default", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } },
} }
}, },
} }

View file

@ -14,6 +14,18 @@
namespace clblast { namespace clblast {
// ================================================================================================= // =================================================================================================
const Database::DatabaseEntry Database::XgemvFastRotHalf = {
"XgemvFastRot", Precision::kHalf, {
{ // Default
kDeviceTypeAll, "default", {
{ "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } },
}
},
}
};
// =================================================================================================
const Database::DatabaseEntry Database::XgemvFastRotSingle = { const Database::DatabaseEntry Database::XgemvFastRotSingle = {
"XgemvFastRot", Precision::kSingle, { "XgemvFastRot", Precision::kSingle, {
{ // AMD GPUs { // AMD GPUs