diff --git a/CHANGELOG b/CHANGELOG index 8fce1969..852b734f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -8,6 +8,7 @@ Development version (next release) - Fixed a performance issue (caused by fp16 support) by optimizing alpha/beta parameter passing to kernels - Added an option (-warm_up) to do a warm-up run before timing in the performance clients - Improved performance significantly of rotated GEMV computations +- Improved performance of unseen/un-tuned devices by a better default tuning parameter selection - Various minor fixes and enhancements - Added tuned parameters for various devices (see README) diff --git a/scripts/database/database/bests.py b/scripts/database/database/bests.py index edb81733..e6239258 100644 --- a/scripts/database/database/bests.py +++ b/scripts/database/database/bests.py @@ -18,3 +18,26 @@ def get_best_results(df): best_parameters = database_entry[database_entry["time"] == best_time].iloc[0] database_bests = database_bests.append(best_parameters, ignore_index=True) return database_bests + + +def get_relative_bests(df, parameter_column_names, name, verbose=False): + """Retrieves the relative best execution time over different devices""" + + # Computes the sum of the execution times over the different devices + def sum_performance(x): + x["group_performance"] = x["relative_performance"].sum() + return x + df = df.groupby(parameter_column_names).apply(sum_performance) + + # Retrieves the entries with the highest performance + best_performance = df["group_performance"].max() + df_bests = df[df["group_performance"] == best_performance] + + # Retrieves one example only (the parameters are the same anyway) + df_bests = df_bests.drop_duplicates(["group_performance"]) + + # Completed, report and return the results + if verbose: + print("[database] " + str(name) + " with performance " + str(best_performance) + " " + str(df_bests.shape)) + assert len(df_bests) == 1 + return df_bests diff --git a/scripts/database/database/defaults.py b/scripts/database/database/defaults.py index 985f24bd..3428d9a9 100644 --- a/scripts/database/database/defaults.py +++ b/scripts/database/database/defaults.py @@ -97,25 +97,9 @@ def get_common_best(database, group_name, verbose): # Fall back to another method in case there are no shared entries at all across devices if len(database_common) == 0: - # print("[database] Skipping: " + str(group_name) + " with devices: %d %d " % (num_devices, len(database))) + if verbose: + print("[database] No common kernels for: " + str(group_name) + " with devices: %d " % num_devices) return get_smallest_best(database) - # Computes the sum of the execution times over the different devices - def sum_performance(x): - x["group_performance"] = x["relative_performance"].sum() - return x - database_common = database_common.groupby(parameter_column_names).apply(sum_performance) - - # Retrieves the entries with the highest performance - best_performance = database_common["group_performance"].max() - database_bests = database_common[database_common["group_performance"] == best_performance] - - # Retrieves one example only (the parameters are the same anyway) - database_bests = database_bests.drop_duplicates(["group_performance"]) - - # Completed, report and return the results - if verbose: - print("[database] " + str(group_name) + " with performance " + str(best_performance) + " with devices: " + - str(num_devices) + " " + str(database_bests.shape)) - assert len(database_bests) == 1 - return database_bests + # Retrieves the entries with the highest relative performance + return bests.get_relative_bests(database_common, parameter_column_names, group_name, verbose)