mirror of
https://github.com/CNugteren/CLBlast.git
synced 2024-07-04 21:36:57 +02:00
Updated the changelog; refactored the database-get-bests code a bit
This commit is contained in:
parent
7d5631b7e4
commit
00979faab4
|
@ -8,6 +8,7 @@ Development version (next release)
|
|||
- Fixed a performance issue (caused by fp16 support) by optimizing alpha/beta parameter passing to kernels
|
||||
- Added an option (-warm_up) to do a warm-up run before timing in the performance clients
|
||||
- Improved performance significantly of rotated GEMV computations
|
||||
- Improved performance of unseen/un-tuned devices by a better default tuning parameter selection
|
||||
- Various minor fixes and enhancements
|
||||
- Added tuned parameters for various devices (see README)
|
||||
|
||||
|
|
|
@ -18,3 +18,26 @@ def get_best_results(df):
|
|||
best_parameters = database_entry[database_entry["time"] == best_time].iloc[0]
|
||||
database_bests = database_bests.append(best_parameters, ignore_index=True)
|
||||
return database_bests
|
||||
|
||||
|
||||
def get_relative_bests(df, parameter_column_names, name, verbose=False):
|
||||
"""Retrieves the relative best execution time over different devices"""
|
||||
|
||||
# Computes the sum of the execution times over the different devices
|
||||
def sum_performance(x):
|
||||
x["group_performance"] = x["relative_performance"].sum()
|
||||
return x
|
||||
df = df.groupby(parameter_column_names).apply(sum_performance)
|
||||
|
||||
# Retrieves the entries with the highest performance
|
||||
best_performance = df["group_performance"].max()
|
||||
df_bests = df[df["group_performance"] == best_performance]
|
||||
|
||||
# Retrieves one example only (the parameters are the same anyway)
|
||||
df_bests = df_bests.drop_duplicates(["group_performance"])
|
||||
|
||||
# Completed, report and return the results
|
||||
if verbose:
|
||||
print("[database] " + str(name) + " with performance " + str(best_performance) + " " + str(df_bests.shape))
|
||||
assert len(df_bests) == 1
|
||||
return df_bests
|
||||
|
|
|
@ -97,25 +97,9 @@ def get_common_best(database, group_name, verbose):
|
|||
|
||||
# Fall back to another method in case there are no shared entries at all across devices
|
||||
if len(database_common) == 0:
|
||||
# print("[database] Skipping: " + str(group_name) + " with devices: %d %d " % (num_devices, len(database)))
|
||||
if verbose:
|
||||
print("[database] No common kernels for: " + str(group_name) + " with devices: %d " % num_devices)
|
||||
return get_smallest_best(database)
|
||||
|
||||
# Computes the sum of the execution times over the different devices
|
||||
def sum_performance(x):
|
||||
x["group_performance"] = x["relative_performance"].sum()
|
||||
return x
|
||||
database_common = database_common.groupby(parameter_column_names).apply(sum_performance)
|
||||
|
||||
# Retrieves the entries with the highest performance
|
||||
best_performance = database_common["group_performance"].max()
|
||||
database_bests = database_common[database_common["group_performance"] == best_performance]
|
||||
|
||||
# Retrieves one example only (the parameters are the same anyway)
|
||||
database_bests = database_bests.drop_duplicates(["group_performance"])
|
||||
|
||||
# Completed, report and return the results
|
||||
if verbose:
|
||||
print("[database] " + str(group_name) + " with performance " + str(best_performance) + " with devices: " +
|
||||
str(num_devices) + " " + str(database_bests.shape))
|
||||
assert len(database_bests) == 1
|
||||
return database_bests
|
||||
# Retrieves the entries with the highest relative performance
|
||||
return bests.get_relative_bests(database_common, parameter_column_names, group_name, verbose)
|
||||
|
|
Loading…
Reference in a new issue