From 2df9f21ab8564cf7e55641400c0ab049df798f95 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sat, 23 Sep 2017 18:06:43 +0200 Subject: [PATCH] Added extra benchmarks to verify new database caching keys performance --- CHANGELOG | 1 + test/diagnostics.cpp | 3 +++ 2 files changed, 4 insertions(+) diff --git a/CHANGELOG b/CHANGELOG index 32c0be3a..f93e736d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,6 +4,7 @@ Development (next version) - The tuning database now has a dictionary to translate vendor/device names to a common set - The tuners can now distinguish between different AMD GPU board names of the same architecture - The tuners can now use particle-swarm optimisation to search more efficiently (thanks to 'mcian') +- Improved performance for small problems on NVIDIA hardware by caching the device name - Further improved compilation time of database.cpp - Added a small diagnostics helper executable - Various minor fixes and enhancements diff --git a/test/diagnostics.cpp b/test/diagnostics.cpp index 6872ed6f..99b936f8 100644 --- a/test/diagnostics.cpp +++ b/test/diagnostics.cpp @@ -83,6 +83,9 @@ void OpenCLDiagnostics(int argc, char *argv[]) { printf("* queue.GetContext() %.4lf ms\n", TimeFunction(kNumRuns, [&](){queue.GetContext();} )); printf("* queue.GetDevice() %.4lf ms\n", TimeFunction(kNumRuns, [&](){queue.GetDevice();} )); printf("* device.Name() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Name();} )); + printf("* device.Vendor() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Vendor();} )); + printf("* device.Version() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Version();} )); + printf("* device.Platform() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Platform();} )); printf("* Buffer(context, 1024) %.4lf ms\n", TimeFunction(kNumRuns, [&](){Buffer(context, 1024);} )); printf("\n");