Added extra benchmarks to verify new database caching keys performance

This commit is contained in:
Cedric Nugteren 2017-09-23 18:06:43 +02:00
parent 890281f3e8
commit 2df9f21ab8
2 changed files with 4 additions and 0 deletions

View file

@ -4,6 +4,7 @@ Development (next version)
- The tuning database now has a dictionary to translate vendor/device names to a common set
- The tuners can now distinguish between different AMD GPU board names of the same architecture
- The tuners can now use particle-swarm optimisation to search more efficiently (thanks to 'mcian')
- Improved performance for small problems on NVIDIA hardware by caching the device name
- Further improved compilation time of database.cpp
- Added a small diagnostics helper executable
- Various minor fixes and enhancements

View file

@ -83,6 +83,9 @@ void OpenCLDiagnostics(int argc, char *argv[]) {
printf("* queue.GetContext() %.4lf ms\n", TimeFunction(kNumRuns, [&](){queue.GetContext();} ));
printf("* queue.GetDevice() %.4lf ms\n", TimeFunction(kNumRuns, [&](){queue.GetDevice();} ));
printf("* device.Name() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Name();} ));
printf("* device.Vendor() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Vendor();} ));
printf("* device.Version() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Version();} ));
printf("* device.Platform() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Platform();} ));
printf("* Buffer<float>(context, 1024) %.4lf ms\n", TimeFunction(kNumRuns, [&](){Buffer<float>(context, 1024);} ));
printf("\n");