mirror of
https://github.com/CNugteren/CLBlast.git
synced 2024-07-04 21:36:57 +02:00
Added extra benchmarks to verify new database caching keys performance
This commit is contained in:
parent
890281f3e8
commit
2df9f21ab8
|
@ -4,6 +4,7 @@ Development (next version)
|
|||
- The tuning database now has a dictionary to translate vendor/device names to a common set
|
||||
- The tuners can now distinguish between different AMD GPU board names of the same architecture
|
||||
- The tuners can now use particle-swarm optimisation to search more efficiently (thanks to 'mcian')
|
||||
- Improved performance for small problems on NVIDIA hardware by caching the device name
|
||||
- Further improved compilation time of database.cpp
|
||||
- Added a small diagnostics helper executable
|
||||
- Various minor fixes and enhancements
|
||||
|
|
|
@ -83,6 +83,9 @@ void OpenCLDiagnostics(int argc, char *argv[]) {
|
|||
printf("* queue.GetContext() %.4lf ms\n", TimeFunction(kNumRuns, [&](){queue.GetContext();} ));
|
||||
printf("* queue.GetDevice() %.4lf ms\n", TimeFunction(kNumRuns, [&](){queue.GetDevice();} ));
|
||||
printf("* device.Name() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Name();} ));
|
||||
printf("* device.Vendor() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Vendor();} ));
|
||||
printf("* device.Version() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Version();} ));
|
||||
printf("* device.Platform() %.4lf ms\n", TimeFunction(kNumRuns, [&](){device.Platform();} ));
|
||||
printf("* Buffer<float>(context, 1024) %.4lf ms\n", TimeFunction(kNumRuns, [&](){Buffer<float>(context, 1024);} ));
|
||||
|
||||
printf("\n");
|
||||
|
|
Loading…
Reference in a new issue