Merge branch 'development' into triangular_solvers
commit
ea6790665d
|
@ -5,6 +5,7 @@ Development version (next release)
|
|||
- Fixed a bug when using offsets in the direct version of the GEMM kernels
|
||||
- Fixed a missing cl_khr_fp64 when running double-precision on Intel CPUs
|
||||
- Tests now also exit with an error code when OpenCL errors or compilation errors occur
|
||||
- Added the OverrideParameters function to the API to be able to supply custom tuning parmeters
|
||||
- Various minor fixes and enhancements
|
||||
- Added tuned parameters for various devices (see README)
|
||||
- Added level-2 routines:
|
||||
|
|
|
@ -415,6 +415,18 @@ if(TESTS)
|
|||
add_test(clblast_test_${ROUTINE} clblast_test_${ROUTINE})
|
||||
endforeach()
|
||||
|
||||
# Miscellaneous tests
|
||||
set(MISC_TESTS override_parameters)
|
||||
foreach(MISC_TEST ${MISC_TESTS})
|
||||
add_executable(clblast_test_${MISC_TEST} ${TESTS_COMMON}
|
||||
test/correctness/misc/${MISC_TEST}.cpp)
|
||||
target_link_libraries(clblast_test_${MISC_TEST} clblast ${REF_LIBRARIES} ${OPENCL_LIBRARIES})
|
||||
target_include_directories(clblast_test_${MISC_TEST} PUBLIC
|
||||
$<TARGET_PROPERTY:clblast,INTERFACE_INCLUDE_DIRECTORIES>
|
||||
${clblast_SOURCE_DIR} ${REF_INCLUDES})
|
||||
add_test(clblast_test_${MISC_TEST} clblast_test_${MISC_TEST})
|
||||
endforeach()
|
||||
|
||||
# Adds 'alltests' target: runs all tests
|
||||
set(ALLTESTS )
|
||||
set(ALLTESTSDEPENDS )
|
||||
|
|
|
@ -119,10 +119,12 @@ The CLBlast library will be tuned in the future for the most commonly used OpenC
|
|||
- GeForce GTX TITAN
|
||||
- GeForce GTX TITAN Black
|
||||
- GeForce GTX TITAN X
|
||||
- TITAN X (Pascal)
|
||||
- Tesla K20m
|
||||
- Tesla K40m
|
||||
* AMD GPUs:
|
||||
- AMD Radeon R9 M370X Compute Engine
|
||||
- Ellesmere
|
||||
- Hawaii
|
||||
- Oland
|
||||
- Pitcairn
|
||||
|
@ -155,7 +157,7 @@ Note that CLBlast's tuners are based on the [CLTune auto-tuning library](https:/
|
|||
|
||||
Compiling with `-DTUNERS=ON` will generate a number of tuners, each named `clblast_tuner_xxxxx`, in which `xxxxx` corresponds to a `.opencl` kernel file as found in `src/kernels`. These kernels corresponds to routines (e.g. `xgemm`) or to common pre-processing or post-processing kernels (`copy` and `transpose`). Running such a tuner will test a number of parameter-value combinations on your device and report which one gave the best performance. Running `make alltuners` runs all tuners for all precisions in one go. You can set the default device and platform for `alltuners` by setting the `CLBLAST_DEVICE` and `CLBLAST_PLATFORM` environmental variables.
|
||||
|
||||
The tuners output a JSON-file with the results. The best results need to be added to `src/database/kernels/xxxxx.hpp` in the appropriate section. However, this can be done automatically based on the JSON-data using a Python script in `scripts/database/database.py`. If you want the found parameters to be included in future releases of CLBlast, please attach the JSON files to the corresponding issue on GitHub or [email the main author](http://www.cedricnugteren.nl).
|
||||
The tuners output a JSON-file with the results. The best results need to be added to `src/database/kernels/xxxxx.hpp` in the appropriate section. However, this can be done automatically based on the JSON-data using a Python (2.7 or 3.x) script in `scripts/database/database.py`. If you want the found parameters to be included in future releases of CLBlast, please attach the JSON files to the corresponding issue on GitHub or [email the main author](http://www.cedricnugteren.nl).
|
||||
|
||||
In summary, tuning the entire library for your device can be done as follows (starting from the root of the CLBlast folder):
|
||||
|
||||
|
@ -167,6 +169,8 @@ In summary, tuning the entire library for your device can be done as follows (st
|
|||
python ../scripts/database/database.py . ..
|
||||
make
|
||||
|
||||
Alternatively, you can also supply your tuning parameters programmatically through the CLBlast API. This is especially useful if you tune for specific non-standard arguments (e.g. a rectangular or a very small matrix). To do so, you can call the `OverrideParameters` function which will set new parameters for a specific kernel. At the first next call of the target routine, CLBlast will compile a new binary and use it together with the new parameters from then on. Until `OverrideParameters` is called again of course. See the [API documentation](doc/clblast.md#overrideparameters-override-tuning-parameters-auxiliary-function) for more details.
|
||||
|
||||
|
||||
Compiling the correctness tests (optional)
|
||||
-------------
|
||||
|
|
|
@ -2903,3 +2903,66 @@ Requirements for OMATCOPY:
|
|||
|
||||
|
||||
|
||||
ClearCache: Resets the cache of compiled binaries (auxiliary function)
|
||||
-------------
|
||||
|
||||
CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on for the same device. This cache can be cleared to free up system memory or it can be useful in case of debugging.
|
||||
|
||||
C++ API:
|
||||
```
|
||||
StatusCode ClearCache()
|
||||
```
|
||||
|
||||
C API:
|
||||
```
|
||||
CLBlastStatusCode CLBlastClearCache()
|
||||
```
|
||||
|
||||
|
||||
|
||||
FillCache: Populates the cache of compiled binaries for a specific device (auxiliary function)
|
||||
-------------
|
||||
|
||||
CLBlast stores binaries of compiled kernels into a cache in case the same kernel is used later on for the same device. This cache is automatically populated whenever a new binary is created. Thus, the first run of a specific kernel could take extra time. For debugging or performance evaluation purposes, it might be useful to populate the cache upfront. This function populates the cache for all kernels in CLBlast for all precisions, but for a specific device only.
|
||||
|
||||
C++ API:
|
||||
```
|
||||
StatusCode FillCache(const cl_device_id device)
|
||||
```
|
||||
|
||||
C API:
|
||||
```
|
||||
CLBlastStatusCode CLBlastFillCache(const cl_device_id device)
|
||||
```
|
||||
|
||||
Arguments to FillCache:
|
||||
|
||||
* `const cl_device_id device`: The OpenCL device to fill the cache for.
|
||||
|
||||
|
||||
|
||||
OverrideParameters: Override tuning parameters (auxiliary function)
|
||||
-------------
|
||||
|
||||
This function overrides tuning parameters for a specific device-precision-kernel combination. The next time the target routine is called it will be re-compiled and use the new parameters. All further times (until `OverrideParameters` is called again) it will load the kernel from the cache and thus continue to use the new parameters. Note that the first time after calling `OverrideParameters` a performance drop can be observable due to the re-compilation of the kernel.
|
||||
|
||||
C++ API:
|
||||
```
|
||||
StatusCode OverrideParameters(const cl_device_id device, const std::string &kernel_name,
|
||||
const Precision precision,
|
||||
const std::unordered_map<std::string,size_t> ¶meters)
|
||||
```
|
||||
|
||||
C API:
|
||||
```
|
||||
CLBlastStatusCode CLBlastOverrideParameters(const cl_device_id device, const char* kernel_name,
|
||||
const CLBlastPrecision precision, const size_t num_parameters,
|
||||
const char** parameters_names, const size_t* parameters_values)
|
||||
```
|
||||
|
||||
Arguments to OverrideParameters (C++ version):
|
||||
|
||||
* `const cl_device_id device`: The OpenCL device to set the new parameters for.
|
||||
* `const std::string &kernel_name`: The target kernel name. This has to be one of the existing CLBlast kernels (Xaxpy, Xdot, Xgemv, XgemvFast, XgemvFastRot, Xgemv, Xger, Copy, Pad, Transpose, Padtranspose, Xgemm, or XgemmDirect). If this argument is incorrect, this function will return with the `clblast::kInvalidOverrideKernel` status-code.
|
||||
* `const Precision precision`: The CLBlast precision enum to set the new parameters for.
|
||||
* `const std::unordered_map<std::string,size_t> ¶meters`: An unordered map of strings to integers. This has to contain all the tuning parameters for a specific kernel as reported by the included tuners (e.g. `{ {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",4}, {"COPY_WPT",8} }` for the `Copy` kernel). If this argument is incorrect, this function will return with the `clblast::kMissingOverrideParameter` status-code.
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#define CLBLAST_CLBLAST_H_
|
||||
|
||||
#include <cstdlib> // For size_t
|
||||
#include <string> // For OverrideParameters function
|
||||
#include <unordered_map> // For OverrideParameters function
|
||||
|
||||
// Includes the normal OpenCL C header
|
||||
#if defined(__APPLE__) || defined(__MACOSX)
|
||||
|
@ -95,6 +97,8 @@ enum class StatusCode {
|
|||
kInsufficientMemoryY = -1007, // Vector Y's OpenCL buffer is too small
|
||||
|
||||
// Custom additional status codes for CLBlast
|
||||
kInvalidOverrideKernel = -2048, // Trying to override parameters for an invalid kernel
|
||||
kMissingOverrideParameter = -2047, // Missing override parameter(s) for the target kernel
|
||||
kInvalidLocalMemUsage = -2046, // Not enough local memory available on this device
|
||||
kNoHalfPrecision = -2045, // Half precision (16-bits) not supported by the device
|
||||
kNoDoublePrecision = -2044, // Double precision (64-bits) not supported by the device
|
||||
|
@ -617,6 +621,14 @@ StatusCode PUBLIC_API FillCache(const cl_device_id device);
|
|||
|
||||
// =================================================================================================
|
||||
|
||||
// Overrides tuning parameters for a specific device-precision-kernel combination. The next time
|
||||
// the target routine is called it will re-compile and use the new parameters from then on.
|
||||
StatusCode PUBLIC_API OverrideParameters(const cl_device_id device, const std::string &kernel_name,
|
||||
const Precision precision,
|
||||
const std::unordered_map<std::string,size_t> ¶meters);
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
} // namespace clblast
|
||||
|
||||
// CLBLAST_CLBLAST_H_
|
||||
|
|
|
@ -96,6 +96,8 @@ typedef enum CLBlastStatusCode_ {
|
|||
CLBlastInsufficientMemoryY = -1007, // Vector Y's OpenCL buffer is too small
|
||||
|
||||
// Custom additional status codes for CLBlast
|
||||
CLBlastInvalidOverrideKernel = -2048, // Trying to override parameters for an invalid kernel
|
||||
CLBlastMissingOverrideParameter = -2047, // Missing override parameter(s) for the target kernel
|
||||
CLBlastInvalidLocalMemUsage = -2046, // Not enough local memory available on this device
|
||||
CLBlastNoHalfPrecision = -2045, // Half precision (16-bits) not supported by the device
|
||||
CLBlastNoDoublePrecision = -2044, // Double precision (64-bits) not supported by the device
|
||||
|
@ -117,6 +119,11 @@ typedef enum CLBlastDiagonal_ { CLBlastDiagonalNonUnit = 131,
|
|||
CLBlastDiagonalUnit = 132 } CLBlastDiagonal;
|
||||
typedef enum CLBlastSide_ { CLBlastSideLeft = 141, CLBlastSideRight = 142 } CLBlastSide;
|
||||
|
||||
// Precision enum (values in bits)
|
||||
typedef enum CLBlastPrecision_ { CLBlastPrecisionHalf = 16, CLBlastPrecisionSingle = 32,
|
||||
CLBlastPrecisionDouble = 64, CLBlastPrecisionComplexSingle = 3232,
|
||||
CLBlastPrecisionComplexDouble = 6464 } CLBlastPrecision;
|
||||
|
||||
// =================================================================================================
|
||||
// BLAS level-1 (vector-vector) routines
|
||||
// =================================================================================================
|
||||
|
@ -1332,6 +1339,14 @@ CLBlastStatusCode PUBLIC_API CLBlastFillCache(const cl_device_id device);
|
|||
|
||||
// =================================================================================================
|
||||
|
||||
// Overrides tuning parameters for a specific device-precision-kernel combination. The next time
|
||||
// the target routine is called it will re-compile and use the new parameters from then on.
|
||||
CLBlastStatusCode PUBLIC_API CLBlastOverrideParameters(const cl_device_id device, const char* kernel_name,
|
||||
const CLBlastPrecision precision, const size_t num_parameters,
|
||||
const char** parameters_names, const size_t* parameters_values);
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
|
|
@ -41,8 +41,10 @@ FILES = [
|
|||
"/include/clblast_netlib_c.h",
|
||||
"/src/clblast_netlib_c.cpp",
|
||||
]
|
||||
HEADER_LINES = [117, 75, 118, 22, 29, 41, 65, 32]
|
||||
FOOTER_LINES = [17, 95, 19, 18, 6, 6, 9, 2]
|
||||
HEADER_LINES = [121, 75, 125, 23, 29, 41, 65, 32]
|
||||
FOOTER_LINES = [25, 139, 27, 38, 6, 6, 9, 2]
|
||||
HEADER_LINES_DOC = 0
|
||||
FOOTER_LINES_DOC = 63
|
||||
|
||||
# Different possibilities for requirements
|
||||
ald_m = "The value of `a_ld` must be at least `m`."
|
||||
|
@ -233,11 +235,20 @@ def main(argv):
|
|||
f.write(cpp.performance_test(routine, level_string))
|
||||
f.write(cpp.FOOTER)
|
||||
|
||||
# Outputs the API documentation
|
||||
# API documentation
|
||||
filename = cl_args.clblast_root + "/doc/clblast.md"
|
||||
|
||||
# Stores the header and the footer of the original documentation file
|
||||
with open(filename) as f:
|
||||
original = f.readlines()
|
||||
file_header = original[:HEADER_LINES_DOC]
|
||||
file_footer = original[-FOOTER_LINES_DOC:]
|
||||
|
||||
# Outputs the API documentation
|
||||
with open(filename, "w") as f:
|
||||
|
||||
# Outputs the header
|
||||
f.write("".join(file_header))
|
||||
doc_header = doc.header()
|
||||
f.write(doc_header)
|
||||
|
||||
|
@ -248,5 +259,8 @@ def main(argv):
|
|||
doc_routine = doc.generate(routine)
|
||||
f.write(doc_routine)
|
||||
|
||||
# Outputs the footer
|
||||
f.write("".join(file_footer))
|
||||
|
||||
if __name__ == '__main__':
|
||||
main(sys.argv[1:])
|
||||
|
|
|
@ -64,6 +64,37 @@ void Cache<Key, Value>::Store(Key &&key, Value &&value) {
|
|||
#endif
|
||||
}
|
||||
|
||||
template <typename Key, typename Value>
|
||||
void Cache<Key, Value>::Remove(const Key &key) {
|
||||
std::lock_guard<std::mutex> lock(cache_mutex_);
|
||||
#if __cplusplus >= 201402L
|
||||
cache_.erase(key);
|
||||
#else
|
||||
auto it = cache_.begin();
|
||||
while (it != cache_.end()) {
|
||||
if ((*it).first == key) {
|
||||
it = cache_.erase(it);
|
||||
}
|
||||
else ++it;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename Key, typename Value>
|
||||
template <int I1, int I2>
|
||||
void Cache<Key, Value>::RemoveBySubset(const Key &key) {
|
||||
std::lock_guard<std::mutex> lock(cache_mutex_);
|
||||
auto it = cache_.begin();
|
||||
while (it != cache_.end()) {
|
||||
const auto current_key = (*it).first;
|
||||
if ((std::get<I1>(key) == std::get<I1>(current_key)) &&
|
||||
(std::get<I2>(key) == std::get<I2>(current_key))) {
|
||||
it = cache_.erase(it);
|
||||
}
|
||||
else ++it;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Key, typename Value>
|
||||
void Cache<Key, Value>::Invalidate() {
|
||||
std::lock_guard<std::mutex> lock(cache_mutex_);
|
||||
|
@ -88,6 +119,7 @@ template std::string BinaryCache::Get(const BinaryKeyRef &, bool *) const;
|
|||
|
||||
template class Cache<ProgramKey, Program>;
|
||||
template Program ProgramCache::Get(const ProgramKeyRef &, bool *) const;
|
||||
template void ProgramCache::RemoveBySubset<1, 2>(const ProgramKey &); // precision and routine name
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
|
|
|
@ -42,6 +42,10 @@ public:
|
|||
void Store(Key &&key, Value &&value);
|
||||
void Invalidate();
|
||||
|
||||
// Removes all entries with a given key
|
||||
void Remove(const Key &key);
|
||||
template <int I1, int I2> void RemoveBySubset(const Key &key); // currently supports 2 indices
|
||||
|
||||
static Cache<Key, Value> &Instance();
|
||||
|
||||
private:
|
||||
|
@ -72,7 +76,6 @@ typedef Cache<BinaryKey, std::string> BinaryCache;
|
|||
extern template class Cache<BinaryKey, std::string>;
|
||||
extern template std::string BinaryCache::Get(const BinaryKeyRef &, bool *) const;
|
||||
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// The key struct for the cache of compiled OpenCL programs (context-dependent)
|
||||
|
@ -90,9 +93,9 @@ extern template Program ProgramCache::Get(const ProgramKeyRef &, bool *) const;
|
|||
class Database;
|
||||
|
||||
// The key struct for the cache of database maps.
|
||||
// Order of fields: precision, device_name, routines (smaller fields first)
|
||||
typedef std::tuple<Precision, std::string, std::vector<std::string>> DatabaseKey;
|
||||
typedef std::tuple<const Precision &, const std::string &, const std::vector<std::string> &> DatabaseKeyRef;
|
||||
// Order of fields: precision, device_name, kernel_name (smaller fields first)
|
||||
typedef std::tuple<Precision, std::string, std::string> DatabaseKey;
|
||||
typedef std::tuple<const Precision &, const std::string &, const std::string &> DatabaseKeyRef;
|
||||
|
||||
typedef Cache<DatabaseKey, Database> DatabaseCache;
|
||||
|
||||
|
|
|
@ -2265,5 +2265,48 @@ StatusCode FillCache(const cl_device_id device) {
|
|||
return StatusCode::kSuccess;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// Overrides the tuning parameters for this device-precision-kernel combination
|
||||
StatusCode OverrideParameters(const cl_device_id device, const std::string &kernel_name,
|
||||
const Precision precision,
|
||||
const std::unordered_map<std::string,size_t> ¶meters) {
|
||||
try {
|
||||
|
||||
// Retrieves the device name
|
||||
const auto device_cpp = Device(device);
|
||||
const auto device_name = device_cpp.Name();
|
||||
|
||||
// Retrieves the current database values to verify whether the new ones are complete
|
||||
auto in_cache = false;
|
||||
const auto current_database = DatabaseCache::Instance().Get(DatabaseKeyRef{ precision, device_name, kernel_name }, &in_cache);
|
||||
if (!in_cache) { return StatusCode::kInvalidOverrideKernel; }
|
||||
for (const auto ¤t_param : current_database.GetParameterNames()) {
|
||||
if (parameters.find(current_param) == parameters.end()) {
|
||||
return StatusCode::kMissingOverrideParameter;
|
||||
}
|
||||
}
|
||||
|
||||
// Clears the existing program & binary cache for routines with the target kernel
|
||||
const auto routine_names = Routine::routines_by_kernel.at(kernel_name);
|
||||
for (const auto &routine_name : routine_names) {
|
||||
ProgramCache::Instance().RemoveBySubset<1, 2>(ProgramKey{nullptr, precision, routine_name});
|
||||
BinaryCache::Instance().Remove(BinaryKey{precision, routine_name, device_name});
|
||||
}
|
||||
|
||||
// Creates a small custom database based on the provided parameters
|
||||
const auto database_device = Database::DatabaseDevice{"default", parameters};
|
||||
const auto database_vendor = Database::DatabaseVendor{database::kDeviceTypeAll, "default", {database_device}};
|
||||
const auto database_entry = Database::DatabaseEntry{kernel_name, precision, {database_vendor}};
|
||||
const auto database = Database(device_cpp, kernel_name, precision, {&database_entry});
|
||||
|
||||
// Removes the old database entry and stores the new one in the cache
|
||||
DatabaseCache::Instance().Remove(DatabaseKey{ precision, device_name, kernel_name });
|
||||
DatabaseCache::Instance().Store(DatabaseKey{ precision, device_name, kernel_name }, Database(database));
|
||||
|
||||
} catch (...) { return DispatchException(); }
|
||||
return StatusCode::kSuccess;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
} // namespace clblast
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
// =================================================================================================
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "utilities/utilities.hpp"
|
||||
#include "clblast_c.h"
|
||||
|
@ -3463,3 +3464,23 @@ CLBlastStatusCode CLBlastFillCache(const cl_device_id device) {
|
|||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// Overrides the tuning parameters for this device-precision-kernel combination
|
||||
CLBlastStatusCode PUBLIC_API CLBlastOverrideParameters(const cl_device_id device, const char* kernel_name,
|
||||
const CLBlastPrecision precision, const size_t num_parameters,
|
||||
const char** parameters_names, const size_t* parameters_values) {
|
||||
try {
|
||||
const auto kernel_name_cpp = std::string(kernel_name);
|
||||
const auto precision_cpp = static_cast<clblast::Precision>(precision);
|
||||
auto parameters = std::unordered_map<std::string, size_t>();
|
||||
for (auto i = size_t{0}; i < num_parameters; ++i) {
|
||||
const auto parameter_name = std::string(parameters_names[i]);
|
||||
const auto parameter_value = parameters_values[i];
|
||||
parameters[parameter_name] = parameter_value;
|
||||
}
|
||||
const auto status = clblast::OverrideParameters(device, kernel_name_cpp, precision_cpp, parameters);
|
||||
return static_cast<CLBlastStatusCode>(status);
|
||||
} catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -67,7 +67,7 @@ const std::unordered_map<std::string, std::string> Database::kVendorNames{
|
|||
|
||||
// Constructor, computing device properties and populating the parameter-vector from the database.
|
||||
// This takes an optional overlay database in case of custom tuning or custom kernels.
|
||||
Database::Database(const Device &device, const std::vector<std::string> &kernels,
|
||||
Database::Database(const Device &device, const std::string &kernel_name,
|
||||
const Precision precision, const std::vector<const DatabaseEntry*> &overlay):
|
||||
parameters_(std::make_shared<Parameters>()) {
|
||||
|
||||
|
@ -83,20 +83,17 @@ Database::Database(const Device &device, const std::vector<std::string> &kernels
|
|||
}
|
||||
}
|
||||
|
||||
// Iterates over all kernels to include, and retrieves the parameters for each of them
|
||||
for (auto &kernel: kernels) {
|
||||
auto search_result = ParametersPtr{};
|
||||
|
||||
for (auto &db: { database, overlay}) {
|
||||
search_result = Search(kernel, device_type, device_vendor, device_name, precision, db);
|
||||
if (search_result) {
|
||||
parameters_->insert(search_result->begin(), search_result->end());
|
||||
break;
|
||||
}
|
||||
// Searches potentially multiple databases
|
||||
auto search_result = ParametersPtr{};
|
||||
for (auto &db: { overlay, database}) {
|
||||
search_result = Search(kernel_name, device_type, device_vendor, device_name, precision, db);
|
||||
if (search_result) {
|
||||
parameters_->insert(search_result->begin(), search_result->end());
|
||||
break;
|
||||
}
|
||||
|
||||
if (!search_result) { throw RuntimeErrorCode(StatusCode::kDatabaseError); }
|
||||
}
|
||||
|
||||
if (!search_result) { throw RuntimeErrorCode(StatusCode::kDatabaseError); }
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
@ -110,6 +107,15 @@ std::string Database::GetDefines() const {
|
|||
return defines;
|
||||
}
|
||||
|
||||
// Retrieves the names of all the parameters
|
||||
std::vector<std::string> Database::GetParameterNames() const {
|
||||
auto parameter_names = std::vector<std::string>();
|
||||
for (auto ¶meter: *parameters_) {
|
||||
parameter_names.push_back(parameter.first);
|
||||
}
|
||||
return parameter_names;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// Searches a particular database for the right kernel and precision
|
||||
|
|
|
@ -75,15 +75,19 @@ class Database {
|
|||
Database() = default;
|
||||
|
||||
// The constructor with a user-provided database overlay (potentially an empty vector)
|
||||
explicit Database(const Device &device, const std::vector<std::string> &routines,
|
||||
explicit Database(const Device &device, const std::string &kernel_name,
|
||||
const Precision precision, const std::vector<const DatabaseEntry*> &overlay);
|
||||
|
||||
// Accessor of values by key
|
||||
size_t operator[](const std::string key) const { return parameters_->find(key)->second; }
|
||||
size_t operator[](const std::string &key) const { return parameters_->find(key)->second; }
|
||||
bool exists(const std::string &key) const { return (parameters_->count(key) == 1); }
|
||||
|
||||
// Obtain a list of OpenCL pre-processor defines based on the parameters
|
||||
std::string GetDefines() const;
|
||||
|
||||
// Retrieves the names of all the parameters
|
||||
std::vector<std::string> GetParameterNames() const;
|
||||
|
||||
private:
|
||||
// Search method for a specified database, returning pointer (possibly a nullptr)
|
||||
ParametersPtr Search(const std::string &this_kernel, const std::string &this_type,
|
||||
|
@ -95,6 +99,31 @@ class Database {
|
|||
std::shared_ptr<Parameters> parameters_;
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// Multiple databases together in a map
|
||||
class Databases {
|
||||
public:
|
||||
|
||||
explicit Databases(const std::vector<std::string> &kernel_names): kernel_names_(kernel_names) { }
|
||||
|
||||
// Database accessor
|
||||
Database& operator()(const std::string &kernel_name) { return databases_[kernel_name]; }
|
||||
|
||||
// Retrieves a parameter from the database
|
||||
size_t operator[](const std::string &key) const {
|
||||
for (const auto &kernel_name : kernel_names_) {
|
||||
const auto &kernel_db = databases_.find(kernel_name)->second;
|
||||
if (kernel_db.exists(key)) { return kernel_db[key]; }
|
||||
}
|
||||
throw RuntimeErrorCode(StatusCode::kDatabaseError);
|
||||
}
|
||||
|
||||
private:
|
||||
const std::vector<std::string> kernel_names_;
|
||||
std::unordered_map<std::string, Database> databases_;
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
} // namespace clblast
|
||||
|
||||
|
|
|
@ -17,6 +17,12 @@ namespace database {
|
|||
|
||||
const Database::DatabaseEntry CopyHalf = {
|
||||
"Copy", Precision::kHalf, {
|
||||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "Ellesmere", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
|
||||
{ "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
|
||||
}
|
||||
},
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",8}, {"COPY_WPT",4} } },
|
||||
|
@ -26,7 +32,7 @@ const Database::DatabaseEntry CopyHalf = {
|
|||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",4}, {"COPY_WPT",8} } },
|
||||
{ "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -39,13 +45,14 @@ const Database::DatabaseEntry CopySingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
|
||||
{ "Ellesmere", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",8} } },
|
||||
{ "Hawaii", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
|
||||
{ "Oland", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } },
|
||||
{ "Pitcairn", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } },
|
||||
{ "Tahiti", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
|
||||
{ "Tonga", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
|
||||
{ "Turks", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } },
|
||||
{ "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
|
||||
{ "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
|
||||
}
|
||||
},
|
||||
{ // ARM GPUs
|
||||
|
@ -96,6 +103,7 @@ const Database::DatabaseEntry CopySingle = {
|
|||
{ "GeForce GTX TITAN", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",4} } },
|
||||
{ "GeForce GTX TITAN Black", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",4}, {"COPY_WPT",8} } },
|
||||
{ "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
|
||||
{ "TITAN X (Pascal)", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",4}, {"COPY_WPT",1} } },
|
||||
{ "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
|
||||
{ "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",2} } },
|
||||
{ "default", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",4}, {"COPY_WPT",1} } },
|
||||
|
@ -103,7 +111,7 @@ const Database::DatabaseEntry CopySingle = {
|
|||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
|
||||
{ "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",4} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -116,13 +124,14 @@ const Database::DatabaseEntry CopyComplexSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "Ellesmere", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",4} } },
|
||||
{ "Hawaii", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
|
||||
{ "Oland", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "Pitcairn", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
|
||||
{ "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
|
||||
{ "Tonga", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",1}, {"COPY_WPT",2} } },
|
||||
{ "Turks", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",4}, {"COPY_WPT",1} } },
|
||||
{ "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",4} } },
|
||||
{ "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
}
|
||||
},
|
||||
{ // Intel CPUs
|
||||
|
@ -165,14 +174,15 @@ const Database::DatabaseEntry CopyComplexSingle = {
|
|||
{ "GeForce GTX 980", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "GeForce GTX TITAN X", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "TITAN X (Pascal)", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",4} } },
|
||||
{ "Tesla K40m", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
|
||||
{ "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -185,12 +195,13 @@ const Database::DatabaseEntry CopyDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "Ellesmere", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",4} } },
|
||||
{ "Hawaii", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
|
||||
{ "Oland", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",8} } },
|
||||
{ "Pitcairn", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "Tahiti", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",1} } },
|
||||
{ "Tonga", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",4} } },
|
||||
{ "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
|
||||
{ "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",4} } },
|
||||
}
|
||||
},
|
||||
{ // ARM GPUs
|
||||
|
@ -229,14 +240,15 @@ const Database::DatabaseEntry CopyDouble = {
|
|||
{ "GeForce GTX TITAN", { {"COPY_DIMX",16}, {"COPY_DIMY",32}, {"COPY_VW",2}, {"COPY_WPT",2} } },
|
||||
{ "GeForce GTX TITAN Black", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",8} } },
|
||||
{ "GeForce GTX TITAN X", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "TITAN X (Pascal)", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
|
||||
{ "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
|
||||
{ "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
|
||||
{ "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
|
||||
{ "default", { {"COPY_DIMX",32}, {"COPY_DIMY",16}, {"COPY_VW",2}, {"COPY_WPT",1} } },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",2} } },
|
||||
{ "default", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",1} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -249,6 +261,7 @@ const Database::DatabaseEntry CopyComplexDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "Ellesmere", { {"COPY_DIMX",8}, {"COPY_DIMY",32}, {"COPY_VW",1}, {"COPY_WPT",2} } },
|
||||
{ "Hawaii", { {"COPY_DIMX",32}, {"COPY_DIMY",8}, {"COPY_VW",2}, {"COPY_WPT",8} } },
|
||||
{ "Oland", { {"COPY_DIMX",8}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "Pitcairn", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
|
@ -293,6 +306,7 @@ const Database::DatabaseEntry CopyComplexDouble = {
|
|||
{ "GeForce GTX TITAN", { {"COPY_DIMX",16}, {"COPY_DIMY",16}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
|
||||
{ "GeForce GTX TITAN X", { {"COPY_DIMX",16}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "TITAN X (Pascal)", { {"COPY_DIMX",32}, {"COPY_DIMY",32}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "Tesla K20m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",2} } },
|
||||
{ "Tesla K40m", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
{ "default", { {"COPY_DIMX",8}, {"COPY_DIMY",8}, {"COPY_VW",1}, {"COPY_WPT",1} } },
|
||||
|
|
|
@ -17,6 +17,12 @@ namespace database {
|
|||
|
||||
const Database::DatabaseEntry PadHalf = {
|
||||
"Pad", Precision::kHalf, {
|
||||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "Ellesmere", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
{ "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
}
|
||||
},
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
|
||||
|
@ -39,6 +45,7 @@ const Database::DatabaseEntry PadSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "Ellesmere", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",2} } },
|
||||
{ "Hawaii", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } },
|
||||
{ "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
{ "Pitcairn", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
|
@ -96,6 +103,7 @@ const Database::DatabaseEntry PadSingle = {
|
|||
{ "GeForce GTX TITAN", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
{ "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "TITAN X (Pascal)", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
|
||||
{ "Tesla K40m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",2} } },
|
||||
|
@ -116,6 +124,7 @@ const Database::DatabaseEntry PadComplexSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "Ellesmere", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",4} } },
|
||||
{ "Hawaii", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
{ "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "Pitcairn", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
|
@ -173,6 +182,7 @@ const Database::DatabaseEntry PadComplexSingle = {
|
|||
{ "GeForce GTX TITAN", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
{ "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "TITAN X (Pascal)", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
{ "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
|
@ -180,7 +190,7 @@ const Database::DatabaseEntry PadComplexSingle = {
|
|||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
|
||||
{ "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -193,12 +203,13 @@ const Database::DatabaseEntry PadDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "Ellesmere", { {"PAD_DIMX",8}, {"PAD_DIMY",32}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
|
||||
{ "Hawaii", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
{ "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "Pitcairn", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
{ "Tahiti", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "Tonga", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",4}, {"PAD_WPTY",1} } },
|
||||
{ "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "default", { {"PAD_DIMX",16}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
}
|
||||
},
|
||||
{ // ARM GPUs
|
||||
|
@ -237,6 +248,7 @@ const Database::DatabaseEntry PadDouble = {
|
|||
{ "GeForce GTX TITAN", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "TITAN X (Pascal)", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "Tesla K20m", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "Tesla K40m", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
{ "default", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
|
@ -257,6 +269,7 @@ const Database::DatabaseEntry PadComplexDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "Ellesmere", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
{ "Hawaii", { {"PAD_DIMX",32}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "Oland", { {"PAD_DIMX",8}, {"PAD_DIMY",16}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
|
||||
{ "Pitcairn", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
|
@ -301,6 +314,7 @@ const Database::DatabaseEntry PadComplexDouble = {
|
|||
{ "GeForce GTX TITAN", { {"PAD_DIMX",8}, {"PAD_DIMY",32}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
{ "GeForce GTX TITAN Black", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",4} } },
|
||||
{ "GeForce GTX TITAN X", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "TITAN X (Pascal)", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",2}, {"PAD_WPTY",1} } },
|
||||
{ "Tesla K20m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",2} } },
|
||||
{ "Tesla K40m", { {"PAD_DIMX",8}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
{ "default", { {"PAD_DIMX",16}, {"PAD_DIMY",8}, {"PAD_WPTX",1}, {"PAD_WPTY",1} } },
|
||||
|
|
|
@ -17,6 +17,12 @@ namespace database {
|
|||
|
||||
const Database::DatabaseEntry PadtransposeHalf = {
|
||||
"Padtranspose", Precision::kHalf, {
|
||||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "Ellesmere", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
|
||||
{ "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
|
||||
}
|
||||
},
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
|
||||
|
@ -39,6 +45,7 @@ const Database::DatabaseEntry PadtransposeSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
|
||||
{ "Ellesmere", { {"PADTRA_PAD",1}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
|
||||
{ "Hawaii", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
|
||||
{ "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
|
||||
{ "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
|
||||
|
@ -96,6 +103,7 @@ const Database::DatabaseEntry PadtransposeSingle = {
|
|||
{ "GeForce GTX TITAN", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
|
||||
{ "GeForce GTX TITAN Black", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } },
|
||||
{ "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
|
||||
{ "TITAN X (Pascal)", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
|
||||
{ "Tesla K20m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
|
||||
{ "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } },
|
||||
{ "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",2} } },
|
||||
|
@ -116,6 +124,7 @@ const Database::DatabaseEntry PadtransposeComplexSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
|
||||
{ "Ellesmere", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
|
||||
{ "Hawaii", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
|
||||
{ "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
|
||||
{ "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
|
||||
|
@ -173,6 +182,7 @@ const Database::DatabaseEntry PadtransposeComplexSingle = {
|
|||
{ "GeForce GTX TITAN", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
{ "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
|
||||
{ "TITAN X (Pascal)", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
{ "Tesla K20m", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
{ "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
{ "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
|
@ -193,6 +203,7 @@ const Database::DatabaseEntry PadtransposeDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
|
||||
{ "Ellesmere", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
|
||||
{ "Hawaii", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
|
||||
{ "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",4} } },
|
||||
{ "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
|
||||
|
@ -237,6 +248,7 @@ const Database::DatabaseEntry PadtransposeDouble = {
|
|||
{ "GeForce GTX TITAN", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
{ "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
|
||||
{ "TITAN X (Pascal)", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",2} } },
|
||||
{ "Tesla K20m", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
{ "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
{ "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
|
@ -244,7 +256,7 @@ const Database::DatabaseEntry PadtransposeDouble = {
|
|||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",2} } },
|
||||
{ "default", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -257,6 +269,7 @@ const Database::DatabaseEntry PadtransposeComplexDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
|
||||
{ "Ellesmere", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
|
||||
{ "Hawaii", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
|
||||
{ "Oland", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
|
||||
{ "Pitcairn", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",4} } },
|
||||
|
@ -301,6 +314,7 @@ const Database::DatabaseEntry PadtransposeComplexDouble = {
|
|||
{ "GeForce GTX TITAN", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"PADTRA_PAD",0}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
{ "GeForce GTX TITAN X", { {"PADTRA_PAD",1}, {"PADTRA_TILE",32}, {"PADTRA_WPT",1} } },
|
||||
{ "TITAN X (Pascal)", { {"PADTRA_PAD",0}, {"PADTRA_TILE",8}, {"PADTRA_WPT",1} } },
|
||||
{ "Tesla K20m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
{ "Tesla K40m", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
{ "default", { {"PADTRA_PAD",1}, {"PADTRA_TILE",16}, {"PADTRA_WPT",1} } },
|
||||
|
|
|
@ -17,6 +17,12 @@ namespace database {
|
|||
|
||||
const Database::DatabaseEntry TransposeHalf = {
|
||||
"Transpose", Precision::kHalf, {
|
||||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "Ellesmere", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } },
|
||||
{ "default", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } },
|
||||
}
|
||||
},
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } },
|
||||
|
@ -26,7 +32,7 @@ const Database::DatabaseEntry TransposeHalf = {
|
|||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",8} } },
|
||||
{ "default", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -39,13 +45,14 @@ const Database::DatabaseEntry TransposeSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } },
|
||||
{ "Ellesmere", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
|
||||
{ "Hawaii", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",8} } },
|
||||
{ "Oland", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
|
||||
{ "Pitcairn", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
|
||||
{ "Tahiti", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
|
||||
{ "Tonga", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
|
||||
{ "Turks", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
|
||||
{ "default", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
|
||||
{ "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
|
||||
}
|
||||
},
|
||||
{ // ARM GPUs
|
||||
|
@ -96,6 +103,7 @@ const Database::DatabaseEntry TransposeSingle = {
|
|||
{ "GeForce GTX TITAN", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
|
||||
{ "GeForce GTX TITAN Black", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
|
||||
{ "GeForce GTX TITAN X", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
|
||||
{ "TITAN X (Pascal)", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
|
||||
{ "Tesla K20m", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
|
||||
{ "Tesla K40m", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
|
||||
{ "default", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",4} } },
|
||||
|
@ -116,6 +124,7 @@ const Database::DatabaseEntry TransposeComplexSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
|
||||
{ "Ellesmere", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
|
||||
{ "Hawaii", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
|
||||
{ "Oland", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
|
||||
{ "Pitcairn", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
|
||||
|
@ -167,6 +176,7 @@ const Database::DatabaseEntry TransposeComplexSingle = {
|
|||
{ "GeForce GTX TITAN", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
|
||||
{ "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
|
||||
{ "TITAN X (Pascal)", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
|
||||
{ "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
|
||||
{ "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
|
||||
{ "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
|
||||
|
@ -187,6 +197,7 @@ const Database::DatabaseEntry TransposeDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
|
||||
{ "Ellesmere", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",4} } },
|
||||
{ "Hawaii", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
|
||||
{ "Oland", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
|
||||
{ "Pitcairn", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
|
||||
|
@ -231,6 +242,7 @@ const Database::DatabaseEntry TransposeDouble = {
|
|||
{ "GeForce GTX TITAN", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
|
||||
{ "GeForce GTX TITAN Black", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
|
||||
{ "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
|
||||
{ "TITAN X (Pascal)", { {"TRA_DIM",8}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
|
||||
{ "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",2} } },
|
||||
{ "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
|
||||
{ "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
|
||||
|
@ -251,6 +263,7 @@ const Database::DatabaseEntry TransposeComplexDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"TRA_DIM",4}, {"TRA_PAD",1}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
|
||||
{ "Ellesmere", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
|
||||
{ "Hawaii", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",2} } },
|
||||
{ "Oland", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
|
||||
{ "Pitcairn", { {"TRA_DIM",4}, {"TRA_PAD",0}, {"TRA_SHUFFLE",1}, {"TRA_WPT",1} } },
|
||||
|
@ -289,6 +302,7 @@ const Database::DatabaseEntry TransposeComplexDouble = {
|
|||
{ "GeForce GTX TITAN", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"TRA_DIM",16}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
|
||||
{ "GeForce GTX TITAN X", { {"TRA_DIM",32}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
|
||||
{ "TITAN X (Pascal)", { {"TRA_DIM",8}, {"TRA_PAD",0}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
|
||||
{ "Tesla K20m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
|
||||
{ "Tesla K40m", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
|
||||
{ "default", { {"TRA_DIM",16}, {"TRA_PAD",1}, {"TRA_SHUFFLE",0}, {"TRA_WPT",1} } },
|
||||
|
|
|
@ -17,6 +17,12 @@ namespace database {
|
|||
|
||||
const Database::DatabaseEntry XaxpyHalf = {
|
||||
"Xaxpy", Precision::kHalf, {
|
||||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "Ellesmere", { {"VW",4}, {"WGS",128}, {"WPT",4} } },
|
||||
{ "default", { {"VW",4}, {"WGS",128}, {"WPT",4} } },
|
||||
}
|
||||
},
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
|
||||
|
@ -26,7 +32,7 @@ const Database::DatabaseEntry XaxpyHalf = {
|
|||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"VW",8}, {"WGS",64}, {"WPT",1} } },
|
||||
{ "default", { {"VW",8}, {"WGS",256}, {"WPT",4} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -39,13 +45,14 @@ const Database::DatabaseEntry XaxpySingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
|
||||
{ "Ellesmere", { {"VW",1}, {"WGS",64}, {"WPT",4} } },
|
||||
{ "Hawaii", { {"VW",2}, {"WGS",64}, {"WPT",2} } },
|
||||
{ "Oland", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
|
||||
{ "Pitcairn", { {"VW",2}, {"WGS",128}, {"WPT",1} } },
|
||||
{ "Tahiti", { {"VW",2}, {"WGS",64}, {"WPT",1} } },
|
||||
{ "Tonga", { {"VW",1}, {"WGS",256}, {"WPT",8} } },
|
||||
{ "Turks", { {"VW",2}, {"WGS",256}, {"WPT",1} } },
|
||||
{ "default", { {"VW",2}, {"WGS",64}, {"WPT",2} } },
|
||||
{ "default", { {"VW",2}, {"WGS",256}, {"WPT",1} } },
|
||||
}
|
||||
},
|
||||
{ // ARM GPUs
|
||||
|
@ -96,6 +103,7 @@ const Database::DatabaseEntry XaxpySingle = {
|
|||
{ "GeForce GTX TITAN", { {"VW",4}, {"WGS",256}, {"WPT",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"VW",4}, {"WGS",128}, {"WPT",4} } },
|
||||
{ "GeForce GTX TITAN X", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
|
||||
{ "TITAN X (Pascal)", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
|
||||
{ "Tesla K20m", { {"VW",4}, {"WGS",128}, {"WPT",1} } },
|
||||
{ "Tesla K40m", { {"VW",4}, {"WGS",128}, {"WPT",1} } },
|
||||
{ "default", { {"VW",4}, {"WGS",256}, {"WPT",1} } },
|
||||
|
@ -116,6 +124,7 @@ const Database::DatabaseEntry XaxpyComplexSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"VW",2}, {"WGS",64}, {"WPT",8} } },
|
||||
{ "Ellesmere", { {"VW",2}, {"WGS",256}, {"WPT",1} } },
|
||||
{ "Hawaii", { {"VW",1}, {"WGS",128}, {"WPT",2} } },
|
||||
{ "Oland", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
|
||||
{ "Pitcairn", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
|
||||
|
@ -173,6 +182,7 @@ const Database::DatabaseEntry XaxpyComplexSingle = {
|
|||
{ "GeForce GTX TITAN", { {"VW",1}, {"WGS",256}, {"WPT",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"VW",1}, {"WGS",128}, {"WPT",2} } },
|
||||
{ "GeForce GTX TITAN X", { {"VW",1}, {"WGS",512}, {"WPT",1} } },
|
||||
{ "TITAN X (Pascal)", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
|
||||
{ "Tesla K20m", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
|
||||
{ "Tesla K40m", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
|
||||
{ "default", { {"VW",1}, {"WGS",256}, {"WPT",1} } },
|
||||
|
@ -193,6 +203,7 @@ const Database::DatabaseEntry XaxpyDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"VW",1}, {"WGS",256}, {"WPT",1} } },
|
||||
{ "Ellesmere", { {"VW",2}, {"WGS",64}, {"WPT",4} } },
|
||||
{ "Hawaii", { {"VW",1}, {"WGS",64}, {"WPT",2} } },
|
||||
{ "Oland", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
|
||||
{ "Pitcairn", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
|
||||
|
@ -237,6 +248,7 @@ const Database::DatabaseEntry XaxpyDouble = {
|
|||
{ "GeForce GTX TITAN", { {"VW",2}, {"WGS",1024}, {"WPT",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"VW",2}, {"WGS",128}, {"WPT",1} } },
|
||||
{ "GeForce GTX TITAN X", { {"VW",1}, {"WGS",512}, {"WPT",1} } },
|
||||
{ "TITAN X (Pascal)", { {"VW",2}, {"WGS",256}, {"WPT",4} } },
|
||||
{ "Tesla K20m", { {"VW",2}, {"WGS",128}, {"WPT",1} } },
|
||||
{ "Tesla K40m", { {"VW",2}, {"WGS",128}, {"WPT",1} } },
|
||||
{ "default", { {"VW",2}, {"WGS",1024}, {"WPT",1} } },
|
||||
|
@ -244,7 +256,7 @@ const Database::DatabaseEntry XaxpyDouble = {
|
|||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"VW",1}, {"WGS",128}, {"WPT",2} } },
|
||||
{ "default", { {"VW",2}, {"WGS",256}, {"WPT",1} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -257,6 +269,7 @@ const Database::DatabaseEntry XaxpyComplexDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
|
||||
{ "Ellesmere", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
|
||||
{ "Hawaii", { {"VW",2}, {"WGS",64}, {"WPT",1} } },
|
||||
{ "Oland", { {"VW",1}, {"WGS",256}, {"WPT",1} } },
|
||||
{ "Pitcairn", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
|
||||
|
@ -301,6 +314,7 @@ const Database::DatabaseEntry XaxpyComplexDouble = {
|
|||
{ "GeForce GTX TITAN", { {"VW",1}, {"WGS",64}, {"WPT",4} } },
|
||||
{ "GeForce GTX TITAN Black", { {"VW",1}, {"WGS",128}, {"WPT",4} } },
|
||||
{ "GeForce GTX TITAN X", { {"VW",1}, {"WGS",1024}, {"WPT",1} } },
|
||||
{ "TITAN X (Pascal)", { {"VW",1}, {"WGS",128}, {"WPT",1} } },
|
||||
{ "Tesla K20m", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
|
||||
{ "Tesla K40m", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
|
||||
{ "default", { {"VW",1}, {"WGS",64}, {"WPT",1} } },
|
||||
|
|
|
@ -17,6 +17,12 @@ namespace database {
|
|||
|
||||
const Database::DatabaseEntry XdotHalf = {
|
||||
"Xdot", Precision::kHalf, {
|
||||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "Ellesmere", { {"WGS1",256}, {"WGS2",64} } },
|
||||
{ "default", { {"WGS1",256}, {"WGS2",64} } },
|
||||
}
|
||||
},
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",256}, {"WGS2",32} } },
|
||||
|
@ -39,12 +45,13 @@ const Database::DatabaseEntry XdotSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",128}, {"WGS2",32} } },
|
||||
{ "Ellesmere", { {"WGS1",128}, {"WGS2",32} } },
|
||||
{ "Oland", { {"WGS1",256}, {"WGS2",32} } },
|
||||
{ "Pitcairn", { {"WGS1",128}, {"WGS2",32} } },
|
||||
{ "Tahiti", { {"WGS1",128}, {"WGS2",32} } },
|
||||
{ "Tonga", { {"WGS1",64}, {"WGS2",32} } },
|
||||
{ "Turks", { {"WGS1",128}, {"WGS2",64} } },
|
||||
{ "default", { {"WGS1",128}, {"WGS2",64} } },
|
||||
{ "default", { {"WGS1",128}, {"WGS2",32} } },
|
||||
}
|
||||
},
|
||||
{ // Intel CPUs
|
||||
|
@ -79,8 +86,9 @@ const Database::DatabaseEntry XdotSingle = {
|
|||
{ "GeForce GTX 980", { {"WGS1",256}, {"WGS2",32} } },
|
||||
{ "GeForce GTX TITAN Black", { {"WGS1",512}, {"WGS2",64} } },
|
||||
{ "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } },
|
||||
{ "TITAN X (Pascal)", { {"WGS1",256}, {"WGS2",512} } },
|
||||
{ "Tesla K20m", { {"WGS1",1024}, {"WGS2",32} } },
|
||||
{ "default", { {"WGS1",256}, {"WGS2",64} } },
|
||||
{ "default", { {"WGS1",256}, {"WGS2",512} } },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
|
@ -98,6 +106,7 @@ const Database::DatabaseEntry XdotComplexSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } },
|
||||
{ "Ellesmere", { {"WGS1",256}, {"WGS2",32} } },
|
||||
{ "Oland", { {"WGS1",128}, {"WGS2",32} } },
|
||||
{ "Pitcairn", { {"WGS1",256}, {"WGS2",32} } },
|
||||
{ "Tahiti", { {"WGS1",64}, {"WGS2",32} } },
|
||||
|
@ -138,6 +147,7 @@ const Database::DatabaseEntry XdotComplexSingle = {
|
|||
{ "GeForce GTX 980", { {"WGS1",256}, {"WGS2",64} } },
|
||||
{ "GeForce GTX TITAN Black", { {"WGS1",128}, {"WGS2",64} } },
|
||||
{ "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } },
|
||||
{ "TITAN X (Pascal)", { {"WGS1",256}, {"WGS2",32} } },
|
||||
{ "Tesla K20m", { {"WGS1",512}, {"WGS2",32} } },
|
||||
{ "default", { {"WGS1",512}, {"WGS2",64} } },
|
||||
}
|
||||
|
@ -157,6 +167,7 @@ const Database::DatabaseEntry XdotDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",128} } },
|
||||
{ "Ellesmere", { {"WGS1",128}, {"WGS2",64} } },
|
||||
{ "Oland", { {"WGS1",256}, {"WGS2",32} } },
|
||||
{ "Pitcairn", { {"WGS1",128}, {"WGS2",32} } },
|
||||
{ "Tahiti", { {"WGS1",256}, {"WGS2",32} } },
|
||||
|
@ -185,6 +196,7 @@ const Database::DatabaseEntry XdotDouble = {
|
|||
{ "GeForce GTX 980", { {"WGS1",128}, {"WGS2",32} } },
|
||||
{ "GeForce GTX TITAN Black", { {"WGS1",128}, {"WGS2",64} } },
|
||||
{ "GeForce GTX TITAN X", { {"WGS1",256}, {"WGS2",32} } },
|
||||
{ "TITAN X (Pascal)", { {"WGS1",256}, {"WGS2",32} } },
|
||||
{ "Tesla K20m", { {"WGS1",512}, {"WGS2",32} } },
|
||||
{ "default", { {"WGS1",128}, {"WGS2",128} } },
|
||||
}
|
||||
|
@ -204,6 +216,7 @@ const Database::DatabaseEntry XdotComplexDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",32} } },
|
||||
{ "Ellesmere", { {"WGS1",256}, {"WGS2",32} } },
|
||||
{ "Oland", { {"WGS1",256}, {"WGS2",32} } },
|
||||
{ "Pitcairn", { {"WGS1",256}, {"WGS2",32} } },
|
||||
{ "Tahiti", { {"WGS1",256}, {"WGS2",32} } },
|
||||
|
@ -232,6 +245,7 @@ const Database::DatabaseEntry XdotComplexDouble = {
|
|||
{ "GeForce GTX 980", { {"WGS1",64}, {"WGS2",32} } },
|
||||
{ "GeForce GTX TITAN Black", { {"WGS1",128}, {"WGS2",32} } },
|
||||
{ "GeForce GTX TITAN X", { {"WGS1",128}, {"WGS2",32} } },
|
||||
{ "TITAN X (Pascal)", { {"WGS1",128}, {"WGS2",512} } },
|
||||
{ "Tesla K20m", { {"WGS1",128}, {"WGS2",32} } },
|
||||
{ "default", { {"WGS1",128}, {"WGS2",64} } },
|
||||
}
|
||||
|
|
|
@ -17,6 +17,12 @@ namespace database {
|
|||
|
||||
const Database::DatabaseEntry XgemmHalf = {
|
||||
"Xgemm", Precision::kHalf, {
|
||||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "Ellesmere", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",4} } },
|
||||
{ "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",4} } },
|
||||
}
|
||||
},
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",4} } },
|
||||
|
@ -38,6 +44,7 @@ const Database::DatabaseEntry XgemmSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",8} } },
|
||||
{ "Ellesmere", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
|
||||
{ "Hawaii", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",2} } },
|
||||
{ "Oland", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } },
|
||||
{ "Pitcairn", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
|
@ -95,9 +102,10 @@ const Database::DatabaseEntry XgemmSingle = {
|
|||
{ "GeForce GTX TITAN", { {"KWG",16}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",2} } },
|
||||
{ "GeForce GTX TITAN Black", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",4}, {"VWN",1} } },
|
||||
{ "GeForce GTX TITAN X", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",8} } },
|
||||
{ "TITAN X (Pascal)", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",4}, {"VWN",2} } },
|
||||
{ "Tesla K20m", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } },
|
||||
{ "Tesla K40m", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",2}, {"VWN",4} } },
|
||||
{ "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",2} } },
|
||||
{ "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",4} } },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
|
@ -115,6 +123,7 @@ const Database::DatabaseEntry XgemmComplexSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",8} } },
|
||||
{ "Ellesmere", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
|
||||
{ "Hawaii", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "Oland", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",1}, {"VWM",2}, {"VWN",4} } },
|
||||
{ "Pitcairn", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",2} } },
|
||||
|
@ -172,14 +181,15 @@ const Database::DatabaseEntry XgemmComplexSingle = {
|
|||
{ "GeForce GTX TITAN", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
|
||||
{ "GeForce GTX TITAN X", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",4} } },
|
||||
{ "TITAN X (Pascal)", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "Tesla K20m", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
|
||||
{ "Tesla K40m", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",32}, {"NDIMC",32}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",1} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -192,6 +202,7 @@ const Database::DatabaseEntry XgemmDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",4} } },
|
||||
{ "Ellesmere", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",2} } },
|
||||
{ "Hawaii", { {"KWG",16}, {"KWI",8}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",128}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
|
||||
{ "Oland", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "Pitcairn", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
|
||||
|
@ -236,6 +247,7 @@ const Database::DatabaseEntry XgemmDouble = {
|
|||
{ "GeForce GTX TITAN", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } },
|
||||
{ "GeForce GTX TITAN Black", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",16}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "GeForce GTX TITAN X", { {"KWG",16}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "TITAN X (Pascal)", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",32}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",4}, {"VWN",1} } },
|
||||
{ "Tesla K20m", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "Tesla K40m", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",64}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",1}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",4} } },
|
||||
{ "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",4} } },
|
||||
|
@ -256,12 +268,13 @@ const Database::DatabaseEntry XgemmComplexDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } },
|
||||
{ "Ellesmere", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",16}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "Hawaii", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",1}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
|
||||
{ "Oland", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",32}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",4} } },
|
||||
{ "Pitcairn", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",32}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "Tahiti", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "Tonga", { {"KWG",16}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",16}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",2} } },
|
||||
{ "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",16}, {"SA",1}, {"SB",1}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
}
|
||||
},
|
||||
{ // ARM GPUs
|
||||
|
@ -299,14 +312,15 @@ const Database::DatabaseEntry XgemmComplexDouble = {
|
|||
{ "GeForce GTX 980", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",1}, {"VWM",2}, {"VWN",2} } },
|
||||
{ "GeForce GTX TITAN Black", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "GeForce GTX TITAN X", { {"KWG",32}, {"KWI",8}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "TITAN X (Pascal)", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",16}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "Tesla K20m", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "Tesla K40m", { {"KWG",16}, {"KWI",8}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",32}, {"NDIMC",16}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",32}, {"MDIMC",32}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
|
||||
{ "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",32}, {"NDIMC",32}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",1} } },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",2} } },
|
||||
{ "default", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",16}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",2} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
|
|
@ -17,6 +17,12 @@ namespace database {
|
|||
|
||||
const Database::DatabaseEntry XgemmDirectHalf = {
|
||||
"XgemmDirect", Precision::kHalf, {
|
||||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "Ellesmere", { {"KWID",8}, {"MDIMAD",32}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",32}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",32} } },
|
||||
{ "default", { {"KWID",8}, {"MDIMAD",32}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",32}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",32} } },
|
||||
}
|
||||
},
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",8} } },
|
||||
|
@ -25,7 +31,7 @@ const Database::DatabaseEntry XgemmDirectHalf = {
|
|||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",8} } },
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -38,9 +44,10 @@ const Database::DatabaseEntry XgemmDirectSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",2}, {"WGD",32} } },
|
||||
{ "Ellesmere", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",32}, {"NDIMCD",32}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",1}, {"WGD",32} } },
|
||||
{ "Tonga", { {"KWID",16}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",32}, {"NDIMCD",8}, {"PADA",0}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",32} } },
|
||||
{ "Turks", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",2}, {"WGD",32} } },
|
||||
}
|
||||
},
|
||||
{ // Intel CPUs
|
||||
|
@ -62,12 +69,13 @@ const Database::DatabaseEntry XgemmDirectSingle = {
|
|||
{ "GeForce GTX 1080", { {"KWID",16}, {"MDIMAD",16}, {"MDIMCD",8}, {"NDIMBD",16}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",32} } },
|
||||
{ "GeForce GTX 750 Ti", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",4}, {"VWND",2}, {"WGD",32} } },
|
||||
{ "GeForce GTX TITAN Black", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",4}, {"VWND",2}, {"WGD",32} } },
|
||||
{ "TITAN X (Pascal)", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",32} } },
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",4}, {"VWND",2}, {"WGD",32} } },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",4}, {"VWND",2}, {"WGD",32} } },
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",4}, {"VWND",4}, {"WGD",32} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -104,12 +112,13 @@ const Database::DatabaseEntry XgemmDirectComplexSingle = {
|
|||
{ "GeForce GTX 1080", { {"KWID",8}, {"MDIMAD",8}, {"MDIMCD",16}, {"NDIMBD",16}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",2}, {"WGD",32} } },
|
||||
{ "GeForce GTX 750 Ti", { {"KWID",16}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",16}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",1}, {"WGD",16} } },
|
||||
{ "GeForce GTX TITAN Black", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
{ "TITAN X (Pascal)", { {"KWID",16}, {"MDIMAD",16}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",2}, {"WGD",16} } },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",2}, {"WGD",16} } },
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",2}, {"WGD",16} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -122,6 +131,7 @@ const Database::DatabaseEntry XgemmDirectDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
{ "Ellesmere", { {"KWID",8}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",8}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",1}, {"WGD",32} } },
|
||||
{ "Tonga", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",32} } },
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
}
|
||||
|
@ -138,12 +148,13 @@ const Database::DatabaseEntry XgemmDirectDouble = {
|
|||
{ "GeForce GTX 1080", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",2}, {"WGD",16} } },
|
||||
{ "GeForce GTX 750 Ti", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",4}, {"WGD",32} } },
|
||||
{ "GeForce GTX TITAN Black", { {"KWID",8}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",16}, {"NDIMCD",8}, {"PADA",1}, {"PADB",0}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
{ "TITAN X (Pascal)", { {"KWID",8}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",1}, {"WGD",16} } },
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",2}, {"WGD",16} } },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",2}, {"WGD",32} } },
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",2}, {"VWND",2}, {"WGD",16} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -156,6 +167,7 @@ const Database::DatabaseEntry XgemmDirectComplexDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
{ "Ellesmere", { {"KWID",16}, {"MDIMAD",32}, {"MDIMCD",32}, {"NDIMBD",16}, {"NDIMCD",8}, {"PADA",0}, {"PADB",0}, {"VWMD",1}, {"VWND",1}, {"WGD",32} } },
|
||||
{ "Tonga", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",16}, {"NDIMCD",16}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
}
|
||||
|
@ -172,12 +184,13 @@ const Database::DatabaseEntry XgemmDirectComplexDouble = {
|
|||
{ "GeForce GTX 1080", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
{ "GeForce GTX 750 Ti", { {"KWID",2}, {"MDIMAD",32}, {"MDIMCD",32}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",32} } },
|
||||
{ "GeForce GTX TITAN Black", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",8} } },
|
||||
{ "TITAN X (Pascal)", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",16}, {"NDIMCD",8}, {"PADA",1}, {"PADB",0}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",16}, {"MDIMCD",16}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",1}, {"WGD",16} } },
|
||||
{ "default", { {"KWID",2}, {"MDIMAD",8}, {"MDIMCD",8}, {"NDIMBD",8}, {"NDIMCD",8}, {"PADA",1}, {"PADB",1}, {"VWMD",1}, {"VWND",2}, {"WGD",16} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
|
|
@ -17,6 +17,12 @@ namespace database {
|
|||
|
||||
const Database::DatabaseEntry XgemvHalf = {
|
||||
"Xgemv", Precision::kHalf, {
|
||||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "Ellesmere", { {"WGS1",256}, {"WPT1",1} } },
|
||||
{ "default", { {"WGS1",256}, {"WPT1",1} } },
|
||||
}
|
||||
},
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",64}, {"WPT1",1} } },
|
||||
|
@ -39,6 +45,7 @@ const Database::DatabaseEntry XgemvSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",128}, {"WPT1",1} } },
|
||||
{ "Ellesmere", { {"WGS1",256}, {"WPT1",1} } },
|
||||
{ "Hawaii", { {"WGS1",128}, {"WPT1",1} } },
|
||||
{ "Oland", { {"WGS1",128}, {"WPT1",1} } },
|
||||
{ "Pitcairn", { {"WGS1",256}, {"WPT1",1} } },
|
||||
|
@ -89,9 +96,10 @@ const Database::DatabaseEntry XgemvSingle = {
|
|||
{ "GeForce GTX TITAN", { {"WGS1",256}, {"WPT1",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"WGS1",256}, {"WPT1",1} } },
|
||||
{ "GeForce GTX TITAN X", { {"WGS1",256}, {"WPT1",1} } },
|
||||
{ "TITAN X (Pascal)", { {"WGS1",32}, {"WPT1",1} } },
|
||||
{ "Tesla K20m", { {"WGS1",128}, {"WPT1",1} } },
|
||||
{ "Tesla K40m", { {"WGS1",256}, {"WPT1",1} } },
|
||||
{ "default", { {"WGS1",256}, {"WPT1",1} } },
|
||||
{ "default", { {"WGS1",128}, {"WPT1",1} } },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
|
@ -109,6 +117,7 @@ const Database::DatabaseEntry XgemvComplexSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WPT1",1} } },
|
||||
{ "Ellesmere", { {"WGS1",32}, {"WPT1",1} } },
|
||||
{ "Hawaii", { {"WGS1",64}, {"WPT1",1} } },
|
||||
{ "Oland", { {"WGS1",64}, {"WPT1",1} } },
|
||||
{ "Pitcairn", { {"WGS1",64}, {"WPT1",1} } },
|
||||
|
@ -157,6 +166,7 @@ const Database::DatabaseEntry XgemvComplexSingle = {
|
|||
{ "GeForce GTX 750 Ti", { {"WGS1",32}, {"WPT1",1} } },
|
||||
{ "GeForce GTX TITAN", { {"WGS1",256}, {"WPT1",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"WGS1",32}, {"WPT1",1} } },
|
||||
{ "TITAN X (Pascal)", { {"WGS1",32}, {"WPT1",1} } },
|
||||
{ "default", { {"WGS1",64}, {"WPT1",1} } },
|
||||
}
|
||||
},
|
||||
|
@ -175,6 +185,7 @@ const Database::DatabaseEntry XgemvDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WPT1",1} } },
|
||||
{ "Ellesmere", { {"WGS1",32}, {"WPT1",1} } },
|
||||
{ "Hawaii", { {"WGS1",128}, {"WPT1",1} } },
|
||||
{ "Oland", { {"WGS1",256}, {"WPT1",1} } },
|
||||
{ "Pitcairn", { {"WGS1",256}, {"WPT1",1} } },
|
||||
|
@ -212,6 +223,7 @@ const Database::DatabaseEntry XgemvDouble = {
|
|||
{ "GeForce GTX TITAN", { {"WGS1",256}, {"WPT1",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"WGS1",32}, {"WPT1",1} } },
|
||||
{ "GeForce GTX TITAN X", { {"WGS1",64}, {"WPT1",1} } },
|
||||
{ "TITAN X (Pascal)", { {"WGS1",32}, {"WPT1",1} } },
|
||||
{ "Tesla K20m", { {"WGS1",256}, {"WPT1",1} } },
|
||||
{ "Tesla K40m", { {"WGS1",256}, {"WPT1",1} } },
|
||||
{ "default", { {"WGS1",128}, {"WPT1",1} } },
|
||||
|
@ -232,6 +244,7 @@ const Database::DatabaseEntry XgemvComplexDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WPT1",1} } },
|
||||
{ "Ellesmere", { {"WGS1",32}, {"WPT1",1} } },
|
||||
{ "Hawaii", { {"WGS1",64}, {"WPT1",1} } },
|
||||
{ "Oland", { {"WGS1",256}, {"WPT1",1} } },
|
||||
{ "Pitcairn", { {"WGS1",256}, {"WPT1",1} } },
|
||||
|
|
|
@ -17,6 +17,12 @@ namespace database {
|
|||
|
||||
const Database::DatabaseEntry XgemvFastHalf = {
|
||||
"XgemvFast", Precision::kHalf, {
|
||||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "Ellesmere", { {"VW2",1}, {"WGS2",32}, {"WPT2",1} } },
|
||||
{ "default", { {"VW2",1}, {"WGS2",32}, {"WPT2",1} } },
|
||||
}
|
||||
},
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } },
|
||||
|
@ -39,6 +45,7 @@ const Database::DatabaseEntry XgemvFastSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } },
|
||||
{ "Ellesmere", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
|
||||
{ "Hawaii", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
|
||||
{ "Oland", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
|
||||
{ "Pitcairn", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
|
||||
|
@ -89,6 +96,7 @@ const Database::DatabaseEntry XgemvFastSingle = {
|
|||
{ "GeForce GTX TITAN", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
|
||||
{ "GeForce GTX TITAN X", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
|
||||
{ "TITAN X (Pascal)", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } },
|
||||
{ "Tesla K20m", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
|
||||
{ "Tesla K40m", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
|
||||
{ "default", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
|
||||
|
@ -109,6 +117,7 @@ const Database::DatabaseEntry XgemvFastComplexSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"VW2",2}, {"WGS2",256}, {"WPT2",2} } },
|
||||
{ "Ellesmere", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
|
||||
{ "Hawaii", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
|
||||
{ "Oland", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
|
||||
{ "Pitcairn", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
|
||||
|
@ -170,6 +179,7 @@ const Database::DatabaseEntry XgemvFastDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
|
||||
{ "Ellesmere", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } },
|
||||
{ "Hawaii", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
|
||||
{ "Oland", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
|
||||
{ "Pitcairn", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
|
||||
|
@ -207,6 +217,7 @@ const Database::DatabaseEntry XgemvFastDouble = {
|
|||
{ "GeForce GTX TITAN", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
|
||||
{ "GeForce GTX TITAN Black", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
|
||||
{ "GeForce GTX TITAN X", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } },
|
||||
{ "TITAN X (Pascal)", { {"VW2",1}, {"WGS2",32}, {"WPT2",1} } },
|
||||
{ "Tesla K20m", { {"VW2",1}, {"WGS2",128}, {"WPT2",1} } },
|
||||
{ "Tesla K40m", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
|
||||
{ "default", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
|
||||
|
@ -227,6 +238,7 @@ const Database::DatabaseEntry XgemvFastComplexDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
|
||||
{ "Ellesmere", { {"VW2",1}, {"WGS2",16}, {"WPT2",1} } },
|
||||
{ "Hawaii", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
|
||||
{ "Oland", { {"VW2",1}, {"WGS2",256}, {"WPT2",1} } },
|
||||
{ "Pitcairn", { {"VW2",1}, {"WGS2",64}, {"WPT2",1} } },
|
||||
|
|
|
@ -17,6 +17,12 @@ namespace database {
|
|||
|
||||
const Database::DatabaseEntry XgemvFastRotHalf = {
|
||||
"XgemvFastRot", Precision::kHalf, {
|
||||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "Ellesmere", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } },
|
||||
{ "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } },
|
||||
}
|
||||
},
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "Intel(R) HD Graphics Skylake ULT GT2", { {"VW3",8}, {"WGS3",128}, {"WPT3",32} } },
|
||||
|
@ -38,6 +44,7 @@ const Database::DatabaseEntry XgemvFastRotSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"VW3",8}, {"WGS3",64}, {"WPT3",32} } },
|
||||
{ "Ellesmere", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } },
|
||||
{ "Tonga", { {"VW3",8}, {"WGS3",128}, {"WPT3",32} } },
|
||||
{ "Turks", { {"VW3",8}, {"WGS3",128}, {"WPT3",16} } },
|
||||
{ "default", { {"VW3",8}, {"WGS3",128}, {"WPT3",32} } },
|
||||
|
@ -67,6 +74,7 @@ const Database::DatabaseEntry XgemvFastRotSingle = {
|
|||
{ "GeForce GTX 750 Ti", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } },
|
||||
{ "GeForce GTX TITAN", { {"VW3",1}, {"WGS3",16}, {"WPT3",16} } },
|
||||
{ "GeForce GTX TITAN Black", { {"VW3",4}, {"WGS3",128}, {"WPT3",16} } },
|
||||
{ "TITAN X (Pascal)", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } },
|
||||
{ "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } },
|
||||
}
|
||||
},
|
||||
|
@ -85,6 +93,7 @@ const Database::DatabaseEntry XgemvFastRotComplexSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"VW3",8}, {"WGS3",16}, {"WPT3",16} } },
|
||||
{ "Ellesmere", { {"VW3",2}, {"WGS3",32}, {"WPT3",16} } },
|
||||
{ "Tonga", { {"VW3",4}, {"WGS3",32}, {"WPT3",32} } },
|
||||
{ "Turks", { {"VW3",4}, {"WGS3",32}, {"WPT3",8} } },
|
||||
{ "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",32} } },
|
||||
|
@ -123,6 +132,7 @@ const Database::DatabaseEntry XgemvFastRotDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } },
|
||||
{ "Ellesmere", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } },
|
||||
{ "Tonga", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } },
|
||||
{ "default", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } },
|
||||
}
|
||||
|
@ -141,6 +151,7 @@ const Database::DatabaseEntry XgemvFastRotDouble = {
|
|||
{ "GeForce GTX 750 Ti", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } },
|
||||
{ "GeForce GTX TITAN", { {"VW3",1}, {"WGS3",16}, {"WPT3",16} } },
|
||||
{ "GeForce GTX TITAN Black", { {"VW3",1}, {"WGS3",16}, {"WPT3",16} } },
|
||||
{ "TITAN X (Pascal)", { {"VW3",8}, {"WGS3",32}, {"WPT3",16} } },
|
||||
{ "default", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } },
|
||||
}
|
||||
},
|
||||
|
@ -159,6 +170,7 @@ const Database::DatabaseEntry XgemvFastRotComplexDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"VW3",4}, {"WGS3",32}, {"WPT3",16} } },
|
||||
{ "Ellesmere", { {"VW3",4}, {"WGS3",16}, {"WPT3",16} } },
|
||||
{ "Tonga", { {"VW3",4}, {"WGS3",16}, {"WPT3",8} } },
|
||||
{ "default", { {"VW3",8}, {"WGS3",32}, {"WPT3",16} } },
|
||||
}
|
||||
|
|
|
@ -17,6 +17,12 @@ namespace database {
|
|||
|
||||
const Database::DatabaseEntry XgerHalf = {
|
||||
"Xger", Precision::kHalf, {
|
||||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "Ellesmere", { {"WGS1",64}, {"WGS2",1}, {"WPT",2} } },
|
||||
{ "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",2} } },
|
||||
}
|
||||
},
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "Intel(R) HD Graphics 5500 BroadWell U-Processor GT2", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } },
|
||||
|
@ -26,7 +32,7 @@ const Database::DatabaseEntry XgerHalf = {
|
|||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"WGS1",4}, {"WGS2",8}, {"WPT",2} } },
|
||||
{ "default", { {"WGS1",64}, {"WGS2",1}, {"WPT",2} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -39,13 +45,14 @@ const Database::DatabaseEntry XgerSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } },
|
||||
{ "Ellesmere", { {"WGS1",64}, {"WGS2",4}, {"WPT",2} } },
|
||||
{ "Hawaii", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } },
|
||||
{ "Oland", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
|
||||
{ "Pitcairn", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
|
||||
{ "Tahiti", { {"WGS1",256}, {"WGS2",1}, {"WPT",1} } },
|
||||
{ "Tonga", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } },
|
||||
{ "Turks", { {"WGS1",64}, {"WGS2",4}, {"WPT",2} } },
|
||||
{ "default", { {"WGS1",32}, {"WGS2",8}, {"WPT",1} } },
|
||||
{ "default", { {"WGS1",32}, {"WGS2",8}, {"WPT",2} } },
|
||||
}
|
||||
},
|
||||
{ // ARM GPUs
|
||||
|
@ -86,12 +93,13 @@ const Database::DatabaseEntry XgerSingle = {
|
|||
{ "GeForce GTX 750 Ti", { {"WGS1",64}, {"WGS2",1}, {"WPT",2} } },
|
||||
{ "GeForce GTX TITAN", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
|
||||
{ "GeForce GTX TITAN Black", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
|
||||
{ "default", { {"WGS1",128}, {"WGS2",1}, {"WPT",2} } },
|
||||
{ "TITAN X (Pascal)", { {"WGS1",128}, {"WGS2",4}, {"WPT",1} } },
|
||||
{ "default", { {"WGS1",256}, {"WGS2",1}, {"WPT",2} } },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
|
||||
{ "default", { {"WGS1",32}, {"WGS2",8}, {"WPT",2} } },
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -104,13 +112,14 @@ const Database::DatabaseEntry XgerComplexSingle = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } },
|
||||
{ "Ellesmere", { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } },
|
||||
{ "Hawaii", { {"WGS1",64}, {"WGS2",1}, {"WPT",2} } },
|
||||
{ "Oland", { {"WGS1",4}, {"WGS2",8}, {"WPT",1} } },
|
||||
{ "Pitcairn", { {"WGS1",128}, {"WGS2",2}, {"WPT",1} } },
|
||||
{ "Tahiti", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } },
|
||||
{ "Tonga", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
|
||||
{ "Turks", { {"WGS1",128}, {"WGS2",2}, {"WPT",1} } },
|
||||
{ "default", { {"WGS1",128}, {"WGS2",2}, {"WPT",1} } },
|
||||
{ "default", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } },
|
||||
}
|
||||
},
|
||||
{ // ARM GPUs
|
||||
|
@ -151,7 +160,8 @@ const Database::DatabaseEntry XgerComplexSingle = {
|
|||
{ "GeForce GTX 750 Ti", { {"WGS1",32}, {"WGS2",8}, {"WPT",2} } },
|
||||
{ "GeForce GTX TITAN", { {"WGS1",16}, {"WGS2",16}, {"WPT",2} } },
|
||||
{ "GeForce GTX TITAN Black", { {"WGS1",16}, {"WGS2",16}, {"WPT",2} } },
|
||||
{ "default", { {"WGS1",128}, {"WGS2",1}, {"WPT",2} } },
|
||||
{ "TITAN X (Pascal)", { {"WGS1",32}, {"WGS2",2}, {"WPT",1} } },
|
||||
{ "default", { {"WGS1",128}, {"WGS2",2}, {"WPT",2} } },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
|
@ -169,12 +179,13 @@ const Database::DatabaseEntry XgerDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",32}, {"WGS2",4}, {"WPT",1} } },
|
||||
{ "Ellesmere", { {"WGS1",64}, {"WGS2",1}, {"WPT",4} } },
|
||||
{ "Hawaii", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
|
||||
{ "Oland", { {"WGS1",128}, {"WGS2",1}, {"WPT",2} } },
|
||||
{ "Pitcairn", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
|
||||
{ "Tahiti", { {"WGS1",64}, {"WGS2",2}, {"WPT",1} } },
|
||||
{ "Tonga", { {"WGS1",8}, {"WGS2",16}, {"WPT",2} } },
|
||||
{ "default", { {"WGS1",32}, {"WGS2",8}, {"WPT",1} } },
|
||||
{ "default", { {"WGS1",128}, {"WGS2",2}, {"WPT",1} } },
|
||||
}
|
||||
},
|
||||
{ // ARM GPUs
|
||||
|
@ -204,6 +215,7 @@ const Database::DatabaseEntry XgerDouble = {
|
|||
{ "GeForce GTX 750 Ti", { {"WGS1",32}, {"WGS2",16}, {"WPT",1} } },
|
||||
{ "GeForce GTX TITAN", { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } },
|
||||
{ "GeForce GTX TITAN Black", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
|
||||
{ "TITAN X (Pascal)", { {"WGS1",32}, {"WGS2",16}, {"WPT",1} } },
|
||||
{ "default", { {"WGS1",64}, {"WGS2",2}, {"WPT",2} } },
|
||||
}
|
||||
},
|
||||
|
@ -222,6 +234,7 @@ const Database::DatabaseEntry XgerComplexDouble = {
|
|||
{ // AMD GPUs
|
||||
kDeviceTypeGPU, "AMD", {
|
||||
{ "AMD Radeon R9 M370X Compute Engine", { {"WGS1",64}, {"WGS2",1}, {"WPT",1} } },
|
||||
{ "Ellesmere", { {"WGS1",8}, {"WGS2",16}, {"WPT",1} } },
|
||||
{ "Hawaii", { {"WGS1",128}, {"WGS2",1}, {"WPT",1} } },
|
||||
{ "Oland", { {"WGS1",16}, {"WGS2",16}, {"WPT",2} } },
|
||||
{ "Pitcairn", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } },
|
||||
|
@ -257,6 +270,7 @@ const Database::DatabaseEntry XgerComplexDouble = {
|
|||
{ "GeForce GTX 750 Ti", { {"WGS1",32}, {"WGS2",8}, {"WPT",2} } },
|
||||
{ "GeForce GTX TITAN", { {"WGS1",32}, {"WGS2",4}, {"WPT",2} } },
|
||||
{ "GeForce GTX TITAN Black", { {"WGS1",16}, {"WGS2",16}, {"WPT",2} } },
|
||||
{ "TITAN X (Pascal)", { {"WGS1",64}, {"WGS2",4}, {"WPT",1} } },
|
||||
{ "default", { {"WGS1",16}, {"WGS2",8}, {"WPT",2} } },
|
||||
}
|
||||
},
|
||||
|
|
|
@ -21,36 +21,66 @@
|
|||
namespace clblast {
|
||||
// =================================================================================================
|
||||
|
||||
// For each kernel this map contains a list of routines it is used in
|
||||
const std::vector<std::string> Routine::routines_axpy = {"AXPY", "COPY", "SCAL", "SWAP"};
|
||||
const std::vector<std::string> Routine::routines_dot = {"AMAX", "ASUM", "DOT", "DOTC", "DOTU", "MAX", "MIN", "NRM2", "SUM"};
|
||||
const std::vector<std::string> Routine::routines_ger = {"GER", "GERC", "GERU", "HER", "HER2", "HPR", "HPR2", "SPR", "SPR2", "SYR", "SYR2"};
|
||||
const std::vector<std::string> Routine::routines_gemv = {"GBMV", "GEMV", "HBMV", "HEMV", "HPMV", "SBMV", "SPMV", "SYMV", "TMBV", "TPMV", "TRMV", "TRSV"};
|
||||
const std::vector<std::string> Routine::routines_gemm = {"GEMM", "HEMM", "SYMM", "TRMM"};
|
||||
const std::vector<std::string> Routine::routines_gemm_syrk = {"GEMM", "HEMM", "HER2K", "HERK", "SYMM", "SYR2K", "SYRK", "TRMM", "TRSM"};
|
||||
const std::vector<std::string> Routine::routines_trsm = {"TRSM"};
|
||||
const std::unordered_map<std::string, const std::vector<std::string>> Routine::routines_by_kernel = {
|
||||
{"Xaxpy", routines_axpy},
|
||||
{"Xdot", routines_dot},
|
||||
{"Xgemv", routines_gemv},
|
||||
{"XgemvFast", routines_gemv},
|
||||
{"XgemvFastRot", routines_gemv},
|
||||
{"Xtrsv", routines_gemv},
|
||||
{"Xger", routines_ger},
|
||||
{"Copy", routines_gemm_syrk},
|
||||
{"Pad", routines_gemm_syrk},
|
||||
{"Transpose", routines_gemm_syrk},
|
||||
{"Padtranspose", routines_gemm_syrk},
|
||||
{"Xgemm", routines_gemm_syrk},
|
||||
{"XgemmDirect", routines_gemm},
|
||||
{"KernelSelection", routines_gemm},
|
||||
{"Invert", routines_trsm},
|
||||
};
|
||||
// =================================================================================================
|
||||
|
||||
// The constructor does all heavy work, errors are returned as exceptions
|
||||
Routine::Routine(Queue &queue, EventPointer event, const std::string &name,
|
||||
const std::vector<std::string> &routines, const Precision precision,
|
||||
const std::vector<std::string> &kernel_names, const Precision precision,
|
||||
const std::vector<const Database::DatabaseEntry*> &userDatabase,
|
||||
std::initializer_list<const char *> source):
|
||||
precision_(precision),
|
||||
routine_name_(name),
|
||||
kernel_names_(kernel_names),
|
||||
queue_(queue),
|
||||
event_(event),
|
||||
context_(queue_.GetContext()),
|
||||
device_(queue_.GetDevice()),
|
||||
device_name_(device_.Name()) {
|
||||
device_name_(device_.Name()),
|
||||
db_(kernel_names) {
|
||||
|
||||
InitDatabase(routines, userDatabase);
|
||||
InitDatabase(userDatabase);
|
||||
InitProgram(source);
|
||||
}
|
||||
|
||||
void Routine::InitDatabase(const std::vector<std::string> &routines,
|
||||
const std::vector<const Database::DatabaseEntry*> &userDatabase) {
|
||||
void Routine::InitDatabase(const std::vector<const Database::DatabaseEntry*> &userDatabase) {
|
||||
for (const auto &kernel_name : kernel_names_) {
|
||||
|
||||
// Queries the cache to see whether or not the kernel parameter database is already there
|
||||
bool has_db;
|
||||
db_ = DatabaseCache::Instance().Get(DatabaseKeyRef{ precision_, device_name_, routines },
|
||||
&has_db);
|
||||
if (has_db) { return; }
|
||||
// Queries the cache to see whether or not the kernel parameter database is already there
|
||||
bool has_db;
|
||||
db_(kernel_name) = DatabaseCache::Instance().Get(DatabaseKeyRef{ precision_, device_name_, kernel_name },
|
||||
&has_db);
|
||||
if (has_db) { continue; }
|
||||
|
||||
// Builds the parameter database for this device and routine set and stores it in the cache
|
||||
db_ = Database(device_, routines, precision_, userDatabase);
|
||||
DatabaseCache::Instance().Store(DatabaseKey{ precision_, device_name_, routines },
|
||||
Database{ db_ });
|
||||
// Builds the parameter database for this device and routine set and stores it in the cache
|
||||
db_(kernel_name) = Database(device_, kernel_name, precision_, userDatabase);
|
||||
DatabaseCache::Instance().Store(DatabaseKey{ precision_, device_name_, kernel_name },
|
||||
Database{ db_(kernel_name) });
|
||||
}
|
||||
}
|
||||
|
||||
void Routine::InitProgram(std::initializer_list<const char *> source) {
|
||||
|
@ -96,7 +126,10 @@ void Routine::InitProgram(std::initializer_list<const char *> source) {
|
|||
}
|
||||
|
||||
// Collects the parameters for this device in the form of defines, and adds the precision
|
||||
auto source_string = db_.GetDefines();
|
||||
auto source_string = std::string{""};
|
||||
for (const auto &kernel_name : kernel_names_) {
|
||||
source_string += db_(kernel_name).GetDefines();
|
||||
}
|
||||
source_string += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n";
|
||||
|
||||
// Adds the name of the routine as a define
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "utilities/utilities.hpp"
|
||||
#include "cache.hpp"
|
||||
|
@ -42,22 +43,32 @@ class Routine {
|
|||
const std::vector<const Database::DatabaseEntry*> &userDatabase,
|
||||
std::initializer_list<const char *> source);
|
||||
|
||||
// List of kernel-routine look-ups
|
||||
static const std::vector<std::string> routines_axpy;
|
||||
static const std::vector<std::string> routines_dot;
|
||||
static const std::vector<std::string> routines_ger;
|
||||
static const std::vector<std::string> routines_gemv;
|
||||
static const std::vector<std::string> routines_gemm;
|
||||
static const std::vector<std::string> routines_gemm_syrk;
|
||||
static const std::vector<std::string> routines_trsm;
|
||||
static const std::unordered_map<std::string, const std::vector<std::string>> routines_by_kernel;
|
||||
|
||||
private:
|
||||
|
||||
// Initializes program_, fetching cached program or building one
|
||||
void InitProgram(std::initializer_list<const char *> source);
|
||||
|
||||
// Initializes db_, fetching cached database or building one
|
||||
void InitDatabase(const std::vector<std::string> &routines,
|
||||
const std::vector<const Database::DatabaseEntry*> &userDatabase);
|
||||
void InitDatabase(const std::vector<const Database::DatabaseEntry*> &userDatabase);
|
||||
|
||||
protected:
|
||||
|
||||
// Non-static variable for the precision
|
||||
const Precision precision_;
|
||||
|
||||
// The routine's name
|
||||
// The routine's name and the corresponding kernels
|
||||
const std::string routine_name_;
|
||||
const std::vector<std::string> kernel_names_;
|
||||
|
||||
// The OpenCL objects, accessible only from derived classes
|
||||
Queue queue_;
|
||||
|
@ -72,7 +83,7 @@ class Routine {
|
|||
Program program_;
|
||||
|
||||
// Connection to the database for all the device-specific parameters
|
||||
Database db_;
|
||||
Databases db_;
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
|
|
@ -36,7 +36,7 @@ void RunKernel(Kernel &kernel, Queue &queue, const Device &device,
|
|||
// Sets all elements of a matrix to a constant value
|
||||
template <typename T>
|
||||
void FillMatrix(Queue &queue, const Device &device,
|
||||
const Program &program, const Database &,
|
||||
const Program &program, const Databases &,
|
||||
EventPointer event, const std::vector<Event> &waitForEvents,
|
||||
const size_t m, const size_t n, const size_t ld, const size_t offset,
|
||||
const Buffer<T> &dest,
|
||||
|
@ -56,7 +56,7 @@ void FillMatrix(Queue &queue, const Device &device,
|
|||
// Sets all elements of a vector to a constant value
|
||||
template <typename T>
|
||||
void FillVector(Queue &queue, const Device &device,
|
||||
const Program &program, const Database &,
|
||||
const Program &program, const Databases &,
|
||||
EventPointer event, const std::vector<Event> &waitForEvents,
|
||||
const size_t n, const size_t inc, const size_t offset,
|
||||
const Buffer<T> &dest,
|
||||
|
@ -78,7 +78,7 @@ void FillVector(Queue &queue, const Device &device,
|
|||
// to write to symmetric and triangular matrices through optional arguments.
|
||||
template <typename T>
|
||||
void PadCopyTransposeMatrix(Queue &queue, const Device &device,
|
||||
const Database &db,
|
||||
const Databases &db,
|
||||
EventPointer event, const std::vector<Event> &waitForEvents,
|
||||
const size_t src_one, const size_t src_two,
|
||||
const size_t src_ld, const size_t src_offset,
|
||||
|
|
|
@ -22,7 +22,7 @@ namespace clblast {
|
|||
// Constructor: forwards to base class constructor
|
||||
template <typename T>
|
||||
Xgemv<T>::Xgemv(Queue &queue, EventPointer event, const std::string &name):
|
||||
Routine(queue, event, name, {"Pad", "Xgemv", "XgemvFast", "XgemvFastRot", "Xtrsv"}, PrecisionValue<T>(), {}, {
|
||||
Routine(queue, event, name, {"Xgemv", "XgemvFast", "XgemvFastRot", "Xtrsv"}, PrecisionValue<T>(), {}, {
|
||||
#include "../../kernels/level2/xgemv.opencl"
|
||||
#include "../../kernels/level2/xgemv_fast.opencl"
|
||||
#include "../../kernels/level2/xtrsv.opencl"
|
||||
|
|
|
@ -0,0 +1,139 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
|
||||
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
|
||||
// width of 100 characters per line.
|
||||
//
|
||||
// Author(s):
|
||||
// Cedric Nugteren <www.cedricnugteren.nl>
|
||||
//
|
||||
// This file contains the tests for the OverrideParameters function
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
#include "utilities/utilities.hpp"
|
||||
#include "test/routines/level3/xgemm.hpp"
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
namespace clblast {
|
||||
// =================================================================================================
|
||||
|
||||
template <typename T>
|
||||
size_t RunOverrideTests(int argc, char *argv[], const bool silent, const std::string &routine_name) {
|
||||
auto arguments = RetrieveCommandLineArguments(argc, argv);
|
||||
auto errors = size_t{0};
|
||||
auto passed = size_t{0};
|
||||
auto example_routine = TestXgemm<T>();
|
||||
constexpr auto kSeed = 42; // fixed seed for reproducibility
|
||||
|
||||
// Determines the test settings
|
||||
const auto kernel_name = std::string{"Xgemm"};
|
||||
const auto precision = PrecisionValue<T>();
|
||||
const auto valid_settings = std::vector<std::unordered_map<std::string,size_t>>{
|
||||
{ {"KWG",16}, {"KWI",2}, {"MDIMA",4}, {"MDIMC",4}, {"MWG",16}, {"NDIMB",4}, {"NDIMC",4}, {"NWG",16}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} },
|
||||
{ {"KWG",32}, {"KWI",2}, {"MDIMA",4}, {"MDIMC",4}, {"MWG",32}, {"NDIMB",4}, {"NDIMC",4}, {"NWG",32}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} },
|
||||
};
|
||||
const auto invalid_settings = std::vector<std::unordered_map<std::string,size_t>>{
|
||||
{ {"KWI",2}, {"MDIMA",4}, {"MDIMC",4}, {"MWG",16}, {"NDIMB",4}, {"NDIMC",4}, {"NWG",16}, {"SA",0} },
|
||||
};
|
||||
|
||||
// Retrieves the arguments
|
||||
auto help = std::string{"Options given/available:\n"};
|
||||
const auto platform_id = GetArgument(arguments, help, kArgPlatform, ConvertArgument(std::getenv("CLBLAST_PLATFORM"), size_t{0}));
|
||||
const auto device_id = GetArgument(arguments, help, kArgDevice, ConvertArgument(std::getenv("CLBLAST_DEVICE"), size_t{0}));
|
||||
auto args = Arguments<T>{};
|
||||
args.m = GetArgument(arguments, help, kArgM, size_t{256});
|
||||
args.n = GetArgument(arguments, help, kArgN, size_t{256});
|
||||
args.k = GetArgument(arguments, help, kArgK, size_t{256});
|
||||
args.a_ld = GetArgument(arguments, help, kArgALeadDim, args.k);
|
||||
args.b_ld = GetArgument(arguments, help, kArgBLeadDim, args.n);
|
||||
args.c_ld = GetArgument(arguments, help, kArgCLeadDim, args.n);
|
||||
args.a_offset = GetArgument(arguments, help, kArgAOffset, size_t{0});
|
||||
args.b_offset = GetArgument(arguments, help, kArgBOffset, size_t{0});
|
||||
args.c_offset = GetArgument(arguments, help, kArgCOffset, size_t{0});
|
||||
args.layout = GetArgument(arguments, help, kArgLayout, Layout::kRowMajor);
|
||||
args.a_transpose = GetArgument(arguments, help, kArgATransp, Transpose::kNo);
|
||||
args.b_transpose = GetArgument(arguments, help, kArgBTransp, Transpose::kNo);
|
||||
args.alpha = GetArgument(arguments, help, kArgAlpha, GetScalar<T>());
|
||||
args.beta = GetArgument(arguments, help, kArgBeta, GetScalar<T>());
|
||||
|
||||
// Prints the help message (command-line arguments)
|
||||
if (!silent) { fprintf(stdout, "\n* %s\n", help.c_str()); }
|
||||
|
||||
// Initializes OpenCL
|
||||
const auto platform = Platform(platform_id);
|
||||
const auto device = Device(platform, device_id);
|
||||
const auto context = Context(device);
|
||||
auto queue = Queue(context, device);
|
||||
|
||||
// Populate host matrices with some example data
|
||||
auto host_a = std::vector<T>(args.m * args.k);
|
||||
auto host_b = std::vector<T>(args.n * args.k);
|
||||
auto host_c = std::vector<T>(args.m * args.n);
|
||||
PopulateVector(host_a, kSeed);
|
||||
PopulateVector(host_b, kSeed);
|
||||
PopulateVector(host_c, kSeed);
|
||||
|
||||
// Copy the matrices to the device
|
||||
auto device_a = Buffer<T>(context, host_a.size());
|
||||
auto device_b = Buffer<T>(context, host_b.size());
|
||||
auto device_c = Buffer<T>(context, host_c.size());
|
||||
device_a.Write(queue, host_a.size(), host_a);
|
||||
device_b.Write(queue, host_b.size(), host_b);
|
||||
device_c.Write(queue, host_c.size(), host_c);
|
||||
auto dummy = Buffer<T>(context, 1);
|
||||
auto buffers = Buffers<T>{dummy, dummy, device_a, device_b, device_c, dummy, dummy};
|
||||
|
||||
// Loops over the valid combinations: run before and run afterwards
|
||||
fprintf(stdout, "* Testing OverrideParameters for '%s'\n", routine_name.c_str());
|
||||
for (const auto &override_setting : valid_settings) {
|
||||
const auto status_before = example_routine.RunRoutine(args, buffers, queue);
|
||||
if (status_before != StatusCode::kSuccess) { errors++; continue; }
|
||||
|
||||
// Overrides the parameters
|
||||
const auto status = OverrideParameters(device(), kernel_name, precision, override_setting);
|
||||
if (status != StatusCode::kSuccess) { errors++; continue; } // error shouldn't occur
|
||||
|
||||
const auto status_after = example_routine.RunRoutine(args, buffers, queue);
|
||||
if (status_after != StatusCode::kSuccess) { errors++; continue; }
|
||||
passed++;
|
||||
}
|
||||
|
||||
// Loops over the invalid combinations: run before and run afterwards
|
||||
for (const auto &override_setting : invalid_settings) {
|
||||
const auto status_before = example_routine.RunRoutine(args, buffers, queue);
|
||||
if (status_before != StatusCode::kSuccess) { errors++; continue; }
|
||||
|
||||
// Overrides the parameters
|
||||
const auto status = OverrideParameters(device(), kernel_name, precision, override_setting);
|
||||
if (status == StatusCode::kSuccess) { errors++; continue; } // error should occur
|
||||
|
||||
const auto status_after = example_routine.RunRoutine(args, buffers, queue);
|
||||
if (status_after != StatusCode::kSuccess) { errors++; continue; }
|
||||
passed++;
|
||||
}
|
||||
|
||||
// Prints and returns the statistics
|
||||
fprintf(stdout, " %zu test(s) passed\n", passed);
|
||||
fprintf(stdout, " %zu test(s) failed\n", errors);
|
||||
fprintf(stdout, "\n");
|
||||
return errors;
|
||||
}
|
||||
|
||||
// =================================================================================================
|
||||
} // namespace clblast
|
||||
|
||||
// Shortcuts to the clblast namespace
|
||||
using float2 = clblast::float2;
|
||||
using double2 = clblast::double2;
|
||||
|
||||
// Main function (not within the clblast namespace)
|
||||
int main(int argc, char *argv[]) {
|
||||
auto errors = size_t{0};
|
||||
errors += clblast::RunOverrideTests<float>(argc, argv, false, "SGEMM");
|
||||
errors += clblast::RunOverrideTests<float2>(argc, argv, true, "CGEMM");
|
||||
if (errors > 0) { return 1; } else { return 0; }
|
||||
}
|
||||
|
||||
// =================================================================================================
|
Loading…
Reference in New Issue