mirror of
https://github.com/CNugteren/CLBlast.git
synced 2024-07-04 21:36:57 +02:00
Using mad() instruction for AMD devices like clBLAS does
This commit is contained in:
parent
3bb1b5fa6e
commit
0157d6d4ea
|
@ -82,8 +82,11 @@ R"(
|
|||
|
||||
// =================================================================================================
|
||||
|
||||
// Don't use the non-IEEE754 compliant OpenCL built-in mad() instruction
|
||||
#define USE_CL_MAD 0
|
||||
// Don't use the non-IEEE754 compliant OpenCL built-in mad() instruction per default. For specific
|
||||
// devices, this is enabled (see src/routine.cc).
|
||||
#ifndef USE_CL_MAD
|
||||
#define USE_CL_MAD 0
|
||||
#endif
|
||||
|
||||
// Sets a variable to zero
|
||||
#if PRECISION == 3232 || PRECISION == 6464
|
||||
|
|
|
@ -68,6 +68,14 @@ StatusCode Routine::SetUp(const std::string &routine_source) {
|
|||
// Collects the parameters for this device in the form of defines, and adds the precision
|
||||
auto defines = db_.GetDefines();
|
||||
defines += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n";
|
||||
|
||||
// For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve
|
||||
// performance, but might result in a reduced accuracy.
|
||||
if (device_.Vendor() == "AMD") {
|
||||
defines += "#define USE_CL_MAD 1\n";
|
||||
}
|
||||
|
||||
// Combines everything together into a single source string
|
||||
auto source_string = defines + common_header + routine_source;
|
||||
|
||||
// Compiles the kernel
|
||||
|
|
Loading…
Reference in a new issue