Using mad() instruction for AMD devices like clBLAS does

This commit is contained in:
CNugteren 2015-07-16 22:42:02 +02:00
parent 3bb1b5fa6e
commit 0157d6d4ea
2 changed files with 13 additions and 2 deletions

View file

@ -82,8 +82,11 @@ R"(
// =================================================================================================
// Don't use the non-IEEE754 compliant OpenCL built-in mad() instruction
#define USE_CL_MAD 0
// Don't use the non-IEEE754 compliant OpenCL built-in mad() instruction per default. For specific
// devices, this is enabled (see src/routine.cc).
#ifndef USE_CL_MAD
#define USE_CL_MAD 0
#endif
// Sets a variable to zero
#if PRECISION == 3232 || PRECISION == 6464

View file

@ -68,6 +68,14 @@ StatusCode Routine::SetUp(const std::string &routine_source) {
// Collects the parameters for this device in the form of defines, and adds the precision
auto defines = db_.GetDefines();
defines += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n";
// For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve
// performance, but might result in a reduced accuracy.
if (device_.Vendor() == "AMD") {
defines += "#define USE_CL_MAD 1\n";
}
// Combines everything together into a single source string
auto source_string = defines + common_header + routine_source;
// Compiles the kernel