Added a define to enable subgroup shuffling if supported by the device

pull/277/head
Cedric Nugteren 2018-04-24 20:41:15 +02:00
parent 5d46a3193e
commit 2b1e0295e6
2 changed files with 6 additions and 0 deletions

View File

@ -57,6 +57,11 @@ Program CompileFromSource(const std::string &source_string, const Precision prec
header_string += "#define GLOBAL_MEM_FENCE 1\n";
}
// For Intel GPUs with subgroup support, use subgroup shuffling.
if (device.IsGPU() && device.HasExtension(kKhronosIntelSubgroups)) {
header_string += "#define USE_SUBGROUP_SHUFFLING 1\n";
}
// Optionally adds a translation header from OpenCL kernels to CUDA kernels
#ifdef CUDA_API
header_string +=

View File

@ -47,6 +47,7 @@ using double2 = std::complex<double>;
// Khronos OpenCL extensions
const std::string kKhronosAttributesAMD = "cl_amd_device_attribute_query";
const std::string kKhronosAttributesNVIDIA = "cl_nv_device_attribute_query";
const std::string kKhronosIntelSubgroups = "cl_intel_subgroups";
// Catched an unknown error
constexpr auto kUnknownError = -999;