Added a define to enable subgroup shuffling if supported by the device
parent
5d46a3193e
commit
2b1e0295e6
|
@ -57,6 +57,11 @@ Program CompileFromSource(const std::string &source_string, const Precision prec
|
|||
header_string += "#define GLOBAL_MEM_FENCE 1\n";
|
||||
}
|
||||
|
||||
// For Intel GPUs with subgroup support, use subgroup shuffling.
|
||||
if (device.IsGPU() && device.HasExtension(kKhronosIntelSubgroups)) {
|
||||
header_string += "#define USE_SUBGROUP_SHUFFLING 1\n";
|
||||
}
|
||||
|
||||
// Optionally adds a translation header from OpenCL kernels to CUDA kernels
|
||||
#ifdef CUDA_API
|
||||
header_string +=
|
||||
|
|
|
@ -47,6 +47,7 @@ using double2 = std::complex<double>;
|
|||
// Khronos OpenCL extensions
|
||||
const std::string kKhronosAttributesAMD = "cl_amd_device_attribute_query";
|
||||
const std::string kKhronosAttributesNVIDIA = "cl_nv_device_attribute_query";
|
||||
const std::string kKhronosIntelSubgroups = "cl_intel_subgroups";
|
||||
|
||||
// Catched an unknown error
|
||||
constexpr auto kUnknownError = -999;
|
||||
|
|
Loading…
Reference in New Issue