Merge pull request #310 from CNugteren/CLBlast-307-netlib-api-static-opencl-vars
Netlib API with optional static OpenCL variablespull/312/head
commit
dd1fa7cc81
|
@ -1,6 +1,7 @@
|
|||
|
||||
Development (next version)
|
||||
- Added support for shuffle instructions for NVIDIA GPUs (thanks to 'tyler-utah')
|
||||
- Added an option to compile the Netlib API with static OpenCL device and context (-DNETLIB_PERSISTENT_OPENCL=ON)
|
||||
- The tuners now check beforehand on invalid local thread sizes and skip those completely
|
||||
- Fixed an issue with conjugate transpose not being executed in certain cases for a.o. XOMATCOPY
|
||||
- Fixed an issue with AMD GPUs and the new GEMMK == 1 kernel
|
||||
|
|
|
@ -32,9 +32,19 @@ option(SAMPLES "Enable compilation of the examples" OFF)
|
|||
option(TUNERS "Enable compilation of the tuners" ON)
|
||||
option(CLIENTS "Enable compilation of the clients to test and compare performance" OFF)
|
||||
option(TESTS "Enable compilation of the correctness tests" OFF)
|
||||
option(NETLIB "Enable compilation of the CBLAS Netlib API" OFF)
|
||||
option(CUBLAS "Enables performance comparison against cuBLAS on NVIDIA GPUs" OFF)
|
||||
|
||||
# The optional Netlib API for CLBlast
|
||||
option(NETLIB "Enable compilation of the CBLAS Netlib API" OFF)
|
||||
option(NETLIB_PERSISTENT_OPENCL "Makes OpenCL device and context in the CBLAS Netlib API static" OFF)
|
||||
if(NETLIB)
|
||||
message("-- Building the Netlib API of CLBlast")
|
||||
if(NETLIB_PERSISTENT_OPENCL)
|
||||
message(" ^^ while using static variables for OpenCL device and context")
|
||||
add_definitions(-DNETLIB_PERSISTENT_OPENCL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Workarounds for bugs
|
||||
option(AMD_SI_EMPTY_KERNEL_WORKAROUND "Enables workaround for bug in AMD Southern Island GPUs" OFF)
|
||||
if(AMD_SI_EMPTY_KERNEL_WORKAROUND)
|
||||
|
|
|
@ -49,7 +49,7 @@ FILES = [
|
|||
"/src/clblast_cuda.cpp",
|
||||
"/src/pyclblast/src/pyclblast.pyx"
|
||||
]
|
||||
HEADER_LINES = [123, 21, 127, 24, 29, 45, 29, 65, 32, 95, 21, 290]
|
||||
HEADER_LINES = [123, 21, 127, 24, 29, 45, 29, 65, 40, 95, 21, 290]
|
||||
FOOTER_LINES = [98, 57, 112, 275, 6, 6, 6, 9, 2, 41, 56, 37]
|
||||
HEADER_LINES_DOC = 0
|
||||
FOOTER_LINES_DOC = 232
|
||||
|
|
|
@ -145,8 +145,8 @@ def clblast_netlib_c_cc(routine):
|
|||
result += routine.routine_header_netlib(flavour, 9, "") + " {" + NL
|
||||
|
||||
# Initialize OpenCL
|
||||
result += " auto device = get_device();" + NL
|
||||
result += " auto context = clblast::Context(device);" + NL
|
||||
result += " OPTIONAL_STATIC auto device = get_device();" + NL
|
||||
result += " OPTIONAL_STATIC auto context = clblast::Context(device);" + NL
|
||||
result += " auto queue = clblast::Queue(context, device);" + NL
|
||||
|
||||
# Set alpha and beta
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue