Merge pull request #310 from CNugteren/CLBlast-307-netlib-api-static-opencl-vars

Netlib API with optional static OpenCL variables
pull/312/head
Cedric Nugteren 2018-08-09 21:37:47 +02:00 committed by GitHub
commit dd1fa7cc81
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 355 additions and 336 deletions

View File

@ -1,6 +1,7 @@
Development (next version)
- Added support for shuffle instructions for NVIDIA GPUs (thanks to 'tyler-utah')
- Added an option to compile the Netlib API with static OpenCL device and context (-DNETLIB_PERSISTENT_OPENCL=ON)
- The tuners now check beforehand on invalid local thread sizes and skip those completely
- Fixed an issue with conjugate transpose not being executed in certain cases for a.o. XOMATCOPY
- Fixed an issue with AMD GPUs and the new GEMMK == 1 kernel

View File

@ -32,9 +32,19 @@ option(SAMPLES "Enable compilation of the examples" OFF)
option(TUNERS "Enable compilation of the tuners" ON)
option(CLIENTS "Enable compilation of the clients to test and compare performance" OFF)
option(TESTS "Enable compilation of the correctness tests" OFF)
option(NETLIB "Enable compilation of the CBLAS Netlib API" OFF)
option(CUBLAS "Enables performance comparison against cuBLAS on NVIDIA GPUs" OFF)
# The optional Netlib API for CLBlast
option(NETLIB "Enable compilation of the CBLAS Netlib API" OFF)
option(NETLIB_PERSISTENT_OPENCL "Makes OpenCL device and context in the CBLAS Netlib API static" OFF)
if(NETLIB)
message("-- Building the Netlib API of CLBlast")
if(NETLIB_PERSISTENT_OPENCL)
message(" ^^ while using static variables for OpenCL device and context")
add_definitions(-DNETLIB_PERSISTENT_OPENCL)
endif()
endif()
# Workarounds for bugs
option(AMD_SI_EMPTY_KERNEL_WORKAROUND "Enables workaround for bug in AMD Southern Island GPUs" OFF)
if(AMD_SI_EMPTY_KERNEL_WORKAROUND)

View File

@ -49,7 +49,7 @@ FILES = [
"/src/clblast_cuda.cpp",
"/src/pyclblast/src/pyclblast.pyx"
]
HEADER_LINES = [123, 21, 127, 24, 29, 45, 29, 65, 32, 95, 21, 290]
HEADER_LINES = [123, 21, 127, 24, 29, 45, 29, 65, 40, 95, 21, 290]
FOOTER_LINES = [98, 57, 112, 275, 6, 6, 6, 9, 2, 41, 56, 37]
HEADER_LINES_DOC = 0
FOOTER_LINES_DOC = 232

View File

@ -145,8 +145,8 @@ def clblast_netlib_c_cc(routine):
result += routine.routine_header_netlib(flavour, 9, "") + " {" + NL
# Initialize OpenCL
result += " auto device = get_device();" + NL
result += " auto context = clblast::Context(device);" + NL
result += " OPTIONAL_STATIC auto device = get_device();" + NL
result += " OPTIONAL_STATIC auto context = clblast::Context(device);" + NL
result += " auto queue = clblast::Queue(context, device);" + NL
# Set alpha and beta

File diff suppressed because it is too large Load Diff