Added a first version of a cuBLAS wrapper (WIP)
parent
af9a521042
commit
674ff96fdf
|
@ -38,11 +38,12 @@ FILES = [
|
|||
"/src/clblast_c.cpp",
|
||||
"/test/wrapper_clblas.hpp",
|
||||
"/test/wrapper_cblas.hpp",
|
||||
"/test/wrapper_cublas.hpp",
|
||||
"/include/clblast_netlib_c.h",
|
||||
"/src/clblast_netlib_c.cpp",
|
||||
]
|
||||
HEADER_LINES = [123, 76, 126, 23, 29, 41, 65, 32]
|
||||
FOOTER_LINES = [25, 138, 27, 38, 6, 6, 9, 2]
|
||||
HEADER_LINES = [123, 76, 126, 23, 29, 41, 29, 65, 32]
|
||||
FOOTER_LINES = [25, 138, 27, 38, 6, 6, 6, 9, 2]
|
||||
HEADER_LINES_DOC = 0
|
||||
FOOTER_LINES_DOC = 63
|
||||
|
||||
|
@ -194,7 +195,7 @@ def main(argv):
|
|||
# Re-writes the body of the file
|
||||
with open(library_root + FILES[i], "w") as f:
|
||||
body = ""
|
||||
levels = [1, 2, 3] if (i == 4 or i == 5) else [1, 2, 3, 4]
|
||||
levels = [1, 2, 3] if (i == 4 or i == 5 or i == 6) else [1, 2, 3, 4]
|
||||
for level in levels:
|
||||
body += cpp.LEVEL_SEPARATORS[level - 1] + "\n"
|
||||
for routine in ROUTINES[level - 1]:
|
||||
|
@ -211,9 +212,11 @@ def main(argv):
|
|||
if i == 5:
|
||||
body += cpp.wrapper_cblas(routine)
|
||||
if i == 6:
|
||||
body += cpp.wrapper_cublas(routine)
|
||||
if i == 7:
|
||||
if not routine.batched:
|
||||
body += cpp.clblast_netlib_c_h(routine)
|
||||
if i == 7:
|
||||
if i == 8:
|
||||
if not routine.batched:
|
||||
body += cpp.clblast_netlib_c_cc(routine)
|
||||
f.write("".join(file_header))
|
||||
|
|
|
@ -56,6 +56,19 @@ def option_to_cblas(x):
|
|||
}[x]
|
||||
|
||||
|
||||
def option_to_cublas(x):
|
||||
"""As above, but for clBLAS data-types"""
|
||||
return {
|
||||
'layout': "cublas_has_no_layout",
|
||||
'a_transpose': "cublasOperation_t",
|
||||
'b_transpose': "cublasOperation_t",
|
||||
'ab_transpose': "cublasOperation_t",
|
||||
'side': "cublasSideMode_t",
|
||||
'triangle': "cublasFillMode_t",
|
||||
'diagonal': "cublasDiagType_t",
|
||||
}[x]
|
||||
|
||||
|
||||
def option_to_documentation(x):
|
||||
"""Translates an option name to a documentation string"""
|
||||
return {
|
||||
|
|
|
@ -290,6 +290,52 @@ def wrapper_cblas(routine):
|
|||
return result
|
||||
|
||||
|
||||
def wrapper_cublas(routine):
|
||||
"""The wrapper to the reference cuBLAS routines (for performance/correctness testing)"""
|
||||
result = ""
|
||||
if routine.has_tests:
|
||||
result += NL + "// Forwards the cuBLAS calls for %s" % routine.short_names_tested() + NL
|
||||
if routine.no_scalars():
|
||||
result += routine.routine_header_wrapper_cublas(routine.template, True, 23) + ";" + NL
|
||||
for flavour in routine.flavours:
|
||||
result += routine.routine_header_wrapper_cublas(flavour, False, 23) + " {" + NL
|
||||
|
||||
# There is a version available in cuBLAS
|
||||
if flavour.precision_name in ["S", "D", "C", "Z"]:
|
||||
indent = " " * (24 + routine.length())
|
||||
arguments = routine.arguments_wrapper_cublas(flavour)
|
||||
result += " cublasHandle_t handle;" + NL
|
||||
result += " auto status = cublas" + flavour.name + routine.name + "(handle, "
|
||||
result += ("," + NL + indent).join([a for a in arguments]) + ");" + NL
|
||||
result += " cublasDestroy(handle);" + NL
|
||||
result += " return status;"
|
||||
|
||||
# There is no cuBLAS available, forward the call to one of the available functions
|
||||
else: # Half-precision
|
||||
result += " return CUBLAS_STATUS_NOT_SUPPORTED;"
|
||||
# indent = " " * (24 + routine.length())
|
||||
|
||||
# # Convert to float (note: also integer buffers are stored as half/float)
|
||||
# for buf in routine.inputs + routine.outputs:
|
||||
# result += " auto " + buf + "_buffer_bis = HalfToFloatBuffer(" + buf + "_buffer, queues[0]);" + NL
|
||||
|
||||
# # Call the float routine
|
||||
# result += " cublasHandle_t handle;" + NL
|
||||
# result += " auto status = cublasX" + routine.name + "(handle,"
|
||||
# result += ("," + NL + indent).join([a for a in routine.arguments_half()]) + ");" + NL
|
||||
# result += " cublasDestroy(handle);" + NL
|
||||
# result += " return status;" + NL
|
||||
|
||||
# # Convert back to half
|
||||
# for buf in routine.outputs:
|
||||
# result += " FloatToHalfBuffer(" + buf + "_buffer, " + buf + "_buffer_bis, queues[0]);" + NL
|
||||
# result += " return status;"
|
||||
|
||||
# Complete
|
||||
result += NL + "}" + NL
|
||||
return result
|
||||
|
||||
|
||||
def performance_test(routine, level_string):
|
||||
"""Generates the body of a performance test for a specific routine"""
|
||||
result = ""
|
||||
|
|
|
@ -257,7 +257,7 @@ class Routine:
|
|||
return []
|
||||
|
||||
def buffer_def_wrapper_cl(self, name, flavour):
|
||||
"""As above but with data-types"""
|
||||
"""As above but for OpenCL"""
|
||||
prefix = "const " if name in self.inputs else ""
|
||||
if name in self.inputs or name in self.outputs:
|
||||
a = [prefix + "Buffer<" + flavour.buffer_type + ">& " + name + "_buffer"]
|
||||
|
@ -266,6 +266,16 @@ class Routine:
|
|||
return [", ".join(a + b + c)]
|
||||
return []
|
||||
|
||||
def buffer_def_wrapper_cuda(self, name, flavour):
|
||||
"""As above but for CUDA"""
|
||||
prefix = "const " if name in self.inputs else ""
|
||||
if name in self.inputs or name in self.outputs:
|
||||
a = [prefix + flavour.buffer_type + "* " + name + "_buffer"]
|
||||
b = ["const size_t " + name + "_offset"]
|
||||
c = ["const size_t " + name + "_" + self.postfix(name)] if name not in self.buffers_without_ld_inc() else []
|
||||
return [", ".join(a + b + c)]
|
||||
return []
|
||||
|
||||
def buffer_def_vector(self, name, flavour):
|
||||
"""As above but as vectors"""
|
||||
prefix = "const " if name in self.inputs else ""
|
||||
|
@ -329,6 +339,18 @@ class Routine:
|
|||
return [", ".join(a + c)]
|
||||
return []
|
||||
|
||||
def buffer_wrapper_cublas(self, name):
|
||||
"""As above but for cuBLAS the wrapper"""
|
||||
if name in self.inputs or name in self.outputs:
|
||||
a = ["&" + name + "_buffer[" + name + "_offset]"]
|
||||
c = []
|
||||
if name in ["x", "y"]:
|
||||
c = ["static_cast<int>(" + name + "_" + self.postfix(name) + ")"]
|
||||
elif name in ["a", "b", "c"]:
|
||||
c = [name + "_" + self.postfix(name)]
|
||||
return [", ".join(a + c)]
|
||||
return []
|
||||
|
||||
def buffer_type(self, name):
|
||||
"""As above, but only data-types"""
|
||||
prefix = "const " if (name in self.inputs) else ""
|
||||
|
@ -399,6 +421,16 @@ class Routine:
|
|||
return [name]
|
||||
return []
|
||||
|
||||
def scalar_use_wrapper_by_ref(self, name, flavour):
|
||||
"""As above, but for the cuBLAS wrapper"""
|
||||
if name in self.scalars:
|
||||
if name == "alpha":
|
||||
return ["&" + flavour.use_alpha_opencl()]
|
||||
elif name == "beta":
|
||||
return ["&" + flavour.use_beta_opencl()]
|
||||
return [name]
|
||||
return []
|
||||
|
||||
def scalar_use_wrapper_cblas(self, name, flavour):
|
||||
"""As above, but for the CBLAS wrapper"""
|
||||
if name in self.scalars:
|
||||
|
@ -465,6 +497,12 @@ class Routine:
|
|||
return [", ".join([s for s in self.sizes])]
|
||||
return []
|
||||
|
||||
def sizes_list_as_int(self):
|
||||
"""Retrieves a list of comma-separated sizes (m, n, k) cast to integers"""
|
||||
if self.sizes:
|
||||
return [", ".join(["static_cast<int>(" + s + ")" for s in self.sizes])]
|
||||
return []
|
||||
|
||||
def sizes_def(self):
|
||||
"""Retrieves the definition of the sizes (m,n,k)"""
|
||||
if self.sizes:
|
||||
|
@ -531,6 +569,13 @@ class Routine:
|
|||
return [", ".join(definitions)]
|
||||
return []
|
||||
|
||||
def options_def_wrapper_cublas(self):
|
||||
"""As above, but now using cuBLAS data-types"""
|
||||
if self.options:
|
||||
definitions = ["const " + convert.option_to_cublas(o) + " " + o for o in self.options]
|
||||
return [", ".join(definitions)]
|
||||
return []
|
||||
|
||||
def options_type(self):
|
||||
"""Retrieves the types of the options (layout, transpose, side, etc.)"""
|
||||
if self.options:
|
||||
|
@ -615,7 +660,7 @@ class Routine:
|
|||
|
||||
def arguments_wrapper_cblas(self, flavour):
|
||||
"""As above, but for the CBLAS wrapper"""
|
||||
return (self.options_list() + self.sizes_list() +
|
||||
return (self.options_list() + self.sizes_list_as_int() +
|
||||
self.scalar_use_wrapper_cblas("alpha", flavour) +
|
||||
list(chain(*[self.buffer_wrapper_cblas(b, flavour) for b in self.buffers_first()])) +
|
||||
self.scalar_use_wrapper_cblas("beta", flavour) +
|
||||
|
@ -623,6 +668,17 @@ class Routine:
|
|||
list(chain(*[self.buffer_wrapper_cblas(b, flavour) for b in self.scalar_buffers_second()])) +
|
||||
list(chain(*[self.scalar_use_wrapper_cblas(s, flavour) for s in self.other_scalars()])))
|
||||
|
||||
def arguments_wrapper_cublas(self, flavour):
|
||||
"""As above, but for the cuBLAS wrapper"""
|
||||
return (self.options_list() + self.sizes_list_as_int() +
|
||||
list(chain(*[self.buffer_wrapper_cublas(b) for b in self.scalar_buffers_first()])) +
|
||||
self.scalar_use_wrapper_by_ref("alpha", flavour) +
|
||||
list(chain(*[self.buffer_wrapper_cublas(b) for b in self.buffers_first()])) +
|
||||
self.scalar_use_wrapper_by_ref("beta", flavour) +
|
||||
list(chain(*[self.buffer_wrapper_cublas(b) for b in self.buffers_second()])) +
|
||||
list(chain(*[self.buffer_wrapper_cublas(b) for b in self.scalar_buffers_second()])) +
|
||||
list(chain(*[self.scalar_use_wrapper_by_ref(s, flavour) for s in self.other_scalars()])))
|
||||
|
||||
def arguments_def(self, flavour):
|
||||
"""Retrieves a combination of all the argument definitions"""
|
||||
return (self.options_def() + self.sizes_def() +
|
||||
|
@ -683,6 +739,17 @@ class Routine:
|
|||
list(chain(*[self.buffer_def_vector(b, flavour) for b in self.scalar_buffers_second()])) +
|
||||
list(chain(*[self.scalar_def_plain(s, flavour) for s in self.other_scalars()])))
|
||||
|
||||
def arguments_def_wrapper_cublas(self, flavour):
|
||||
"""As above, but cuBLAS wrapper plain data-types"""
|
||||
return (self.options_def_wrapper_cublas() + self.sizes_def() +
|
||||
list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.scalar_buffers_first()])) +
|
||||
self.scalar_def_plain("alpha", flavour) +
|
||||
list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.buffers_first()])) +
|
||||
self.scalar_def_plain("beta", flavour) +
|
||||
list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.buffers_second()])) +
|
||||
list(chain(*[self.buffer_def_wrapper_cuda(b, flavour) for b in self.scalar_buffers_second()])) +
|
||||
list(chain(*[self.scalar_def_plain(s, flavour) for s in self.other_scalars()])))
|
||||
|
||||
def arguments_type(self, flavour):
|
||||
"""Retrieves a combination of all the argument types"""
|
||||
return (self.options_type() + self.sizes_type() +
|
||||
|
@ -781,3 +848,17 @@ class Routine:
|
|||
result = "void cblasX" + self.name + "("
|
||||
result += (",\n" + indent).join([a for a in self.arguments_def_wrapper_cblas(flavour)]) + ")"
|
||||
return result
|
||||
|
||||
def routine_header_wrapper_cublas(self, flavour, def_only, spaces):
|
||||
"""As above, but now for the cuBLAS wrapper"""
|
||||
template = "<" + flavour.template + ">" if self.no_scalars() and not def_only else ""
|
||||
indent = " " * (spaces + self.length() + len(template))
|
||||
result = ""
|
||||
if self.no_scalars():
|
||||
result += "template <"
|
||||
if def_only:
|
||||
result += flavour.name
|
||||
result += ">\n"
|
||||
result += "cublasStatus_t cublasX" + self.name + template + "("
|
||||
result += (",\n" + indent).join([a for a in self.arguments_def_wrapper_cublas(flavour)]) + ")"
|
||||
return result
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue