tidying up pyclblast override_parameters api, and added example

This commit is contained in:
kodonell 2018-03-26 08:51:55 +13:00
parent c6056da0c8
commit 58e70c56f1
2 changed files with 29 additions and 16 deletions

View file

@ -10,6 +10,7 @@ import numpy as np
import pyopencl as cl import pyopencl as cl
from pyopencl.array import Array from pyopencl.array import Array
import pyclblast import pyclblast
from datetime import datetime
# Settings for this sample # Settings for this sample
dtype = 'float32' dtype = 'float32'
@ -19,7 +20,7 @@ ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx) queue = cl.CommandQueue(ctx)
print("# Setting up Numpy arrays") print("# Setting up Numpy arrays")
m, n, k = 2, 3, 4 m, n, k = 128, 256, 512
a = np.random.rand(m, k).astype(dtype=dtype) a = np.random.rand(m, k).astype(dtype=dtype)
b = np.random.rand(k, n).astype(dtype=dtype) b = np.random.rand(k, n).astype(dtype=dtype)
c = np.random.rand(m, n).astype(dtype=dtype) c = np.random.rand(m, n).astype(dtype=dtype)
@ -34,5 +35,17 @@ clc.set(c)
print("# Example level-3 operation: GEMM") print("# Example level-3 operation: GEMM")
pyclblast.gemm(queue, m, n, k, cla, clb, clc, a_ld=k, b_ld=n, c_ld=n) pyclblast.gemm(queue, m, n, k, cla, clb, clc, a_ld=k, b_ld=n, c_ld=n)
print("# Matrix C result: %s" % clc.get()) print("# PyCLBlast matrix result is correct?:", np.allclose(clc.get(), np.dot(a, b)))
print("# Expected result: %s" % (np.dot(a, b)))
print("# GFLOPS when tuned with different values of MWG:")
params = { "KWG": 32, "KWI": 2, "MDIMA": 8, "MDIMC": 8, "MWG": 64, "NDIMB": 8, "NDIMC": 8, "NWG": 64, "SA": 0, "SB": 0, "STRM": 0, "STRN": 0, "VWM": 4, "VWN": 1 }
mwg = 1
while mwg <= 256:
params["MWG"] = mwg
pyclblast.override_parameters(ctx.devices[0], 'Xgemm', 32, params)
for i in range(100):
if i == 10:
t0 = datetime.now()
pyclblast.gemm(queue, m, n, k, cla, clb, clc, a_ld=k, b_ld=n, c_ld=n)
print("#\tMWG = %-3d : %4d" % (mwg, int(2 * m * n * k / ((datetime.now() - t0).total_seconds() / 100) / 1024 ** 3)))
mwg *= 4

View file

@ -15,6 +15,8 @@ import numpy as np
import pyopencl as cl import pyopencl as cl
from pyopencl.array import Array from pyopencl.array import Array
from libcpp cimport bool from libcpp cimport bool
from cpython.mem cimport PyMem_Malloc, PyMem_Free
from libc.string cimport strdup
#################################################################################################### ####################################################################################################
# CLBlast and OpenCL data-types # CLBlast and OpenCL data-types
@ -2086,38 +2088,36 @@ def trsm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_t
# Overrides the parameters # Overrides the parameters
#################################################################################################### ####################################################################################################
from libc.stdlib cimport malloc, free
from libc.string cimport strdup
cdef extern from "clblast_c.h": cdef extern from "clblast_c.h":
ctypedef struct _cl_device_id: ctypedef struct _cl_device_id:
pass pass
ctypedef _cl_device_id* cl_device_id ctypedef _cl_device_id* cl_device_id
CLBlastStatusCode CLBlastOverrideParameters(const cl_device_id device, const char* kernel_name, const CLBlastPrecision precision, const size_t num_parameters, const char** parameters_names, const size_t* parameters_values) CLBlastStatusCode CLBlastOverrideParameters(const cl_device_id device, const char* kernel_name, const CLBlastPrecision precision, const size_t num_parameters, const char** parameters_names, const size_t* parameters_values)
def override_parameters(device, kernel_name, precision, parameters): def override_parameters(device, kernel_name, precision, parameters):
""" """
precision = 16, 32, 64, 3232, 6464 Override the current parameters for the given kernel, on this device, with this precision.
kernel name = unicode string
parameters =
""" """
cdef cl_device_id device_id = <cl_device_id><size_t>device.int_ptr cdef cl_device_id device_id = <cl_device_id><size_t>device.int_ptr
# read the parameters dictionary into names/values arrays, for use in CLBlastOverrideParameters
cdef size_t n = len(parameters) cdef size_t n = len(parameters)
cdef const char **parameter_names = <const char**> malloc(n * sizeof(char*)) cdef const char **parameter_names = <const char**> PyMem_Malloc(n * sizeof(char*))
cdef size_t *parameter_values = <size_t*> malloc(n * sizeof(size_t)) cdef size_t *parameter_values = <size_t*> PyMem_Malloc(n * sizeof(size_t))
if not (parameter_names or parameter_values):
# TODO: check mallocs raise MemoryError()
for i, (k, v) in enumerate(parameters.items()): for i, (k, v) in enumerate(parameters.items()):
parameter_names[i] = strdup(k.encode('ascii')) parameter_names[i] = strdup(k.encode('ascii'))
parameter_values[i] = v parameter_values[i] = v
# call the underlying API
err = CLBlastOverrideParameters(device_id, kernel_name.encode('ascii'), precision, n, parameter_names, parameter_values) err = CLBlastOverrideParameters(device_id, kernel_name.encode('ascii'), precision, n, parameter_names, parameter_values)
if err != CLBlastSuccess: if err != CLBlastSuccess:
raise RuntimeError("PyCLBlast: 'OverrideParameters' failed: %s" % get_status_message(err)) raise RuntimeError("PyCLBlast: 'OverrideParameters' failed: %s" % get_status_message(err))
# TODO: free etc. # tidy up:
PyMem_Free(parameter_names)
PyMem_Free(parameter_values)
#################################################################################################### ####################################################################################################