mirror of
https://github.com/CNugteren/CLBlast.git
synced 2024-07-04 21:36:57 +02:00
tidying up pyclblast override_parameters api, and added example
This commit is contained in:
parent
c6056da0c8
commit
58e70c56f1
|
@ -10,6 +10,7 @@ import numpy as np
|
||||||
import pyopencl as cl
|
import pyopencl as cl
|
||||||
from pyopencl.array import Array
|
from pyopencl.array import Array
|
||||||
import pyclblast
|
import pyclblast
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
# Settings for this sample
|
# Settings for this sample
|
||||||
dtype = 'float32'
|
dtype = 'float32'
|
||||||
|
@ -19,7 +20,7 @@ ctx = cl.create_some_context()
|
||||||
queue = cl.CommandQueue(ctx)
|
queue = cl.CommandQueue(ctx)
|
||||||
|
|
||||||
print("# Setting up Numpy arrays")
|
print("# Setting up Numpy arrays")
|
||||||
m, n, k = 2, 3, 4
|
m, n, k = 128, 256, 512
|
||||||
a = np.random.rand(m, k).astype(dtype=dtype)
|
a = np.random.rand(m, k).astype(dtype=dtype)
|
||||||
b = np.random.rand(k, n).astype(dtype=dtype)
|
b = np.random.rand(k, n).astype(dtype=dtype)
|
||||||
c = np.random.rand(m, n).astype(dtype=dtype)
|
c = np.random.rand(m, n).astype(dtype=dtype)
|
||||||
|
@ -34,5 +35,17 @@ clc.set(c)
|
||||||
|
|
||||||
print("# Example level-3 operation: GEMM")
|
print("# Example level-3 operation: GEMM")
|
||||||
pyclblast.gemm(queue, m, n, k, cla, clb, clc, a_ld=k, b_ld=n, c_ld=n)
|
pyclblast.gemm(queue, m, n, k, cla, clb, clc, a_ld=k, b_ld=n, c_ld=n)
|
||||||
print("# Matrix C result: %s" % clc.get())
|
print("# PyCLBlast matrix result is correct?:", np.allclose(clc.get(), np.dot(a, b)))
|
||||||
print("# Expected result: %s" % (np.dot(a, b)))
|
|
||||||
|
print("# GFLOPS when tuned with different values of MWG:")
|
||||||
|
params = { "KWG": 32, "KWI": 2, "MDIMA": 8, "MDIMC": 8, "MWG": 64, "NDIMB": 8, "NDIMC": 8, "NWG": 64, "SA": 0, "SB": 0, "STRM": 0, "STRN": 0, "VWM": 4, "VWN": 1 }
|
||||||
|
mwg = 1
|
||||||
|
while mwg <= 256:
|
||||||
|
params["MWG"] = mwg
|
||||||
|
pyclblast.override_parameters(ctx.devices[0], 'Xgemm', 32, params)
|
||||||
|
for i in range(100):
|
||||||
|
if i == 10:
|
||||||
|
t0 = datetime.now()
|
||||||
|
pyclblast.gemm(queue, m, n, k, cla, clb, clc, a_ld=k, b_ld=n, c_ld=n)
|
||||||
|
print("#\tMWG = %-3d : %4d" % (mwg, int(2 * m * n * k / ((datetime.now() - t0).total_seconds() / 100) / 1024 ** 3)))
|
||||||
|
mwg *= 4
|
||||||
|
|
|
@ -15,6 +15,8 @@ import numpy as np
|
||||||
import pyopencl as cl
|
import pyopencl as cl
|
||||||
from pyopencl.array import Array
|
from pyopencl.array import Array
|
||||||
from libcpp cimport bool
|
from libcpp cimport bool
|
||||||
|
from cpython.mem cimport PyMem_Malloc, PyMem_Free
|
||||||
|
from libc.string cimport strdup
|
||||||
|
|
||||||
####################################################################################################
|
####################################################################################################
|
||||||
# CLBlast and OpenCL data-types
|
# CLBlast and OpenCL data-types
|
||||||
|
@ -2086,38 +2088,36 @@ def trsm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_t
|
||||||
# Overrides the parameters
|
# Overrides the parameters
|
||||||
####################################################################################################
|
####################################################################################################
|
||||||
|
|
||||||
from libc.stdlib cimport malloc, free
|
|
||||||
from libc.string cimport strdup
|
|
||||||
|
|
||||||
cdef extern from "clblast_c.h":
|
cdef extern from "clblast_c.h":
|
||||||
ctypedef struct _cl_device_id:
|
ctypedef struct _cl_device_id:
|
||||||
pass
|
pass
|
||||||
ctypedef _cl_device_id* cl_device_id
|
ctypedef _cl_device_id* cl_device_id
|
||||||
|
|
||||||
CLBlastStatusCode CLBlastOverrideParameters(const cl_device_id device, const char* kernel_name, const CLBlastPrecision precision, const size_t num_parameters, const char** parameters_names, const size_t* parameters_values)
|
CLBlastStatusCode CLBlastOverrideParameters(const cl_device_id device, const char* kernel_name, const CLBlastPrecision precision, const size_t num_parameters, const char** parameters_names, const size_t* parameters_values)
|
||||||
|
|
||||||
def override_parameters(device, kernel_name, precision, parameters):
|
def override_parameters(device, kernel_name, precision, parameters):
|
||||||
"""
|
"""
|
||||||
precision = 16, 32, 64, 3232, 6464
|
Override the current parameters for the given kernel, on this device, with this precision.
|
||||||
kernel name = unicode string
|
|
||||||
parameters =
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
cdef cl_device_id device_id = <cl_device_id><size_t>device.int_ptr
|
cdef cl_device_id device_id = <cl_device_id><size_t>device.int_ptr
|
||||||
|
|
||||||
|
# read the parameters dictionary into names/values arrays, for use in CLBlastOverrideParameters
|
||||||
cdef size_t n = len(parameters)
|
cdef size_t n = len(parameters)
|
||||||
cdef const char **parameter_names = <const char**> malloc(n * sizeof(char*))
|
cdef const char **parameter_names = <const char**> PyMem_Malloc(n * sizeof(char*))
|
||||||
cdef size_t *parameter_values = <size_t*> malloc(n * sizeof(size_t))
|
cdef size_t *parameter_values = <size_t*> PyMem_Malloc(n * sizeof(size_t))
|
||||||
|
if not (parameter_names or parameter_values):
|
||||||
# TODO: check mallocs
|
raise MemoryError()
|
||||||
|
|
||||||
for i, (k, v) in enumerate(parameters.items()):
|
for i, (k, v) in enumerate(parameters.items()):
|
||||||
parameter_names[i] = strdup(k.encode('ascii'))
|
parameter_names[i] = strdup(k.encode('ascii'))
|
||||||
parameter_values[i] = v
|
parameter_values[i] = v
|
||||||
|
|
||||||
|
# call the underlying API
|
||||||
err = CLBlastOverrideParameters(device_id, kernel_name.encode('ascii'), precision, n, parameter_names, parameter_values)
|
err = CLBlastOverrideParameters(device_id, kernel_name.encode('ascii'), precision, n, parameter_names, parameter_values)
|
||||||
if err != CLBlastSuccess:
|
if err != CLBlastSuccess:
|
||||||
raise RuntimeError("PyCLBlast: 'OverrideParameters' failed: %s" % get_status_message(err))
|
raise RuntimeError("PyCLBlast: 'OverrideParameters' failed: %s" % get_status_message(err))
|
||||||
|
|
||||||
# TODO: free etc.
|
# tidy up:
|
||||||
|
PyMem_Free(parameter_names)
|
||||||
|
PyMem_Free(parameter_values)
|
||||||
|
|
||||||
####################################################################################################
|
####################################################################################################
|
||||||
|
|
Loading…
Reference in a new issue