Merge pull request #255 from kodonnell/py_override

Adding override parameters to pyclblast
pull/282/head
Cedric Nugteren 2018-03-30 10:28:00 +02:00 committed by GitHub
commit 4de220a7a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 83 additions and 3 deletions

View File

@ -49,8 +49,8 @@ FILES = [
"/src/clblast_cuda.cpp",
"/src/pyclblast/src/pyclblast.pyx"
]
HEADER_LINES = [123, 21, 127, 24, 29, 41, 29, 65, 32, 95, 21, 288]
FOOTER_LINES = [98, 56, 112, 275, 6, 6, 6, 9, 2, 41, 55, 1]
HEADER_LINES = [123, 21, 127, 24, 29, 41, 29, 65, 32, 95, 21, 290]
FOOTER_LINES = [98, 56, 112, 275, 6, 6, 6, 9, 2, 41, 55, 37]
HEADER_LINES_DOC = 0
FOOTER_LINES_DOC = 232

View File

@ -0,0 +1,42 @@
#!/usr/bin/env python
# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0.
# This file follows the PEP8 Python style guide and uses a max-width of 100 characters per line.
#
# Author(s):
# Cedric Nugteren <www.cedricnugteren.nl>
import numpy as np
import pyopencl as cl
from pyopencl.array import Array
import pyclblast
from datetime import datetime
if __name__ == "__main__":
# Set up pyopencl:
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
# Set up a basic sgemm example:
m, n, k = 2, 3, 4
a = np.random.rand(m, k).astype(dtype=np.float32)
b = np.random.rand(k, n).astype(dtype=np.float32)
c = np.empty((m, n), np.float32)
cla = Array(queue, a.shape, a.dtype)
clb = Array(queue, b.shape, b.dtype)
clc = Array(queue, c.shape, c.dtype)
cla.set(a)
clb.set(b)
clc.set(c)
# Perform sgemm on these matrices, overriding the CLBlast parameters. In this example, we'll
# just change the 'MWG' parameter a couple of times:
params = { "KWG": 32, "KWI": 2, "MDIMA": 8, "MDIMC": 8, "MWG": 64, "NDIMB": 8, "NDIMC": 8,
"NWG": 64, "SA": 0, "SB": 0, "STRM": 0, "STRN": 0, "VWM": 4, "VWN": 1 }
for mwg in (32, 64, 256):
print("Running sgemm tuned with MWG = %d" % mwg)
params["MWG"] = mwg
pyclblast.override_parameters(ctx.devices[0], 'Xgemm', 32, params)
pyclblast.gemm(queue, m, n, k, cla, clb, clc, a_ld=k, b_ld=n, c_ld=n)
assert np.allclose(clc.get(), a.dot(b)), "uh-oh, xgemm isn't behaving correctly"

View File

@ -1,3 +1,4 @@
#distutils: language = c++
#cython: binding=True
####################################################################################################
# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0.
@ -13,8 +14,9 @@
import numpy as np
import pyopencl as cl
from pyopencl.array import Array
from libcpp cimport bool
from cpython.mem cimport PyMem_Malloc, PyMem_Free
from libc.string cimport strdup
####################################################################################################
# CLBlast and OpenCL data-types
@ -2083,3 +2085,39 @@ def trsm(queue, m, n, a, b, a_ld, b_ld, alpha = 1.0, right_side = False, lower_t
return cl.Event.from_int_ptr(<size_t>event)
####################################################################################################
# Overrides the parameters
####################################################################################################
cdef extern from "clblast_c.h":
ctypedef struct _cl_device_id:
pass
ctypedef _cl_device_id* cl_device_id
CLBlastStatusCode CLBlastOverrideParameters(const cl_device_id device, const char* kernel_name, const CLBlastPrecision precision, const size_t num_parameters, const char** parameters_names, const size_t* parameters_values)
def override_parameters(device, kernel_name, precision, parameters):
"""
Override the current parameters for the given kernel, on this device, with this precision.
"""
cdef cl_device_id device_id = <cl_device_id><size_t>device.int_ptr
# read the parameters dictionary into names/values arrays, for use in CLBlastOverrideParameters
cdef size_t n = len(parameters)
cdef const char **parameter_names = <const char**> PyMem_Malloc(n * sizeof(char*))
cdef size_t *parameter_values = <size_t*> PyMem_Malloc(n * sizeof(size_t))
if not (parameter_names or parameter_values):
raise MemoryError()
for i, (k, v) in enumerate(parameters.items()):
parameter_names[i] = strdup(k.encode('ascii'))
parameter_values[i] = v
# call the underlying API
err = CLBlastOverrideParameters(device_id, kernel_name.encode('ascii'), precision, n, parameter_names, parameter_values)
if err != CLBlastSuccess:
raise RuntimeError("PyCLBlast: 'OverrideParameters' failed: %s" % get_status_message(err))
# tidy up:
PyMem_Free(parameter_names)
PyMem_Free(parameter_values)
####################################################################################################