Merge pull request #10 from CNugteren/test_infrastructure

Re-organized test infrastructure
This commit is contained in:
Cedric Nugteren 2015-06-29 20:45:10 +02:00
commit cbf2eef179
38 changed files with 1736 additions and 2716 deletions

View file

@ -1,5 +1,6 @@
Development version (next release)
- Re-organized test/client infrastructure to avoid code duplication
- Added level-3 routines:
* SSYRK/DSYRK/CSYRK/ZSYRK
* SSYR2K/DSYR2K/CSYR2K/ZSYR2K

View file

@ -95,12 +95,10 @@ include_directories(${clblast_SOURCE_DIR}/include ${OPENCL_INCLUDE_DIRS})
# Sets the supported routines and the used kernels. New routines and kernels should be added here.
set(KERNELS copy pad transpose padtranspose xaxpy xgemv xgemm)
set(SAMPLE_PROGRAMS sgemm)
set(ROUTINES_XY xaxpy)
set(ROUTINES_AXY xgemv)
set(ROUTINES_ABC xgemm xsymm xsyr2k)
set(ROUTINES_AB )
set(ROUTINES_AC xsyrk)
set(ROUTINES ${ROUTINES_XY} ${ROUTINES_AXY} ${ROUTINES_ABC} ${ROUTINES_AB} ${ROUTINES_AC})
set(ROUTINES
xaxpy
xgemv
xgemm xsymm xsyrk xsyr2k)
# ==================================================================================================
@ -170,45 +168,14 @@ if(TESTS)
include_directories(${clblast_SOURCE_DIR}/test ${clBLAS_SOURCE_DIR})
# Creates the common correctness-tests objects (requires CMake 2.8.8)
add_library(test_correctness_common OBJECT test/correctness/tester.cc)
add_library(test_correctness_xy OBJECT test/correctness/testxy.cc)
add_library(test_correctness_axy OBJECT test/correctness/testaxy.cc)
add_library(test_correctness_abc OBJECT test/correctness/testabc.cc)
add_library(test_correctness_ab OBJECT test/correctness/testab.cc)
add_library(test_correctness_ac OBJECT test/correctness/testac.cc)
add_library(test_correctness_common OBJECT
test/correctness/tester.cc test/correctness/testblas.cc)
# Compiles the correctness-tests
foreach(ROUTINE ${ROUTINES_XY})
add_executable(test_${ROUTINE}
$<TARGET_OBJECTS:test_correctness_common>
$<TARGET_OBJECTS:test_correctness_xy>
test/correctness/routines/${ROUTINE}.cc)
endforeach()
foreach(ROUTINE ${ROUTINES_AXY})
add_executable(test_${ROUTINE}
$<TARGET_OBJECTS:test_correctness_common>
$<TARGET_OBJECTS:test_correctness_axy>
test/correctness/routines/${ROUTINE}.cc)
endforeach()
foreach(ROUTINE ${ROUTINES_ABC})
add_executable(test_${ROUTINE}
$<TARGET_OBJECTS:test_correctness_common>
$<TARGET_OBJECTS:test_correctness_abc>
test/correctness/routines/${ROUTINE}.cc)
endforeach()
foreach(ROUTINE ${ROUTINES_AB})
add_executable(test_${ROUTINE}
$<TARGET_OBJECTS:test_correctness_common>
$<TARGET_OBJECTS:test_correctness_ab>
test/correctness/routines/${ROUTINE}.cc)
endforeach()
foreach(ROUTINE ${ROUTINES_AC})
add_executable(test_${ROUTINE}
$<TARGET_OBJECTS:test_correctness_common>
$<TARGET_OBJECTS:test_correctness_ac>
test/correctness/routines/${ROUTINE}.cc)
endforeach()
foreach(ROUTINE ${ROUTINES})
add_executable(test_${ROUTINE}
$<TARGET_OBJECTS:test_correctness_common>
test/correctness/routines/${ROUTINE}.cc)
target_link_libraries(test_${ROUTINE} clBLAS clblast ${OPENCL_LIBRARIES})
install(TARGETS test_${ROUTINE} DESTINATION bin)
endforeach()
@ -217,7 +184,6 @@ if(TESTS)
add_library(test_performance_common OBJECT test/performance/client.cc)
# Compiles the performance-tests
set(TEST_PERF_COMM )
foreach(ROUTINE ${ROUTINES})
add_executable(client_${ROUTINE} $<TARGET_OBJECTS:test_performance_common>
test/performance/routines/${ROUTINE}.cc)

View file

@ -105,6 +105,11 @@ struct Arguments {
size_t c_offset = 0;
T alpha = T{1.0};
T beta = T{1.0};
size_t x_size = 1;
size_t y_size = 1;
size_t a_size = 1;
size_t b_size = 1;
size_t c_size = 1;
// Tuner-specific arguments
double fraction = 1.0;
// Client-specific arguments
@ -123,6 +128,15 @@ struct Arguments {
bool no_abbrv = false;
};
// Structure containing all possible buffers for test clients
struct Buffers {
Buffer x_vec;
Buffer y_vec;
Buffer a_mat;
Buffer b_mat;
Buffer c_mat;
};
// =================================================================================================
// Converts a value (e.g. an integer) to a string. This also covers special cases for CLBlast

View file

@ -1,63 +1,69 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the tests for the Xaxpy routine. It is based on the TestXY class.
// This file implements the tests for the Xaxpy routine.
//
// =================================================================================================
#include "wrapper_clblas.h"
#include "correctness/testxy.h"
#include "correctness/testblas.h"
#include "routines/xaxpy.h"
namespace clblast {
// =================================================================================================
// The correctness tester, containing the function calls to CLBlast and to clBLAS for comparison.
// The correctness tester
template <typename T>
void XaxpyTest(int argc, char *argv[], const bool silent, const std::string &name) {
// Creates the CLBlast lambda
auto clblast_lambda = [](const Arguments<T> &args,
const Buffer &x_vec, const Buffer &y_vec,
CommandQueue &queue) -> StatusCode {
auto queue_plain = queue();
auto event = cl_event{};
return Axpy(args.n, args.alpha,
x_vec(), args.x_offset, args.x_inc,
y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
};
// Creates the clBLAS lambda (for comparison)
auto clblas_lambda = [](const Arguments<T> &args,
const Buffer &x_vec, const Buffer &y_vec,
CommandQueue &queue) -> StatusCode {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXaxpy(args.n, args.alpha,
x_vec(), args.x_offset, args.x_inc,
y_vec(), args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
return static_cast<StatusCode>(status);
};
// Initializes the arguments relevant for this routine
auto args = Arguments<T>{};
const auto options = std::vector<std::string>{kArgN, kArgXInc, kArgYInc,
kArgXOffset, kArgYOffset, kArgAlpha};
void RunTest(int argc, char *argv[], const bool silent, const std::string &name) {
// Creates a tester
TestXY<T> tester{argc, argv, silent, name, options, clblast_lambda, clblas_lambda};
TestBlas<T> tester{argc, argv, silent, name, TestXaxpy<T>::GetOptions(),
TestXaxpy<T>::RunRoutine, TestXaxpy<T>::RunReference,
TestXaxpy<T>::DownloadResult, TestXaxpy<T>::GetResultIndex,
TestXaxpy<T>::ResultID1, TestXaxpy<T>::ResultID2};
// This variable holds the arguments relevant for this routine
auto args = Arguments<T>{};
// Creates the arguments vector for the regular tests
auto regular_test_vector = std::vector<Arguments<T>>{};
for (auto &n: tester.kVectorDims) { args.n = n;
for (auto &x_inc: tester.kIncrements) { args.x_inc = x_inc;
for (auto &x_offset: tester.kOffsets) { args.x_offset = x_offset;
for (auto &y_inc: tester.kIncrements) { args.y_inc = y_inc;
for (auto &y_offset: tester.kOffsets) { args.y_offset = y_offset;
for (auto &alpha: tester.kAlphaValues) { args.alpha = alpha;
args.x_size = TestXaxpy<T>::GetSizeX(args);
args.y_size = TestXaxpy<T>::GetSizeY(args);
if (args.x_size<1 || args.y_size<1) { continue; }
regular_test_vector.push_back(args);
}
}
}
}
}
}
// Creates the arguments vector for the invalid-buffer tests
auto invalid_test_vector = std::vector<Arguments<T>>{};
args.n = tester.kBufferSize;
args.x_inc = args.y_inc = 1;
args.x_offset = args.y_offset = 0;
for (auto &x_size: tester.kVecSizes) { args.x_size = x_size;
for (auto &y_size: tester.kVecSizes) { args.y_size = y_size;
invalid_test_vector.push_back(args);
}
}
// Runs the tests
const auto case_name = "default";
tester.TestRegular(args, case_name);
tester.TestInvalidBufferSizes(args, case_name);
tester.TestRegular(regular_test_vector, case_name);
tester.TestInvalid(invalid_test_vector, case_name);
}
// =================================================================================================
@ -65,10 +71,10 @@ void XaxpyTest(int argc, char *argv[], const bool silent, const std::string &nam
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
clblast::XaxpyTest<float>(argc, argv, false, "SAXPY");
clblast::XaxpyTest<double>(argc, argv, true, "DAXPY");
clblast::XaxpyTest<clblast::float2>(argc, argv, true, "CAXPY");
clblast::XaxpyTest<clblast::double2>(argc, argv, true, "ZAXPY");
clblast::RunTest<float>(argc, argv, false, "SAXPY");
clblast::RunTest<double>(argc, argv, true, "DAXPY");
clblast::RunTest<clblast::float2>(argc, argv, true, "CAXPY");
clblast::RunTest<clblast::double2>(argc, argv, true, "ZAXPY");
return 0;
}

View file

@ -1,83 +1,87 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the tests for the Xgemm routine. It is based on the TestABC class.
// This file implements the tests for the Xgemm routine.
//
// =================================================================================================
#include "wrapper_clblas.h"
#include "correctness/testabc.h"
#include "correctness/testblas.h"
#include "routines/xgemm.h"
namespace clblast {
// =================================================================================================
// The correctness tester, containing the function calls to CLBlast and to clBLAS for comparison.
// The correctness tester
template <typename T>
void XgemmTest(int argc, char *argv[], const bool silent, const std::string &name) {
// Creates the CLBlast lambda
auto clblast_lambda = [](const Arguments<T> &args,
const Buffer &a_mat, const Buffer &b_mat, const Buffer &c_mat,
CommandQueue &queue) -> StatusCode {
auto queue_plain = queue();
auto event = cl_event{};
return Gemm(args.layout, args.a_transpose, args.b_transpose,
args.m, args.n, args.k,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
b_mat(), args.b_offset, args.b_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
};
// Creates the clBLAS lambda (for comparison)
auto clblas_lambda = [](const Arguments<T> &args,
const Buffer &a_mat, const Buffer &b_mat, const Buffer &c_mat,
CommandQueue &queue) -> StatusCode {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXgemm(static_cast<clblasOrder>(args.layout),
static_cast<clblasTranspose>(args.a_transpose),
static_cast<clblasTranspose>(args.b_transpose),
args.m, args.n, args.k,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
b_mat(), args.b_offset, args.b_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
return static_cast<StatusCode>(status);
};
// Initializes the arguments relevant for this routine
auto args = Arguments<T>{};
const auto options = std::vector<std::string>{kArgM, kArgN, kArgK, kArgLayout,
kArgATransp, kArgBTransp,
kArgALeadDim, kArgBLeadDim, kArgCLeadDim,
kArgAOffset, kArgBOffset, kArgCOffset};
void RunTest(int argc, char *argv[], const bool silent, const std::string &name) {
// Creates a tester
TestABC<T> tester{argc, argv, silent, name, options, clblast_lambda, clblas_lambda};
TestBlas<T> tester{argc, argv, silent, name, TestXgemm<T>::GetOptions(),
TestXgemm<T>::RunRoutine, TestXgemm<T>::RunReference,
TestXgemm<T>::DownloadResult, TestXgemm<T>::GetResultIndex,
TestXgemm<T>::ResultID1, TestXgemm<T>::ResultID2};
// This variable holds the arguments relevant for this routine
auto args = Arguments<T>{};
// Loops over the test-cases from a data-layout point of view
for (auto &layout: tester.kLayouts) {
args.layout = layout;
for (auto &a_transpose: tester.kTransposes) {
args.a_transpose = a_transpose;
for (auto &b_transpose: tester.kTransposes) {
args.b_transpose = b_transpose;
const auto case_name = ToString(layout)+" "+ToString(a_transpose)+" "+ToString(b_transpose);
for (auto &layout: tester.kLayouts) { args.layout = layout;
for (auto &a_transpose: tester.kTransposes) { args.a_transpose = a_transpose;
for (auto &b_transpose: tester.kTransposes) { args.b_transpose = b_transpose;
// Creates the arguments vector for the regular tests
auto regular_test_vector = std::vector<Arguments<T>>{};
for (auto &m: tester.kMatrixDims) { args.m = m;
for (auto &n: tester.kMatrixDims) { args.n = n;
for (auto &k: tester.kMatrixDims) { args.k = k;
for (auto &a_ld: tester.kMatrixDims) { args.a_ld = a_ld;
for (auto &a_offset: tester.kOffsets) { args.a_offset = a_offset;
for (auto &b_ld: tester.kMatrixDims) { args.b_ld = b_ld;
for (auto &b_offset: tester.kOffsets) { args.b_offset = b_offset;
for (auto &c_ld: tester.kMatrixDims) { args.c_ld = c_ld;
for (auto &c_offset: tester.kOffsets) { args.c_offset = c_offset;
for (auto &alpha: tester.kAlphaValues) { args.alpha = alpha;
for (auto &beta: tester.kBetaValues) { args.beta = beta;
args.a_size = TestXgemm<T>::GetSizeA(args);
args.b_size = TestXgemm<T>::GetSizeB(args);
args.c_size = TestXgemm<T>::GetSizeC(args);
if (args.a_size<1 || args.b_size<1 || args.c_size<1) { continue; }
regular_test_vector.push_back(args);
}
}
}
}
}
}
}
}
}
}
}
// Creates the arguments vector for the invalid-buffer tests
auto invalid_test_vector = std::vector<Arguments<T>>{};
args.m = args.n = args.k = tester.kBufferSize;
args.a_ld = args.b_ld = args.c_ld = tester.kBufferSize;
args.a_offset = args.b_offset = args.c_offset = 0;
for (auto &a_size: tester.kMatSizes) { args.a_size = a_size;
for (auto &b_size: tester.kMatSizes) { args.b_size = b_size;
for (auto &c_size: tester.kMatSizes) { args.c_size = c_size;
invalid_test_vector.push_back(args);
}
}
}
// Runs the tests
tester.TestRegular(args, case_name, false);
tester.TestInvalidBufferSizes(args, case_name);
const auto case_name = ToString(layout)+" "+ToString(a_transpose)+" "+ToString(b_transpose);
tester.TestRegular(regular_test_vector, case_name);
tester.TestInvalid(invalid_test_vector, case_name);
}
}
}
@ -88,10 +92,10 @@ void XgemmTest(int argc, char *argv[], const bool silent, const std::string &nam
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
clblast::XgemmTest<float>(argc, argv, false, "SGEMM");
clblast::XgemmTest<double>(argc, argv, true, "DGEMM");
clblast::XgemmTest<clblast::float2>(argc, argv, true, "CGEMM");
clblast::XgemmTest<clblast::double2>(argc, argv, true, "ZGEMM");
clblast::RunTest<float>(argc, argv, false, "SGEMM");
clblast::RunTest<double>(argc, argv, true, "DGEMM");
clblast::RunTest<clblast::float2>(argc, argv, true, "CGEMM");
clblast::RunTest<clblast::double2>(argc, argv, true, "ZGEMM");
return 0;
}

View file

@ -1,74 +1,85 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the tests for the Xgemv routine. It is based on the TestAXY class.
// This file implements the tests for the Xgemv routine.
//
// =================================================================================================
#include "wrapper_clblas.h"
#include "correctness/testaxy.h"
#include "correctness/testblas.h"
#include "routines/xgemv.h"
namespace clblast {
// =================================================================================================
// The correctness tester, containing the function calls to CLBlast and to clBLAS for comparison.
// The correctness tester
template <typename T>
void XgemvTest(int argc, char *argv[], const bool silent, const std::string &name) {
// Creates the CLBlast lambda
auto clblast_lambda = [](const Arguments<T> &args,
const Buffer &a_mat, const Buffer &x_vec, const Buffer &y_vec,
CommandQueue &queue) -> StatusCode {
auto queue_plain = queue();
auto event = cl_event{};
return Gemv(args.layout, args.a_transpose, args.m, args.n, args.alpha,
a_mat(), args.a_offset, args.a_ld,
x_vec(), args.x_offset, args.x_inc, args.beta,
y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
};
// Creates the clBLAS lambda (for comparison)
auto clblas_lambda = [](const Arguments<T> &args,
const Buffer &a_mat, const Buffer &x_vec, const Buffer &y_vec,
CommandQueue &queue) -> StatusCode {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXgemv(static_cast<clblasOrder>(args.layout),
static_cast<clblasTranspose>(args.a_transpose),
args.m, args.n, args.alpha,
a_mat(), args.a_offset, args.a_ld,
x_vec(), args.x_offset, args.x_inc, args.beta,
y_vec(), args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
return static_cast<StatusCode>(status);
};
// Initializes the arguments relevant for this routine
auto args = Arguments<T>{};
const auto options = std::vector<std::string>{kArgM, kArgN, kArgLayout, kArgATransp,
kArgALeadDim, kArgXInc, kArgYInc,
kArgAOffset, kArgXOffset, kArgYOffset};
void RunTest(int argc, char *argv[], const bool silent, const std::string &name) {
// Creates a tester
TestAXY<T> tester{argc, argv, silent, name, options, clblast_lambda, clblas_lambda};
TestBlas<T> tester{argc, argv, silent, name, TestXgemv<T>::GetOptions(),
TestXgemv<T>::RunRoutine, TestXgemv<T>::RunReference,
TestXgemv<T>::DownloadResult, TestXgemv<T>::GetResultIndex,
TestXgemv<T>::ResultID1, TestXgemv<T>::ResultID2};
// This variable holds the arguments relevant for this routine
auto args = Arguments<T>{};
// Loops over the test-cases from a data-layout point of view
for (auto &layout: tester.kLayouts) {
args.layout = layout;
for (auto &a_transpose: tester.kTransposes) {
args.a_transpose = a_transpose;
const auto case_name = ToString(layout)+" "+ToString(a_transpose);
for (auto &layout: tester.kLayouts) { args.layout = layout;
for (auto &a_transpose: tester.kTransposes) { args.a_transpose = a_transpose;
// Creates the arguments vector for the regular tests
auto regular_test_vector = std::vector<Arguments<T>>{};
for (auto &m: tester.kMatrixVectorDims) { args.m = m;
for (auto &n: tester.kMatrixVectorDims) { args.n = n;
for (auto &a_ld: tester.kMatrixVectorDims) { args.a_ld = a_ld;
for (auto &a_offset: tester.kOffsets) { args.a_offset = a_offset;
for (auto &x_inc: tester.kIncrements) { args.x_inc = x_inc;
for (auto &x_offset: tester.kOffsets) { args.x_offset = x_offset;
for (auto &y_inc: tester.kIncrements) { args.y_inc = y_inc;
for (auto &y_offset: tester.kOffsets) { args.y_offset = y_offset;
for (auto &alpha: tester.kAlphaValues) { args.alpha = alpha;
for (auto &beta: tester.kBetaValues) { args.beta = beta;
args.a_size = TestXgemv<T>::GetSizeA(args);
args.x_size = TestXgemv<T>::GetSizeX(args);
args.y_size = TestXgemv<T>::GetSizeY(args);
if (args.a_size<1 || args.x_size<1 || args.y_size<1) { continue; }
regular_test_vector.push_back(args);
}
}
}
}
}
}
}
}
}
}
// Creates the arguments vector for the invalid-buffer tests
auto invalid_test_vector = std::vector<Arguments<T>>{};
args.m = args.n = tester.kBufferSize;
args.a_ld = tester.kBufferSize;
args.x_inc = args.y_inc = 1;
args.a_offset = args.x_offset = args.y_offset = 0;
for (auto &a_size: tester.kMatSizes) { args.a_size = a_size;
for (auto &x_size: tester.kVecSizes) { args.x_size = x_size;
for (auto &y_size: tester.kVecSizes) { args.y_size = y_size;
invalid_test_vector.push_back(args);
}
}
}
// Runs the tests
tester.TestRegular(args, case_name);
tester.TestInvalidBufferSizes(args, case_name);
const auto case_name = ToString(layout)+" "+ToString(a_transpose);
tester.TestRegular(regular_test_vector, case_name);
tester.TestInvalid(invalid_test_vector, case_name);
}
}
}
@ -78,10 +89,10 @@ void XgemvTest(int argc, char *argv[], const bool silent, const std::string &nam
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
clblast::XgemvTest<float>(argc, argv, false, "SGEMV");
clblast::XgemvTest<double>(argc, argv, true, "DGEMV");
clblast::XgemvTest<clblast::float2>(argc, argv, true, "CGEMV");
clblast::XgemvTest<clblast::double2>(argc, argv, true, "ZGEMV");
clblast::RunTest<float>(argc, argv, false, "SGEMV");
clblast::RunTest<double>(argc, argv, true, "DGEMV");
clblast::RunTest<clblast::float2>(argc, argv, true, "CGEMV");
clblast::RunTest<clblast::double2>(argc, argv, true, "ZGEMV");
return 0;
}

View file

@ -1,83 +1,85 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the tests for the Xsymm routine. It is based on the TestABC class.
// This file implements the tests for the Xsymm routine.
//
// =================================================================================================
#include "wrapper_clblas.h"
#include "correctness/testabc.h"
#include "correctness/testblas.h"
#include "routines/xsymm.h"
namespace clblast {
// =================================================================================================
// The correctness tester, containing the function calls to CLBlast and to clBLAS for comparison.
// The correctness tester
template <typename T>
void XsymmTest(int argc, char *argv[], const bool silent, const std::string &name) {
// Creates the CLBlast lambda
auto clblast_lambda = [](const Arguments<T> &args,
const Buffer &a_mat, const Buffer &b_mat, const Buffer &c_mat,
CommandQueue &queue) -> StatusCode {
auto queue_plain = queue();
auto event = cl_event{};
return Symm(args.layout, args.side, args.triangle,
args.m, args.n,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
b_mat(), args.b_offset, args.b_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
};
// Creates the clBLAS lambda (for comparison)
auto clblas_lambda = [](const Arguments<T> &args,
const Buffer &a_mat, const Buffer &b_mat, const Buffer &c_mat,
CommandQueue &queue) -> StatusCode {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsymm(static_cast<clblasOrder>(args.layout),
static_cast<clblasSide>(args.side),
static_cast<clblasUplo>(args.triangle),
args.m, args.n,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
b_mat(), args.b_offset, args.b_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
return static_cast<StatusCode>(status);
};
// Initializes the arguments relevant for this routine
auto args = Arguments<T>{};
const auto options = std::vector<std::string>{kArgM, kArgN, kArgLayout,
kArgSide, kArgTriangle,
kArgALeadDim, kArgBLeadDim, kArgCLeadDim,
kArgAOffset, kArgBOffset, kArgCOffset};
void RunTest(int argc, char *argv[], const bool silent, const std::string &name) {
// Creates a tester
TestABC<T> tester{argc, argv, silent, name, options, clblast_lambda, clblas_lambda};
TestBlas<T> tester{argc, argv, silent, name, TestXsymm<T>::GetOptions(),
TestXsymm<T>::RunRoutine, TestXsymm<T>::RunReference,
TestXsymm<T>::DownloadResult, TestXsymm<T>::GetResultIndex,
TestXsymm<T>::ResultID1, TestXsymm<T>::ResultID2};
// This variable holds the arguments relevant for this routine
auto args = Arguments<T>{};
// Loops over the test-cases from a data-layout point of view
for (auto &layout: tester.kLayouts) {
args.layout = layout;
for (auto &side: {Side::kLeft, Side::kRight}) {
args.side = side;
for (auto &triangle: {Triangle::kUpper, Triangle::kLower}) {
args.triangle = triangle;
const auto case_name = ToString(layout)+" "+ToString(side)+" "+ToString(triangle);
for (auto &layout: tester.kLayouts) { args.layout = layout;
for (auto &side: tester.kSides) { args.side = side;
for (auto &triangle: tester.kTriangles) { args.triangle = triangle;
// Creates the arguments vector for the regular tests
auto regular_test_vector = std::vector<Arguments<T>>{};
for (auto &m: tester.kMatrixDims) { args.m = m;
for (auto &n: tester.kMatrixDims) { args.n = n;
for (auto &a_ld: tester.kMatrixDims) { args.a_ld = a_ld;
for (auto &a_offset: tester.kOffsets) { args.a_offset = a_offset;
for (auto &b_ld: tester.kMatrixDims) { args.b_ld = b_ld;
for (auto &b_offset: tester.kOffsets) { args.b_offset = b_offset;
for (auto &c_ld: tester.kMatrixDims) { args.c_ld = c_ld;
for (auto &c_offset: tester.kOffsets) { args.c_offset = c_offset;
for (auto &alpha: tester.kAlphaValues) { args.alpha = alpha;
for (auto &beta: tester.kBetaValues) { args.beta = beta;
args.a_size = TestXsymm<T>::GetSizeA(args);
args.b_size = TestXsymm<T>::GetSizeB(args);
args.c_size = TestXsymm<T>::GetSizeC(args);
if (args.a_size<1 || args.b_size<1 || args.c_size<1) { continue; }
regular_test_vector.push_back(args);
}
}
}
}
}
}
}
}
}
}
// Creates the arguments vector for the invalid-buffer tests
auto invalid_test_vector = std::vector<Arguments<T>>{};
args.m = args.n = tester.kBufferSize;
args.a_ld = args.b_ld = args.c_ld = tester.kBufferSize;
args.a_offset = args.b_offset = args.c_offset = 0;
for (auto &a_size: tester.kMatSizes) { args.a_size = a_size;
for (auto &b_size: tester.kMatSizes) { args.b_size = b_size;
for (auto &c_size: tester.kMatSizes) { args.c_size = c_size;
invalid_test_vector.push_back(args);
}
}
}
// Runs the tests
tester.TestRegular(args, case_name, true);
tester.TestInvalidBufferSizes(args, case_name);
const auto case_name = ToString(layout)+" "+ToString(side)+" "+ToString(triangle);
tester.TestRegular(regular_test_vector, case_name);
tester.TestInvalid(invalid_test_vector, case_name);
}
}
}
@ -88,10 +90,10 @@ void XsymmTest(int argc, char *argv[], const bool silent, const std::string &nam
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
clblast::XsymmTest<float>(argc, argv, false, "SSYMM");
clblast::XsymmTest<double>(argc, argv, true, "DSYMM");
clblast::XsymmTest<clblast::float2>(argc, argv, true, "CSYMM");
clblast::XsymmTest<clblast::double2>(argc, argv, true, "ZSYMM");
clblast::RunTest<float>(argc, argv, false, "SSYMM");
clblast::RunTest<double>(argc, argv, true, "DSYMM");
clblast::RunTest<clblast::float2>(argc, argv, true, "CSYMM");
clblast::RunTest<clblast::double2>(argc, argv, true, "ZSYMM");
return 0;
}

View file

@ -1,84 +1,87 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the tests for the Xsyr2k routine. It is based on the TestABC class.
// This file implements the tests for the Xsyr2k routine.
//
// =================================================================================================
#include "wrapper_clblas.h"
#include "correctness/testabc.h"
#include "correctness/testblas.h"
#include "routines/xsyr2k.h"
namespace clblast {
// =================================================================================================
// The correctness tester, containing the function calls to CLBlast and to clBLAS for comparison.
// The correctness tester
template <typename T>
void Xsyr2kTest(int argc, char *argv[], const bool silent, const std::string &name) {
// Creates the CLBlast lambda
auto clblast_lambda = [](const Arguments<T> &args,
const Buffer &a_mat, const Buffer &b_mat, const Buffer &c_mat,
CommandQueue &queue) -> StatusCode {
auto queue_plain = queue();
auto event = cl_event{};
return Syr2k(args.layout, args.triangle, args.a_transpose,
args.n, args.k,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
b_mat(), args.b_offset, args.b_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
};
// Creates the clBLAS lambda (for comparison)
auto clblas_lambda = [](const Arguments<T> &args,
const Buffer &a_mat, const Buffer &b_mat, const Buffer &c_mat,
CommandQueue &queue) -> StatusCode {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsyr2k(static_cast<clblasOrder>(args.layout),
static_cast<clblasUplo>(args.triangle),
static_cast<clblasTranspose>(args.a_transpose),
args.n, args.k,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
b_mat(), args.b_offset, args.b_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
return static_cast<StatusCode>(status);
};
// Initializes the arguments relevant for this routine
auto args = Arguments<T>{};
const auto options = std::vector<std::string>{kArgN, kArgK, kArgLayout,
kArgTriangle, kArgATransp,
kArgALeadDim, kArgBLeadDim, kArgCLeadDim,
kArgAOffset, kArgBOffset, kArgCOffset};
void RunTest(int argc, char *argv[], const bool silent, const std::string &name) {
// Creates a tester
TestABC<T> tester{argc, argv, silent, name, options, clblast_lambda, clblas_lambda};
TestBlas<T> tester{argc, argv, silent, name, TestXsyr2k<T>::GetOptions(),
TestXsyr2k<T>::RunRoutine, TestXsyr2k<T>::RunReference,
TestXsyr2k<T>::DownloadResult, TestXsyr2k<T>::GetResultIndex,
TestXsyr2k<T>::ResultID1, TestXsyr2k<T>::ResultID2};
// This variable holds the arguments relevant for this routine
auto args = Arguments<T>{};
// Loops over the test-cases from a data-layout point of view
for (auto &layout: tester.kLayouts) {
args.layout = layout;
for (auto &triangle: {Triangle::kUpper, Triangle::kLower}) {
args.triangle = triangle;
for (auto &ab_transpose: {Transpose::kNo, Transpose::kYes}) { // No conjugate here since it is
args.a_transpose = ab_transpose; // not supported by clBLAS
for (auto &layout: tester.kLayouts) { args.layout = layout;
for (auto &triangle: tester.kTriangles) { args.triangle = triangle;
for (auto &ab_transpose: {Transpose::kNo, Transpose::kYes}) { // No conjugate here since it
args.a_transpose = ab_transpose; // is not supported by clBLAS
args.b_transpose = ab_transpose;
const auto case_name = ToString(layout)+" "+ToString(triangle)+" "+ToString(ab_transpose);
// Creates the arguments vector for the regular tests
auto regular_test_vector = std::vector<Arguments<T>>{};
for (auto &n: tester.kMatrixDims) { args.n = n;
for (auto &k: tester.kMatrixDims) { args.k = k;
for (auto &a_ld: tester.kMatrixDims) { args.a_ld = a_ld;
for (auto &a_offset: tester.kOffsets) { args.a_offset = a_offset;
for (auto &b_ld: tester.kMatrixDims) { args.b_ld = b_ld;
for (auto &b_offset: tester.kOffsets) { args.b_offset = b_offset;
for (auto &c_ld: tester.kMatrixDims) { args.c_ld = c_ld;
for (auto &c_offset: tester.kOffsets) { args.c_offset = c_offset;
for (auto &alpha: tester.kAlphaValues) { args.alpha = alpha;
for (auto &beta: tester.kBetaValues) { args.beta = beta;
args.a_size = TestXsyr2k<T>::GetSizeA(args);
args.b_size = TestXsyr2k<T>::GetSizeB(args);
args.c_size = TestXsyr2k<T>::GetSizeC(args);
if (args.a_size<1 || args.b_size<1 || args.c_size<1) { continue; }
regular_test_vector.push_back(args);
}
}
}
}
}
}
}
}
}
}
// Creates the arguments vector for the invalid-buffer tests
auto invalid_test_vector = std::vector<Arguments<T>>{};
args.n = args.k = tester.kBufferSize;
args.a_ld = args.b_ld = args.c_ld = tester.kBufferSize;
args.a_offset = args.b_offset = args.c_offset = 0;
for (auto &a_size: tester.kMatSizes) { args.a_size = a_size;
for (auto &b_size: tester.kMatSizes) { args.b_size = b_size;
for (auto &c_size: tester.kMatSizes) { args.c_size = c_size;
invalid_test_vector.push_back(args);
}
}
}
// Runs the tests
tester.TestRegular(args, case_name, true);
tester.TestInvalidBufferSizes(args, case_name);
const auto case_name = ToString(layout)+" "+ToString(triangle)+" "+ToString(ab_transpose);
tester.TestRegular(regular_test_vector, case_name);
tester.TestInvalid(invalid_test_vector, case_name);
}
}
}
@ -89,10 +92,10 @@ void Xsyr2kTest(int argc, char *argv[], const bool silent, const std::string &na
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
clblast::Xsyr2kTest<float>(argc, argv, false, "SSYR2K");
clblast::Xsyr2kTest<double>(argc, argv, true, "DSYR2K");
clblast::Xsyr2kTest<clblast::float2>(argc, argv, true, "CSYR2K");
clblast::Xsyr2kTest<clblast::double2>(argc, argv, true, "ZSYR2K");
clblast::RunTest<float>(argc, argv, false, "SSYR2K");
clblast::RunTest<double>(argc, argv, true, "DSYR2K");
clblast::RunTest<clblast::float2>(argc, argv, true, "CSYR2K");
clblast::RunTest<clblast::double2>(argc, argv, true, "ZSYR2K");
return 0;
}

View file

@ -1,81 +1,79 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the tests for the Xsyrk routine. It is based on the TestAC class.
// This file implements the tests for the Xsyrk routine.
//
// =================================================================================================
#include "wrapper_clblas.h"
#include "correctness/testac.h"
#include "correctness/testblas.h"
#include "routines/xsyrk.h"
namespace clblast {
// =================================================================================================
// The correctness tester, containing the function calls to CLBlast and to clBLAS for comparison.
// The correctness tester
template <typename T>
void XsyrkTest(int argc, char *argv[], const bool silent, const std::string &name) {
// Creates the CLBlast lambda
auto clblast_lambda = [](const Arguments<T> &args,
const Buffer &a_mat, const Buffer &c_mat,
CommandQueue &queue) -> StatusCode {
auto queue_plain = queue();
auto event = cl_event{};
return Syrk(args.layout, args.triangle, args.a_transpose,
args.n, args.k,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
};
// Creates the clBLAS lambda (for comparison)
auto clblas_lambda = [](const Arguments<T> &args,
const Buffer &a_mat, const Buffer &c_mat,
CommandQueue &queue) -> StatusCode {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsyrk(static_cast<clblasOrder>(args.layout),
static_cast<clblasUplo>(args.triangle),
static_cast<clblasTranspose>(args.a_transpose),
args.n, args.k,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
return static_cast<StatusCode>(status);
};
// Initializes the arguments relevant for this routine
auto args = Arguments<T>{};
const auto options = std::vector<std::string>{kArgN, kArgK, kArgLayout,
kArgTriangle, kArgATransp,
kArgALeadDim, kArgCLeadDim,
kArgAOffset, kArgCOffset};
void RunTest(int argc, char *argv[], const bool silent, const std::string &name) {
// Creates a tester
TestAC<T> tester{argc, argv, silent, name, options, clblast_lambda, clblas_lambda};
TestBlas<T> tester{argc, argv, silent, name, TestXsyrk<T>::GetOptions(),
TestXsyrk<T>::RunRoutine, TestXsyrk<T>::RunReference,
TestXsyrk<T>::DownloadResult, TestXsyrk<T>::GetResultIndex,
TestXsyrk<T>::ResultID1, TestXsyrk<T>::ResultID2};
// This variable holds the arguments relevant for this routine
auto args = Arguments<T>{};
// Loops over the test-cases from a data-layout point of view
for (auto &layout: tester.kLayouts) {
args.layout = layout;
for (auto &triangle: {Triangle::kUpper, Triangle::kLower}) {
args.triangle = triangle;
for (auto &a_transpose: {Transpose::kNo, Transpose::kYes}) { // No conjugate here since it is
args.a_transpose = a_transpose; // not supported by clBLAS
const auto case_name = ToString(layout)+" "+ToString(triangle)+" "+ToString(a_transpose);
for (auto &layout: tester.kLayouts) { args.layout = layout;
for (auto &triangle: tester.kTriangles) { args.triangle = triangle;
for (auto &a_transpose: {Transpose::kNo, Transpose::kYes}) { // No conjugate here since it
args.a_transpose = a_transpose; // is not supported by clBLAS
// Creates the arguments vector for the regular tests
auto regular_test_vector = std::vector<Arguments<T>>{};
for (auto &n: tester.kMatrixDims) { args.n = n;
for (auto &k: tester.kMatrixDims) { args.k = k;
for (auto &a_ld: tester.kMatrixDims) { args.a_ld = a_ld;
for (auto &a_offset: tester.kOffsets) { args.a_offset = a_offset;
for (auto &c_ld: tester.kMatrixDims) { args.c_ld = c_ld;
for (auto &c_offset: tester.kOffsets) { args.c_offset = c_offset;
for (auto &alpha: tester.kAlphaValues) { args.alpha = alpha;
for (auto &beta: tester.kBetaValues) { args.beta = beta;
args.a_size = TestXsyrk<T>::GetSizeA(args);
args.c_size = TestXsyrk<T>::GetSizeC(args);
if (args.a_size<1 || args.c_size<1) { continue; }
regular_test_vector.push_back(args);
}
}
}
}
}
}
}
}
// Creates the arguments vector for the invalid-buffer tests
auto invalid_test_vector = std::vector<Arguments<T>>{};
args.n = args.k = tester.kBufferSize;
args.a_ld = args.c_ld = tester.kBufferSize;
args.a_offset = args.c_offset = 0;
for (auto &a_size: tester.kMatSizes) { args.a_size = a_size;
for (auto &c_size: tester.kMatSizes) { args.c_size = c_size;
invalid_test_vector.push_back(args);
}
}
// Runs the tests
tester.TestRegular(args, case_name);
tester.TestInvalidBufferSizes(args, case_name);
const auto case_name = ToString(layout)+" "+ToString(triangle)+" "+ToString(a_transpose);
tester.TestRegular(regular_test_vector, case_name);
tester.TestInvalid(invalid_test_vector, case_name);
}
}
}
@ -86,10 +84,10 @@ void XsyrkTest(int argc, char *argv[], const bool silent, const std::string &nam
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
clblast::XsyrkTest<float>(argc, argv, false, "SSYRK");
clblast::XsyrkTest<double>(argc, argv, true, "DSYRK");
clblast::XsyrkTest<clblast::float2>(argc, argv, true, "CSYRK");
clblast::XsyrkTest<clblast::double2>(argc, argv, true, "ZSYRK");
clblast::RunTest<float>(argc, argv, false, "SSYRK");
clblast::RunTest<double>(argc, argv, true, "DSYRK");
clblast::RunTest<clblast::float2>(argc, argv, true, "CSYRK");
clblast::RunTest<clblast::double2>(argc, argv, true, "ZSYRK");
return 0;
}

View file

@ -1,192 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the TestAB class (see the header for information about the class).
//
// =================================================================================================
#include <algorithm>
#include "correctness/testab.h"
namespace clblast {
// =================================================================================================
// Constructor, initializes the base class tester and input data
template <typename T>
TestAB<T>::TestAB(int argc, char *argv[], const bool silent,
const std::string &name, const std::vector<std::string> &options,
const Routine clblast_lambda, const Routine clblas_lambda):
Tester<T>{argc, argv, silent, name, options},
clblast_lambda_(clblast_lambda),
clblas_lambda_(clblas_lambda) {
// Computes the maximum sizes. This allows for a single set of input/output buffers.
auto max_dim = *std::max_element(kMatrixDims.begin(), kMatrixDims.end());
auto max_ld = *std::max_element(kMatrixDims.begin(), kMatrixDims.end());
auto max_offset = *std::max_element(kOffsets.begin(), kOffsets.end());
// Creates test input data
a_source_.resize(max_dim*max_ld + max_offset);
b_source_.resize(max_dim*max_ld + max_offset);
PopulateVector(a_source_);
PopulateVector(b_source_);
}
// ===============================================================================================
// Tests the routine for a wide variety of parameters
template <typename T>
void TestAB<T>::TestRegular(Arguments<T> &args, const std::string &name) {
if (!PrecisionSupported()) { return; }
TestStart("regular behaviour", name);
// Computes whether or not the matrices are transposed. Note that we assume a default of
// column-major and no-transpose. If one of them is different (but not both), then rotated
// is considered true.
auto a_rotated = (args.layout == Layout::kColMajor && args.a_transpose != Transpose::kNo) ||
(args.layout == Layout::kRowMajor && args.a_transpose == Transpose::kNo);
auto b_rotated = (args.layout == Layout::kRowMajor);
// Iterates over the matrix dimensions
for (auto &m: kMatrixDims) {
args.m = m;
for (auto &n: kMatrixDims) {
args.n = n;
// Computes the second dimensions of the matrices taking the rotation into account
auto a_two = (a_rotated) ? n : n;
auto b_two = (b_rotated) ? m : n;
// Iterates over the leading-dimension values and the offsets
for (auto &a_ld: kMatrixDims) {
args.a_ld = a_ld;
for (auto &a_offset: kOffsets) {
args.a_offset = a_offset;
for (auto &b_ld: kMatrixDims) {
args.b_ld = b_ld;
for (auto &b_offset: kOffsets) {
args.b_offset = b_offset;
// Computes the buffer sizes
auto a_size = a_two * a_ld + a_offset;
auto b_size = b_two * b_ld + b_offset;
if (a_size < 1 || b_size < 1) { continue; }
// Creates the OpenCL buffers
auto a_mat = Buffer(context_, CL_MEM_READ_WRITE, a_size*sizeof(T));
auto r_mat = Buffer(context_, CL_MEM_READ_WRITE, b_size*sizeof(T));
auto s_mat = Buffer(context_, CL_MEM_READ_WRITE, b_size*sizeof(T));
// Iterates over the values for alpha and beta
for (auto &alpha: kAlphaValues) {
args.alpha = alpha;
for (auto &beta: kBetaValues) {
args.beta = beta;
// Runs the reference clBLAS code
a_mat.WriteBuffer(queue_, a_size*sizeof(T), a_source_);
r_mat.WriteBuffer(queue_, b_size*sizeof(T), b_source_);
auto status1 = clblas_lambda_(args, a_mat, r_mat, queue_);
// Runs the CLBlast code
a_mat.WriteBuffer(queue_, a_size*sizeof(T), a_source_);
s_mat.WriteBuffer(queue_, b_size*sizeof(T), b_source_);
auto status2 = clblast_lambda_(args, a_mat, s_mat, queue_);
// Tests for equality of the two status codes
if (status1 != StatusCode::kSuccess || status2 != StatusCode::kSuccess) {
TestErrorCodes(status1, status2, args);
continue;
}
// Downloads the results
std::vector<T> r_result(b_size, static_cast<T>(0));
std::vector<T> s_result(b_size, static_cast<T>(0));
r_mat.ReadBuffer(queue_, b_size*sizeof(T), r_result);
s_mat.ReadBuffer(queue_, b_size*sizeof(T), s_result);
// Checks for differences in the output
auto errors = size_t{0};
for (auto idm=size_t{0}; idm<m; ++idm) {
for (auto idn=size_t{0}; idn<n; ++idn) {
auto index = (args.layout == Layout::kRowMajor) ?
idm*args.b_ld + idn + args.b_offset:
idn*args.b_ld + idm + args.b_offset;
if (!TestSimilarity(r_result[index], s_result[index])) {
errors++;
}
}
}
// Tests the error count (should be zero)
TestErrorCount(errors, m*n, args);
}
}
}
}
}
}
}
}
TestEnd();
}
// =================================================================================================
// Tests the routine for cases with invalid OpenCL memory buffer sizes. Tests only on return-types,
// does not test for results (if any).
template <typename T>
void TestAB<T>::TestInvalidBufferSizes(Arguments<T> &args, const std::string &name) {
if (!PrecisionSupported()) { return; }
TestStart("invalid buffer sizes", name);
// Sets example test parameters
args.m = kBufferSize;
args.n = kBufferSize;
args.a_ld = kBufferSize;
args.b_ld = kBufferSize;
args.a_offset = 0;
args.b_offset = 0;
// Iterates over test buffer sizes
const std::vector<size_t> kBufferSizes = {0, kBufferSize*kBufferSize-1, kBufferSize*kBufferSize};
for (auto &a_size: kBufferSizes) {
for (auto &b_size: kBufferSizes) {
// Creates the OpenCL buffers. Note: we are not using the C++ version since we explicitly
// want to be able to create invalid buffers (no error checking here).
auto a = clCreateBuffer(context_(), CL_MEM_READ_WRITE, a_size*sizeof(T), nullptr, nullptr);
auto a_mat = Buffer(a);
auto r = clCreateBuffer(context_(), CL_MEM_READ_WRITE, b_size*sizeof(T), nullptr, nullptr);
auto r_mat = Buffer(r);
auto s = clCreateBuffer(context_(), CL_MEM_READ_WRITE, b_size*sizeof(T), nullptr, nullptr);
auto s_mat = Buffer(s);
// Runs the two routines
auto status1 = clblas_lambda_(args, a_mat, r_mat, queue_);
auto status2 = clblast_lambda_(args, a_mat, s_mat, queue_);
// Tests for equality of the two status codes
TestErrorCodes(status1, status2, args);
}
}
TestEnd();
}
// =================================================================================================
// Compiles the templated class
template class TestAB<float>;
template class TestAB<double>;
template class TestAB<float2>;
template class TestAB<double2>;
// =================================================================================================
} // namespace clblast

View file

@ -1,85 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file tests any mat-mat (A,B) routine. It contains two types of tests: one testing
// all sorts of input combinations, and one deliberatly testing with invalid values.
//
// =================================================================================================
#ifndef CLBLAST_TEST_CORRECTNESS_TESTAB_H_
#define CLBLAST_TEST_CORRECTNESS_TESTAB_H_
#include <vector>
#include <string>
#include "correctness/tester.h"
namespace clblast {
// =================================================================================================
// See comment at top of file for a description of the class
template <typename T>
class TestAB: public Tester<T> {
public:
// Uses several variables from the Tester class
using Tester<T>::context_;
using Tester<T>::queue_;
using Tester<T>::kLayouts;
using Tester<T>::kTransposes;
// Uses several helper functions from the Tester class
using Tester<T>::TestStart;
using Tester<T>::TestEnd;
using Tester<T>::TestSimilarity;
using Tester<T>::TestErrorCount;
using Tester<T>::TestErrorCodes;
using Tester<T>::GetExampleScalars;
using Tester<T>::GetOffsets;
using Tester<T>::PrecisionSupported;
// Test settings for the regular test. Append to this list in case more tests are required.
const std::vector<size_t> kMatrixDims = { 7, 64 };
const std::vector<size_t> kOffsets = GetOffsets();
const std::vector<T> kAlphaValues = GetExampleScalars();
const std::vector<T> kBetaValues = GetExampleScalars();
// Test settings for the invalid test
const size_t kBufferSize = 64;
// Shorthand for a BLAS routine
using Routine = std::function<StatusCode(const Arguments<T>&,
const Buffer&, const Buffer&,
CommandQueue&)>;
// Constructor, initializes the base class tester and input data
TestAB(int argc, char *argv[], const bool silent,
const std::string &name, const std::vector<std::string> &options,
const Routine clblast_lambda, const Routine clblas_lambda);
// The test functions, taking no inputs
void TestRegular(Arguments<T> &args, const std::string &name);
void TestInvalidBufferSizes(Arguments<T> &args, const std::string &name);
private:
// Source data to test with
std::vector<T> a_source_;
std::vector<T> b_source_;
// The routines to test
Routine clblast_lambda_;
Routine clblas_lambda_;
};
// =================================================================================================
} // namespace clblast
// CLBLAST_TEST_CORRECTNESS_TESTAB_H_
#endif

View file

@ -1,218 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the TestABC class (see the header for information about the class).
//
// =================================================================================================
#include <algorithm>
#include "correctness/testabc.h"
namespace clblast {
// =================================================================================================
// Constructor, initializes the base class tester and input data
template <typename T>
TestABC<T>::TestABC(int argc, char *argv[], const bool silent,
const std::string &name, const std::vector<std::string> &options,
const Routine clblast_lambda, const Routine clblas_lambda):
Tester<T>{argc, argv, silent, name, options},
clblast_lambda_(clblast_lambda),
clblas_lambda_(clblas_lambda) {
// Computes the maximum sizes. This allows for a single set of input/output buffers.
auto max_dim = *std::max_element(kMatrixDims.begin(), kMatrixDims.end());
auto max_ld = *std::max_element(kMatrixDims.begin(), kMatrixDims.end());
auto max_offset = *std::max_element(kOffsets.begin(), kOffsets.end());
// Creates test input data
a_source_.resize(max_dim*max_ld + max_offset);
b_source_.resize(max_dim*max_ld + max_offset);
c_source_.resize(max_dim*max_ld + max_offset);
PopulateVector(a_source_);
PopulateVector(b_source_);
PopulateVector(c_source_);
}
// ===============================================================================================
// Tests the routine for a wide variety of parameters
template <typename T>
void TestABC<T>::TestRegular(Arguments<T> &args, const std::string &name, const bool symmetric) {
if (!PrecisionSupported()) { return; }
TestStart("regular behaviour", name);
// Computes whether or not the matrices are transposed. Note that we assume a default of
// column-major and no-transpose. If one of them is different (but not both), then rotated
// is considered true.
auto a_rotated = (args.layout == Layout::kColMajor && args.a_transpose != Transpose::kNo) ||
(args.layout == Layout::kRowMajor && args.a_transpose == Transpose::kNo);
auto b_rotated = (args.layout == Layout::kColMajor && args.b_transpose != Transpose::kNo) ||
(args.layout == Layout::kRowMajor && args.b_transpose == Transpose::kNo);
auto c_rotated = (args.layout == Layout::kRowMajor);
// Iterates over the matrix dimensions
for (auto &m: kMatrixDims) {
args.m = m;
for (auto &n: kMatrixDims) {
args.n = n;
if (symmetric && m != n) { continue; }
for (auto &k: kMatrixDims) {
args.k = k;
// Computes the second dimensions of the matrices taking the rotation into account
auto a_two = (a_rotated) ? m : k;
auto b_two = (b_rotated) ? k : n;
auto c_two = (c_rotated) ? m : n;
// Iterates over the leading-dimension values and the offsets
for (auto &a_ld: kMatrixDims) {
args.a_ld = a_ld;
for (auto &a_offset: kOffsets) {
args.a_offset = a_offset;
for (auto &b_ld: kMatrixDims) {
args.b_ld = b_ld;
for (auto &b_offset: kOffsets) {
args.b_offset = b_offset;
for (auto &c_ld: kMatrixDims) {
args.c_ld = c_ld;
for (auto &c_offset: kOffsets) {
args.c_offset = c_offset;
// Computes the buffer sizes
auto a_size = a_two * a_ld + a_offset;
auto b_size = b_two * b_ld + b_offset;
auto c_size = c_two * c_ld + c_offset;
if (a_size < 1 || b_size < 1 || c_size < 1) { continue; }
// Creates the OpenCL buffers
auto a_mat = Buffer(context_, CL_MEM_READ_WRITE, a_size*sizeof(T));
auto b_mat = Buffer(context_, CL_MEM_READ_WRITE, b_size*sizeof(T));
auto r_mat = Buffer(context_, CL_MEM_READ_WRITE, c_size*sizeof(T));
auto s_mat = Buffer(context_, CL_MEM_READ_WRITE, c_size*sizeof(T));
// Iterates over the values for alpha and beta
for (auto &alpha: kAlphaValues) {
args.alpha = alpha;
for (auto &beta: kBetaValues) {
args.beta = beta;
// Runs the reference clBLAS code
a_mat.WriteBuffer(queue_, a_size*sizeof(T), a_source_);
b_mat.WriteBuffer(queue_, b_size*sizeof(T), b_source_);
r_mat.WriteBuffer(queue_, c_size*sizeof(T), c_source_);
auto status1 = clblas_lambda_(args, a_mat, b_mat, r_mat, queue_);
// Runs the CLBlast code
a_mat.WriteBuffer(queue_, a_size*sizeof(T), a_source_);
b_mat.WriteBuffer(queue_, b_size*sizeof(T), b_source_);
s_mat.WriteBuffer(queue_, c_size*sizeof(T), c_source_);
auto status2 = clblast_lambda_(args, a_mat, b_mat, s_mat, queue_);
// Tests for equality of the two status codes
if (status1 != StatusCode::kSuccess || status2 != StatusCode::kSuccess) {
TestErrorCodes(status1, status2, args);
continue;
}
// Downloads the results
std::vector<T> r_result(c_size, static_cast<T>(0));
std::vector<T> s_result(c_size, static_cast<T>(0));
r_mat.ReadBuffer(queue_, c_size*sizeof(T), r_result);
s_mat.ReadBuffer(queue_, c_size*sizeof(T), s_result);
// Checks for differences in the output
auto errors = size_t{0};
for (auto idm=size_t{0}; idm<m; ++idm) {
for (auto idn=size_t{0}; idn<n; ++idn) {
auto index = (args.layout == Layout::kRowMajor) ?
idm*args.c_ld + idn + args.c_offset:
idn*args.c_ld + idm + args.c_offset;
if (!TestSimilarity(r_result[index], s_result[index])) {
errors++;
}
}
}
// Tests the error count (should be zero)
TestErrorCount(errors, m*n, args);
}
}
}
}
}
}
}
}
}
}
}
TestEnd();
}
// =================================================================================================
// Tests the routine for cases with invalid OpenCL memory buffer sizes. Tests only on return-types,
// does not test for results (if any).
template <typename T>
void TestABC<T>::TestInvalidBufferSizes(Arguments<T> &args, const std::string &name) {
if (!PrecisionSupported()) { return; }
TestStart("invalid buffer sizes", name);
// Sets example test parameters
args.m = kBufferSize;
args.n = kBufferSize;
args.k = kBufferSize;
args.a_ld = kBufferSize;
args.b_ld = kBufferSize;
args.c_ld = kBufferSize;
args.a_offset = 0;
args.b_offset = 0;
args.c_offset = 0;
// Iterates over test buffer sizes
const std::vector<size_t> kBufferSizes = {0, kBufferSize*kBufferSize-1, kBufferSize*kBufferSize};
for (auto &a_size: kBufferSizes) {
for (auto &b_size: kBufferSizes) {
for (auto &c_size: kBufferSizes) {
// Creates the OpenCL buffers. Note: we are not using the C++ version since we explicitly
// want to be able to create invalid buffers (no error checking here).
auto a = clCreateBuffer(context_(), CL_MEM_READ_WRITE, a_size*sizeof(T), nullptr, nullptr);
auto a_mat = Buffer(a);
auto b = clCreateBuffer(context_(), CL_MEM_READ_WRITE, b_size*sizeof(T), nullptr, nullptr);
auto b_mat = Buffer(b);
auto r = clCreateBuffer(context_(), CL_MEM_READ_WRITE, c_size*sizeof(T), nullptr, nullptr);
auto r_mat = Buffer(r);
auto s = clCreateBuffer(context_(), CL_MEM_READ_WRITE, c_size*sizeof(T), nullptr, nullptr);
auto s_mat = Buffer(s);
// Runs the two routines
auto status1 = clblas_lambda_(args, a_mat, b_mat, r_mat, queue_);
auto status2 = clblast_lambda_(args, a_mat, b_mat, s_mat, queue_);
// Tests for equality of the two status codes
TestErrorCodes(status1, status2, args);
}
}
}
TestEnd();
}
// =================================================================================================
// Compiles the templated class
template class TestABC<float>;
template class TestABC<double>;
template class TestABC<float2>;
template class TestABC<double2>;
// =================================================================================================
} // namespace clblast

View file

@ -1,86 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file tests any mat-mat-mat (A,B,C) routine. It contains two types of tests: one testing
// all sorts of input combinations, and one deliberatly testing with invalid values.
//
// =================================================================================================
#ifndef CLBLAST_TEST_CORRECTNESS_TESTABC_H_
#define CLBLAST_TEST_CORRECTNESS_TESTABC_H_
#include <vector>
#include <string>
#include "correctness/tester.h"
namespace clblast {
// =================================================================================================
// See comment at top of file for a description of the class
template <typename T>
class TestABC: public Tester<T> {
public:
// Uses several variables from the Tester class
using Tester<T>::context_;
using Tester<T>::queue_;
using Tester<T>::kLayouts;
using Tester<T>::kTransposes;
// Uses several helper functions from the Tester class
using Tester<T>::TestStart;
using Tester<T>::TestEnd;
using Tester<T>::TestSimilarity;
using Tester<T>::TestErrorCount;
using Tester<T>::TestErrorCodes;
using Tester<T>::GetExampleScalars;
using Tester<T>::GetOffsets;
using Tester<T>::PrecisionSupported;
// Test settings for the regular test. Append to this list in case more tests are required.
const std::vector<size_t> kMatrixDims = { 7, 64 };
const std::vector<size_t> kOffsets = GetOffsets();
const std::vector<T> kAlphaValues = GetExampleScalars();
const std::vector<T> kBetaValues = GetExampleScalars();
// Test settings for the invalid test
const size_t kBufferSize = 64;
// Shorthand for a BLAS routine
using Routine = std::function<StatusCode(const Arguments<T>&,
const Buffer&, const Buffer&, const Buffer&,
CommandQueue&)>;
// Constructor, initializes the base class tester and input data
TestABC(int argc, char *argv[], const bool silent,
const std::string &name, const std::vector<std::string> &options,
const Routine clblast_lambda, const Routine clblas_lambda);
// The test functions, taking no inputs
void TestRegular(Arguments<T> &args, const std::string &name, const bool symmetric);
void TestInvalidBufferSizes(Arguments<T> &args, const std::string &name);
private:
// Source data to test with
std::vector<T> a_source_;
std::vector<T> b_source_;
std::vector<T> c_source_;
// The routines to test
Routine clblast_lambda_;
Routine clblas_lambda_;
};
// =================================================================================================
} // namespace clblast
// CLBLAST_TEST_CORRECTNESS_TESTABC_H_
#endif

View file

@ -1,191 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the TestAC class (see the header for information about the class).
//
// =================================================================================================
#include <algorithm>
#include "correctness/testac.h"
namespace clblast {
// =================================================================================================
// Constructor, initializes the base class tester and input data
template <typename T>
TestAC<T>::TestAC(int argc, char *argv[], const bool silent,
const std::string &name, const std::vector<std::string> &options,
const Routine clblast_lambda, const Routine clblas_lambda):
Tester<T>{argc, argv, silent, name, options},
clblast_lambda_(clblast_lambda),
clblas_lambda_(clblas_lambda) {
// Computes the maximum sizes. This allows for a single set of input/output buffers.
auto max_dim = *std::max_element(kMatrixDims.begin(), kMatrixDims.end());
auto max_ld = *std::max_element(kMatrixDims.begin(), kMatrixDims.end());
auto max_offset = *std::max_element(kOffsets.begin(), kOffsets.end());
// Creates test input data
a_source_.resize(max_dim*max_ld + max_offset);
c_source_.resize(max_dim*max_ld + max_offset);
PopulateVector(a_source_);
PopulateVector(c_source_);
}
// ===============================================================================================
// Tests the routine for a wide variety of parameters
template <typename T>
void TestAC<T>::TestRegular(Arguments<T> &args, const std::string &name) {
if (!PrecisionSupported()) { return; }
TestStart("regular behaviour", name);
// Computes whether or not the matrices are transposed. Note that we assume a default of
// column-major and no-transpose. If one of them is different (but not both), then rotated
// is considered true.
auto a_rotated = (args.layout == Layout::kColMajor && args.a_transpose != Transpose::kNo) ||
(args.layout == Layout::kRowMajor && args.a_transpose == Transpose::kNo);
auto c_rotated = (args.layout == Layout::kRowMajor);
// Iterates over the matrix dimensions
for (auto &n: kMatrixDims) {
args.n = n;
for (auto &k: kMatrixDims) {
args.k = k;
// Computes the second dimensions of the matrices taking the rotation into account
auto a_two = (a_rotated) ? n : k;
auto c_two = (c_rotated) ? n : n;
// Iterates over the leading-dimension values and the offsets
for (auto &a_ld: kMatrixDims) {
args.a_ld = a_ld;
for (auto &a_offset: kOffsets) {
args.a_offset = a_offset;
for (auto &c_ld: kMatrixDims) {
args.c_ld = c_ld;
for (auto &c_offset: kOffsets) {
args.c_offset = c_offset;
// Computes the buffer sizes
auto a_size = a_two * a_ld + a_offset;
auto c_size = c_two * c_ld + c_offset;
if (a_size < 1 || c_size < 1) { continue; }
// Creates the OpenCL buffers
auto a_mat = Buffer(context_, CL_MEM_READ_WRITE, a_size*sizeof(T));
auto r_mat = Buffer(context_, CL_MEM_READ_WRITE, c_size*sizeof(T));
auto s_mat = Buffer(context_, CL_MEM_READ_WRITE, c_size*sizeof(T));
// Iterates over the values for alpha and beta
for (auto &alpha: kAlphaValues) {
args.alpha = alpha;
for (auto &beta: kBetaValues) {
args.beta = beta;
// Runs the reference clBLAS code
a_mat.WriteBuffer(queue_, a_size*sizeof(T), a_source_);
r_mat.WriteBuffer(queue_, c_size*sizeof(T), c_source_);
auto status1 = clblas_lambda_(args, a_mat, r_mat, queue_);
// Runs the CLBlast code
a_mat.WriteBuffer(queue_, a_size*sizeof(T), a_source_);
s_mat.WriteBuffer(queue_, c_size*sizeof(T), c_source_);
auto status2 = clblast_lambda_(args, a_mat, s_mat, queue_);
// Tests for equality of the two status codes
if (status1 != StatusCode::kSuccess || status2 != StatusCode::kSuccess) {
TestErrorCodes(status1, status2, args);
continue;
}
// Downloads the results
std::vector<T> r_result(c_size, static_cast<T>(0));
std::vector<T> s_result(c_size, static_cast<T>(0));
r_mat.ReadBuffer(queue_, c_size*sizeof(T), r_result);
s_mat.ReadBuffer(queue_, c_size*sizeof(T), s_result);
// Checks for differences in the output
auto errors = size_t{0};
for (auto idn0=size_t{0}; idn0<n; ++idn0) {
for (auto idn1=size_t{0}; idn1<n; ++idn1) {
auto index = idn0*args.c_ld + idn1 + args.c_offset;
if (!TestSimilarity(r_result[index], s_result[index])) {
errors++;
}
}
}
// Tests the error count (should be zero)
TestErrorCount(errors, n*n, args);
}
}
}
}
}
}
}
}
TestEnd();
}
// =================================================================================================
// Tests the routine for cases with invalid OpenCL memory buffer sizes. Tests only on return-types,
// does not test for results (if any).
template <typename T>
void TestAC<T>::TestInvalidBufferSizes(Arguments<T> &args, const std::string &name) {
if (!PrecisionSupported()) { return; }
TestStart("invalid buffer sizes", name);
// Sets example test parameters
args.m = kBufferSize;
args.n = kBufferSize;
args.k = kBufferSize;
args.a_ld = kBufferSize;
args.c_ld = kBufferSize;
args.a_offset = 0;
args.c_offset = 0;
// Iterates over test buffer sizes
const std::vector<size_t> kBufferSizes = {0, kBufferSize*kBufferSize-1, kBufferSize*kBufferSize};
for (auto &a_size: kBufferSizes) {
for (auto &c_size: kBufferSizes) {
// Creates the OpenCL buffers. Note: we are not using the C++ version since we explicitly
// want to be able to create invalid buffers (no error checking here).
auto a = clCreateBuffer(context_(), CL_MEM_READ_WRITE, a_size*sizeof(T), nullptr, nullptr);
auto a_mat = Buffer(a);
auto r = clCreateBuffer(context_(), CL_MEM_READ_WRITE, c_size*sizeof(T), nullptr, nullptr);
auto r_mat = Buffer(r);
auto s = clCreateBuffer(context_(), CL_MEM_READ_WRITE, c_size*sizeof(T), nullptr, nullptr);
auto s_mat = Buffer(s);
// Runs the two routines
auto status1 = clblas_lambda_(args, a_mat, r_mat, queue_);
auto status2 = clblast_lambda_(args, a_mat, s_mat, queue_);
// Tests for equality of the two status codes
TestErrorCodes(status1, status2, args);
}
}
TestEnd();
}
// =================================================================================================
// Compiles the templated class
template class TestAC<float>;
template class TestAC<double>;
template class TestAC<float2>;
template class TestAC<double2>;
// =================================================================================================
} // namespace clblast

View file

@ -1,85 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file tests any mat-mat (A,C) routine. It contains two types of tests: one testing
// all sorts of input combinations, and one deliberatly testing with invalid values.
//
// =================================================================================================
#ifndef CLBLAST_TEST_CORRECTNESS_TESTAC_H_
#define CLBLAST_TEST_CORRECTNESS_TESTAC_H_
#include <vector>
#include <string>
#include "correctness/tester.h"
namespace clblast {
// =================================================================================================
// See comment at top of file for a description of the class
template <typename T>
class TestAC: public Tester<T> {
public:
// Uses several variables from the Tester class
using Tester<T>::context_;
using Tester<T>::queue_;
using Tester<T>::kLayouts;
using Tester<T>::kTransposes;
// Uses several helper functions from the Tester class
using Tester<T>::TestStart;
using Tester<T>::TestEnd;
using Tester<T>::TestSimilarity;
using Tester<T>::TestErrorCount;
using Tester<T>::TestErrorCodes;
using Tester<T>::GetExampleScalars;
using Tester<T>::GetOffsets;
using Tester<T>::PrecisionSupported;
// Test settings for the regular test. Append to this list in case more tests are required.
const std::vector<size_t> kMatrixDims = { 7, 64 };
const std::vector<size_t> kOffsets = GetOffsets();
const std::vector<T> kAlphaValues = GetExampleScalars();
const std::vector<T> kBetaValues = GetExampleScalars();
// Test settings for the invalid test
const size_t kBufferSize = 64;
// Shorthand for a BLAS routine
using Routine = std::function<StatusCode(const Arguments<T>&,
const Buffer&, const Buffer&,
CommandQueue&)>;
// Constructor, initializes the base class tester and input data
TestAC(int argc, char *argv[], const bool silent,
const std::string &name, const std::vector<std::string> &options,
const Routine clblast_lambda, const Routine clblas_lambda);
// The test functions, taking no inputs
void TestRegular(Arguments<T> &args, const std::string &name);
void TestInvalidBufferSizes(Arguments<T> &args, const std::string &name);
private:
// Source data to test with
std::vector<T> a_source_;
std::vector<T> c_source_;
// The routines to test
Routine clblast_lambda_;
Routine clblas_lambda_;
};
// =================================================================================================
} // namespace clblast
// CLBLAST_TEST_CORRECTNESS_TESTAC_H_
#endif

View file

@ -1,213 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the TestAXY class (see the header for information about the class).
//
// =================================================================================================
#include <algorithm>
#include "correctness/testaxy.h"
namespace clblast {
// =================================================================================================
// Constructor, initializes the base class tester and input data
template <typename T>
TestAXY<T>::TestAXY(int argc, char *argv[], const bool silent,
const std::string &name, const std::vector<std::string> &options,
const Routine clblast_lambda, const Routine clblas_lambda):
Tester<T>{argc, argv, silent, name, options},
clblast_lambda_(clblast_lambda),
clblas_lambda_(clblas_lambda) {
// Computes the maximum sizes. This allows for a single set of input/output buffers.
auto max_dim = *std::max_element(kMatrixVectorDims.begin(), kMatrixVectorDims.end());
auto max_ld = *std::max_element(kMatrixVectorDims.begin(), kMatrixVectorDims.end());
auto max_inc = *std::max_element(kIncrements.begin(), kIncrements.end());
auto max_offset = *std::max_element(kOffsets.begin(), kOffsets.end());
// Creates test input data
a_source_.resize(max_dim*max_ld + max_offset);
x_source_.resize(max_dim*max_inc + max_offset);
y_source_.resize(max_dim*max_inc + max_offset);
PopulateVector(a_source_);
PopulateVector(x_source_);
PopulateVector(y_source_);
}
// ===============================================================================================
// Tests the routine for a wide variety of parameters
template <typename T>
void TestAXY<T>::TestRegular(Arguments<T> &args, const std::string &name) {
if (!PrecisionSupported()) { return; }
TestStart("regular behaviour", name);
// Iterates over the dimension for the matrix and vectors
for (auto &m: kMatrixVectorDims) {
args.m = m;
for (auto &n: kMatrixVectorDims) {
args.n = n;
// Computes the second dimension of the matrix taking the rotation into account
auto a_two = (args.layout == Layout::kRowMajor) ? args.m : args.n;
// Computes the vector sizes in case the matrix is transposed
auto a_transposed = (args.a_transpose == Transpose::kYes);
auto m_real = (a_transposed) ? n : m;
auto n_real = (a_transposed) ? m : n;
// Iterates over the leading-dimension values and the offsets of the matrix
for (auto &a_ld: kMatrixVectorDims) {
args.a_ld = a_ld;
for (auto &a_offset: kOffsets) {
args.a_offset = a_offset;
// Iterates over the increment-values and the offsets of the vectors
for (auto &x_inc: kIncrements) {
args.x_inc = x_inc;
for (auto &x_offset: kOffsets) {
args.x_offset = x_offset;
for (auto &y_inc: kIncrements) {
args.y_inc = y_inc;
for (auto &y_offset: kOffsets) {
args.y_offset = y_offset;
// Computes the buffer sizes
auto a_size = a_two * a_ld + a_offset;
auto x_size = n_real * x_inc + x_offset;
auto y_size = m_real * y_inc + y_offset;
if (a_size < 1 || x_size < 1 || y_size < 1) { continue; }
// Creates the OpenCL buffers
auto a_mat = Buffer(context_, CL_MEM_READ_WRITE, a_size*sizeof(T));
auto x_vec = Buffer(context_, CL_MEM_READ_WRITE, x_size*sizeof(T));
auto r_vec = Buffer(context_, CL_MEM_READ_WRITE, y_size*sizeof(T));
auto s_vec = Buffer(context_, CL_MEM_READ_WRITE, y_size*sizeof(T));
// Iterates over the values for alpha and beta
for (auto &alpha: kAlphaValues) {
args.alpha = alpha;
for (auto &beta: kBetaValues) {
args.beta = beta;
// Runs the reference clBLAS code
a_mat.WriteBuffer(queue_, a_size*sizeof(T), a_source_);
x_vec.WriteBuffer(queue_, x_size*sizeof(T), x_source_);
r_vec.WriteBuffer(queue_, y_size*sizeof(T), y_source_);
auto status1 = clblas_lambda_(args, a_mat, x_vec, r_vec, queue_);
// Runs the CLBlast code
a_mat.WriteBuffer(queue_, a_size*sizeof(T), a_source_);
x_vec.WriteBuffer(queue_, x_size*sizeof(T), x_source_);
s_vec.WriteBuffer(queue_, y_size*sizeof(T), y_source_);
auto status2 = clblast_lambda_(args, a_mat, x_vec, s_vec, queue_);
// Tests for equality of the two status codes
if (status1 != StatusCode::kSuccess || status2 != StatusCode::kSuccess) {
TestErrorCodes(status1, status2, args);
continue;
}
// Downloads the results
std::vector<T> r_result(y_size, static_cast<T>(0));
std::vector<T> s_result(y_size, static_cast<T>(0));
r_vec.ReadBuffer(queue_, y_size*sizeof(T), r_result);
s_vec.ReadBuffer(queue_, y_size*sizeof(T), s_result);
// Checks for differences in the output
auto errors = size_t{0};
for (auto idm=size_t{0}; idm<m_real; ++idm) {
auto index = idm*y_inc + y_offset;
if (!TestSimilarity(r_result[index], s_result[index])) {
errors++;
}
}
// Tests the error count (should be zero)
TestErrorCount(errors, m_real, args);
}
}
}
}
}
}
}
}
}
}
TestEnd();
}
// =================================================================================================
// Tests the routine for cases with invalid OpenCL memory buffer sizes. Tests only on return-types,
// does not test for results (if any).
template <typename T>
void TestAXY<T>::TestInvalidBufferSizes(Arguments<T> &args, const std::string &name) {
if (!PrecisionSupported()) { return; }
TestStart("invalid buffer sizes", name);
// Sets example test parameters
args.m = kBufferSize;
args.n = kBufferSize;
args.a_ld = kBufferSize;
args.a_offset = 0;
args.x_offset = 0;
args.y_offset = 0;
// Iterates over test buffer sizes
const std::vector<size_t> kMatrixSizes = {0, kBufferSize*kBufferSize-1, kBufferSize*kBufferSize};
const std::vector<size_t> kVectorSizes = {0, kBufferSize - 1, kBufferSize};
for (auto &a_size: kMatrixSizes) {
for (auto &x_size: kVectorSizes) {
for (auto &y_size: kVectorSizes) {
// Iterates over test increments
for (auto &x_inc: kInvalidIncrements) {
args.x_inc = x_inc;
for (auto &y_inc: kInvalidIncrements) {
args.y_inc = y_inc;
// Creates the OpenCL buffers. Note: we are not using the C++ version since we
// explicitly want to be able to create invalid buffers (no error checking here).
auto a = clCreateBuffer(context_(), CL_MEM_READ_WRITE, a_size*sizeof(T), nullptr, nullptr);
auto a_mat = Buffer(a);
auto x = clCreateBuffer(context_(), CL_MEM_READ_WRITE, x_size*sizeof(T), nullptr, nullptr);
auto x_vec = Buffer(x);
auto r = clCreateBuffer(context_(), CL_MEM_READ_WRITE, y_size*sizeof(T), nullptr, nullptr);
auto r_vec = Buffer(r);
auto s = clCreateBuffer(context_(), CL_MEM_READ_WRITE, y_size*sizeof(T), nullptr, nullptr);
auto s_vec = Buffer(s);
// Runs the two routines
auto status1 = clblas_lambda_(args, a_mat, x_vec, r_vec, queue_);
auto status2 = clblast_lambda_(args, a_mat, x_vec, s_vec, queue_);
// Tests for equality of the two status codes
TestErrorCodes(status1, status2, args);
}
}
}
}
}
TestEnd();
}
// =================================================================================================
// Compiles the templated class
template class TestAXY<float>;
template class TestAXY<double>;
template class TestAXY<float2>;
template class TestAXY<double2>;
// =================================================================================================
} // namespace clblast

View file

@ -1,88 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file tests any mat-vec-vec (A,X,Y) routine. It contains two types of tests: one testing
// all sorts of input combinations, and one deliberatly testing with invalid values.
//
// =================================================================================================
#ifndef CLBLAST_TEST_CORRECTNESS_TESTAXY_H_
#define CLBLAST_TEST_CORRECTNESS_TESTAXY_H_
#include <vector>
#include <string>
#include "correctness/tester.h"
namespace clblast {
// =================================================================================================
// See comment at top of file for a description of the class
template <typename T>
class TestAXY: public Tester<T> {
public:
// Uses several variables from the Tester class
using Tester<T>::context_;
using Tester<T>::queue_;
using Tester<T>::kLayouts;
using Tester<T>::kTransposes;
// Uses several helper functions from the Tester class
using Tester<T>::TestStart;
using Tester<T>::TestEnd;
using Tester<T>::TestSimilarity;
using Tester<T>::TestErrorCount;
using Tester<T>::TestErrorCodes;
using Tester<T>::GetExampleScalars;
using Tester<T>::GetOffsets;
using Tester<T>::PrecisionSupported;
// Test settings for the regular test. Append to this list in case more tests are required.
const std::vector<size_t> kMatrixVectorDims = { 61, 512 };
const std::vector<size_t> kOffsets = GetOffsets();
const std::vector<size_t> kIncrements = { 1, 2 };
const std::vector<T> kAlphaValues = GetExampleScalars();
const std::vector<T> kBetaValues = GetExampleScalars();
// Test settings for the invalid test
const std::vector<size_t> kInvalidIncrements = { 0, 1 };
const size_t kBufferSize = 64;
// Shorthand for a BLAS routine
using Routine = std::function<StatusCode(const Arguments<T>&,
const Buffer&, const Buffer&, const Buffer&,
CommandQueue&)>;
// Constructor, initializes the base class tester and input data
TestAXY(int argc, char *argv[], const bool silent,
const std::string &name, const std::vector<std::string> &options,
const Routine clblast_lambda, const Routine clblas_lambda);
// The test functions, taking no inputs
void TestRegular(Arguments<T> &args, const std::string &name);
void TestInvalidBufferSizes(Arguments<T> &args, const std::string &name);
private:
// Source data to test with
std::vector<T> a_source_;
std::vector<T> x_source_;
std::vector<T> y_source_;
// The routines to test
Routine clblast_lambda_;
Routine clblas_lambda_;
};
// =================================================================================================
} // namespace clblast
// CLBLAST_TEST_CORRECTNESS_TESTAXY_H_
#endif

View file

@ -0,0 +1,185 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the TestBlas class (see the header for information about the class).
//
// =================================================================================================
#include <algorithm>
#include "correctness/testblas.h"
namespace clblast {
// =================================================================================================
// The transpose-options to test with (data-type dependent)
template <> const std::vector<Transpose> TestBlas<float>::kTransposes = {Transpose::kNo, Transpose::kYes};
template <> const std::vector<Transpose> TestBlas<double>::kTransposes = {Transpose::kNo, Transpose::kYes};
template <> const std::vector<Transpose> TestBlas<float2>::kTransposes = {Transpose::kNo, Transpose::kYes, Transpose::kConjugate};
template <> const std::vector<Transpose> TestBlas<double2>::kTransposes = {Transpose::kNo, Transpose::kYes, Transpose::kConjugate};
// =================================================================================================
// Constructor, initializes the base class tester and input data
template <typename T>
TestBlas<T>::TestBlas(int argc, char *argv[], const bool silent,
const std::string &name, const std::vector<std::string> &options,
const Routine run_routine, const Routine run_reference,
const ResultGet get_result, const ResultIndex get_index,
const ResultIterator get_id1, const ResultIterator get_id2):
Tester<T>{argc, argv, silent, name, options},
run_routine_(run_routine),
run_reference_(run_reference),
get_result_(get_result),
get_index_(get_index),
get_id1_(get_id1),
get_id2_(get_id2) {
// Computes the maximum sizes. This allows for a single set of input/output buffers.
auto max_vec = *std::max_element(kVectorDims.begin(), kVectorDims.end());
auto max_inc = *std::max_element(kIncrements.begin(), kIncrements.end());
auto max_mat = *std::max_element(kMatrixDims.begin(), kMatrixDims.end());
auto max_ld = *std::max_element(kMatrixDims.begin(), kMatrixDims.end());
auto max_matvec = *std::max_element(kMatrixVectorDims.begin(), kMatrixVectorDims.end());
auto max_offset = *std::max_element(kOffsets.begin(), kOffsets.end());
// Creates test input data
x_source_.resize(std::max(max_vec, max_matvec)*max_inc + max_offset);
y_source_.resize(std::max(max_vec, max_matvec)*max_inc + max_offset);
a_source_.resize(std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset);
b_source_.resize(std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset);
c_source_.resize(std::max(max_mat, max_matvec)*std::max(max_ld, max_matvec) + max_offset);
PopulateVector(x_source_);
PopulateVector(y_source_);
PopulateVector(a_source_);
PopulateVector(b_source_);
PopulateVector(c_source_);
}
// ===============================================================================================
// Tests the routine for a wide variety of parameters
template <typename T>
void TestBlas<T>::TestRegular(std::vector<Arguments<T>> &test_vector, const std::string &name) {
if (!PrecisionSupported()) { return; }
TestStart("regular behaviour", name);
// Iterates over all the to-be-tested combinations of arguments
for (auto &args: test_vector) {
// Runs the reference clBLAS code
auto x_vec1 = Buffer(context_, CL_MEM_READ_WRITE, args.x_size*sizeof(T));
auto y_vec1 = Buffer(context_, CL_MEM_READ_WRITE, args.y_size*sizeof(T));
auto a_mat1 = Buffer(context_, CL_MEM_READ_WRITE, args.a_size*sizeof(T));
auto b_mat1 = Buffer(context_, CL_MEM_READ_WRITE, args.b_size*sizeof(T));
auto c_mat1 = Buffer(context_, CL_MEM_READ_WRITE, args.c_size*sizeof(T));
x_vec1.WriteBuffer(queue_, args.x_size*sizeof(T), x_source_);
y_vec1.WriteBuffer(queue_, args.y_size*sizeof(T), y_source_);
a_mat1.WriteBuffer(queue_, args.a_size*sizeof(T), a_source_);
b_mat1.WriteBuffer(queue_, args.b_size*sizeof(T), b_source_);
c_mat1.WriteBuffer(queue_, args.c_size*sizeof(T), c_source_);
auto buffers1 = Buffers{x_vec1, y_vec1, a_mat1, b_mat1, c_mat1};
auto status1 = run_reference_(args, buffers1, queue_);
// Runs the CLBlast code
auto x_vec2 = Buffer(context_, CL_MEM_READ_WRITE, args.x_size*sizeof(T));
auto y_vec2 = Buffer(context_, CL_MEM_READ_WRITE, args.y_size*sizeof(T));
auto a_mat2 = Buffer(context_, CL_MEM_READ_WRITE, args.a_size*sizeof(T));
auto b_mat2 = Buffer(context_, CL_MEM_READ_WRITE, args.b_size*sizeof(T));
auto c_mat2 = Buffer(context_, CL_MEM_READ_WRITE, args.c_size*sizeof(T));
x_vec2.WriteBuffer(queue_, args.x_size*sizeof(T), x_source_);
y_vec2.WriteBuffer(queue_, args.y_size*sizeof(T), y_source_);
a_mat2.WriteBuffer(queue_, args.a_size*sizeof(T), a_source_);
b_mat2.WriteBuffer(queue_, args.b_size*sizeof(T), b_source_);
c_mat2.WriteBuffer(queue_, args.c_size*sizeof(T), c_source_);
auto buffers2 = Buffers{x_vec2, y_vec2, a_mat2, b_mat2, c_mat2};
auto status2 = run_routine_(args, buffers2, queue_);
// Tests for equality of the two status codes
if (status1 != StatusCode::kSuccess || status2 != StatusCode::kSuccess) {
TestErrorCodes(status1, status2, args);
continue;
}
// Downloads the results
auto result1 = get_result_(args, buffers1, queue_);
auto result2 = get_result_(args, buffers2, queue_);
// Checks for differences in the output
auto errors = size_t{0};
for (auto id1=size_t{0}; id1<get_id1_(args); ++id1) {
for (auto id2=size_t{0}; id2<get_id2_(args); ++id2) {
auto index = get_index_(args, id1, id2);
if (!TestSimilarity(result1[index], result2[index])) {
errors++;
}
}
}
// Tests the error count (should be zero)
TestErrorCount(errors, get_id1_(args)*get_id2_(args), args);
}
TestEnd();
}
// =================================================================================================
// Tests the routine for cases with invalid OpenCL memory buffer sizes. Tests only on return-types,
// does not test for results (if any).
template <typename T>
void TestBlas<T>::TestInvalid(std::vector<Arguments<T>> &test_vector, const std::string &name) {
if (!PrecisionSupported()) { return; }
TestStart("invalid buffer sizes", name);
// Iterates over all the to-be-tested combinations of arguments
for (auto &args: test_vector) {
// Creates the OpenCL buffers. Note: we are not using the C++ version since we explicitly
// want to be able to create invalid buffers (no error checking here).
auto x1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.x_size*sizeof(T), nullptr,nullptr);
auto y1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.y_size*sizeof(T), nullptr,nullptr);
auto a1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.a_size*sizeof(T), nullptr,nullptr);
auto b1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.b_size*sizeof(T), nullptr,nullptr);
auto c1 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.c_size*sizeof(T), nullptr,nullptr);
auto x_vec1 = Buffer(x1);
auto y_vec1 = Buffer(y1);
auto a_mat1 = Buffer(a1);
auto b_mat1 = Buffer(b1);
auto c_mat1 = Buffer(c1);
auto x2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.x_size*sizeof(T), nullptr,nullptr);
auto y2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.y_size*sizeof(T), nullptr,nullptr);
auto a2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.a_size*sizeof(T), nullptr,nullptr);
auto b2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.b_size*sizeof(T), nullptr,nullptr);
auto c2 = clCreateBuffer(context_(), CL_MEM_READ_WRITE, args.c_size*sizeof(T), nullptr,nullptr);
auto x_vec2 = Buffer(x2);
auto y_vec2 = Buffer(y2);
auto a_mat2 = Buffer(a2);
auto b_mat2 = Buffer(b2);
auto c_mat2 = Buffer(c2);
// Runs the two routines
auto status1 = run_reference_(args, Buffers{x_vec1, y_vec1, a_mat1, b_mat1, c_mat1}, queue_);
auto status2 = run_routine_(args, Buffers{x_vec2, y_vec2, a_mat2, b_mat2, c_mat2}, queue_);
// Tests for equality of the two status codes
TestErrorCodes(status1, status2, args);
}
TestEnd();
}
// =================================================================================================
// Compiles the templated class
template class TestBlas<float>;
template class TestBlas<double>;
template class TestBlas<float2>;
template class TestBlas<double2>;
// =================================================================================================
} // namespace clblast

104
test/correctness/testblas.h Normal file
View file

@ -0,0 +1,104 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file tests any CLBlast routine. It contains two types of tests: one testing all sorts of
// input combinations, and one deliberatly testing with invalid values.
//
// =================================================================================================
#ifndef CLBLAST_TEST_CORRECTNESS_TESTBLAS_H_
#define CLBLAST_TEST_CORRECTNESS_TESTBLAS_H_
#include <vector>
#include <string>
#include "correctness/tester.h"
namespace clblast {
// =================================================================================================
// See comment at top of file for a description of the class
template <typename T>
class TestBlas: public Tester<T> {
public:
// Uses several variables from the Tester class
using Tester<T>::context_;
using Tester<T>::queue_;
// Uses several helper functions from the Tester class
using Tester<T>::TestStart;
using Tester<T>::TestEnd;
using Tester<T>::TestSimilarity;
using Tester<T>::TestErrorCount;
using Tester<T>::TestErrorCodes;
using Tester<T>::GetExampleScalars;
using Tester<T>::GetOffsets;
using Tester<T>::PrecisionSupported;
// Test settings for the regular test. Append to these lists in case more tests are required.
const std::vector<size_t> kVectorDims = { 7, 93, 4096 };
const std::vector<size_t> kIncrements = { 1, 2, 7 };
const std::vector<size_t> kMatrixDims = { 7, 64 };
const std::vector<size_t> kMatrixVectorDims = { 61, 512 };
const std::vector<size_t> kOffsets = GetOffsets();
const std::vector<T> kAlphaValues = GetExampleScalars();
const std::vector<T> kBetaValues = GetExampleScalars();
// Test settings for the invalid tests
const std::vector<size_t> kInvalidIncrements = { 0, 1 };
const size_t kBufferSize = 64;
const std::vector<size_t> kMatSizes = {0, kBufferSize*kBufferSize-1, kBufferSize*kBufferSize};
const std::vector<size_t> kVecSizes = {0, kBufferSize - 1, kBufferSize};
// The layout/transpose/triangle options to test with
const std::vector<Layout> kLayouts = {Layout::kRowMajor, Layout::kColMajor};
const std::vector<Triangle> kTriangles = {Triangle::kUpper, Triangle::kLower};
const std::vector<Side> kSides = {Side::kLeft, Side::kRight};
static const std::vector<Transpose> kTransposes; // Data-type dependent, see .cc-file
// Shorthand for the routine-specific functions passed to the tester
using Routine = std::function<StatusCode(const Arguments<T>&, const Buffers&, CommandQueue&)>;
using ResultGet = std::function<std::vector<T>(const Arguments<T>&, Buffers&, CommandQueue&)>;
using ResultIndex = std::function<size_t(const Arguments<T>&, const size_t, const size_t)>;
using ResultIterator = std::function<size_t(const Arguments<T>&)>;
// Constructor, initializes the base class tester and input data
TestBlas(int argc, char *argv[], const bool silent,
const std::string &name, const std::vector<std::string> &options,
const Routine run_routine, const Routine run_reference, const ResultGet get_result,
const ResultIndex get_index, const ResultIterator get_id1, const ResultIterator get_id2);
// The test functions, taking no inputs
void TestRegular(std::vector<Arguments<T>> &test_vector, const std::string &name);
void TestInvalid(std::vector<Arguments<T>> &test_vector, const std::string &name);
private:
// Source data to test with
std::vector<T> x_source_;
std::vector<T> y_source_;
std::vector<T> a_source_;
std::vector<T> b_source_;
std::vector<T> c_source_;
// The routine-specific functions passed to the tester
Routine run_routine_;
Routine run_reference_;
ResultGet get_result_;
ResultIndex get_index_;
ResultIterator get_id1_;
ResultIterator get_id2_;
};
// =================================================================================================
} // namespace clblast
// CLBLAST_TEST_CORRECTNESS_TESTBLAS_H_
#endif

View file

@ -21,16 +21,6 @@
namespace clblast {
// =================================================================================================
// The layouts and transpose-options to test with (data-type dependent)
template <typename T>
const std::vector<Layout> Tester<T>::kLayouts = {Layout::kRowMajor, Layout::kColMajor};
template <> const std::vector<Transpose> Tester<float>::kTransposes = {Transpose::kNo, Transpose::kYes};
template <> const std::vector<Transpose> Tester<double>::kTransposes = {Transpose::kNo, Transpose::kYes};
template <> const std::vector<Transpose> Tester<float2>::kTransposes = {Transpose::kNo, Transpose::kYes, Transpose::kConjugate};
template <> const std::vector<Transpose> Tester<double2>::kTransposes = {Transpose::kNo, Transpose::kYes, Transpose::kConjugate};
// =================================================================================================
// General constructor for all CLBlast testers. It prints out the test header to stdout and sets-up
// the clBLAS library for reference.
template <typename T>

View file

@ -62,10 +62,6 @@ class Tester {
const std::string kSkippedCompilation{kPrintWarning + "\\" + kPrintEnd};
const std::string kUnsupportedPrecision{kPrintWarning + "o" + kPrintEnd};
// The layouts and transpose-options to test with
static const std::vector<Layout> kLayouts;
static const std::vector<Transpose> kTransposes;
// This structure combines the above log-entry with a status code an error percentage
struct ErrorLogEntry {
StatusCode status_expect;

View file

@ -1,176 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the TestXY class (see the header for information about the class).
//
// =================================================================================================
#include <algorithm>
#include "correctness/testxy.h"
namespace clblast {
// =================================================================================================
// Constructor, initializes the base class tester and input data
template <typename T>
TestXY<T>::TestXY(int argc, char *argv[], const bool silent,
const std::string &name, const std::vector<std::string> &options,
const Routine clblast_lambda, const Routine clblas_lambda):
Tester<T>{argc, argv, silent, name, options},
clblast_lambda_(clblast_lambda),
clblas_lambda_(clblas_lambda) {
// Computes the maximum sizes. This allows for a single set of input/output buffers.
auto max_dim = *std::max_element(kVectorDims.begin(), kVectorDims.end());
auto max_inc = *std::max_element(kIncrements.begin(), kIncrements.end());
auto max_offset = *std::max_element(kOffsets.begin(), kOffsets.end());
// Creates test input data
x_source_.resize(max_dim*max_inc + max_offset);
y_source_.resize(max_dim*max_inc + max_offset);
PopulateVector(x_source_);
PopulateVector(y_source_);
}
// ===============================================================================================
// Tests the routine for a wide variety of parameters
template <typename T>
void TestXY<T>::TestRegular(Arguments<T> &args, const std::string &name) {
if (!PrecisionSupported()) { return; }
TestStart("regular behaviour", name);
// Iterates over the vector dimension
for (auto &n: kVectorDims) {
args.n = n;
// Iterates over the increment-values and the offsets
for (auto &x_inc: kIncrements) {
args.x_inc = x_inc;
for (auto &x_offset: kOffsets) {
args.x_offset = x_offset;
for (auto &y_inc: kIncrements) {
args.y_inc = y_inc;
for (auto &y_offset: kOffsets) {
args.y_offset = y_offset;
// Computes the buffer sizes
auto x_size = n * x_inc + x_offset;
auto y_size = n * y_inc + y_offset;
if (x_size < 1 || y_size < 1) { continue; }
// Creates the OpenCL buffers
auto x_vec = Buffer(context_, CL_MEM_READ_WRITE, x_size*sizeof(T));
auto r_vec = Buffer(context_, CL_MEM_READ_WRITE, y_size*sizeof(T));
auto s_vec = Buffer(context_, CL_MEM_READ_WRITE, y_size*sizeof(T));
// Iterates over the values for alpha
for (auto &alpha: kAlphaValues) {
args.alpha = alpha;
// Runs the reference clBLAS code
x_vec.WriteBuffer(queue_, x_size*sizeof(T), x_source_);
r_vec.WriteBuffer(queue_, y_size*sizeof(T), y_source_);
auto status1 = clblas_lambda_(args, x_vec, r_vec, queue_);
// Runs the CLBlast code
x_vec.WriteBuffer(queue_, x_size*sizeof(T), x_source_);
s_vec.WriteBuffer(queue_, y_size*sizeof(T), y_source_);
auto status2 = clblast_lambda_(args, x_vec, s_vec, queue_);
// Tests for equality of the two status codes
if (status1 != StatusCode::kSuccess || status2 != StatusCode::kSuccess) {
TestErrorCodes(status1, status2, args);
continue;
}
// Downloads the results
std::vector<T> r_result(y_size, static_cast<T>(0));
std::vector<T> s_result(y_size, static_cast<T>(0));
r_vec.ReadBuffer(queue_, y_size*sizeof(T), r_result);
s_vec.ReadBuffer(queue_, y_size*sizeof(T), s_result);
// Checks for differences in the output
auto errors = size_t{0};
for (auto idn=size_t{0}; idn<n; ++idn) {
auto index = idn*y_inc + y_offset;
if (!TestSimilarity(r_result[index], s_result[index])) {
errors++;
}
}
// Tests the error count (should be zero)
TestErrorCount(errors, n, args);
}
}
}
}
}
}
TestEnd();
}
// =================================================================================================
// Tests the routine for cases with invalid OpenCL memory buffer sizes. Tests only on return-types,
// does not test for results (if any).
template <typename T>
void TestXY<T>::TestInvalidBufferSizes(Arguments<T> &args, const std::string &name) {
if (!PrecisionSupported()) { return; }
TestStart("invalid buffer sizes", name);
// Sets example test parameters
args.n = kBufferSize;
args.x_offset = 0;
args.y_offset = 0;
// Iterates over test buffer sizes
const std::vector<size_t> kBufferSizes = {0, kBufferSize - 1, kBufferSize};
for (auto &x_size: kBufferSizes) {
for (auto &y_size: kBufferSizes) {
// Iterates over test increments
for (auto &x_inc: kInvalidIncrements) {
args.x_inc = x_inc;
for (auto &y_inc: kInvalidIncrements) {
args.y_inc = y_inc;
// Creates the OpenCL buffers. Note: we are not using the C++ version since we explicitly
// want to be able to create invalid buffers (no error checking here).
auto x = clCreateBuffer(context_(), CL_MEM_READ_WRITE, x_size*sizeof(T), nullptr, nullptr);
auto x_vec = Buffer(x);
auto r = clCreateBuffer(context_(), CL_MEM_READ_WRITE, y_size*sizeof(T), nullptr, nullptr);
auto r_vec = Buffer(r);
auto s = clCreateBuffer(context_(), CL_MEM_READ_WRITE, y_size*sizeof(T), nullptr, nullptr);
auto s_vec = Buffer(s);
// Runs the two routines
auto status1 = clblas_lambda_(args, x_vec, r_vec, queue_);
auto status2 = clblast_lambda_(args, x_vec, s_vec, queue_);
// Tests for equality of the two status codes
TestErrorCodes(status1, status2, args);
}
}
}
}
TestEnd();
}
// =================================================================================================
// Compiles the templated class
template class TestXY<float>;
template class TestXY<double>;
template class TestXY<float2>;
template class TestXY<double2>;
// =================================================================================================
} // namespace clblast

View file

@ -1,84 +0,0 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file tests any vector-vector (X,Y) routine. It contains two types of tests: one testing
// all sorts of input combinations, and one deliberatly testing with invalid values.
//
// =================================================================================================
#ifndef CLBLAST_TEST_CORRECTNESS_TESTXY_H_
#define CLBLAST_TEST_CORRECTNESS_TESTXY_H_
#include <vector>
#include <string>
#include "correctness/tester.h"
namespace clblast {
// =================================================================================================
// See comment at top of file for a description of the class
template <typename T>
class TestXY: public Tester<T> {
public:
// Uses several variables from the Tester class
using Tester<T>::context_;
using Tester<T>::queue_;
// Uses several helper functions from the Tester class
using Tester<T>::TestStart;
using Tester<T>::TestEnd;
using Tester<T>::TestSimilarity;
using Tester<T>::TestErrorCount;
using Tester<T>::TestErrorCodes;
using Tester<T>::GetExampleScalars;
using Tester<T>::GetOffsets;
using Tester<T>::PrecisionSupported;
// Test settings for the regular test. Append to this list in case more tests are required.
const std::vector<size_t> kVectorDims = { 7, 93, 4096 };
const std::vector<size_t> kOffsets = GetOffsets();
const std::vector<size_t> kIncrements = { 1, 2, 7 };
const std::vector<T> kAlphaValues = GetExampleScalars();
// Test settings for the invalid test
const std::vector<size_t> kInvalidIncrements = { 0, 1 };
const size_t kBufferSize = 512;
// Shorthand for a BLAS routine
using Routine = std::function<StatusCode(const Arguments<T>&,
const Buffer&, const Buffer&,
CommandQueue&)>;
// Constructor, initializes the base class tester and input data
TestXY(int argc, char *argv[], const bool silent,
const std::string &name, const std::vector<std::string> &options,
const Routine clblast_lambda, const Routine clblas_lambda);
// The test functions, taking no inputs
void TestRegular(Arguments<T> &args, const std::string &name);
void TestInvalidBufferSizes(Arguments<T> &args, const std::string &name);
private:
// Source data to test with
std::vector<T> x_source_;
std::vector<T> y_source_;
// The routines to test
Routine clblast_lambda_;
Routine clblas_lambda_;
};
// =================================================================================================
} // namespace clblast
// CLBLAST_TEST_CORRECTNESS_TESTXY_H_
#endif

View file

@ -21,323 +21,36 @@
namespace clblast {
// =================================================================================================
// This is the vector-vector variant of the set-up/tear-down client routine.
// Constructor
template <typename T>
void ClientXY(int argc, char *argv[], Routine2<T> client_routine,
const std::vector<std::string> &options) {
// Function to determine how to find the default value of the leading dimension of matrix A.
// Note: this is not relevant for this client but given anyway.
auto default_ld_a = [](const Arguments<T> args) { return args.n; };
// Simple command line argument parser with defaults
auto args = ParseArguments<T>(argc, argv, options, default_ld_a);
if (args.print_help) { return; }
// Prints the header of the output table
PrintTableHeader(args.silent, options);
// Initializes OpenCL and the libraries
auto platform = Platform(args.platform_id);
auto device = Device(platform, kDeviceType, args.device_id);
auto context = Context(device);
auto queue = CommandQueue(context, device);
if (args.compare_clblas) { clblasSetup(); }
// Iterates over all "num_step" values jumping by "step" each time
auto s = size_t{0};
while(true) {
// Computes the data sizes
auto x_size = args.n*args.x_inc + args.x_offset;
auto y_size = args.n*args.y_inc + args.y_offset;
// Populates input host vectors with random data
std::vector<T> x_source(x_size);
std::vector<T> y_source(y_size);
PopulateVector(x_source);
PopulateVector(y_source);
// Creates the vectors on the device
auto x_buffer = Buffer(context, CL_MEM_READ_WRITE, x_size*sizeof(T));
auto y_buffer = Buffer(context, CL_MEM_READ_WRITE, y_size*sizeof(T));
x_buffer.WriteBuffer(queue, x_size*sizeof(T), x_source);
y_buffer.WriteBuffer(queue, y_size*sizeof(T), y_source);
// Runs the routine-specific code
client_routine(args, x_buffer, y_buffer, queue);
// Makes the jump to the next step
++s;
if (s >= args.num_steps) { break; }
args.n += args.step;
}
// Cleans-up and returns
if (args.compare_clblas) { clblasTeardown(); }
Client<T>::Client(const Routine run_routine, const Routine run_reference,
const std::vector<std::string> &options,
const GetMetric get_flops, const GetMetric get_bytes):
run_routine_(run_routine),
run_reference_(run_reference),
options_(options),
get_flops_(get_flops),
get_bytes_(get_bytes) {
}
// Compiles the above function
template void ClientXY<float>(int, char **, Routine2<float>, const std::vector<std::string>&);
template void ClientXY<double>(int, char **, Routine2<double>, const std::vector<std::string>&);
template void ClientXY<float2>(int, char **, Routine2<float2>, const std::vector<std::string>&);
template void ClientXY<double2>(int, char **, Routine2<double2>, const std::vector<std::string>&);
// =================================================================================================
// This is the matrix-vector-vector variant of the set-up/tear-down client routine.
template <typename T>
void ClientAXY(int argc, char *argv[], Routine3<T> client_routine,
const std::vector<std::string> &options) {
// Function to determine how to find the default value of the leading dimension of matrix A
auto default_ld_a = [](const Arguments<T> args) { return args.n; };
// Simple command line argument parser with defaults
auto args = ParseArguments<T>(argc, argv, options, default_ld_a);
if (args.print_help) { return; }
// Prints the header of the output table
PrintTableHeader(args.silent, options);
// Initializes OpenCL and the libraries
auto platform = Platform(args.platform_id);
auto device = Device(platform, kDeviceType, args.device_id);
auto context = Context(device);
auto queue = CommandQueue(context, device);
if (args.compare_clblas) { clblasSetup(); }
// Iterates over all "num_step" values jumping by "step" each time
auto s = size_t{0};
while(true) {
// Computes the second dimension of the matrix taking the rotation into account
auto a_two = (args.layout == Layout::kRowMajor) ? args.m : args.n;
// Computes the vector sizes in case the matrix is transposed
auto a_transposed = (args.a_transpose != Transpose::kNo);
auto m_real = (a_transposed) ? args.n : args.m;
auto n_real = (a_transposed) ? args.m : args.n;
// Computes the data sizes
auto a_size = a_two * args.a_ld + args.a_offset;
auto x_size = n_real*args.x_inc + args.x_offset;
auto y_size = m_real*args.y_inc + args.y_offset;
// Populates input host vectors with random data
std::vector<T> a_source(a_size);
std::vector<T> x_source(x_size);
std::vector<T> y_source(y_size);
PopulateVector(a_source);
PopulateVector(x_source);
PopulateVector(y_source);
// Creates the vectors on the device
auto a_buffer = Buffer(context, CL_MEM_READ_WRITE, a_size*sizeof(T));
auto x_buffer = Buffer(context, CL_MEM_READ_WRITE, x_size*sizeof(T));
auto y_buffer = Buffer(context, CL_MEM_READ_WRITE, y_size*sizeof(T));
a_buffer.WriteBuffer(queue, a_size*sizeof(T), a_source);
x_buffer.WriteBuffer(queue, x_size*sizeof(T), x_source);
y_buffer.WriteBuffer(queue, y_size*sizeof(T), y_source);
// Runs the routine-specific code
client_routine(args, a_buffer, x_buffer, y_buffer, queue);
// Makes the jump to the next step
++s;
if (s >= args.num_steps) { break; }
args.m += args.step;
args.n += args.step;
args.a_ld += args.step;
}
// Cleans-up and returns
if (args.compare_clblas) { clblasTeardown(); }
}
// Compiles the above function
template void ClientAXY<float>(int, char **, Routine3<float>, const std::vector<std::string>&);
template void ClientAXY<double>(int, char **, Routine3<double>, const std::vector<std::string>&);
template void ClientAXY<float2>(int, char **, Routine3<float2>, const std::vector<std::string>&);
template void ClientAXY<double2>(int, char **, Routine3<double2>, const std::vector<std::string>&);
// =================================================================================================
// This is the matrix-matrix variant of the set-up/tear-down client routine.
template <typename T>
void ClientAC(int argc, char *argv[], Routine2<T> client_routine,
const std::vector<std::string> &options) {
// Function to determine how to find the default value of the leading dimension of matrix A
auto default_ld_a = [](const Arguments<T> args) { return args.k; };
// Simple command line argument parser with defaults
auto args = ParseArguments<T>(argc, argv, options, default_ld_a);
if (args.print_help) { return; }
// Prints the header of the output table
PrintTableHeader(args.silent, options);
// Initializes OpenCL and the libraries
auto platform = Platform(args.platform_id);
auto device = Device(platform, kDeviceType, args.device_id);
auto context = Context(device);
auto queue = CommandQueue(context, device);
if (args.compare_clblas) { clblasSetup(); }
// Computes whether or not the matrices are transposed. Note that we assume a default of
// column-major and no-transpose. If one of them is different (but not both), then rotated
// is considered true.
auto a_rotated = (args.layout == Layout::kColMajor && args.a_transpose != Transpose::kNo) ||
(args.layout == Layout::kRowMajor && args.a_transpose == Transpose::kNo);
// Iterates over all "num_step" values jumping by "step" each time
auto s = size_t{0};
while(true) {
// Computes the data sizes
auto a_two = (a_rotated) ? args.n : args.k;
auto a_size = a_two * args.a_ld + args.a_offset;
auto c_size = args.n * args.c_ld + args.c_offset;
// Populates input host matrices with random data
std::vector<T> a_source(a_size);
std::vector<T> c_source(c_size);
PopulateVector(a_source);
PopulateVector(c_source);
// Creates the matrices on the device
auto a_buffer = Buffer(context, CL_MEM_READ_WRITE, a_size*sizeof(T));
auto c_buffer = Buffer(context, CL_MEM_READ_WRITE, c_size*sizeof(T));
a_buffer.WriteBuffer(queue, a_size*sizeof(T), a_source);
c_buffer.WriteBuffer(queue, c_size*sizeof(T), c_source);
// Runs the routine-specific code
client_routine(args, a_buffer, c_buffer, queue);
// Makes the jump to the next step
++s;
if (s >= args.num_steps) { break; }
args.n += args.step;
args.k += args.step;
args.a_ld += args.step;
args.c_ld += args.step;
}
// Cleans-up and returns
if (args.compare_clblas) { clblasTeardown(); }
}
// Compiles the above function
template void ClientAC<float>(int, char **, Routine2<float>, const std::vector<std::string>&);
template void ClientAC<double>(int, char **, Routine2<double>, const std::vector<std::string>&);
template void ClientAC<float2>(int, char **, Routine2<float2>, const std::vector<std::string>&);
template void ClientAC<double2>(int, char **, Routine2<double2>, const std::vector<std::string>&);
// =================================================================================================
// This is the matrix-matrix-matrix variant of the set-up/tear-down client routine.
template <typename T>
void ClientABC(int argc, char *argv[], Routine3<T> client_routine,
const std::vector<std::string> &options, const bool symmetric) {
// Function to determine how to find the default value of the leading dimension of matrix A
auto default_ld_a = [&symmetric](const Arguments<T> args) { return (symmetric) ? args.n : args.m; };
// Simple command line argument parser with defaults
auto args = ParseArguments<T>(argc, argv, options, default_ld_a);
if (args.print_help) { return; }
if (symmetric) { args.m = args.n; }
// Prints the header of the output table
PrintTableHeader(args.silent, options);
// Initializes OpenCL and the libraries
auto platform = Platform(args.platform_id);
auto device = Device(platform, kDeviceType, args.device_id);
auto context = Context(device);
auto queue = CommandQueue(context, device);
if (args.compare_clblas) { clblasSetup(); }
// Computes whether or not the matrices are transposed. Note that we assume a default of
// column-major and no-transpose. If one of them is different (but not both), then rotated
// is considered true.
auto a_rotated = (args.layout == Layout::kColMajor && args.a_transpose != Transpose::kNo) ||
(args.layout == Layout::kRowMajor && args.a_transpose == Transpose::kNo);
auto b_rotated = (args.layout == Layout::kColMajor && args.b_transpose != Transpose::kNo) ||
(args.layout == Layout::kRowMajor && args.b_transpose == Transpose::kNo);
auto c_rotated = (args.layout == Layout::kRowMajor);
// Iterates over all "num_step" values jumping by "step" each time
auto s = size_t{0};
while(true) {
// Computes the data sizes
auto a_two = (a_rotated) ? args.m : args.k;
auto b_two = (b_rotated) ? args.k : args.n;
auto c_two = (c_rotated) ? args.m : args.n;
auto a_size = a_two * args.a_ld + args.a_offset;
auto b_size = b_two * args.b_ld + args.b_offset;
auto c_size = c_two * args.c_ld + args.c_offset;
// Populates input host matrices with random data
std::vector<T> a_source(a_size);
std::vector<T> b_source(b_size);
std::vector<T> c_source(c_size);
PopulateVector(a_source);
PopulateVector(b_source);
PopulateVector(c_source);
// Creates the matrices on the device
auto a_buffer = Buffer(context, CL_MEM_READ_WRITE, a_size*sizeof(T));
auto b_buffer = Buffer(context, CL_MEM_READ_WRITE, b_size*sizeof(T));
auto c_buffer = Buffer(context, CL_MEM_READ_WRITE, c_size*sizeof(T));
a_buffer.WriteBuffer(queue, a_size*sizeof(T), a_source);
b_buffer.WriteBuffer(queue, b_size*sizeof(T), b_source);
c_buffer.WriteBuffer(queue, c_size*sizeof(T), c_source);
// Runs the routine-specific code
client_routine(args, a_buffer, b_buffer, c_buffer, queue);
// Makes the jump to the next step
++s;
if (s >= args.num_steps) { break; }
args.m += args.step;
args.n += args.step;
args.k += args.step;
args.a_ld += args.step;
args.b_ld += args.step;
args.c_ld += args.step;
}
// Cleans-up and returns
if (args.compare_clblas) { clblasTeardown(); }
}
// Compiles the above function
template void ClientABC<float>(int, char **, Routine3<float>, const std::vector<std::string>&, const bool);
template void ClientABC<double>(int, char **, Routine3<double>, const std::vector<std::string>&, const bool);
template void ClientABC<float2>(int, char **, Routine3<float2>, const std::vector<std::string>&, const bool);
template void ClientABC<double2>(int, char **, Routine3<double2>, const std::vector<std::string>&, const bool);
// =================================================================================================
// Parses all arguments available for the CLBlast client testers. Some arguments might not be
// applicable, but are searched for anyway to be able to create one common argument parser. All
// arguments have a default value in case they are not found.
template <typename T>
Arguments<T> ParseArguments(int argc, char *argv[], const std::vector<std::string> &options,
const std::function<size_t(const Arguments<T>)> default_ld_a) {
Arguments<T> Client<T>::ParseArguments(int argc, char *argv[], const GetMetric default_a_ld,
const GetMetric default_b_ld, const GetMetric default_c_ld) {
auto args = Arguments<T>{};
auto help = std::string{"Options given/available:\n"};
// These are the options which are not for every client: they are optional
for (auto &o: options) {
for (auto &o: options_) {
// Data-sizes
if (o == kArgM) { args.m = args.k = GetArgument(argc, argv, help, kArgM, 512UL); }
if (o == kArgN) { args.n = GetArgument(argc, argv, help, kArgN, 512UL); }
if (o == kArgK) { args.k = GetArgument(argc, argv, help, kArgK, 512UL); }
if (o == kArgM) { args.m = GetArgument(argc, argv, help, kArgM, 512UL); }
if (o == kArgN) { args.n = GetArgument(argc, argv, help, kArgN, 512UL); }
if (o == kArgK) { args.k = GetArgument(argc, argv, help, kArgK, 512UL); }
// Data-layouts
if (o == kArgLayout) { args.layout = GetArgument(argc, argv, help, kArgLayout, Layout::kRowMajor); }
@ -353,9 +66,9 @@ Arguments<T> ParseArguments(int argc, char *argv[], const std::vector<std::strin
if (o == kArgYOffset) { args.y_offset = GetArgument(argc, argv, help, kArgYOffset, size_t{0}); }
// Matrix arguments
if (o == kArgALeadDim) { args.a_ld = GetArgument(argc, argv, help, kArgALeadDim, default_ld_a(args)); }
if (o == kArgBLeadDim) { args.b_ld = GetArgument(argc, argv, help, kArgBLeadDim, args.n); }
if (o == kArgCLeadDim) { args.c_ld = GetArgument(argc, argv, help, kArgCLeadDim, args.n); }
if (o == kArgALeadDim) { args.a_ld = GetArgument(argc, argv, help, kArgALeadDim, default_a_ld(args)); }
if (o == kArgBLeadDim) { args.b_ld = GetArgument(argc, argv, help, kArgBLeadDim, default_b_ld(args)); }
if (o == kArgCLeadDim) { args.c_ld = GetArgument(argc, argv, help, kArgCLeadDim, default_c_ld(args)); }
if (o == kArgAOffset) { args.a_offset = GetArgument(argc, argv, help, kArgAOffset, size_t{0}); }
if (o == kArgBOffset) { args.b_offset = GetArgument(argc, argv, help, kArgBOffset, size_t{0}); }
if (o == kArgCOffset) { args.c_offset = GetArgument(argc, argv, help, kArgCOffset, size_t{0}); }
@ -387,16 +100,92 @@ Arguments<T> ParseArguments(int argc, char *argv[], const std::vector<std::strin
// =================================================================================================
// This is main performance tester
template <typename T>
void Client<T>::PerformanceTest(Arguments<T> &args, const SetMetric set_sizes) {
// Prints the header of the output table
PrintTableHeader(args.silent, options_);
// Initializes OpenCL and the libraries
auto platform = Platform(args.platform_id);
auto device = Device(platform, kDeviceType, args.device_id);
auto context = Context(device);
auto queue = CommandQueue(context, device);
if (args.compare_clblas) { clblasSetup(); }
// Iterates over all "num_step" values jumping by "step" each time
auto s = size_t{0};
while(true) {
// Sets the buffer sizes (routine-specific)
set_sizes(args);
// Populates input host matrices with random data
std::vector<T> x_source(args.x_size);
std::vector<T> y_source(args.y_size);
std::vector<T> a_source(args.a_size);
std::vector<T> b_source(args.b_size);
std::vector<T> c_source(args.c_size);
PopulateVector(x_source);
PopulateVector(y_source);
PopulateVector(a_source);
PopulateVector(b_source);
PopulateVector(c_source);
// Creates the matrices on the device
auto x_vec = Buffer(context, CL_MEM_READ_WRITE, args.x_size*sizeof(T));
auto y_vec = Buffer(context, CL_MEM_READ_WRITE, args.y_size*sizeof(T));
auto a_mat = Buffer(context, CL_MEM_READ_WRITE, args.a_size*sizeof(T));
auto b_mat = Buffer(context, CL_MEM_READ_WRITE, args.b_size*sizeof(T));
auto c_mat = Buffer(context, CL_MEM_READ_WRITE, args.c_size*sizeof(T));
x_vec.WriteBuffer(queue, args.x_size*sizeof(T), x_source);
y_vec.WriteBuffer(queue, args.y_size*sizeof(T), y_source);
a_mat.WriteBuffer(queue, args.a_size*sizeof(T), a_source);
b_mat.WriteBuffer(queue, args.b_size*sizeof(T), b_source);
c_mat.WriteBuffer(queue, args.c_size*sizeof(T), c_source);
auto buffers = Buffers{x_vec, y_vec, a_mat, b_mat, c_mat};
// Runs the routines and collects the timings
auto ms_clblast = TimedExecution(args.num_runs, args, buffers, queue, run_routine_, "CLBlast");
auto ms_clblas = TimedExecution(args.num_runs, args, buffers, queue, run_reference_, "clBLAS");
// Prints the performance of both libraries
PrintTableRow(args, ms_clblast, ms_clblas);
// Makes the jump to the next step
++s;
if (s >= args.num_steps) { break; }
args.m += args.step;
args.n += args.step;
args.k += args.step;
args.a_ld += args.step;
args.b_ld += args.step;
args.c_ld += args.step;
}
// Cleans-up and returns
if (args.compare_clblas) { clblasTeardown(); }
}
// =================================================================================================
// Creates a vector of timing results, filled with execution times of the 'main computation'. The
// timing is performed using the milliseconds chrono functions. The function returns the minimum
// value found in the vector of timing results. The return value is in milliseconds.
double TimedExecution(const size_t num_runs, std::function<void()> main_computation) {
template <typename T>
double Client<T>::TimedExecution(const size_t num_runs, const Arguments<T> &args,
const Buffers &buffers, CommandQueue &queue,
Routine run_blas, const std::string &library_name) {
auto timings = std::vector<double>(num_runs);
for (auto &timing: timings) {
auto start_time = std::chrono::steady_clock::now();
// Executes the main computation
main_computation();
auto status = run_blas(args, buffers, queue);
if (status != StatusCode::kSuccess) {
throw std::runtime_error(library_name+" error: "+ToString(static_cast<int>(status)));
}
// Records and stores the end-time
auto elapsed_time = std::chrono::steady_clock::now() - start_time;
@ -408,7 +197,8 @@ double TimedExecution(const size_t num_runs, std::function<void()> main_computat
// =================================================================================================
// Prints the header of the performance table
void PrintTableHeader(const bool silent, const std::vector<std::string> &args) {
template <typename T>
void Client<T>::PrintTableHeader(const bool silent, const std::vector<std::string> &args) {
if (!silent) {
for (auto i=size_t{0}; i<args.size(); ++i) { fprintf(stdout, "%9s ", ""); }
fprintf(stdout, " | <-- CLBlast --> | <-- clBLAS --> |\n");
@ -419,29 +209,59 @@ void PrintTableHeader(const bool silent, const std::vector<std::string> &args) {
}
// Print a performance-result row
void PrintTableRow(const std::vector<size_t> &args_int, const std::vector<std::string> &args_string,
const bool no_abbrv, const double ms_clblast, const double ms_clblas,
const unsigned long long flops, const unsigned long long bytes) {
template <typename T>
void Client<T>::PrintTableRow(const Arguments<T>& args, const double ms_clblast,
const double ms_clblas) {
// Creates a vector of relevant variables
auto integers = std::vector<size_t>{};
for (auto &o: options_) {
if (o == kArgM) { integers.push_back(args.m); }
if (o == kArgN) { integers.push_back(args.n); }
else if (o == kArgK) { integers.push_back(args.k); }
else if (o == kArgLayout) { integers.push_back(static_cast<size_t>(args.layout)); }
else if (o == kArgSide) { integers.push_back(static_cast<size_t>(args.side)); }
else if (o == kArgTriangle) { integers.push_back(static_cast<size_t>(args.triangle)); }
else if (o == kArgATransp) { integers.push_back(static_cast<size_t>(args.a_transpose)); }
else if (o == kArgBTransp) { integers.push_back(static_cast<size_t>(args.b_transpose)); }
else if (o == kArgXInc) { integers.push_back(args.x_inc); }
else if (o == kArgYInc) { integers.push_back(args.y_inc); }
else if (o == kArgXOffset) { integers.push_back(args.x_offset); }
else if (o == kArgYOffset) { integers.push_back(args.y_offset); }
else if (o == kArgALeadDim) { integers.push_back(args.a_ld); }
else if (o == kArgBLeadDim) { integers.push_back(args.b_ld); }
else if (o == kArgCLeadDim) { integers.push_back(args.c_ld); }
else if (o == kArgAOffset) { integers.push_back(args.a_offset); }
else if (o == kArgBOffset) { integers.push_back(args.b_offset); }
else if (o == kArgCOffset) { integers.push_back(args.c_offset); }
}
auto strings = std::vector<std::string>{};
for (auto &o: options_) {
if (o == kArgAlpha) { strings.push_back(ToString(args.alpha)); }
else if (o == kArgBeta) { strings.push_back(ToString(args.beta)); }
}
// Computes the GFLOPS and GB/s metrics
auto flops = get_flops_(args);
auto bytes = get_bytes_(args);
auto gflops_clblast = (ms_clblast != 0.0) ? (flops*1e-6)/ms_clblast : 0;
auto gflops_clblas = (ms_clblas != 0.0) ? (flops*1e-6)/ms_clblas: 0;
auto gbs_clblast = (ms_clblast != 0.0) ? (bytes*1e-6)/ms_clblast : 0;
auto gbs_clblas = (ms_clblas != 0.0) ? (bytes*1e-6)/ms_clblas: 0;
// Outputs the argument values
for (auto &argument: args_int) {
if (!no_abbrv && argument >= 1024*1024 && IsMultiple(argument, 1024*1024)) {
for (auto &argument: integers) {
if (!args.no_abbrv && argument >= 1024*1024 && IsMultiple(argument, 1024*1024)) {
fprintf(stdout, "%8luM;", argument/(1024*1024));
}
else if (!no_abbrv && argument >= 1024 && IsMultiple(argument, 1024)) {
else if (!args.no_abbrv && argument >= 1024 && IsMultiple(argument, 1024)) {
fprintf(stdout, "%8luK;", argument/1024);
}
else {
fprintf(stdout, "%9lu;", argument);
}
}
for (auto &argument: args_string) {
for (auto &argument: strings) {
fprintf(stdout, "%9s;", argument.c_str());
}
@ -451,5 +271,13 @@ void PrintTableRow(const std::vector<size_t> &args_int, const std::vector<std::s
ms_clblas, gflops_clblas, gbs_clblas);
}
// =================================================================================================
// Compiles the templated class
template class Client<float>;
template class Client<double>;
template class Client<float2>;
template class Client<double2>;
// =================================================================================================
} // namespace clblast

View file

@ -7,7 +7,12 @@
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file provides common function declarations to be used with the test clients.
// This class implements the performance-test client. It is generic for all CLBlast routines by
// taking a number of routine-specific functions as arguments, such as how to compute buffer sizes
// or how to get the FLOPS count.
//
// This file also provides the common interface to the performance client (see the 'RunClient'
// function for details).
//
// =================================================================================================
@ -26,64 +31,71 @@
namespace clblast {
// =================================================================================================
// Types of devices to consider
const cl_device_type kDeviceType = CL_DEVICE_TYPE_ALL;
// See comment at top of file for a description of the class
template <typename T>
class Client {
public:
// Types of devices to consider
const cl_device_type kDeviceType = CL_DEVICE_TYPE_ALL;
// Shorthand for the routine-specific functions passed to the tester
using Routine = std::function<StatusCode(const Arguments<T>&, const Buffers&, CommandQueue&)>;
using SetMetric = std::function<void(Arguments<T>&)>;
using GetMetric = std::function<size_t(const Arguments<T>&)>;
// The constructor
Client(const Routine run_routine, const Routine run_reference,
const std::vector<std::string> &options,
const GetMetric get_flops, const GetMetric get_bytes);
// Parses all command-line arguments, filling in the arguments structure. If no command-line
// argument is given for a particular argument, it is filled in with a default value.
Arguments<T> ParseArguments(int argc, char *argv[], const GetMetric default_a_ld,
const GetMetric default_b_ld, const GetMetric default_c_ld);
// The main client function, setting-up arguments, matrices, OpenCL buffers, etc. After set-up, it
// calls the client routines.
void PerformanceTest(Arguments<T> &args, const SetMetric set_sizes);
private:
// Runs a function a given number of times and returns the execution time of the shortest instance
double TimedExecution(const size_t num_runs, const Arguments<T> &args, const Buffers &buffers,
CommandQueue &queue, Routine run_blas, const std::string &library_name);
// Prints the header of a performance-data table
void PrintTableHeader(const bool silent, const std::vector<std::string> &args);
// Prints a row of performance data, including results of two libraries
void PrintTableRow(const Arguments<T>& args, const double ms_clblast, const double ms_clblas);
// The routine-specific functions passed to the tester
const Routine run_routine_;
const Routine run_reference_;
const std::vector<std::string> options_;
const GetMetric get_flops_;
const GetMetric get_bytes_;
};
// =================================================================================================
// Shorthand for a BLAS routine with 2 or 3 OpenCL buffers as argument
template <typename T>
using Routine2 = std::function<void(const Arguments<T>&,
const Buffer&, const Buffer&,
CommandQueue&)>;
template <typename T>
using Routine3 = std::function<void(const Arguments<T>&,
const Buffer&, const Buffer&, const Buffer&,
CommandQueue&)>;
// The interface to the performance client. This is a separate function in the header such that it
// is automatically compiled for each routine, templated by the parameter "C".
template <typename C, typename T>
void RunClient(int argc, char *argv[]) {
// =================================================================================================
// Creates a new client
auto client = Client<T>(C::RunRoutine, C::RunReference, C::GetOptions(),
C::GetFlops, C::GetBytes);
// These are the main client functions, setting-up arguments, matrices, OpenCL buffers, etc. After
// set-up, they call the client routine, passed as argument to this function.
template <typename T>
void ClientXY(int argc, char *argv[], Routine2<T> client_routine,
const std::vector<std::string> &options);
template <typename T>
void ClientAXY(int argc, char *argv[], Routine3<T> client_routine,
const std::vector<std::string> &options);
template <typename T>
void ClientAC(int argc, char *argv[], Routine2<T> client_routine,
const std::vector<std::string> &options);
template <typename T>
void ClientABC(int argc, char *argv[], Routine3<T> client_routine,
const std::vector<std::string> &options, const bool symmetric);
// Simple command line argument parser with defaults
auto args = client.ParseArguments(argc, argv, C::DefaultLDA, C::DefaultLDB, C::DefaultLDC);
if (args.print_help) { return; }
// =================================================================================================
// Parses all command-line arguments, filling in the arguments structure. If no command-line
// argument is given for a particular argument, it is filled in with a default value.
template <typename T>
Arguments<T> ParseArguments(int argc, char *argv[], const std::vector<std::string> &options,
const std::function<size_t(const Arguments<T>)> default_ld_a);
// Retrieves only the precision command-line argument, since the above function is templated based
// on the precision
Precision GetPrecision(int argc, char *argv[]);
// =================================================================================================
// Runs a function a given number of times and returns the execution time of the shortest instance
double TimedExecution(const size_t num_runs, std::function<void()> main_computation);
// =================================================================================================
// Prints the header of a performance-data table
void PrintTableHeader(const bool silent, const std::vector<std::string> &args);
// Prints a row of performance data, including results of two libraries
void PrintTableRow(const std::vector<size_t> &args_int, const std::vector<std::string> &args_string,
const bool abbreviations, const double ms_clblast, const double ms_clblas,
const unsigned long long flops, const unsigned long long bytes);
// Runs the client
client.PerformanceTest(args, C::SetSizes);
}
// =================================================================================================
} // namespace clblast

View file

@ -7,90 +7,29 @@
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the Xaxpy command-line interface tester.
// This file implements the Xaxpy command-line interface performance tester.
//
// =================================================================================================
#include <string>
#include <vector>
#include <exception>
#include "wrapper_clblas.h"
#include "performance/client.h"
namespace clblast {
// =================================================================================================
// The client, used for performance testing. It contains the function calls to CLBlast and to other
// libraries to compare against.
template <typename T>
void PerformanceXaxpy(const Arguments<T> &args,
const Buffer &x_vec, const Buffer &y_vec,
CommandQueue &queue) {
// Creates the CLBlast lambda
auto clblast_lambda = [&args, &x_vec, &y_vec, &queue]() {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Axpy(args.n, args.alpha,
x_vec(), args.x_offset, args.x_inc,
y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
clWaitForEvents(1, &event);
if (status != StatusCode::kSuccess) {
throw std::runtime_error("CLBlast error: "+ToString(static_cast<int>(status)));
}
};
// Creates the clBLAS lambda (for comparison)
auto clblas_lambda = [&args, &x_vec, &y_vec, &queue]() {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXaxpy(args.n, args.alpha,
x_vec(), args.x_offset, args.x_inc,
y_vec(), args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
if (status != CL_SUCCESS) {
throw std::runtime_error("clBLAS error: "+ToString(static_cast<int>(status)));
}
};
// Runs the routines and collect the timings
auto ms_clblast = TimedExecution(args.num_runs, clblast_lambda);
auto ms_clblas = TimedExecution(args.num_runs, clblas_lambda);
// Prints the performance of both libraries
const auto flops = 2 * args.n;
const auto bytes = (3 * args.n) * sizeof(T);
const auto output_ints = std::vector<size_t>{args.n, args.x_inc, args.y_inc,
args.x_offset, args.y_offset};
const auto output_strings = std::vector<std::string>{ToString(args.alpha)};
PrintTableRow(output_ints, output_strings, args.no_abbrv,
ms_clblast, ms_clblas, flops, bytes);
}
#include "routines/xaxpy.h"
// =================================================================================================
// Main function which calls the common client code with the routine-specific function as argument.
void ClientXaxpy(int argc, char *argv[]) {
const auto o = std::vector<std::string>{kArgN, kArgXInc, kArgYInc,
kArgXOffset, kArgYOffset, kArgAlpha};
switch(GetPrecision(argc, argv)) {
case Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case Precision::kSingle: ClientXY<float>(argc, argv, PerformanceXaxpy<float>, o); break;
case Precision::kDouble: ClientXY<double>(argc, argv, PerformanceXaxpy<double>, o); break;
case Precision::kComplexSingle: ClientXY<float2>(argc, argv, PerformanceXaxpy<float2>, o); break;
case Precision::kComplexDouble: ClientXY<double2>(argc, argv, PerformanceXaxpy<double2>, o); break;
}
}
// =================================================================================================
} // namespace clblast
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
clblast::ClientXaxpy(argc, argv);
switch(clblast::GetPrecision(argc, argv)) {
case clblast::Precision::kHalf:
throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXaxpy<float>, float>(argc, argv); break;
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXaxpy<double>, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
clblast::RunClient<clblast::TestXaxpy<clblast::float2>, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
clblast::RunClient<clblast::TestXaxpy<clblast::double2>, clblast::double2>(argc, argv); break;
}
return 0;
}

View file

@ -7,108 +7,29 @@
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the Xgemm command-line interface tester.
// This file implements the Xgemm command-line interface performance tester.
//
// =================================================================================================
#include <string>
#include <vector>
#include <exception>
#include "wrapper_clblas.h"
#include "performance/client.h"
namespace clblast {
// =================================================================================================
// The client, used for performance testing. It contains the function calls to CLBlast and to other
// libraries to compare against.
template <typename T>
void PerformanceXgemm(const Arguments<T> &args,
const Buffer &a_mat, const Buffer &b_mat, const Buffer &c_mat,
CommandQueue &queue) {
// Creates the CLBlast lambda
auto clblast_lambda = [&args, &a_mat, &b_mat, &c_mat, &queue]() {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Gemm(args.layout, args.a_transpose, args.b_transpose,
args.m, args.n, args.k,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
b_mat(), args.b_offset, args.b_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
clWaitForEvents(1, &event);
if (status != StatusCode::kSuccess) {
throw std::runtime_error("CLBlast error: "+ToString(static_cast<int>(status)));
}
};
// Creates the clBLAS lambda (for comparison)
auto clblas_lambda = [&args, &a_mat, &b_mat, &c_mat, &queue]() {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXgemm(static_cast<clblasOrder>(args.layout),
static_cast<clblasTranspose>(args.a_transpose),
static_cast<clblasTranspose>(args.b_transpose),
args.m, args.n, args.k,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
b_mat(), args.b_offset, args.b_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
if (status != CL_SUCCESS) {
throw std::runtime_error("clBLAS error: "+ToString(static_cast<int>(status)));
}
};
// Runs the routines and collect the timings
auto ms_clblast = TimedExecution(args.num_runs, clblast_lambda);
auto ms_clblas = TimedExecution(args.num_runs, clblas_lambda);
// Prints the performance of both libraries
const auto flops = 2 * args.m * args.n * args.k;
const auto bytes = (args.m*args.k + args.k*args.n + 2*args.m*args.n) * sizeof(T);
const auto output_ints = std::vector<size_t>{args.m, args.n, args.k,
static_cast<size_t>(args.layout),
static_cast<size_t>(args.a_transpose),
static_cast<size_t>(args.b_transpose),
args.a_ld, args.b_ld, args.c_ld,
args.a_offset, args.b_offset, args.c_offset};
const auto output_strings = std::vector<std::string>{ToString(args.alpha),
ToString(args.beta)};
PrintTableRow(output_ints, output_strings, args.no_abbrv,
ms_clblast, ms_clblas, flops, bytes);
}
#include "routines/xgemm.h"
// =================================================================================================
// Main function which calls the common client code with the routine-specific function as argument.
void ClientXgemm(int argc, char *argv[]) {
const auto o = std::vector<std::string>{kArgM, kArgN, kArgK, kArgLayout,
kArgATransp, kArgBTransp,
kArgALeadDim, kArgBLeadDim, kArgCLeadDim,
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
switch(GetPrecision(argc, argv)) {
case Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case Precision::kSingle: ClientABC<float>(argc, argv, PerformanceXgemm<float>, o, false); break;
case Precision::kDouble: ClientABC<double>(argc, argv, PerformanceXgemm<double>, o, false); break;
case Precision::kComplexSingle: ClientABC<float2>(argc, argv, PerformanceXgemm<float2>, o, false); break;
case Precision::kComplexDouble: ClientABC<double2>(argc, argv, PerformanceXgemm<double2>, o, false); break;
}
}
// =================================================================================================
} // namespace clblast
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
clblast::ClientXgemm(argc, argv);
switch(clblast::GetPrecision(argc, argv)) {
case clblast::Precision::kHalf:
throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXgemm<float>, float>(argc, argv); break;
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXgemm<double>, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
clblast::RunClient<clblast::TestXgemm<clblast::float2>, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
clblast::RunClient<clblast::TestXgemm<clblast::double2>, clblast::double2>(argc, argv); break;
}
return 0;
}

View file

@ -7,100 +7,29 @@
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the Xgemv command-line interface tester.
// This file implements the Xgemv command-line interface performance tester.
//
// =================================================================================================
#include <string>
#include <vector>
#include <exception>
#include "wrapper_clblas.h"
#include "performance/client.h"
namespace clblast {
// =================================================================================================
// The client, used for performance testing. It contains the function calls to CLBlast and to other
// libraries to compare against.
template <typename T>
void PerformanceXgemv(const Arguments<T> &args,
const Buffer &a_mat, const Buffer &x_vec, const Buffer &y_vec,
CommandQueue &queue) {
// Creates the CLBlast lambda
auto clblast_lambda = [&args, &a_mat, &x_vec, &y_vec, &queue]() {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Gemv(args.layout, args.a_transpose, args.m, args.n, args.alpha,
a_mat(), args.a_offset, args.a_ld,
x_vec(), args.x_offset, args.x_inc, args.beta,
y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
clWaitForEvents(1, &event);
if (status != StatusCode::kSuccess) {
throw std::runtime_error("CLBlast error: "+ToString(static_cast<int>(status)));
}
};
// Creates the clBLAS lambda (for comparison)
auto clblas_lambda = [&args, &a_mat, &x_vec, &y_vec, &queue]() {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXgemv(static_cast<clblasOrder>(args.layout),
static_cast<clblasTranspose>(args.a_transpose),
args.m, args.n, args.alpha,
a_mat(), args.a_offset, args.a_ld,
x_vec(), args.x_offset, args.x_inc, args.beta,
y_vec(), args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
if (status != CL_SUCCESS) {
throw std::runtime_error("clBLAS error: "+ToString(static_cast<int>(status)));
}
};
// Runs the routines and collect the timings
auto ms_clblast = TimedExecution(args.num_runs, clblast_lambda);
auto ms_clblas = TimedExecution(args.num_runs, clblas_lambda);
// Prints the performance of both libraries
const auto flops = 2 * args.m * args.n;
const auto bytes = (args.m*args.n + 2*args.m + args.n) * sizeof(T);
const auto output_ints = std::vector<size_t>{args.m, args.n,
static_cast<size_t>(args.layout),
static_cast<size_t>(args.a_transpose),
args.a_ld, args.x_inc, args.y_inc,
args.a_offset, args.x_offset, args.y_offset};
const auto output_strings = std::vector<std::string>{ToString(args.alpha),
ToString(args.beta)};
PrintTableRow(output_ints, output_strings, args.no_abbrv,
ms_clblast, ms_clblas, flops, bytes);
}
#include "routines/xgemv.h"
// =================================================================================================
// Main function which calls the common client code with the routine-specific function as argument.
void ClientXgemv(int argc, char *argv[]) {
const auto o = std::vector<std::string>{kArgM, kArgN, kArgLayout, kArgATransp,
kArgALeadDim, kArgXInc, kArgYInc,
kArgAOffset, kArgXOffset, kArgYOffset,
kArgAlpha, kArgBeta};
switch(GetPrecision(argc, argv)) {
case Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case Precision::kSingle: ClientAXY<float>(argc, argv, PerformanceXgemv<float>, o); break;
case Precision::kDouble: ClientAXY<double>(argc, argv, PerformanceXgemv<double>, o); break;
case Precision::kComplexSingle: ClientAXY<float2>(argc, argv, PerformanceXgemv<float2>, o); break;
case Precision::kComplexDouble: ClientAXY<double2>(argc, argv, PerformanceXgemv<double2>, o); break;
}
}
// =================================================================================================
} // namespace clblast
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
clblast::ClientXgemv(argc, argv);
switch(clblast::GetPrecision(argc, argv)) {
case clblast::Precision::kHalf:
throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXgemv<float>, float>(argc, argv); break;
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXgemv<double>, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
clblast::RunClient<clblast::TestXgemv<clblast::float2>, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
clblast::RunClient<clblast::TestXgemv<clblast::double2>, clblast::double2>(argc, argv); break;
}
return 0;
}

View file

@ -7,108 +7,29 @@
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the Xsymm command-line interface tester.
// This file implements the Xsymm command-line interface performance tester.
//
// =================================================================================================
#include <string>
#include <vector>
#include <exception>
#include "wrapper_clblas.h"
#include "performance/client.h"
namespace clblast {
// =================================================================================================
// The client, used for performance testing. It contains the function calls to CLBlast and to other
// libraries to compare against.
template <typename T>
void PerformanceXsymm(const Arguments<T> &args,
const Buffer &a_mat, const Buffer &b_mat, const Buffer &c_mat,
CommandQueue &queue) {
// Creates the CLBlast lambda
auto clblast_lambda = [&args, &a_mat, &b_mat, &c_mat, &queue]() {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Symm(args.layout, args.side, args.triangle,
args.m, args.n,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
b_mat(), args.b_offset, args.b_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
clWaitForEvents(1, &event);
if (status != StatusCode::kSuccess) {
throw std::runtime_error("CLBlast error: "+ToString(static_cast<int>(status)));
}
};
// Creates the clBLAS lambda (for comparison)
auto clblas_lambda = [&args, &a_mat, &b_mat, &c_mat, &queue]() {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsymm(static_cast<clblasOrder>(args.layout),
static_cast<clblasSide>(args.side),
static_cast<clblasUplo>(args.triangle),
args.m, args.n,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
b_mat(), args.b_offset, args.b_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
if (status != CL_SUCCESS) {
throw std::runtime_error("clBLAS error: "+ToString(static_cast<int>(status)));
}
};
// Runs the routines and collect the timings
auto ms_clblast = TimedExecution(args.num_runs, clblast_lambda);
auto ms_clblas = TimedExecution(args.num_runs, clblas_lambda);
// Prints the performance of both libraries
const auto flops = 2 * args.m * args.n * args.m;
const auto bytes = (args.m*args.m + args.m*args.n + 2*args.m*args.n) * sizeof(T);
const auto output_ints = std::vector<size_t>{args.m, args.n,
static_cast<size_t>(args.layout),
static_cast<size_t>(args.triangle),
static_cast<size_t>(args.side),
args.a_ld, args.b_ld, args.c_ld,
args.a_offset, args.b_offset, args.c_offset};
const auto output_strings = std::vector<std::string>{ToString(args.alpha),
ToString(args.beta)};
PrintTableRow(output_ints, output_strings, args.no_abbrv,
ms_clblast, ms_clblas, flops, bytes);
}
#include "routines/xsymm.h"
// =================================================================================================
// Main function which calls the common client code with the routine-specific function as argument.
void ClientXsymm(int argc, char *argv[]) {
const auto o = std::vector<std::string>{kArgM, kArgN, kArgLayout,
kArgTriangle, kArgSide,
kArgALeadDim, kArgBLeadDim, kArgCLeadDim,
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
switch(GetPrecision(argc, argv)) {
case Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case Precision::kSingle: ClientABC<float>(argc, argv, PerformanceXsymm<float>, o, false); break;
case Precision::kDouble: ClientABC<double>(argc, argv, PerformanceXsymm<double>, o, false); break;
case Precision::kComplexSingle: ClientABC<float2>(argc, argv, PerformanceXsymm<float2>, o, false); break;
case Precision::kComplexDouble: ClientABC<double2>(argc, argv, PerformanceXsymm<double2>, o, false); break;
}
}
// =================================================================================================
} // namespace clblast
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
clblast::ClientXsymm(argc, argv);
switch(clblast::GetPrecision(argc, argv)) {
case clblast::Precision::kHalf:
throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsymm<float>, float>(argc, argv); break;
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXsymm<double>, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
clblast::RunClient<clblast::TestXsymm<clblast::float2>, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
clblast::RunClient<clblast::TestXsymm<clblast::double2>, clblast::double2>(argc, argv); break;
}
return 0;
}

View file

@ -7,108 +7,29 @@
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the Xsyr2k command-line interface tester.
// This file implements the Xsyr2k command-line interface performance tester.
//
// =================================================================================================
#include <string>
#include <vector>
#include <exception>
#include "wrapper_clblas.h"
#include "performance/client.h"
namespace clblast {
// =================================================================================================
// The client, used for performance testing. It contains the function calls to CLBlast and to other
// libraries to compare against.
template <typename T>
void PerformanceXsyr2k(const Arguments<T> &args,
const Buffer &a_mat, const Buffer &b_mat, const Buffer &c_mat,
CommandQueue &queue) {
// Creates the CLBlast lambda
auto clblast_lambda = [&args, &a_mat, &b_mat, &c_mat, &queue]() {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Syr2k(args.layout, args.triangle, args.a_transpose,
args.n, args.k,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
b_mat(), args.b_offset, args.b_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
clWaitForEvents(1, &event);
if (status != StatusCode::kSuccess) {
throw std::runtime_error("CLBlast error: "+ToString(static_cast<int>(status)));
}
};
// Creates the clBLAS lambda (for comparison)
auto clblas_lambda = [&args, &a_mat, &b_mat, &c_mat, &queue]() {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsyr2k(static_cast<clblasOrder>(args.layout),
static_cast<clblasUplo>(args.triangle),
static_cast<clblasTranspose>(args.a_transpose),
args.n, args.k,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
b_mat(), args.b_offset, args.b_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
if (status != CL_SUCCESS) {
throw std::runtime_error("clBLAS error: "+ToString(static_cast<int>(status)));
}
};
// Runs the routines and collect the timings
auto ms_clblast = TimedExecution(args.num_runs, clblast_lambda);
auto ms_clblas = TimedExecution(args.num_runs, clblas_lambda);
// Prints the performance of both libraries
const auto flops = 2 * args.n * args.n * args.k;
const auto bytes = (args.n*args.k + args.n*args.n) * sizeof(T);
const auto output_ints = std::vector<size_t>{args.n, args.k,
static_cast<size_t>(args.layout),
static_cast<size_t>(args.triangle),
static_cast<size_t>(args.a_transpose),
args.a_ld, args.b_ld, args.c_ld,
args.a_offset, args.b_offset, args.c_offset};
const auto output_strings = std::vector<std::string>{ToString(args.alpha),
ToString(args.beta)};
PrintTableRow(output_ints, output_strings, args.no_abbrv,
ms_clblast, ms_clblas, flops, bytes);
}
#include "routines/xsyr2k.h"
// =================================================================================================
// Main function which calls the common client code with the routine-specific function as argument.
void ClientXsyr2k(int argc, char *argv[]) {
const auto o = std::vector<std::string>{kArgN, kArgK,
kArgLayout, kArgTriangle, kArgATransp,
kArgALeadDim, kArgBLeadDim, kArgCLeadDim,
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
switch(GetPrecision(argc, argv)) {
case Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case Precision::kSingle: ClientABC<float>(argc, argv, PerformanceXsyr2k<float>, o, true); break;
case Precision::kDouble: ClientABC<double>(argc, argv, PerformanceXsyr2k<double>, o, true); break;
case Precision::kComplexSingle: ClientABC<float2>(argc, argv, PerformanceXsyr2k<float2>, o, true); break;
case Precision::kComplexDouble: ClientABC<double2>(argc, argv, PerformanceXsyr2k<double2>, o, true); break;
}
}
// =================================================================================================
} // namespace clblast
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
clblast::ClientXsyr2k(argc, argv);
switch(clblast::GetPrecision(argc, argv)) {
case clblast::Precision::kHalf:
throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsyr2k<float>, float>(argc, argv); break;
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXsyr2k<double>, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
clblast::RunClient<clblast::TestXsyr2k<clblast::float2>, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
clblast::RunClient<clblast::TestXsyr2k<clblast::double2>, clblast::double2>(argc, argv); break;
}
return 0;
}

View file

@ -7,106 +7,29 @@
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements the Xsyrk command-line interface tester.
// This file implements the Xsyrk command-line interface performance tester.
//
// =================================================================================================
#include <string>
#include <vector>
#include <exception>
#include "wrapper_clblas.h"
#include "performance/client.h"
namespace clblast {
// =================================================================================================
// The client, used for performance testing. It contains the function calls to CLBlast and to other
// libraries to compare against.
template <typename T>
void PerformanceXsyrk(const Arguments<T> &args,
const Buffer &a_mat, const Buffer &c_mat,
CommandQueue &queue) {
// Creates the CLBlast lambda
auto clblast_lambda = [&args, &a_mat, &c_mat, &queue]() {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Syrk(args.layout, args.triangle, args.a_transpose,
args.n, args.k,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
clWaitForEvents(1, &event);
if (status != StatusCode::kSuccess) {
throw std::runtime_error("CLBlast error: "+ToString(static_cast<int>(status)));
}
};
// Creates the clBLAS lambda (for comparison)
auto clblas_lambda = [&args, &a_mat, &c_mat, &queue]() {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsyrk(static_cast<clblasOrder>(args.layout),
static_cast<clblasUplo>(args.triangle),
static_cast<clblasTranspose>(args.a_transpose),
args.n, args.k,
args.alpha,
a_mat(), args.a_offset, args.a_ld,
args.beta,
c_mat(), args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
if (status != CL_SUCCESS) {
throw std::runtime_error("clBLAS error: "+ToString(static_cast<int>(status)));
}
};
// Runs the routines and collect the timings
auto ms_clblast = TimedExecution(args.num_runs, clblast_lambda);
auto ms_clblas = TimedExecution(args.num_runs, clblas_lambda);
// Prints the performance of both libraries
const auto flops = args.n * args.n * args.k;
const auto bytes = (args.n*args.k + args.n*args.n) * sizeof(T);
const auto output_ints = std::vector<size_t>{args.n, args.k,
static_cast<size_t>(args.layout),
static_cast<size_t>(args.triangle),
static_cast<size_t>(args.a_transpose),
args.a_ld, args.c_ld,
args.a_offset, args.c_offset};
const auto output_strings = std::vector<std::string>{ToString(args.alpha),
ToString(args.beta)};
PrintTableRow(output_ints, output_strings, args.no_abbrv,
ms_clblast, ms_clblas, flops, bytes);
}
#include "routines/xsyrk.h"
// =================================================================================================
// Main function which calls the common client code with the routine-specific function as argument.
void ClientXsyrk(int argc, char *argv[]) {
const auto o = std::vector<std::string>{kArgN, kArgK,
kArgLayout, kArgTriangle, kArgATransp,
kArgALeadDim, kArgCLeadDim,
kArgAOffset, kArgCOffset,
kArgAlpha, kArgBeta};
switch(GetPrecision(argc, argv)) {
case Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
case Precision::kSingle: ClientAC<float>(argc, argv, PerformanceXsyrk<float>, o); break;
case Precision::kDouble: ClientAC<double>(argc, argv, PerformanceXsyrk<double>, o); break;
case Precision::kComplexSingle: ClientAC<float2>(argc, argv, PerformanceXsyrk<float2>, o); break;
case Precision::kComplexDouble: ClientAC<double2>(argc, argv, PerformanceXsyrk<double2>, o); break;
}
}
// =================================================================================================
} // namespace clblast
// Main function (not within the clblast namespace)
int main(int argc, char *argv[]) {
clblast::ClientXsyrk(argc, argv);
switch(clblast::GetPrecision(argc, argv)) {
case clblast::Precision::kHalf:
throw std::runtime_error("Unsupported precision mode");
case clblast::Precision::kSingle:
clblast::RunClient<clblast::TestXsyrk<float>, float>(argc, argv); break;
case clblast::Precision::kDouble:
clblast::RunClient<clblast::TestXsyrk<double>, double>(argc, argv); break;
case clblast::Precision::kComplexSingle:
clblast::RunClient<clblast::TestXsyrk<clblast::float2>, clblast::float2>(argc, argv); break;
case clblast::Precision::kComplexDouble:
clblast::RunClient<clblast::TestXsyrk<clblast::double2>, clblast::double2>(argc, argv); break;
}
return 0;
}

113
test/routines/xaxpy.h Normal file
View file

@ -0,0 +1,113 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements a class with static methods to describe the Xaxpy routine. Examples of
// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These
// static methods are used by the correctness tester and the performance tester.
//
// =================================================================================================
#ifndef CLBLAST_TEST_ROUTINES_XAXPY_H_
#define CLBLAST_TEST_ROUTINES_XAXPY_H_
#include <vector>
#include <string>
#include "wrapper_clblas.h"
namespace clblast {
// =================================================================================================
// See comment at top of file for a description of the class
template <typename T>
class TestXaxpy {
public:
// The list of arguments relevant for this routine
static std::vector<std::string> GetOptions() {
return {kArgN,
kArgXInc, kArgYInc,
kArgXOffset, kArgYOffset,
kArgAlpha};
}
// Describes how to obtain the sizes of the buffers
static size_t GetSizeX(const Arguments<T> &args) {
return args.n * args.x_inc + args.x_offset;
}
static size_t GetSizeY(const Arguments<T> &args) {
return args.n * args.y_inc + args.y_offset;
}
// Describes how to set the sizes of all the buffers
static void SetSizes(Arguments<T> &args) {
args.x_size = GetSizeX(args);
args.y_size = GetSizeY(args);
}
// Describes what the default values of the leading dimensions of the matrices are
static size_t DefaultLDA(const Arguments<T> &) { return 1; } // N/A for this routine
static size_t DefaultLDB(const Arguments<T> &) { return 1; } // N/A for this routine
static size_t DefaultLDC(const Arguments<T> &) { return 1; } // N/A for this routine
// Describes how to run the CLBlast routine
static StatusCode RunRoutine(const Arguments<T> &args, const Buffers &buffers,
CommandQueue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Axpy(args.n, args.alpha,
buffers.x_vec(), args.x_offset, args.x_inc,
buffers.y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
clWaitForEvents(1, &event);
return status;
}
// Describes how to run the clBLAS routine (for correctness/performance comparison)
static StatusCode RunReference(const Arguments<T> &args, const Buffers &buffers,
CommandQueue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXaxpy(args.n, args.alpha,
buffers.x_vec(), args.x_offset, args.x_inc,
buffers.y_vec(), args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
}
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers &buffers,
CommandQueue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
buffers.y_vec.ReadBuffer(queue, args.y_size*sizeof(T), result);
return result;
}
// Describes how to compute the indices of the result buffer
static size_t ResultID1(const Arguments<T> &args) { return args.n; }
static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine
static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t) {
return id1*args.y_inc + args.y_offset;
}
// Describes how to compute performance metrics
static size_t GetFlops(const Arguments<T> &args) {
return 2 * args.n;
}
static size_t GetBytes(const Arguments<T> &args) {
return (3 * args.n) * sizeof(T);
}
};
// =================================================================================================
} // namespace clblast
// CLBLAST_TEST_ROUTINES_XAXPY_H_
#endif

134
test/routines/xgemm.h Normal file
View file

@ -0,0 +1,134 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements a class with static methods to describe the Xgemm routine. Examples of
// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These
// static methods are used by the correctness tester and the performance tester.
//
// =================================================================================================
#ifndef CLBLAST_TEST_ROUTINES_XGEMM_H_
#define CLBLAST_TEST_ROUTINES_XGEMM_H_
#include <vector>
#include <string>
#include "wrapper_clblas.h"
namespace clblast {
// =================================================================================================
// See comment at top of file for a description of the class
template <typename T>
class TestXgemm {
public:
// The list of arguments relevant for this routine
static std::vector<std::string> GetOptions() {
return {kArgM, kArgN, kArgK,
kArgLayout, kArgATransp, kArgBTransp,
kArgALeadDim, kArgBLeadDim, kArgCLeadDim,
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
auto a_rotated = (args.layout == Layout::kColMajor && args.a_transpose != Transpose::kNo) ||
(args.layout == Layout::kRowMajor && args.a_transpose == Transpose::kNo);
auto a_two = (a_rotated) ? args.m : args.k;
return a_two * args.a_ld + args.a_offset;
}
static size_t GetSizeB(const Arguments<T> &args) {
auto b_rotated = (args.layout == Layout::kColMajor && args.b_transpose != Transpose::kNo) ||
(args.layout == Layout::kRowMajor && args.b_transpose == Transpose::kNo);
auto b_two = (b_rotated) ? args.k : args.n;
return b_two * args.b_ld + args.b_offset;
}
static size_t GetSizeC(const Arguments<T> &args) {
auto c_rotated = (args.layout == Layout::kRowMajor);
auto c_two = (c_rotated) ? args.m : args.n;
return c_two * args.c_ld + args.c_offset;
}
// Describes how to set the sizes of all the buffers
static void SetSizes(Arguments<T> &args) {
args.a_size = GetSizeA(args);
args.b_size = GetSizeB(args);
args.c_size = GetSizeC(args);
}
// Describes what the default values of the leading dimensions of the matrices are
static size_t DefaultLDA(const Arguments<T> &args) { return args.k; }
static size_t DefaultLDB(const Arguments<T> &args) { return args.n; }
static size_t DefaultLDC(const Arguments<T> &args) { return args.n; }
// Describes how to run the CLBlast routine
static StatusCode RunRoutine(const Arguments<T> &args, const Buffers &buffers,
CommandQueue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Gemm(args.layout, args.a_transpose, args.b_transpose,
args.m, args.n, args.k, args.alpha,
buffers.a_mat(), args.a_offset, args.a_ld,
buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
buffers.c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
clWaitForEvents(1, &event);
return status;
}
// Describes how to run the clBLAS routine (for correctness/performance comparison)
static StatusCode RunReference(const Arguments<T> &args, const Buffers &buffers,
CommandQueue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXgemm(static_cast<clblasOrder>(args.layout),
static_cast<clblasTranspose>(args.a_transpose),
static_cast<clblasTranspose>(args.b_transpose),
args.m, args.n, args.k, args.alpha,
buffers.a_mat(), args.a_offset, args.a_ld,
buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
buffers.c_mat(), args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
}
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers &buffers,
CommandQueue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
buffers.c_mat.ReadBuffer(queue, args.c_size*sizeof(T), result);
return result;
}
// Describes how to compute the indices of the result buffer
static size_t ResultID1(const Arguments<T> &args) { return args.m; }
static size_t ResultID2(const Arguments<T> &args) { return args.n; }
static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t id2) {
return (args.layout == Layout::kRowMajor) ?
id1*args.c_ld + id2 + args.c_offset:
id2*args.c_ld + id1 + args.c_offset;
}
// Describes how to compute performance metrics
static size_t GetFlops(const Arguments<T> &args) {
return 2 * args.m * args.n * args.m;
}
static size_t GetBytes(const Arguments<T> &args) {
return (args.m*args.m + args.m*args.n + 2*args.m*args.n) * sizeof(T);
}
};
// =================================================================================================
} // namespace clblast
// CLBLAST_TEST_ROUTINES_XGEMM_H_
#endif

132
test/routines/xgemv.h Normal file
View file

@ -0,0 +1,132 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements a class with static methods to describe the Xgemv routine. Examples of
// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These
// static methods are used by the correctness tester and the performance tester.
//
// =================================================================================================
#ifndef CLBLAST_TEST_ROUTINES_XGEMV_H_
#define CLBLAST_TEST_ROUTINES_XGEMV_H_
#include <vector>
#include <string>
#include "wrapper_clblas.h"
namespace clblast {
// =================================================================================================
// See comment at top of file for a description of the class
template <typename T>
class TestXgemv {
public:
// The list of arguments relevant for this routine
static std::vector<std::string> GetOptions() {
return {kArgM, kArgN,
kArgLayout, kArgATransp,
kArgALeadDim, kArgXInc, kArgYInc,
kArgAOffset, kArgXOffset, kArgYOffset,
kArgAlpha, kArgBeta};
}
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
auto a_rotated = (args.layout == Layout::kRowMajor);
auto a_two = (a_rotated) ? args.m : args.n;
return a_two * args.a_ld + args.a_offset;
}
static size_t GetSizeX(const Arguments<T> &args) {
auto a_transposed = (args.a_transpose != Transpose::kNo);
auto n_real = (a_transposed) ? args.m : args.n;
return n_real * args.x_inc + args.x_offset;
}
static size_t GetSizeY(const Arguments<T> &args) {
auto a_transposed = (args.a_transpose != Transpose::kNo);
auto m_real = (a_transposed) ? args.n : args.m;
return m_real * args.y_inc + args.y_offset;
}
// Describes how to set the sizes of all the buffers
static void SetSizes(Arguments<T> &args) {
args.a_size = GetSizeA(args);
args.x_size = GetSizeX(args);
args.y_size = GetSizeY(args);
}
// Describes what the default values of the leading dimensions of the matrices are
static size_t DefaultLDA(const Arguments<T> &args) { return args.n; }
static size_t DefaultLDB(const Arguments<T> &) { return 1; } // N/A for this routine
static size_t DefaultLDC(const Arguments<T> &) { return 1; } // N/A for this routine
// Describes how to run the CLBlast routine
static StatusCode RunRoutine(const Arguments<T> &args, const Buffers &buffers,
CommandQueue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Gemv(args.layout, args.a_transpose,
args.m, args.n, args.alpha,
buffers.a_mat(), args.a_offset, args.a_ld,
buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
buffers.y_vec(), args.y_offset, args.y_inc,
&queue_plain, &event);
clWaitForEvents(1, &event);
return status;
}
// Describes how to run the clBLAS routine (for correctness/performance comparison)
static StatusCode RunReference(const Arguments<T> &args, const Buffers &buffers,
CommandQueue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXgemv(static_cast<clblasOrder>(args.layout),
static_cast<clblasTranspose>(args.a_transpose),
args.m, args.n, args.alpha,
buffers.a_mat(), args.a_offset, args.a_ld,
buffers.x_vec(), args.x_offset, args.x_inc, args.beta,
buffers.y_vec(), args.y_offset, args.y_inc,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
}
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers &buffers,
CommandQueue &queue) {
std::vector<T> result(args.y_size, static_cast<T>(0));
buffers.y_vec.ReadBuffer(queue, args.y_size*sizeof(T), result);
return result;
}
// Describes how to compute the indices of the result buffer
static size_t ResultID1(const Arguments<T> &args) {
auto a_transposed = (args.a_transpose != Transpose::kNo);
return (a_transposed) ? args.n : args.m;
}
static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine
static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t) {
return id1*args.y_inc + args.y_offset;
}
// Describes how to compute performance metrics
static size_t GetFlops(const Arguments<T> &args) {
return 2 * args.m * args.n;
}
static size_t GetBytes(const Arguments<T> &args) {
return (args.m*args.n + 2*args.m + args.n) * sizeof(T);
}
};
// =================================================================================================
} // namespace clblast
// CLBLAST_TEST_ROUTINES_XGEMV_H_
#endif

134
test/routines/xsymm.h Normal file
View file

@ -0,0 +1,134 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements a class with static methods to describe the Xsymm routine. Examples of
// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These
// static methods are used by the correctness tester and the performance tester.
//
// =================================================================================================
#ifndef CLBLAST_TEST_ROUTINES_XSYMM_H_
#define CLBLAST_TEST_ROUTINES_XSYMM_H_
#include <vector>
#include <string>
#include "wrapper_clblas.h"
namespace clblast {
// =================================================================================================
// See comment at top of file for a description of the class
template <typename T>
class TestXsymm {
public:
// The list of arguments relevant for this routine
static std::vector<std::string> GetOptions() {
return {kArgM, kArgN,
kArgLayout, kArgSide, kArgTriangle,
kArgALeadDim, kArgBLeadDim, kArgCLeadDim,
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
size_t k_value = (args.side == Side::kLeft) ? args.m : args.n;
auto a_rotated = (args.layout == Layout::kRowMajor);
auto a_two = (a_rotated) ? args.m : k_value;
return a_two * args.a_ld + args.a_offset;
}
static size_t GetSizeB(const Arguments<T> &args) {
size_t k_value = (args.side == Side::kLeft) ? args.m : args.n;
auto b_rotated = (args.layout == Layout::kRowMajor);
auto b_two = (b_rotated) ? k_value : args.n;
return b_two * args.b_ld + args.b_offset;
}
static size_t GetSizeC(const Arguments<T> &args) {
auto c_rotated = (args.layout == Layout::kRowMajor);
auto c_two = (c_rotated) ? args.m : args.n;
return c_two * args.c_ld + args.c_offset;
}
// Describes how to set the sizes of all the buffers
static void SetSizes(Arguments<T> &args) {
args.a_size = GetSizeA(args);
args.b_size = GetSizeB(args);
args.c_size = GetSizeC(args);
}
// Describes what the default values of the leading dimensions of the matrices are
static size_t DefaultLDA(const Arguments<T> &args) { return args.m; }
static size_t DefaultLDB(const Arguments<T> &args) { return args.n; }
static size_t DefaultLDC(const Arguments<T> &args) { return args.n; }
// Describes how to run the CLBlast routine
static StatusCode RunRoutine(const Arguments<T> &args, const Buffers &buffers,
CommandQueue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Symm(args.layout, args.side, args.triangle,
args.m, args.n, args.alpha,
buffers.a_mat(), args.a_offset, args.a_ld,
buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
buffers.c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
clWaitForEvents(1, &event);
return status;
}
// Describes how to run the clBLAS routine (for correctness/performance comparison)
static StatusCode RunReference(const Arguments<T> &args, const Buffers &buffers,
CommandQueue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsymm(static_cast<clblasOrder>(args.layout),
static_cast<clblasSide>(args.side),
static_cast<clblasUplo>(args.triangle),
args.m, args.n, args.alpha,
buffers.a_mat(), args.a_offset, args.a_ld,
buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
buffers.c_mat(), args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
}
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers &buffers,
CommandQueue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
buffers.c_mat.ReadBuffer(queue, args.c_size*sizeof(T), result);
return result;
}
// Describes how to compute the indices of the result buffer
static size_t ResultID1(const Arguments<T> &args) { return args.m; }
static size_t ResultID2(const Arguments<T> &args) { return args.n; }
static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t id2) {
return (args.layout == Layout::kRowMajor) ?
id1*args.c_ld + id2 + args.c_offset:
id2*args.c_ld + id1 + args.c_offset;
}
// Describes how to compute performance metrics
static size_t GetFlops(const Arguments<T> &args) {
return 2 * args.m * args.n * args.m;
}
static size_t GetBytes(const Arguments<T> &args) {
return (args.m*args.m + args.m*args.n + 2*args.m*args.n) * sizeof(T);
}
};
// =================================================================================================
} // namespace clblast
// CLBLAST_TEST_ROUTINES_XSYMM_H_
#endif

130
test/routines/xsyr2k.h Normal file
View file

@ -0,0 +1,130 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements a class with static methods to describe the Xsyr2k routine. Examples of
// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These
// static methods are used by the correctness tester and the performance tester.
//
// =================================================================================================
#ifndef CLBLAST_TEST_ROUTINES_XSYR2K_H_
#define CLBLAST_TEST_ROUTINES_XSYR2K_H_
#include <vector>
#include <string>
#include "wrapper_clblas.h"
namespace clblast {
// =================================================================================================
// See comment at top of file for a description of the class
template <typename T>
class TestXsyr2k {
public:
// The list of arguments relevant for this routine
static std::vector<std::string> GetOptions() {
return {kArgN, kArgK,
kArgLayout, kArgTriangle, kArgATransp,
kArgALeadDim, kArgBLeadDim, kArgCLeadDim,
kArgAOffset, kArgBOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
auto a_rotated = (args.layout == Layout::kColMajor && args.a_transpose != Transpose::kNo) ||
(args.layout == Layout::kRowMajor && args.a_transpose == Transpose::kNo);
auto a_two = (a_rotated) ? args.n : args.k;
return a_two * args.a_ld + args.a_offset;
}
static size_t GetSizeB(const Arguments<T> &args) {
auto b_rotated = (args.layout == Layout::kColMajor && args.b_transpose != Transpose::kNo) ||
(args.layout == Layout::kRowMajor && args.b_transpose == Transpose::kNo);
auto b_two = (b_rotated) ? args.n : args.k;
return b_two * args.b_ld + args.b_offset;
}
static size_t GetSizeC(const Arguments<T> &args) {
return args.n * args.c_ld + args.c_offset;
}
// Describes how to set the sizes of all the buffers
static void SetSizes(Arguments<T> &args) {
args.a_size = GetSizeA(args);
args.b_size = GetSizeB(args);
args.c_size = GetSizeC(args);
}
// Describes what the default values of the leading dimensions of the matrices are
static size_t DefaultLDA(const Arguments<T> &args) { return args.k; }
static size_t DefaultLDB(const Arguments<T> &args) { return args.k; }
static size_t DefaultLDC(const Arguments<T> &args) { return args.n; }
// Describes how to run the CLBlast routine
static StatusCode RunRoutine(const Arguments<T> &args, const Buffers &buffers,
CommandQueue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Syr2k(args.layout, args.triangle, args.a_transpose,
args.n, args.k, args.alpha,
buffers.a_mat(), args.a_offset, args.a_ld,
buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
buffers.c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
clWaitForEvents(1, &event);
return status;
}
// Describes how to run the clBLAS routine (for correctness/performance comparison)
static StatusCode RunReference(const Arguments<T> &args, const Buffers &buffers,
CommandQueue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsyr2k(static_cast<clblasOrder>(args.layout),
static_cast<clblasUplo>(args.triangle),
static_cast<clblasTranspose>(args.a_transpose),
args.n, args.k, args.alpha,
buffers.a_mat(), args.a_offset, args.a_ld,
buffers.b_mat(), args.b_offset, args.b_ld, args.beta,
buffers.c_mat(), args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
}
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers &buffers,
CommandQueue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
buffers.c_mat.ReadBuffer(queue, args.c_size*sizeof(T), result);
return result;
}
// Describes how to compute the indices of the result buffer
static size_t ResultID1(const Arguments<T> &args) { return args.n; }
static size_t ResultID2(const Arguments<T> &args) { return args.n; }
static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t id2) {
return id1*args.c_ld + id2 + args.c_offset;
}
// Describes how to compute performance metrics
static size_t GetFlops(const Arguments<T> &args) {
return 2 * args.n * args.n * args.k;
}
static size_t GetBytes(const Arguments<T> &args) {
return (args.n*args.k + args.n*args.n) * sizeof(T);
}
};
// =================================================================================================
} // namespace clblast
// CLBLAST_TEST_ROUTINES_XSYR2K_H_
#endif

121
test/routines/xsyrk.h Normal file
View file

@ -0,0 +1,121 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file implements a class with static methods to describe the Xsyrk routine. Examples of
// such 'descriptions' are how to calculate the size a of buffer or how to run the routine. These
// static methods are used by the correctness tester and the performance tester.
//
// =================================================================================================
#ifndef CLBLAST_TEST_ROUTINES_XSYRK_H_
#define CLBLAST_TEST_ROUTINES_XSYRK_H_
#include <vector>
#include <string>
#include "wrapper_clblas.h"
namespace clblast {
// =================================================================================================
// See comment at top of file for a description of the class
template <typename T>
class TestXsyrk {
public:
// The list of arguments relevant for this routine
static std::vector<std::string> GetOptions() {
return {kArgN, kArgK,
kArgLayout, kArgTriangle, kArgATransp,
kArgALeadDim, kArgCLeadDim,
kArgAOffset, kArgCOffset,
kArgAlpha, kArgBeta};
}
// Describes how to obtain the sizes of the buffers
static size_t GetSizeA(const Arguments<T> &args) {
auto a_rotated = (args.layout == Layout::kColMajor && args.a_transpose != Transpose::kNo) ||
(args.layout == Layout::kRowMajor && args.a_transpose == Transpose::kNo);
auto a_two = (a_rotated) ? args.n : args.k;
return a_two * args.a_ld + args.a_offset;
}
static size_t GetSizeC(const Arguments<T> &args) {
return args.n * args.c_ld + args.c_offset;
}
// Describes how to set the sizes of all the buffers
static void SetSizes(Arguments<T> &args) {
args.a_size = GetSizeA(args);
args.c_size = GetSizeC(args);
}
// Describes what the default values of the leading dimensions of the matrices are
static size_t DefaultLDA(const Arguments<T> &args) { return args.k; }
static size_t DefaultLDB(const Arguments<T> &) { return 1; } // N/A for this routine
static size_t DefaultLDC(const Arguments<T> &args) { return args.n; }
// Describes how to run the CLBlast routine
static StatusCode RunRoutine(const Arguments<T> &args, const Buffers &buffers,
CommandQueue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = Syrk(args.layout, args.triangle, args.a_transpose,
args.n, args.k, args.alpha,
buffers.a_mat(), args.a_offset, args.a_ld, args.beta,
buffers.c_mat(), args.c_offset, args.c_ld,
&queue_plain, &event);
clWaitForEvents(1, &event);
return status;
}
// Describes how to run the clBLAS routine (for correctness/performance comparison)
static StatusCode RunReference(const Arguments<T> &args, const Buffers &buffers,
CommandQueue &queue) {
auto queue_plain = queue();
auto event = cl_event{};
auto status = clblasXsyrk(static_cast<clblasOrder>(args.layout),
static_cast<clblasUplo>(args.triangle),
static_cast<clblasTranspose>(args.a_transpose),
args.n, args.k, args.alpha,
buffers.a_mat(), args.a_offset, args.a_ld, args.beta,
buffers.c_mat(), args.c_offset, args.c_ld,
1, &queue_plain, 0, nullptr, &event);
clWaitForEvents(1, &event);
return static_cast<StatusCode>(status);
}
// Describes how to download the results of the computation (more importantly: which buffer)
static std::vector<T> DownloadResult(const Arguments<T> &args, Buffers &buffers,
CommandQueue &queue) {
std::vector<T> result(args.c_size, static_cast<T>(0));
buffers.c_mat.ReadBuffer(queue, args.c_size*sizeof(T), result);
return result;
}
// Describes how to compute the indices of the result buffer
static size_t ResultID1(const Arguments<T> &args) { return args.n; }
static size_t ResultID2(const Arguments<T> &args) { return args.n; }
static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t id2) {
return id1*args.c_ld + id2 + args.c_offset;
}
// Describes how to compute performance metrics
static size_t GetFlops(const Arguments<T> &args) {
return args.n * args.n * args.k;
}
static size_t GetBytes(const Arguments<T> &args) {
return (args.n*args.k + args.n*args.n) * sizeof(T);
}
};
// =================================================================================================
} // namespace clblast
// CLBLAST_TEST_ROUTINES_XSYRK_H_
#endif

View file

@ -1,6 +1,6 @@
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under the MIT license. This
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//