Add Github Actions CI (#464)
This replaces the old Travis CI builds with Github Actions that test on both Ubuntu and MacOS, with both Clang and GCC. The builds on macOS also run the tests and some other programs, on Ubuntu OpenCL is not working at the moment. Because these tests use new/different compilers, I fixed a few warnings and errors along the way.pull/468/head
parent
8d2f3540e9
commit
221121b840
|
@ -0,0 +1,59 @@
|
|||
name: CLBlast build
|
||||
|
||||
on:
|
||||
pull_request: {}
|
||||
push:
|
||||
branches: ['master']
|
||||
|
||||
jobs:
|
||||
|
||||
build_and_test:
|
||||
strategy:
|
||||
matrix:
|
||||
config: [
|
||||
{os: ubuntu-latest, c_compiler: gcc, cpp_compiler: g++},
|
||||
{os: ubuntu-latest, c_compiler: clang, cpp_compiler: clang++},
|
||||
{os: macos-latest, c_compiler: clang, cpp_compiler: clang++},
|
||||
]
|
||||
runs-on: ${{ matrix.config.os }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Install requirements for Ubuntu
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -yq cmake ninja-build ocl-icd-opencl-dev opencl-c-headers libopenblas-dev --no-install-recommends
|
||||
if: ${{ matrix.config.os == 'ubuntu-latest' }}
|
||||
|
||||
- name: Install requirements for macOS
|
||||
run: brew install ninja
|
||||
if: ${{ matrix.config.os == 'macos-latest' }}
|
||||
|
||||
- name: Run CMake
|
||||
run: |
|
||||
export CC=${{ matrix.config.c_compiler }}
|
||||
export CXX=${{ matrix.config.cpp_compiler }}
|
||||
cmake -S . -B build -G Ninja -DTESTS=ON -DCLIENTS=ON -DSAMPLES=ON
|
||||
|
||||
- name: Compile the code
|
||||
run: cmake --build build
|
||||
|
||||
- name: Get the diagnostics info
|
||||
run: ./build/clblast_test_diagnostics
|
||||
if: ${{ matrix.config.os == 'macos-latest' }}
|
||||
|
||||
- name: Run an example client
|
||||
run: ./build/clblast_client_xgemm
|
||||
if: ${{ matrix.config.os == 'macos-latest' }}
|
||||
|
||||
- name: Run an example sample program
|
||||
run: ./build/clblast_sample_dgemv_c
|
||||
if: ${{ matrix.config.os == 'macos-latest' }}
|
||||
|
||||
- name: Run an example tuner
|
||||
run: ./build/clblast_tuner_xdot
|
||||
if: ${{ matrix.config.os == 'macos-latest' }}
|
||||
|
||||
- name: Run the unittests
|
||||
run: ctest --test-dir build
|
||||
if: ${{ matrix.config.os == 'macos-latest' }}
|
69
.travis.yml
69
.travis.yml
|
@ -1,69 +0,0 @@
|
|||
language: cpp
|
||||
sudo: required
|
||||
dist: trusty
|
||||
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
# kubuntu-backports contains newer versions of cmake to install
|
||||
- kubuntu-backports
|
||||
packages:
|
||||
- cmake
|
||||
- ocl-icd-opencl-dev
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- os: linux
|
||||
compiler: gcc
|
||||
- os: linux
|
||||
compiler: clang
|
||||
- os: osx
|
||||
|
||||
env:
|
||||
global:
|
||||
- CLBLAST_VERSION=1.5.3
|
||||
- CLBLAST_ROOT=${TRAVIS_BUILD_DIR}/bin/clblast
|
||||
- CLBLAST_INSTALL=${TRAVIS_BUILD_DIR}/bin/CLBlast-${CLBLAST_VERSION}
|
||||
- CLBLAST_TAR=CLBlast-${CLBLAST_VERSION}-${TRAVIS_OS_NAME}-x64.tar.gz
|
||||
|
||||
before_install:
|
||||
- cmake --version;
|
||||
- ${CC} --version;
|
||||
- ${CXX} --version;
|
||||
|
||||
before_script:
|
||||
- mkdir -p ${CLBLAST_INSTALL}
|
||||
- mkdir -p ${CLBLAST_ROOT}
|
||||
- pushd ${CLBLAST_ROOT}
|
||||
- cmake -DTESTS=ON -DCLIENTS=ON -DSAMPLES=ON -DCMAKE_INSTALL_PREFIX=${CLBLAST_INSTALL} ${TRAVIS_BUILD_DIR}
|
||||
|
||||
script:
|
||||
- make
|
||||
- make install
|
||||
|
||||
after_success:
|
||||
- pushd ${TRAVIS_BUILD_DIR}/bin
|
||||
- rm ${CLBLAST_INSTALL}/bin/clblast_client_*
|
||||
- rm ${CLBLAST_INSTALL}/bin/clblast_test_*
|
||||
- echo ${CLBLAST_TAR}
|
||||
- tar -cvf ${CLBLAST_TAR} CLBlast-${CLBLAST_VERSION}
|
||||
- cp ${CLBLAST_TAR} ${TRAVIS_BUILD_DIR}
|
||||
- pushd ${TRAVIS_BUILD_DIR}
|
||||
- ls -l
|
||||
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
|
||||
notifications:
|
||||
email: false
|
||||
|
||||
deploy:
|
||||
provider: releases
|
||||
api_key:
|
||||
secure: oBnP56zfFTiON0v6nm6qiRevtTsojqaxV2E/+ahUP4iyZxZgn1zf9reGNEbB/s6wfHCwXpXKlCk3A0cEQzbfoZeQy3oMzyWHV/xgu+etOENe3z18oVEiVBe/WAd1/hMVmQvX65kHR+q12rce6K6rDm1mEIJC/udf5Dbdl2alVWgiL20Hrj/PSQAYZZuTmZLuMm7OBc1G2xhRmRo5FYgI2u1ZALUHDRov/yLQkoKwxAlzBhURoNTHW2wTAr3Pq01Fk2kfQFRmg7YFieu3cit/JGNzaDdgmT0U5pLRzhuPiD3qziNnC3rG7tnYV0jHQOLKH+AJ0csbNncG47JrUQrKDJGUs0fLBxHG4ErEdVc/s+l/ZTGBT6kOEjk5GLQviNuAzP51em+TATR6YJ4JdgnZEU3iwbyeY/lLPPWhOVDfUgLNVKHX7Sijf83Wp+cqspAdIcnT5lWMXUe7jciKQLC0B+jD6IQ/hCqF0/yX/H8Sa8jA+qSIrXWt/qSy1viKaQ3Sf8+rXyxG6dqYc0jUweQ248FOgUCtzmaZP48SoMBATN7JPCLzhGnY8IiMErGzc6jsevmoqB0MRqZhc2qsLEfTclxsMmfx2yVKt93G+zRMtQuYmf36MvDNbPaH+/tzE8pWxufSY0672qhL0sfvNO+FuCJ8Bsk4UwKV3lTeGjCwN5o=
|
||||
file: ${CLBLAST_TAR}
|
||||
skip_cleanup: true
|
||||
on:
|
||||
repo: CNugteren/CLBlast
|
||||
tags: true
|
|
@ -124,7 +124,7 @@ else()
|
|||
set(FLAGS "${FLAGS} -O2")
|
||||
endif()
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL GNU)
|
||||
set(FLAGS "${FLAGS} -Wall -Wno-comment -Wno-return-type -Wno-switch -Wno-missing-noreturn")
|
||||
set(FLAGS "${FLAGS} -Wall -Wno-comment -Wno-return-type -Wno-switch -Wno-missing-noreturn -Wno-unused-function")
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9.0)
|
||||
set(FLAGS "${FLAGS} -Wno-attributes -Wno-unused-variable")
|
||||
endif()
|
||||
|
@ -138,7 +138,7 @@ else()
|
|||
set(FLAGS "${FLAGS} -Wextra -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-padded")
|
||||
set(FLAGS "${FLAGS} -Wno-missing-prototypes -Wno-float-equal -Wno-switch-enum -Wno-switch")
|
||||
set(FLAGS "${FLAGS} -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-noreturn")
|
||||
set(FLAGS "${FLAGS} -Wno-deprecated-declarations")
|
||||
set(FLAGS "${FLAGS} -Wno-deprecated-declarations -Wno-unused-function")
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 3.9.0) # clang 4.0 or higher
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0.0) # but not for AppleClang
|
||||
set(FLAGS "${FLAGS} -Wno-undefined-var-template")
|
||||
|
|
|
@ -5,8 +5,8 @@ CLBlast: The tuned OpenCL BLAS library
|
|||
| Platform | Build status |
|
||||
|-----|-----|
|
||||
| Windows | [![Build Status](https://ci.appveyor.com/api/projects/status/github/cnugteren/clblast?branch=master&svg=true)](https://ci.appveyor.com/project/CNugteren/clblast) |
|
||||
| Linux | [![Build Status](https://travis-ci.org/CNugteren/CLBlast.svg?branch=master)](https://travis-ci.org/CNugteren/CLBlast/branches) |
|
||||
| OS X | [![Build Status](https://travis-ci.org/CNugteren/CLBlast.svg?branch=master)](https://travis-ci.org/CNugteren/CLBlast/branches) |
|
||||
| Linux/macOS | ![Build Status](https://github.com/cnugteren/clblast/actions/workflows/build_and_test.yml/badge.svg?branch=master)
|
||||
|
|
||||
|
||||
| Test machine (thanks to [ArrayFire](https://ci.arrayfire.org:8010/#/builders)) | Test status |
|
||||
|-----|-----|
|
||||
|
|
|
@ -18,11 +18,6 @@
|
|||
#ifndef CLBLAST_HALF_H_
|
||||
#define CLBLAST_HALF_H_
|
||||
|
||||
// MSVC 2013 doesn't fully support C99
|
||||
#ifdef _MSC_VER
|
||||
#define inline __inline
|
||||
#endif
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// The host data-type for half-precision floating-point (16-bit) is based on the `cl_half` OpenCL
|
||||
|
@ -40,7 +35,7 @@ typedef union ConversionBits_ {
|
|||
// Converts a IEEE-compliant single-precision value to half-precision floating-point. This function
|
||||
// applies simple truncation (round toward zero, but with overflows set to infinity) as rounding
|
||||
// mode.
|
||||
inline half FloatToHalf(const float value) {
|
||||
static half FloatToHalf(const float value) {
|
||||
static const unsigned short base_table[512] = {
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||
|
@ -101,7 +96,7 @@ inline half FloatToHalf(const float value) {
|
|||
}
|
||||
|
||||
// Converts a half-precision value to IEEE-compliant single-precision floating-point
|
||||
inline float HalfToFloat(const half value) {
|
||||
static float HalfToFloat(const half value) {
|
||||
static const unsigned int mantissa_table[2048] = {
|
||||
0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000,
|
||||
0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000,
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <string.h>
|
||||
#include <time.h>
|
||||
|
||||
#define CL_TARGET_OPENCL_VERSION 120
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
|
||||
|
||||
// Includes the CLBlast library (C interface)
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define CL_TARGET_OPENCL_VERSION 120
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
|
||||
|
||||
// Includes the CLBlast library (C interface)
|
||||
|
|
|
@ -24,8 +24,9 @@
|
|||
|
||||
// Includes the C++ OpenCL API. If not yet available, it can be found here:
|
||||
// https://raw.githubusercontent.com/KhronosGroup/OpenCL-CLHPP/main/include/CL/opencl.hpp
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION 210
|
||||
#define CL_TARGET_OPENCL_VERSION 210
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION 120
|
||||
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
|
||||
#define CL_TARGET_OPENCL_VERSION 120
|
||||
#include "opencl.hpp"
|
||||
|
||||
// Includes the CLBlast library
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define CL_TARGET_OPENCL_VERSION 120
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
|
||||
|
||||
// Includes the CLBlast library (C interface)
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define CL_TARGET_OPENCL_VERSION 110
|
||||
#define CL_TARGET_OPENCL_VERSION 120
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
|
||||
|
||||
// Includes the CLBlast library (C interface)
|
||||
|
@ -84,7 +84,7 @@ int main(void) {
|
|||
clEnqueueReadBuffer(queue, device_output, CL_TRUE, 0, 1*sizeof(unsigned int), host_output, 0, NULL, NULL);
|
||||
|
||||
// Example completed. See "clblast_c.h" for status codes (0 -> success).
|
||||
printf("Completed iSAMAX with status %d: array of %d values with staircases from 0..9 repeated, max at index %zu with value %.0lf\n",
|
||||
printf("Completed iSAMAX with status %d: array of %zu values with staircases from 0..9 repeated, max at index %u with value %.0lf\n",
|
||||
status, n, host_output[0], host_input[host_output[0]]);
|
||||
|
||||
// Clean-up
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define CL_TARGET_OPENCL_VERSION 120
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
|
||||
|
||||
// Includes the CLBlast library (C interface)
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define CL_TARGET_OPENCL_VERSION 120
|
||||
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
|
||||
|
||||
// Includes the CLBlast library (C interface)
|
||||
|
|
|
@ -25,8 +25,9 @@
|
|||
|
||||
// Includes the C++ OpenCL API. If not yet available, it can be found here:
|
||||
// https://raw.githubusercontent.com/KhronosGroup/OpenCL-CLHPP/main/include/CL/opencl.hpp
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION 210
|
||||
#define CL_TARGET_OPENCL_VERSION 210
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION 120
|
||||
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
|
||||
#define CL_TARGET_OPENCL_VERSION 120
|
||||
#include "opencl.hpp"
|
||||
|
||||
// Includes the CLBlast library
|
||||
|
|
|
@ -25,8 +25,9 @@
|
|||
|
||||
// Includes the C++ OpenCL API. If not yet available, it can be found here:
|
||||
// https://raw.githubusercontent.com/KhronosGroup/OpenCL-CLHPP/main/include/CL/opencl.hpp
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION 210
|
||||
#define CL_TARGET_OPENCL_VERSION 210
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION 120
|
||||
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
|
||||
#define CL_TARGET_OPENCL_VERSION 120
|
||||
#include "opencl.hpp"
|
||||
|
||||
// Includes the CLBlast library
|
||||
|
|
|
@ -21,8 +21,9 @@
|
|||
|
||||
// Includes the C++ OpenCL API. If not yet available, it can be found here:
|
||||
// https://raw.githubusercontent.com/KhronosGroup/OpenCL-CLHPP/main/include/CL/opencl.hpp
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION 210
|
||||
#define CL_TARGET_OPENCL_VERSION 210
|
||||
#define CL_HPP_TARGET_OPENCL_VERSION 120
|
||||
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
|
||||
#define CL_TARGET_OPENCL_VERSION 120
|
||||
#include "opencl.hpp"
|
||||
|
||||
// Includes the CLBlast library
|
||||
|
@ -66,7 +67,7 @@ int main() {
|
|||
|
||||
// Tuning completed. See "clblast.h" for status codes (0 -> success).
|
||||
printf("Completed TuneCopy with status %d (0 == OK), found parameters:\n", static_cast<int>(status));
|
||||
for (const auto parameter: parameters) {
|
||||
for (const auto ¶meter: parameters) {
|
||||
printf("> %s = %zu\n", parameter.first.c_str(), parameter.second);
|
||||
}
|
||||
|
||||
|
|
|
@ -157,6 +157,15 @@ template <typename T, typename U> const std::vector<Triangle> TestBlas<T,U>::kTr
|
|||
template <typename T, typename U> const std::vector<Side> TestBlas<T,U>::kSides = {Side::kLeft, Side::kRight};
|
||||
template <typename T, typename U> const std::vector<Diagonal> TestBlas<T,U>::kDiagonals = {Diagonal::kUnit, Diagonal::kNonUnit};
|
||||
|
||||
// The transpose configurations to test with: template parameter dependent, see .cpp file for implementation
|
||||
template <> const std::vector<Transpose> TestBlas<half,half>::kTransposes;
|
||||
template <> const std::vector<Transpose> TestBlas<float,float>::kTransposes;
|
||||
template <> const std::vector<Transpose> TestBlas<double,double>::kTransposes;
|
||||
template <> const std::vector<Transpose> TestBlas<float2,float2>::kTransposes;
|
||||
template <> const std::vector<Transpose> TestBlas<double2,double2>::kTransposes;
|
||||
template <> const std::vector<Transpose> TestBlas<float2,float>::kTransposes;
|
||||
template <> const std::vector<Transpose> TestBlas<double2,double>::kTransposes;
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
// Bogus reference function, in case a comparison library is not available
|
||||
|
|
|
@ -279,7 +279,7 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes)
|
|||
TimeResult time_cublas;
|
||||
try {
|
||||
time_cublas = TimedExecution(args.num_runs, args, buffers_cuda, queue, run_reference3_, "cuBLAS");
|
||||
} catch (std::runtime_error e) { }
|
||||
} catch (std::runtime_error &e) { }
|
||||
CUDAToHost(args, buffers_cuda, buffers_host, buffers_out_);
|
||||
HostToDevice(args, buffers, buffers_host, queue, buffers_out_);
|
||||
timings.push_back(std::pair<std::string, TimeResult>("cuBLAS", time_cublas));
|
||||
|
|
|
@ -201,7 +201,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
|
|||
auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat);
|
||||
auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
|
||||
auto dummy = std::vector<float>(0);
|
||||
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
|
||||
auto dummy_uint = std::vector<unsigned int>(0);
|
||||
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy, dummy_uint};
|
||||
auto args2 = Arguments<float>();
|
||||
args2.a_size = args.a_size; args2.b_size = args.b_size;
|
||||
args2.kernel_mode = args.kernel_mode;
|
||||
|
|
|
@ -230,7 +230,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
|
|||
auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
|
||||
auto c_buffer2 = HalfToFloatBuffer(buffers_host.c_mat);
|
||||
auto dummy = std::vector<float>(0);
|
||||
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, c_buffer2, dummy, dummy};
|
||||
auto dummy_uint = std::vector<unsigned int>(0);
|
||||
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, c_buffer2, dummy, dummy, dummy_uint};
|
||||
auto args2 = Arguments<float>();
|
||||
args2.a_size = args.a_size; args2.b_size = args.b_size; args2.c_size = args.c_size;
|
||||
args2.kernel_mode = args.kernel_mode;
|
||||
|
|
|
@ -39,7 +39,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
|
|||
auto y_buffer2 = HalfToFloatBuffer(buffers_host.y_vec);
|
||||
auto c_buffer2 = HalfToFloatBuffer(buffers_host.c_mat);
|
||||
auto dummy = std::vector<float>(0);
|
||||
auto buffers2 = BuffersHost<float>{x_buffer2, y_buffer2, dummy, dummy, c_buffer2, dummy, dummy};
|
||||
auto dummy_uint = std::vector<unsigned int>(0);
|
||||
auto buffers2 = BuffersHost<float>{x_buffer2, y_buffer2, dummy, dummy, c_buffer2, dummy, dummy, dummy_uint};
|
||||
auto args2 = Arguments<float>();
|
||||
args2.x_size = args.x_size; args2.y_size = args.y_size; args2.c_size = args.c_size;
|
||||
args2.x_inc = args.x_inc; args2.y_inc = args.y_inc; args2.n = args.n;
|
||||
|
@ -152,7 +153,7 @@ public:
|
|||
// Describes how to compute the indices of the result buffer
|
||||
static size_t ResultID1(const Arguments<T> &args) { return args.n; }
|
||||
static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine
|
||||
static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t) {
|
||||
static size_t GetResultIndex(const Arguments<T> &, const size_t id1, const size_t) {
|
||||
return id1; // * args.z_inc + args.z_offset;
|
||||
}
|
||||
|
||||
|
|
|
@ -200,7 +200,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
|
|||
auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat);
|
||||
auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
|
||||
auto dummy = std::vector<float>(0);
|
||||
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
|
||||
auto dummy_uint = std::vector<unsigned int>(0);
|
||||
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy, dummy_uint};
|
||||
auto args2 = Arguments<float>();
|
||||
args2.a_size = args.a_size; args2.b_size = args.b_size;
|
||||
args2.kernel_mode = args.kernel_mode;
|
||||
|
|
|
@ -108,7 +108,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
|
|||
auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat);
|
||||
auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
|
||||
auto dummy = std::vector<float>(0);
|
||||
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
|
||||
auto dummy_uint = std::vector<unsigned int>(0);
|
||||
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy, dummy_uint};
|
||||
auto args2 = Arguments<float>();
|
||||
args2.a_size = args.a_size; args2.b_size = args.b_size;
|
||||
args2.a_ld = args.a_ld; args2.m = args.m; args2.n = args.n;
|
||||
|
|
|
@ -59,7 +59,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
|
|||
auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat);
|
||||
auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
|
||||
auto dummy = std::vector<float>(0);
|
||||
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
|
||||
auto dummy_uint = std::vector<unsigned int>(0);
|
||||
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy, dummy_uint};
|
||||
auto args2 = Arguments<float>();
|
||||
args2.a_size = args.a_size; args2.b_size = args.b_size;
|
||||
args2.a_ld = args.a_ld; args2.b_ld = args.b_ld; args2.m = args.m; args2.n = args.n;
|
||||
|
|
Loading…
Reference in New Issue