Add Github Actions CI (#464)

This replaces the old Travis CI builds with Github Actions that test on both Ubuntu and MacOS, with both Clang and GCC. The builds on macOS also run the tests and some other programs, on Ubuntu OpenCL is not working at the moment. Because these tests use new/different compilers, I fixed a few warnings and errors along the way.
pull/468/head
Cedric Nugteren 2023-05-14 11:25:15 +02:00 committed by GitHub
parent 8d2f3540e9
commit 221121b840
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 108 additions and 99 deletions

View File

@ -0,0 +1,59 @@
name: CLBlast build
on:
pull_request: {}
push:
branches: ['master']
jobs:
build_and_test:
strategy:
matrix:
config: [
{os: ubuntu-latest, c_compiler: gcc, cpp_compiler: g++},
{os: ubuntu-latest, c_compiler: clang, cpp_compiler: clang++},
{os: macos-latest, c_compiler: clang, cpp_compiler: clang++},
]
runs-on: ${{ matrix.config.os }}
steps:
- uses: actions/checkout@v3
- name: Install requirements for Ubuntu
run: |
sudo apt-get update
sudo apt-get install -yq cmake ninja-build ocl-icd-opencl-dev opencl-c-headers libopenblas-dev --no-install-recommends
if: ${{ matrix.config.os == 'ubuntu-latest' }}
- name: Install requirements for macOS
run: brew install ninja
if: ${{ matrix.config.os == 'macos-latest' }}
- name: Run CMake
run: |
export CC=${{ matrix.config.c_compiler }}
export CXX=${{ matrix.config.cpp_compiler }}
cmake -S . -B build -G Ninja -DTESTS=ON -DCLIENTS=ON -DSAMPLES=ON
- name: Compile the code
run: cmake --build build
- name: Get the diagnostics info
run: ./build/clblast_test_diagnostics
if: ${{ matrix.config.os == 'macos-latest' }}
- name: Run an example client
run: ./build/clblast_client_xgemm
if: ${{ matrix.config.os == 'macos-latest' }}
- name: Run an example sample program
run: ./build/clblast_sample_dgemv_c
if: ${{ matrix.config.os == 'macos-latest' }}
- name: Run an example tuner
run: ./build/clblast_tuner_xdot
if: ${{ matrix.config.os == 'macos-latest' }}
- name: Run the unittests
run: ctest --test-dir build
if: ${{ matrix.config.os == 'macos-latest' }}

View File

@ -1,69 +0,0 @@
language: cpp
sudo: required
dist: trusty
addons:
apt:
sources:
# kubuntu-backports contains newer versions of cmake to install
- kubuntu-backports
packages:
- cmake
- ocl-icd-opencl-dev
matrix:
include:
- os: linux
compiler: gcc
- os: linux
compiler: clang
- os: osx
env:
global:
- CLBLAST_VERSION=1.5.3
- CLBLAST_ROOT=${TRAVIS_BUILD_DIR}/bin/clblast
- CLBLAST_INSTALL=${TRAVIS_BUILD_DIR}/bin/CLBlast-${CLBLAST_VERSION}
- CLBLAST_TAR=CLBlast-${CLBLAST_VERSION}-${TRAVIS_OS_NAME}-x64.tar.gz
before_install:
- cmake --version;
- ${CC} --version;
- ${CXX} --version;
before_script:
- mkdir -p ${CLBLAST_INSTALL}
- mkdir -p ${CLBLAST_ROOT}
- pushd ${CLBLAST_ROOT}
- cmake -DTESTS=ON -DCLIENTS=ON -DSAMPLES=ON -DCMAKE_INSTALL_PREFIX=${CLBLAST_INSTALL} ${TRAVIS_BUILD_DIR}
script:
- make
- make install
after_success:
- pushd ${TRAVIS_BUILD_DIR}/bin
- rm ${CLBLAST_INSTALL}/bin/clblast_client_*
- rm ${CLBLAST_INSTALL}/bin/clblast_test_*
- echo ${CLBLAST_TAR}
- tar -cvf ${CLBLAST_TAR} CLBlast-${CLBLAST_VERSION}
- cp ${CLBLAST_TAR} ${TRAVIS_BUILD_DIR}
- pushd ${TRAVIS_BUILD_DIR}
- ls -l
branches:
only:
- master
notifications:
email: false
deploy:
provider: releases
api_key:
secure: oBnP56zfFTiON0v6nm6qiRevtTsojqaxV2E/+ahUP4iyZxZgn1zf9reGNEbB/s6wfHCwXpXKlCk3A0cEQzbfoZeQy3oMzyWHV/xgu+etOENe3z18oVEiVBe/WAd1/hMVmQvX65kHR+q12rce6K6rDm1mEIJC/udf5Dbdl2alVWgiL20Hrj/PSQAYZZuTmZLuMm7OBc1G2xhRmRo5FYgI2u1ZALUHDRov/yLQkoKwxAlzBhURoNTHW2wTAr3Pq01Fk2kfQFRmg7YFieu3cit/JGNzaDdgmT0U5pLRzhuPiD3qziNnC3rG7tnYV0jHQOLKH+AJ0csbNncG47JrUQrKDJGUs0fLBxHG4ErEdVc/s+l/ZTGBT6kOEjk5GLQviNuAzP51em+TATR6YJ4JdgnZEU3iwbyeY/lLPPWhOVDfUgLNVKHX7Sijf83Wp+cqspAdIcnT5lWMXUe7jciKQLC0B+jD6IQ/hCqF0/yX/H8Sa8jA+qSIrXWt/qSy1viKaQ3Sf8+rXyxG6dqYc0jUweQ248FOgUCtzmaZP48SoMBATN7JPCLzhGnY8IiMErGzc6jsevmoqB0MRqZhc2qsLEfTclxsMmfx2yVKt93G+zRMtQuYmf36MvDNbPaH+/tzE8pWxufSY0672qhL0sfvNO+FuCJ8Bsk4UwKV3lTeGjCwN5o=
file: ${CLBLAST_TAR}
skip_cleanup: true
on:
repo: CNugteren/CLBlast
tags: true

View File

@ -124,7 +124,7 @@ else()
set(FLAGS "${FLAGS} -O2")
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL GNU)
set(FLAGS "${FLAGS} -Wall -Wno-comment -Wno-return-type -Wno-switch -Wno-missing-noreturn")
set(FLAGS "${FLAGS} -Wall -Wno-comment -Wno-return-type -Wno-switch -Wno-missing-noreturn -Wno-unused-function")
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9.0)
set(FLAGS "${FLAGS} -Wno-attributes -Wno-unused-variable")
endif()
@ -138,7 +138,7 @@ else()
set(FLAGS "${FLAGS} -Wextra -Wno-c++98-compat -Wno-c++98-compat-pedantic -Wno-padded")
set(FLAGS "${FLAGS} -Wno-missing-prototypes -Wno-float-equal -Wno-switch-enum -Wno-switch")
set(FLAGS "${FLAGS} -Wno-exit-time-destructors -Wno-global-constructors -Wno-missing-noreturn")
set(FLAGS "${FLAGS} -Wno-deprecated-declarations")
set(FLAGS "${FLAGS} -Wno-deprecated-declarations -Wno-unused-function")
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 3.9.0) # clang 4.0 or higher
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 7.0.0) # but not for AppleClang
set(FLAGS "${FLAGS} -Wno-undefined-var-template")

View File

@ -5,8 +5,8 @@ CLBlast: The tuned OpenCL BLAS library
| Platform | Build status |
|-----|-----|
| Windows | [![Build Status](https://ci.appveyor.com/api/projects/status/github/cnugteren/clblast?branch=master&svg=true)](https://ci.appveyor.com/project/CNugteren/clblast) |
| Linux | [![Build Status](https://travis-ci.org/CNugteren/CLBlast.svg?branch=master)](https://travis-ci.org/CNugteren/CLBlast/branches) |
| OS X | [![Build Status](https://travis-ci.org/CNugteren/CLBlast.svg?branch=master)](https://travis-ci.org/CNugteren/CLBlast/branches) |
| Linux/macOS | ![Build Status](https://github.com/cnugteren/clblast/actions/workflows/build_and_test.yml/badge.svg?branch=master)
|
| Test machine (thanks to [ArrayFire](https://ci.arrayfire.org:8010/#/builders)) | Test status |
|-----|-----|

View File

@ -18,11 +18,6 @@
#ifndef CLBLAST_HALF_H_
#define CLBLAST_HALF_H_
// MSVC 2013 doesn't fully support C99
#ifdef _MSC_VER
#define inline __inline
#endif
// =================================================================================================
// The host data-type for half-precision floating-point (16-bit) is based on the `cl_half` OpenCL
@ -40,7 +35,7 @@ typedef union ConversionBits_ {
// Converts a IEEE-compliant single-precision value to half-precision floating-point. This function
// applies simple truncation (round toward zero, but with overflows set to infinity) as rounding
// mode.
inline half FloatToHalf(const float value) {
static half FloatToHalf(const float value) {
static const unsigned short base_table[512] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
@ -101,7 +96,7 @@ inline half FloatToHalf(const float value) {
}
// Converts a half-precision value to IEEE-compliant single-precision floating-point
inline float HalfToFloat(const half value) {
static float HalfToFloat(const half value) {
static const unsigned int mantissa_table[2048] = {
0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000,
0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000,

View File

@ -20,6 +20,7 @@
#include <string.h>
#include <time.h>
#define CL_TARGET_OPENCL_VERSION 120
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
// Includes the CLBlast library (C interface)

View File

@ -19,6 +19,7 @@
#include <stdio.h>
#include <string.h>
#define CL_TARGET_OPENCL_VERSION 120
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
// Includes the CLBlast library (C interface)

View File

@ -24,8 +24,9 @@
// Includes the C++ OpenCL API. If not yet available, it can be found here:
// https://raw.githubusercontent.com/KhronosGroup/OpenCL-CLHPP/main/include/CL/opencl.hpp
#define CL_HPP_TARGET_OPENCL_VERSION 210
#define CL_TARGET_OPENCL_VERSION 210
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#define CL_TARGET_OPENCL_VERSION 120
#include "opencl.hpp"
// Includes the CLBlast library

View File

@ -18,6 +18,7 @@
#include <stdio.h>
#include <string.h>
#define CL_TARGET_OPENCL_VERSION 120
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
// Includes the CLBlast library (C interface)

View File

@ -19,7 +19,7 @@
#include <stdio.h>
#include <string.h>
#define CL_TARGET_OPENCL_VERSION 110
#define CL_TARGET_OPENCL_VERSION 120
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
// Includes the CLBlast library (C interface)
@ -84,7 +84,7 @@ int main(void) {
clEnqueueReadBuffer(queue, device_output, CL_TRUE, 0, 1*sizeof(unsigned int), host_output, 0, NULL, NULL);
// Example completed. See "clblast_c.h" for status codes (0 -> success).
printf("Completed iSAMAX with status %d: array of %d values with staircases from 0..9 repeated, max at index %zu with value %.0lf\n",
printf("Completed iSAMAX with status %d: array of %zu values with staircases from 0..9 repeated, max at index %u with value %.0lf\n",
status, n, host_output[0], host_input[host_output[0]]);
// Clean-up

View File

@ -19,6 +19,7 @@
#include <stdio.h>
#include <string.h>
#define CL_TARGET_OPENCL_VERSION 120
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
// Includes the CLBlast library (C interface)

View File

@ -19,6 +19,7 @@
#include <stdio.h>
#include <string.h>
#define CL_TARGET_OPENCL_VERSION 120
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS // to disable deprecation warnings
// Includes the CLBlast library (C interface)

View File

@ -25,8 +25,9 @@
// Includes the C++ OpenCL API. If not yet available, it can be found here:
// https://raw.githubusercontent.com/KhronosGroup/OpenCL-CLHPP/main/include/CL/opencl.hpp
#define CL_HPP_TARGET_OPENCL_VERSION 210
#define CL_TARGET_OPENCL_VERSION 210
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#define CL_TARGET_OPENCL_VERSION 120
#include "opencl.hpp"
// Includes the CLBlast library

View File

@ -25,8 +25,9 @@
// Includes the C++ OpenCL API. If not yet available, it can be found here:
// https://raw.githubusercontent.com/KhronosGroup/OpenCL-CLHPP/main/include/CL/opencl.hpp
#define CL_HPP_TARGET_OPENCL_VERSION 210
#define CL_TARGET_OPENCL_VERSION 210
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#define CL_TARGET_OPENCL_VERSION 120
#include "opencl.hpp"
// Includes the CLBlast library

View File

@ -21,8 +21,9 @@
// Includes the C++ OpenCL API. If not yet available, it can be found here:
// https://raw.githubusercontent.com/KhronosGroup/OpenCL-CLHPP/main/include/CL/opencl.hpp
#define CL_HPP_TARGET_OPENCL_VERSION 210
#define CL_TARGET_OPENCL_VERSION 210
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#define CL_TARGET_OPENCL_VERSION 120
#include "opencl.hpp"
// Includes the CLBlast library
@ -66,7 +67,7 @@ int main() {
// Tuning completed. See "clblast.h" for status codes (0 -> success).
printf("Completed TuneCopy with status %d (0 == OK), found parameters:\n", static_cast<int>(status));
for (const auto parameter: parameters) {
for (const auto &parameter: parameters) {
printf("> %s = %zu\n", parameter.first.c_str(), parameter.second);
}

View File

@ -157,6 +157,15 @@ template <typename T, typename U> const std::vector<Triangle> TestBlas<T,U>::kTr
template <typename T, typename U> const std::vector<Side> TestBlas<T,U>::kSides = {Side::kLeft, Side::kRight};
template <typename T, typename U> const std::vector<Diagonal> TestBlas<T,U>::kDiagonals = {Diagonal::kUnit, Diagonal::kNonUnit};
// The transpose configurations to test with: template parameter dependent, see .cpp file for implementation
template <> const std::vector<Transpose> TestBlas<half,half>::kTransposes;
template <> const std::vector<Transpose> TestBlas<float,float>::kTransposes;
template <> const std::vector<Transpose> TestBlas<double,double>::kTransposes;
template <> const std::vector<Transpose> TestBlas<float2,float2>::kTransposes;
template <> const std::vector<Transpose> TestBlas<double2,double2>::kTransposes;
template <> const std::vector<Transpose> TestBlas<float2,float>::kTransposes;
template <> const std::vector<Transpose> TestBlas<double2,double>::kTransposes;
// =================================================================================================
// Bogus reference function, in case a comparison library is not available

View File

@ -279,7 +279,7 @@ void Client<T,U>::PerformanceTest(Arguments<U> &args, const SetMetric set_sizes)
TimeResult time_cublas;
try {
time_cublas = TimedExecution(args.num_runs, args, buffers_cuda, queue, run_reference3_, "cuBLAS");
} catch (std::runtime_error e) { }
} catch (std::runtime_error &e) { }
CUDAToHost(args, buffers_cuda, buffers_host, buffers_out_);
HostToDevice(args, buffers, buffers_host, queue, buffers_out_);
timings.push_back(std::pair<std::string, TimeResult>("cuBLAS", time_cublas));

View File

@ -201,7 +201,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat);
auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
auto dummy = std::vector<float>(0);
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
auto dummy_uint = std::vector<unsigned int>(0);
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy, dummy_uint};
auto args2 = Arguments<float>();
args2.a_size = args.a_size; args2.b_size = args.b_size;
args2.kernel_mode = args.kernel_mode;

View File

@ -230,7 +230,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
auto c_buffer2 = HalfToFloatBuffer(buffers_host.c_mat);
auto dummy = std::vector<float>(0);
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, c_buffer2, dummy, dummy};
auto dummy_uint = std::vector<unsigned int>(0);
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, c_buffer2, dummy, dummy, dummy_uint};
auto args2 = Arguments<float>();
args2.a_size = args.a_size; args2.b_size = args.b_size; args2.c_size = args.c_size;
args2.kernel_mode = args.kernel_mode;

View File

@ -39,7 +39,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
auto y_buffer2 = HalfToFloatBuffer(buffers_host.y_vec);
auto c_buffer2 = HalfToFloatBuffer(buffers_host.c_mat);
auto dummy = std::vector<float>(0);
auto buffers2 = BuffersHost<float>{x_buffer2, y_buffer2, dummy, dummy, c_buffer2, dummy, dummy};
auto dummy_uint = std::vector<unsigned int>(0);
auto buffers2 = BuffersHost<float>{x_buffer2, y_buffer2, dummy, dummy, c_buffer2, dummy, dummy, dummy_uint};
auto args2 = Arguments<float>();
args2.x_size = args.x_size; args2.y_size = args.y_size; args2.c_size = args.c_size;
args2.x_inc = args.x_inc; args2.y_inc = args.y_inc; args2.n = args.n;
@ -152,7 +153,7 @@ public:
// Describes how to compute the indices of the result buffer
static size_t ResultID1(const Arguments<T> &args) { return args.n; }
static size_t ResultID2(const Arguments<T> &) { return 1; } // N/A for this routine
static size_t GetResultIndex(const Arguments<T> &args, const size_t id1, const size_t) {
static size_t GetResultIndex(const Arguments<T> &, const size_t id1, const size_t) {
return id1; // * args.z_inc + args.z_offset;
}

View File

@ -200,7 +200,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat);
auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
auto dummy = std::vector<float>(0);
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
auto dummy_uint = std::vector<unsigned int>(0);
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy, dummy_uint};
auto args2 = Arguments<float>();
args2.a_size = args.a_size; args2.b_size = args.b_size;
args2.kernel_mode = args.kernel_mode;

View File

@ -108,7 +108,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat);
auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
auto dummy = std::vector<float>(0);
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
auto dummy_uint = std::vector<unsigned int>(0);
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy, dummy_uint};
auto args2 = Arguments<float>();
args2.a_size = args.a_size; args2.b_size = args.b_size;
args2.a_ld = args.a_ld; args2.m = args.m; args2.n = args.n;

View File

@ -59,7 +59,8 @@ StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &bu
auto a_buffer2 = HalfToFloatBuffer(buffers_host.a_mat);
auto b_buffer2 = HalfToFloatBuffer(buffers_host.b_mat);
auto dummy = std::vector<float>(0);
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy};
auto dummy_uint = std::vector<unsigned int>(0);
auto buffers2 = BuffersHost<float>{dummy, dummy, a_buffer2, b_buffer2, dummy, dummy, dummy, dummy_uint};
auto args2 = Arguments<float>();
args2.a_size = args.a_size; args2.b_size = args.b_size;
args2.a_ld = args.a_ld; args2.b_ld = args.b_ld; args2.m = args.m; args2.n = args.n;