mirror of
https://github.com/CNugteren/CLBlast.git
synced 2024-07-15 10:55:42 +02:00
Merge branch 'master' into preparation_for_size_specific_parameters
This commit is contained in:
commit
3b7371f81b
|
@ -58,8 +58,8 @@ build_script:
|
||||||
|
|
||||||
after_build:
|
after_build:
|
||||||
- ps: pushd $env:CLBLAST_BUILD
|
- ps: pushd $env:CLBLAST_BUILD
|
||||||
- 7z a CLBlast-1.0.0-Windows-x64.zip .\install_dir\*
|
- 7z a CLBlast-1.1.0-Windows-x64.zip .\install_dir\*
|
||||||
- ps: mv CLBlast-1.0.0-Windows-x64.zip $env:APPVEYOR_BUILD_FOLDER
|
- ps: mv CLBlast-1.1.0-Windows-x64.zip $env:APPVEYOR_BUILD_FOLDER
|
||||||
|
|
||||||
artifacts:
|
artifacts:
|
||||||
- path: '*.zip'
|
- path: '*.zip'
|
||||||
|
|
|
@ -21,7 +21,7 @@ matrix:
|
||||||
|
|
||||||
env:
|
env:
|
||||||
global:
|
global:
|
||||||
- CLBLAST_VERSION=1.0.0
|
- CLBLAST_VERSION=1.1.0
|
||||||
- CLBLAST_ROOT=${TRAVIS_BUILD_DIR}/bin/clblast
|
- CLBLAST_ROOT=${TRAVIS_BUILD_DIR}/bin/clblast
|
||||||
- CLBLAST_INSTALL=${TRAVIS_BUILD_DIR}/bin/CLBlast-${CLBLAST_VERSION}
|
- CLBLAST_INSTALL=${TRAVIS_BUILD_DIR}/bin/CLBlast-${CLBLAST_VERSION}
|
||||||
- CLBLAST_TAR=CLBlast-${CLBLAST_VERSION}-${TRAVIS_OS_NAME}-x64.tar.gz
|
- CLBLAST_TAR=CLBlast-${CLBLAST_VERSION}-${TRAVIS_OS_NAME}-x64.tar.gz
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
|
|
||||||
Development (next version)
|
Version 1.1.0
|
||||||
- The tuning database now has defaults per architecture (e.g. NVIDIA Kepler SM3.5, AMD Fiji)
|
- The tuning database now has defaults per architecture (e.g. NVIDIA Kepler SM3.5, AMD Fiji)
|
||||||
- The tuning database now has a dictionary to translate vendor/device names to a common set
|
- The tuning database now has a dictionary to translate vendor/device names to a common set
|
||||||
- The tuners can now distinguish between different AMD GPU board names of the same architecture
|
- The tuners can now distinguish between different AMD GPU board names of the same architecture
|
||||||
|
|
|
@ -18,8 +18,8 @@ set(CMAKE_USER_MAKE_RULES_OVERRIDE_CXX ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cxx_fla
|
||||||
# CMake project details
|
# CMake project details
|
||||||
project("clblast" C CXX)
|
project("clblast" C CXX)
|
||||||
set(clblast_VERSION_MAJOR 1)
|
set(clblast_VERSION_MAJOR 1)
|
||||||
set(clblast_VERSION_MINOR 0)
|
set(clblast_VERSION_MINOR 1)
|
||||||
set(clblast_VERSION_PATCH 1)
|
set(clblast_VERSION_PATCH 0)
|
||||||
|
|
||||||
# Options and their default values
|
# Options and their default values
|
||||||
option(BUILD_SHARED_LIBS "Build a shared (ON) or static library (OFF)" ON)
|
option(BUILD_SHARED_LIBS "Build a shared (ON) or static library (OFF)" ON)
|
||||||
|
|
10
README.md
10
README.md
|
@ -316,6 +316,16 @@ Since there is no half-precision data-type in C or C++, OpenCL provides the `cl_
|
||||||
The `samples/haxpy.c` example shows how to use these convenience functions when calling the half-precision BLAS routine HAXPY.
|
The `samples/haxpy.c` example shows how to use these convenience functions when calling the half-precision BLAS routine HAXPY.
|
||||||
|
|
||||||
|
|
||||||
|
Known issues
|
||||||
|
-------------
|
||||||
|
|
||||||
|
Known performance related issues:
|
||||||
|
|
||||||
|
* Severe performance issues with Beignet v1.3.0 due to missing support for local memory. Please downgrade to v1.2.1 or upgrade to v1.3.1 or newer.
|
||||||
|
|
||||||
|
* Performance issues on ARM Mali GPUs due to missing compiler for support for loop unrolling and array-to-register promotion.
|
||||||
|
|
||||||
|
|
||||||
Contributing
|
Contributing
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue