Fixes for the CUDA API

This commit is contained in:
Cedric Nugteren 2018-04-20 21:50:36 +02:00
parent 458e6717a9
commit 3e3a26e0da
2 changed files with 3 additions and 3 deletions

View file

@ -236,7 +236,6 @@ set(SOURCES
src/routine.cpp src/routine.cpp
src/routines/levelx/xinvert.cpp # only source, don't include it as a test src/routines/levelx/xinvert.cpp # only source, don't include it as a test
src/tuning/configurations.cpp src/tuning/configurations.cpp
src/tuning/tuning_api.cpp
) )
set(HEADERS # such that they can be discovered by IDEs such as CLion and Visual Studio set(HEADERS # such that they can be discovered by IDEs such as CLion and Visual Studio
include/clblast_half.h include/clblast_half.h
@ -265,7 +264,7 @@ set(HEADERS # such that they can be discovered by IDEs such as CLion and Visual
src/tuning/routines/routine_tuner.hpp src/tuning/routines/routine_tuner.hpp
) )
if(OPENCL) if(OPENCL)
set(SOURCES ${SOURCES} src/clblast.cpp src/clblast_c.cpp) set(SOURCES ${SOURCES} src/clblast.cpp src/clblast_c.cpp src/tuning/tuning_api.cpp)
set(HEADERS ${HEADERS} include/clblast.h include/clblast_c.h src/clpp11.hpp) set(HEADERS ${HEADERS} include/clblast.h include/clblast_c.h src/clpp11.hpp)
if(NETLIB) if(NETLIB)
set(SOURCES ${SOURCES} src/clblast_netlib_c.cpp) set(SOURCES ${SOURCES} src/clblast_netlib_c.cpp)

View file

@ -2593,7 +2593,8 @@ StatusCode GemmTempBufferSize(const Layout layout, const Transpose a_transpose,
else { else {
temp_buffer_size = Xgemm<T>::GetTempSize(layout, a_transpose, b_transpose, m, n, k, temp_buffer_size = Xgemm<T>::GetTempSize(layout, a_transpose, b_transpose, m, n, k,
a_offset, a_ld, b_offset, b_ld, c_offset, c_ld, a_offset, a_ld, b_offset, b_ld, c_offset, c_ld,
db["MWG"], db["NWG"], db["KWG"]); db["MWG"], db["NWG"], db["KWG"] * db["KREG"],
db["GEMMK"]);
} }
temp_buffer_size *= sizeof(T); // translate from num-elements to bytes temp_buffer_size *= sizeof(T); // translate from num-elements to bytes
return StatusCode::kSuccess; return StatusCode::kSuccess;