Added convgemm to the CLBlast database, added initial parameters for Skylake GPU
parent
d929525039
commit
560f7a40f6
|
@ -212,11 +212,10 @@ endif()
|
||||||
|
|
||||||
# Sets the supported routines and the used kernels. New routines and kernels should be added here.
|
# Sets the supported routines and the used kernels. New routines and kernels should be added here.
|
||||||
set(KERNELS copy_fast copy_pad transpose_fast transpose_pad xaxpy xdot xger
|
set(KERNELS copy_fast copy_pad transpose_fast transpose_pad xaxpy xdot xger
|
||||||
xgemm xgemm_direct xgemv invert)
|
xgemm xgemm_direct xgemv invert xconvgemm)
|
||||||
set(KERNELS_EXTRA xconvgemm) # kernels for which not to include a tuner in 'all tuners' target
|
|
||||||
set(DATABASES copy pad padtranspose transpose xaxpy xdot
|
set(DATABASES copy pad padtranspose transpose xaxpy xdot
|
||||||
xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger invert
|
xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger invert
|
||||||
gemm_routine trsv_routine)
|
gemm_routine trsv_routine xconvgemm)
|
||||||
set(ROUTINE_TUNERS xgemm xtrsv)
|
set(ROUTINE_TUNERS xgemm xtrsv)
|
||||||
set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc xnrm2 xasum xamax)
|
set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc xnrm2 xasum xamax)
|
||||||
set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv xtrsv
|
set(LEVEL2_ROUTINES xgemv xgbmv xhemv xhbmv xhpmv xsymv xsbmv xspmv xtrmv xtbmv xtpmv xtrsv
|
||||||
|
@ -435,7 +434,7 @@ if(TUNERS)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Adds tuning executables
|
# Adds tuning executables
|
||||||
set(ALLKERNELS ${KERNELS} ${KERNELS_EXTRA})
|
set(ALLKERNELS ${KERNELS})
|
||||||
foreach(KERNEL ${ALLKERNELS})
|
foreach(KERNEL ${ALLKERNELS})
|
||||||
add_executable(clblast_tuner_${KERNEL} ${TUNERS_COMMON} src/tuning/kernels/${KERNEL}.cpp)
|
add_executable(clblast_tuner_${KERNEL} ${TUNERS_COMMON} src/tuning/kernels/${KERNEL}.cpp)
|
||||||
target_link_libraries(clblast_tuner_${KERNEL} ${API_LIBRARIES})
|
target_link_libraries(clblast_tuner_${KERNEL} ${API_LIBRARIES})
|
||||||
|
|
|
@ -49,6 +49,9 @@ const DatabaseEntry XgemmApple = {
|
||||||
const DatabaseEntry XgemmDirectApple = {
|
const DatabaseEntry XgemmDirectApple = {
|
||||||
"XgemmDirect", Precision::kAny, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0 } } } } } } }
|
"XgemmDirect", Precision::kAny, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0 } } } } } } }
|
||||||
};
|
};
|
||||||
|
const DatabaseEntry XconvgemmApple = {
|
||||||
|
"Xconvgemm", Precision::kAny, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0 } } } } } } }
|
||||||
|
};
|
||||||
const DatabaseEntry CopyApple = {
|
const DatabaseEntry CopyApple = {
|
||||||
"Copy", Precision::kAny, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
|
"Copy", Precision::kAny, {"COPY_DIMX", "COPY_DIMY", "COPY_VW", "COPY_WPT"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
|
||||||
};
|
};
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include "database/kernels/xger/xger.hpp"
|
#include "database/kernels/xger/xger.hpp"
|
||||||
#include "database/kernels/xgemm/xgemm.hpp"
|
#include "database/kernels/xgemm/xgemm.hpp"
|
||||||
#include "database/kernels/xgemm_direct/xgemm_direct.hpp"
|
#include "database/kernels/xgemm_direct/xgemm_direct.hpp"
|
||||||
|
#include "database/kernels/xconvgemm/xconvgemm.hpp"
|
||||||
#include "database/kernels/copy/copy.hpp"
|
#include "database/kernels/copy/copy.hpp"
|
||||||
#include "database/kernels/pad/pad.hpp"
|
#include "database/kernels/pad/pad.hpp"
|
||||||
#include "database/kernels/transpose/transpose.hpp"
|
#include "database/kernels/transpose/transpose.hpp"
|
||||||
|
@ -43,7 +44,7 @@ std::vector<database::DatabaseEntry> Database::database = std::vector<database::
|
||||||
const std::vector<database::DatabaseEntry> Database::apple_cpu_fallback = std::vector<database::DatabaseEntry>{
|
const std::vector<database::DatabaseEntry> Database::apple_cpu_fallback = std::vector<database::DatabaseEntry>{
|
||||||
database::XaxpyApple, database::XdotApple,
|
database::XaxpyApple, database::XdotApple,
|
||||||
database::XgemvApple, database::XgemvFastApple, database::XgemvFastRotApple, database::XgerApple, database::XtrsvApple,
|
database::XgemvApple, database::XgemvFastApple, database::XgemvFastRotApple, database::XgerApple, database::XtrsvApple,
|
||||||
database::XgemmApple, database::XgemmDirectApple,
|
database::XgemmApple, database::XgemmDirectApple, database::XconvgemmApple,
|
||||||
database::CopyApple, database::PadApple, database::TransposeApple, database::PadtransposeApple,
|
database::CopyApple, database::PadApple, database::TransposeApple, database::PadtransposeApple,
|
||||||
database::InvertApple,
|
database::InvertApple,
|
||||||
database::TrsvRoutineApple
|
database::TrsvRoutineApple
|
||||||
|
@ -71,6 +72,7 @@ Database::Database(const Device &device, const std::string &kernel_name,
|
||||||
database::XgerHalf, database::XgerSingle, database::XgerDouble, database::XgerComplexSingle, database::XgerComplexDouble,
|
database::XgerHalf, database::XgerSingle, database::XgerDouble, database::XgerComplexSingle, database::XgerComplexDouble,
|
||||||
database::XgemmHalf, database::XgemmSingle, database::XgemmDouble, database::XgemmComplexSingle, database::XgemmComplexDouble,
|
database::XgemmHalf, database::XgemmSingle, database::XgemmDouble, database::XgemmComplexSingle, database::XgemmComplexDouble,
|
||||||
database::XgemmDirectHalf, database::XgemmDirectSingle, database::XgemmDirectDouble, database::XgemmDirectComplexSingle, database::XgemmDirectComplexDouble,
|
database::XgemmDirectHalf, database::XgemmDirectSingle, database::XgemmDirectDouble, database::XgemmDirectComplexSingle, database::XgemmDirectComplexDouble,
|
||||||
|
database::XconvgemmHalf, database::XconvgemmSingle, database::XconvgemmDouble, database::XconvgemmComplexSingle, database::XconvgemmComplexDouble,
|
||||||
database::CopyHalf, database::CopySingle, database::CopyDouble, database::CopyComplexSingle, database::CopyComplexDouble,
|
database::CopyHalf, database::CopySingle, database::CopyDouble, database::CopyComplexSingle, database::CopyComplexDouble,
|
||||||
database::PadHalf, database::PadSingle, database::PadDouble, database::PadComplexSingle, database::PadComplexDouble,
|
database::PadHalf, database::PadSingle, database::PadDouble, database::PadComplexSingle, database::PadComplexDouble,
|
||||||
database::TransposeHalf, database::TransposeSingle, database::TransposeDouble, database::TransposeComplexSingle, database::TransposeComplexDouble,
|
database::TransposeHalf, database::TransposeSingle, database::TransposeDouble, database::TransposeComplexSingle, database::TransposeComplexDouble,
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||||
|
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||||
|
//
|
||||||
|
// This file populates the database with best-found tuning parameters for the 'Xconvgemm' kernels.
|
||||||
|
//
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
#include "database/kernels/xconvgemm/xconvgemm.hpp"
|
||||||
|
#include "database/kernels/xconvgemm/xconvgemm_16.hpp"
|
||||||
|
#include "database/kernels/xconvgemm/xconvgemm_32.hpp"
|
||||||
|
#include "database/kernels/xconvgemm/xconvgemm_3232.hpp"
|
||||||
|
#include "database/kernels/xconvgemm/xconvgemm_64.hpp"
|
||||||
|
#include "database/kernels/xconvgemm/xconvgemm_6464.hpp"
|
|
@ -0,0 +1,22 @@
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||||
|
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||||
|
//
|
||||||
|
// This file populates the database with best-found tuning parameters for the 'Xconvgemm' kernels.
|
||||||
|
//
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
#include "database/database_structure.hpp"
|
||||||
|
|
||||||
|
namespace clblast {
|
||||||
|
namespace database {
|
||||||
|
|
||||||
|
extern const DatabaseEntry XconvgemmHalf;
|
||||||
|
extern const DatabaseEntry XconvgemmSingle;
|
||||||
|
extern const DatabaseEntry XconvgemmComplexSingle;
|
||||||
|
extern const DatabaseEntry XconvgemmDouble;
|
||||||
|
extern const DatabaseEntry XconvgemmComplexDouble;
|
||||||
|
|
||||||
|
} // namespace database
|
||||||
|
} // namespace clblast
|
|
@ -0,0 +1,26 @@
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||||
|
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||||
|
//
|
||||||
|
// This file populates the database with best-found tuning parameters for the 'Xconvgemm16' kernels.
|
||||||
|
//
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
namespace clblast {
|
||||||
|
namespace database {
|
||||||
|
|
||||||
|
const DatabaseEntry XconvgemmHalf = {
|
||||||
|
"Xconvgemm", Precision::kHalf, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
|
||||||
|
{ // Default
|
||||||
|
kDeviceTypeAll, "default", {
|
||||||
|
{ "default", {
|
||||||
|
{ kDeviceNameDefault , Params{ 1, 16, 8, 8, 16, 0, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } },
|
||||||
|
} },
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace database
|
||||||
|
} // namespace clblast
|
|
@ -0,0 +1,34 @@
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||||
|
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||||
|
//
|
||||||
|
// This file populates the database with best-found tuning parameters for the 'Xconvgemm32' kernels.
|
||||||
|
//
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
namespace clblast {
|
||||||
|
namespace database {
|
||||||
|
|
||||||
|
const DatabaseEntry XconvgemmSingle = {
|
||||||
|
"Xconvgemm", Precision::kSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
|
||||||
|
{ // Intel GPUs
|
||||||
|
kDeviceTypeGPU, "Intel", {
|
||||||
|
{ "default", {
|
||||||
|
{ Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 1, 16, 8, 8, 16, 0, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } },
|
||||||
|
{ kDeviceNameDefault , Params{ 1, 16, 8, 8, 16, 0, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } },
|
||||||
|
} },
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{ // Default
|
||||||
|
kDeviceTypeAll, "default", {
|
||||||
|
{ "default", {
|
||||||
|
{ kDeviceNameDefault , Params{ 1, 16, 8, 8, 16, 0, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } },
|
||||||
|
} },
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace database
|
||||||
|
} // namespace clblast
|
|
@ -0,0 +1,26 @@
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||||
|
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||||
|
//
|
||||||
|
// This file populates the database with best-found tuning parameters for the 'Xconvgemm3232' kernels.
|
||||||
|
//
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
namespace clblast {
|
||||||
|
namespace database {
|
||||||
|
|
||||||
|
const DatabaseEntry XconvgemmComplexSingle = {
|
||||||
|
"Xconvgemm", Precision::kComplexSingle, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
|
||||||
|
{ // Default
|
||||||
|
kDeviceTypeAll, "default", {
|
||||||
|
{ "default", {
|
||||||
|
{ kDeviceNameDefault , Params{ 1, 16, 8, 8, 16, 0, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } },
|
||||||
|
} },
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace database
|
||||||
|
} // namespace clblast
|
|
@ -0,0 +1,26 @@
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||||
|
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||||
|
//
|
||||||
|
// This file populates the database with best-found tuning parameters for the 'Xconvgemm64' kernels.
|
||||||
|
//
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
namespace clblast {
|
||||||
|
namespace database {
|
||||||
|
|
||||||
|
const DatabaseEntry XconvgemmDouble = {
|
||||||
|
"Xconvgemm", Precision::kDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
|
||||||
|
{ // Default
|
||||||
|
kDeviceTypeAll, "default", {
|
||||||
|
{ "default", {
|
||||||
|
{ kDeviceNameDefault , Params{ 1, 16, 8, 8, 16, 0, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } },
|
||||||
|
} },
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace database
|
||||||
|
} // namespace clblast
|
|
@ -0,0 +1,26 @@
|
||||||
|
|
||||||
|
// =================================================================================================
|
||||||
|
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||||
|
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||||
|
//
|
||||||
|
// This file populates the database with best-found tuning parameters for the 'Xconvgemm6464' kernels.
|
||||||
|
//
|
||||||
|
// =================================================================================================
|
||||||
|
|
||||||
|
namespace clblast {
|
||||||
|
namespace database {
|
||||||
|
|
||||||
|
const DatabaseEntry XconvgemmComplexDouble = {
|
||||||
|
"Xconvgemm", Precision::kComplexDouble, {"KWID", "MDIMAD", "MDIMCD", "NDIMBD", "NDIMCD", "PADA", "PADB", "VWMD", "VWND", "WGD"}, {
|
||||||
|
{ // Default
|
||||||
|
kDeviceTypeAll, "default", {
|
||||||
|
{ "default", {
|
||||||
|
{ kDeviceNameDefault , Params{ 1, 16, 8, 8, 16, 0, 0, 1, 1, 32, 0, 0, 0, 0, 0, 0 } },
|
||||||
|
} },
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace database
|
||||||
|
} // namespace clblast
|
|
@ -25,7 +25,7 @@ namespace clblast {
|
||||||
template <typename T>
|
template <typename T>
|
||||||
Xconvgemm<T>::Xconvgemm(Queue &queue, EventPointer event, const std::string &name,
|
Xconvgemm<T>::Xconvgemm(Queue &queue, EventPointer event, const std::string &name,
|
||||||
const ConvGemmMethod method):
|
const ConvGemmMethod method):
|
||||||
Routine(queue, event, name, {"XgemmDirect"},
|
Routine(queue, event, name, {"Xconvgemm"},
|
||||||
PrecisionValue<T>(), {}, {
|
PrecisionValue<T>(), {}, {
|
||||||
(method == ConvGemmMethod::kWithIm2Col) ? "#define CONVGEMM_WITH_IM2COL\n" : "",
|
(method == ConvGemmMethod::kWithIm2Col) ? "#define CONVGEMM_WITH_IM2COL\n" : "",
|
||||||
#include "../../kernels/level3/level3.opencl"
|
#include "../../kernels/level3/level3.opencl"
|
||||||
|
|
Loading…
Reference in New Issue