Updated the database to use the new TRSV and Invert tuners
parent
aa7db4f987
commit
b1f52f130c
|
@ -4,6 +4,7 @@ Development (next version)
|
|||
- Made it possible to override the tuning parameters in the clients straight from JSON tuning files
|
||||
- Added OpenCL pre-processor to unroll loops and perform array-to-register promotions for compilers
|
||||
which don't do this themselves (ARM Mali) - greatly improves performance on these platforms
|
||||
- Added first tuners for the TRSV (block size) and TRSM (invert kernel) routines
|
||||
- Various minor fixes and enhancements
|
||||
- Added tuned parameters for various devices (see README)
|
||||
|
||||
|
|
|
@ -195,7 +195,7 @@ endif()
|
|||
set(KERNELS copy_fast copy_pad transpose_fast transpose_pad xaxpy xdot xger
|
||||
xgemm xgemm_direct xgemv invert)
|
||||
set(DATABASES copy pad padtranspose transpose xaxpy xdot
|
||||
xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger
|
||||
xgemm xgemm_direct xgemv xgemv_fast xgemv_fast_rot xger invert
|
||||
gemm_routine trsv_routine)
|
||||
set(ROUTINE_TUNERS xgemm xtrsv)
|
||||
set(LEVEL1_ROUTINES xswap xscal xcopy xaxpy xdot xdotu xdotc xnrm2 xasum xamax)
|
||||
|
|
|
@ -29,11 +29,11 @@
|
|||
#include "database/kernels/pad/pad.hpp"
|
||||
#include "database/kernels/transpose/transpose.hpp"
|
||||
#include "database/kernels/padtranspose/padtranspose.hpp"
|
||||
#include "database/kernels/invert/invert.hpp"
|
||||
|
||||
#include "database/kernels/gemm_routine/gemm_routine.hpp"
|
||||
#include "database/kernels/trsv_routine/trsv_routine.hpp"
|
||||
|
||||
#include "database/kernels/xtrsv.hpp"
|
||||
#include "database/kernels/invert.hpp"
|
||||
#include "database/apple_cpu_fallback.hpp"
|
||||
|
||||
namespace clblast {
|
||||
|
@ -47,7 +47,6 @@ const std::vector<database::DatabaseEntry> Database::database = std::vector<data
|
|||
database::XgemvFastHalf, database::XgemvFastSingle, database::XgemvFastDouble, database::XgemvFastComplexSingle, database::XgemvFastComplexDouble,
|
||||
database::XgemvFastRotHalf, database::XgemvFastRotSingle, database::XgemvFastRotDouble, database::XgemvFastRotComplexSingle, database::XgemvFastRotComplexDouble,
|
||||
database::XgerHalf, database::XgerSingle, database::XgerDouble, database::XgerComplexSingle, database::XgerComplexDouble,
|
||||
database::XtrsvHalf, database::XtrsvSingle, database::XtrsvDouble, database::XtrsvComplexSingle, database::XtrsvComplexDouble,
|
||||
database::XgemmHalf, database::XgemmSingle, database::XgemmDouble, database::XgemmComplexSingle, database::XgemmComplexDouble,
|
||||
database::XgemmDirectHalf, database::XgemmDirectSingle, database::XgemmDirectDouble, database::XgemmDirectComplexSingle, database::XgemmDirectComplexDouble,
|
||||
database::CopyHalf, database::CopySingle, database::CopyDouble, database::CopyComplexSingle, database::CopyComplexDouble,
|
||||
|
@ -55,7 +54,8 @@ const std::vector<database::DatabaseEntry> Database::database = std::vector<data
|
|||
database::TransposeHalf, database::TransposeSingle, database::TransposeDouble, database::TransposeComplexSingle, database::TransposeComplexDouble,
|
||||
database::PadtransposeHalf, database::PadtransposeSingle, database::PadtransposeDouble, database::PadtransposeComplexSingle, database::PadtransposeComplexDouble,
|
||||
database::InvertHalf, database::InvertSingle, database::InvertDouble, database::InvertComplexSingle, database::InvertComplexDouble,
|
||||
database::GemmRoutineHalf, database::GemmRoutineSingle, database::GemmRoutineDouble, database::GemmRoutineComplexSingle, database::GemmRoutineComplexDouble
|
||||
database::GemmRoutineHalf, database::GemmRoutineSingle, database::GemmRoutineDouble, database::GemmRoutineComplexSingle, database::GemmRoutineComplexDouble,
|
||||
database::TrsvRoutineHalf, database::TrsvRoutineSingle, database::TrsvRoutineDouble, database::TrsvRoutineComplexSingle, database::TrsvRoutineComplexDouble
|
||||
};
|
||||
const std::vector<database::DatabaseEntry> Database::apple_cpu_fallback = std::vector<database::DatabaseEntry>{
|
||||
database::XaxpyApple, database::XdotApple,
|
||||
|
|
|
@ -1,78 +0,0 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
|
||||
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
|
||||
// width of 100 characters per line.
|
||||
//
|
||||
// Author(s):
|
||||
// Cedric Nugteren <www.cedricnugteren.nl>
|
||||
//
|
||||
// Tuning parameters for the diagonal matrix inversion kernels
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
namespace clblast {
|
||||
namespace database {
|
||||
// =================================================================================================
|
||||
|
||||
const DatabaseEntry InvertHalf = {
|
||||
"Invert", Precision::kHalf, {"INTERNAL_BLOCK_SIZE"}, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
const DatabaseEntry InvertSingle = {
|
||||
"Invert", Precision::kSingle, {"INTERNAL_BLOCK_SIZE"}, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
const DatabaseEntry InvertComplexSingle = {
|
||||
"Invert", Precision::kComplexSingle, {"INTERNAL_BLOCK_SIZE"}, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
const DatabaseEntry InvertDouble = {
|
||||
"Invert", Precision::kDouble, {"INTERNAL_BLOCK_SIZE"}, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
const DatabaseEntry InvertComplexDouble = {
|
||||
"Invert", Precision::kComplexDouble, {"INTERNAL_BLOCK_SIZE"}, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
} // namespace database
|
||||
} // namespace clblast
|
|
@ -0,0 +1,14 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||
//
|
||||
// This file populates the database with best-found tuning parameters for the 'Invert' kernels.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
#include "database/kernels/invert/invert_16.hpp"
|
||||
#include "database/kernels/invert/invert_32.hpp"
|
||||
#include "database/kernels/invert/invert_3232.hpp"
|
||||
#include "database/kernels/invert/invert_64.hpp"
|
||||
#include "database/kernels/invert/invert_6464.hpp"
|
|
@ -0,0 +1,34 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||
//
|
||||
// This file populates the database with best-found tuning parameters for the 'Invert16' kernels.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
namespace clblast {
|
||||
namespace database {
|
||||
|
||||
const DatabaseEntry InvertHalf = {
|
||||
"Invert", Precision::kHalf, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, {
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "default", {
|
||||
{ Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
} },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", {
|
||||
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
} },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace database
|
||||
} // namespace clblast
|
|
@ -0,0 +1,34 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||
//
|
||||
// This file populates the database with best-found tuning parameters for the 'Invert32' kernels.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
namespace clblast {
|
||||
namespace database {
|
||||
|
||||
const DatabaseEntry InvertSingle = {
|
||||
"Invert", Precision::kSingle, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, {
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "default", {
|
||||
{ Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
} },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", {
|
||||
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
} },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace database
|
||||
} // namespace clblast
|
|
@ -0,0 +1,34 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||
//
|
||||
// This file populates the database with best-found tuning parameters for the 'Invert3232' kernels.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
namespace clblast {
|
||||
namespace database {
|
||||
|
||||
const DatabaseEntry InvertComplexSingle = {
|
||||
"Invert", Precision::kComplexSingle, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, {
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "default", {
|
||||
{ Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
} },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", {
|
||||
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
} },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace database
|
||||
} // namespace clblast
|
|
@ -0,0 +1,26 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||
//
|
||||
// This file populates the database with best-found tuning parameters for the 'Invert64' kernels.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
namespace clblast {
|
||||
namespace database {
|
||||
|
||||
const DatabaseEntry InvertDouble = {
|
||||
"Invert", Precision::kDouble, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", {
|
||||
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
} },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace database
|
||||
} // namespace clblast
|
|
@ -0,0 +1,26 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||
//
|
||||
// This file populates the database with best-found tuning parameters for the 'Invert6464' kernels.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
namespace clblast {
|
||||
namespace database {
|
||||
|
||||
const DatabaseEntry InvertComplexDouble = {
|
||||
"Invert", Precision::kComplexDouble, {"INTERNAL_BLOCK_SIZE", "LOCALPAD", "TMMWGSX", "TMMWGSY"}, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", {
|
||||
{ kDeviceNameDefault , Params{ 16, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
} },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace database
|
||||
} // namespace clblast
|
|
@ -0,0 +1,14 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||
//
|
||||
// This file populates the database with best-found tuning parameters for the 'Trsv_Routine' kernels.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
#include "database/kernels/trsv_routine/trsv_routine_16.hpp"
|
||||
#include "database/kernels/trsv_routine/trsv_routine_32.hpp"
|
||||
#include "database/kernels/trsv_routine/trsv_routine_3232.hpp"
|
||||
#include "database/kernels/trsv_routine/trsv_routine_64.hpp"
|
||||
#include "database/kernels/trsv_routine/trsv_routine_6464.hpp"
|
|
@ -0,0 +1,26 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||
//
|
||||
// This file populates the database with best-found tuning parameters for the 'Trsv_Routine16' kernels.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
namespace clblast {
|
||||
namespace database {
|
||||
|
||||
const DatabaseEntry TrsvRoutineHalf = {
|
||||
"TrsvRoutine", Precision::kHalf, {"TRSV_BLOCK_SIZE"}, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", {
|
||||
{ kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
} },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace database
|
||||
} // namespace clblast
|
|
@ -0,0 +1,34 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||
//
|
||||
// This file populates the database with best-found tuning parameters for the 'Trsv_Routine32' kernels.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
namespace clblast {
|
||||
namespace database {
|
||||
|
||||
const DatabaseEntry TrsvRoutineSingle = {
|
||||
"TrsvRoutine", Precision::kSingle, {"TRSV_BLOCK_SIZE"}, {
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "default", {
|
||||
{ Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
{ kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
} },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", {
|
||||
{ kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
} },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace database
|
||||
} // namespace clblast
|
|
@ -0,0 +1,34 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||
//
|
||||
// This file populates the database with best-found tuning parameters for the 'Trsv_Routine3232' kernels.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
namespace clblast {
|
||||
namespace database {
|
||||
|
||||
const DatabaseEntry TrsvRoutineComplexSingle = {
|
||||
"TrsvRoutine", Precision::kComplexSingle, {"TRSV_BLOCK_SIZE"}, {
|
||||
{ // Intel GPUs
|
||||
kDeviceTypeGPU, "Intel", {
|
||||
{ "default", {
|
||||
{ Name{"Intel(R) HD Graphics Skylake ULT GT2 "}, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
{ kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
} },
|
||||
}
|
||||
},
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", {
|
||||
{ kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
} },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace database
|
||||
} // namespace clblast
|
|
@ -0,0 +1,26 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||
//
|
||||
// This file populates the database with best-found tuning parameters for the 'Trsv_Routine64' kernels.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
namespace clblast {
|
||||
namespace database {
|
||||
|
||||
const DatabaseEntry TrsvRoutineDouble = {
|
||||
"TrsvRoutine", Precision::kDouble, {"TRSV_BLOCK_SIZE"}, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", {
|
||||
{ kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
} },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace database
|
||||
} // namespace clblast
|
|
@ -0,0 +1,26 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. It
|
||||
// is auto-generated by the 'scripts/database/database.py' Python script.
|
||||
//
|
||||
// This file populates the database with best-found tuning parameters for the 'Trsv_Routine6464' kernels.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
namespace clblast {
|
||||
namespace database {
|
||||
|
||||
const DatabaseEntry TrsvRoutineComplexDouble = {
|
||||
"TrsvRoutine", Precision::kComplexDouble, {"TRSV_BLOCK_SIZE"}, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", {
|
||||
{ kDeviceNameDefault , Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
|
||||
} },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace database
|
||||
} // namespace clblast
|
|
@ -1,78 +0,0 @@
|
|||
|
||||
// =================================================================================================
|
||||
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
|
||||
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
|
||||
// width of 100 characters per line.
|
||||
//
|
||||
// Author(s):
|
||||
// Cedric Nugteren <www.cedricnugteren.nl>
|
||||
//
|
||||
// This file populates the database with best-found tuning parameters for the 'Xtrsv' kernels.
|
||||
//
|
||||
// =================================================================================================
|
||||
|
||||
namespace clblast {
|
||||
namespace database {
|
||||
// =================================================================================================
|
||||
|
||||
const DatabaseEntry XtrsvHalf = {
|
||||
"Xtrsv", Precision::kHalf, {"TRSV_BLOCK_SIZE"}, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
const DatabaseEntry XtrsvSingle = {
|
||||
"Xtrsv", Precision::kSingle, {"TRSV_BLOCK_SIZE"}, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
const DatabaseEntry XtrsvComplexSingle = {
|
||||
"Xtrsv", Precision::kComplexSingle, {"TRSV_BLOCK_SIZE"}, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
const DatabaseEntry XtrsvDouble = {
|
||||
"Xtrsv", Precision::kDouble, {"TRSV_BLOCK_SIZE"}, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
|
||||
const DatabaseEntry XtrsvComplexDouble = {
|
||||
"Xtrsv", Precision::kComplexDouble, {"TRSV_BLOCK_SIZE"}, {
|
||||
{ // Default
|
||||
kDeviceTypeAll, "default", {
|
||||
{ "default", { { kDeviceNameDefault, Params{ 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } },
|
||||
}
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
// =================================================================================================
|
||||
} // namespace database
|
||||
} // namespace clblast
|
|
@ -22,7 +22,7 @@ namespace clblast {
|
|||
// Constructor: forwards to base class constructor
|
||||
template <typename T>
|
||||
Xgemv<T>::Xgemv(Queue &queue, EventPointer event, const std::string &name):
|
||||
Routine(queue, event, name, {"Xgemv", "XgemvFast", "XgemvFastRot", "Xtrsv"}, PrecisionValue<T>(), {}, {
|
||||
Routine(queue, event, name, {"Xgemv", "XgemvFast", "XgemvFastRot", "TrsvRoutine"}, PrecisionValue<T>(), {}, {
|
||||
#include "../../kernels/level2/xgemv.opencl"
|
||||
#include "../../kernels/level2/xgemv_fast.opencl"
|
||||
#include "../../kernels/level2/xtrsv.opencl"
|
||||
|
|
|
@ -59,7 +59,7 @@ void TuneXtrsv(int argc, char* argv[]) {
|
|||
|
||||
// Values for the block size
|
||||
const auto from = size_t{8};
|
||||
const auto to = size_t{64 + 1};
|
||||
const auto to = size_t{32 + 1};
|
||||
const auto step = size_t{8};
|
||||
|
||||
// OpenCL initialisation
|
||||
|
|
Loading…
Reference in New Issue