From abb4d5ab324878337853685d1fdb705b913deeb4 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 24 Nov 2017 21:16:54 +0100 Subject: [PATCH] Added tuning results for ARM Mali T760 GPU --- README.md | 1 + src/database/kernels/copy/copy_16.hpp | 8 ++++++++ src/database/kernels/copy/copy_32.hpp | 3 ++- src/database/kernels/copy/copy_3232.hpp | 10 +++++++++- src/database/kernels/copy/copy_64.hpp | 5 +++-- src/database/kernels/copy/copy_6464.hpp | 3 ++- src/database/kernels/gemm_routine/gemm_routine_32.hpp | 10 +++++++++- src/database/kernels/pad/pad_16.hpp | 10 +++++++++- src/database/kernels/pad/pad_32.hpp | 3 ++- src/database/kernels/pad/pad_3232.hpp | 3 ++- src/database/kernels/pad/pad_64.hpp | 5 +++-- src/database/kernels/pad/pad_6464.hpp | 1 + src/database/kernels/padtranspose/padtranspose_16.hpp | 8 ++++++++ src/database/kernels/padtranspose/padtranspose_32.hpp | 3 ++- .../kernels/padtranspose/padtranspose_3232.hpp | 1 + src/database/kernels/padtranspose/padtranspose_64.hpp | 3 ++- .../kernels/padtranspose/padtranspose_6464.hpp | 3 ++- src/database/kernels/transpose/transpose_16.hpp | 8 ++++++++ src/database/kernels/transpose/transpose_32.hpp | 3 ++- src/database/kernels/transpose/transpose_3232.hpp | 1 + src/database/kernels/transpose/transpose_64.hpp | 1 + src/database/kernels/transpose/transpose_6464.hpp | 3 ++- src/database/kernels/xaxpy/xaxpy_16.hpp | 10 +++++++++- src/database/kernels/xaxpy/xaxpy_32.hpp | 3 ++- src/database/kernels/xaxpy/xaxpy_3232.hpp | 3 ++- src/database/kernels/xaxpy/xaxpy_64.hpp | 3 ++- src/database/kernels/xaxpy/xaxpy_6464.hpp | 3 ++- src/database/kernels/xdot/xdot_16.hpp | 8 ++++++++ src/database/kernels/xdot/xdot_32.hpp | 8 ++++++++ src/database/kernels/xdot/xdot_3232.hpp | 8 ++++++++ src/database/kernels/xdot/xdot_64.hpp | 8 ++++++++ src/database/kernels/xdot/xdot_6464.hpp | 8 ++++++++ src/database/kernels/xgemm/xgemm_16.hpp | 10 +++++++++- src/database/kernels/xgemm/xgemm_32.hpp | 4 ++-- src/database/kernels/xgemm/xgemm_3232.hpp | 4 ++-- src/database/kernels/xgemm/xgemm_64.hpp | 4 ++-- src/database/kernels/xgemm/xgemm_6464.hpp | 6 +++--- src/database/kernels/xgemm_direct/xgemm_direct_16.hpp | 8 ++++++++ src/database/kernels/xgemm_direct/xgemm_direct_32.hpp | 8 ++++++++ .../kernels/xgemm_direct/xgemm_direct_3232.hpp | 8 ++++++++ src/database/kernels/xgemm_direct/xgemm_direct_64.hpp | 10 +++++++++- .../kernels/xgemm_direct/xgemm_direct_6464.hpp | 8 ++++++++ src/database/kernels/xgemv/xgemv_32.hpp | 8 ++++++++ src/database/kernels/xgemv/xgemv_3232.hpp | 8 ++++++++ src/database/kernels/xgemv/xgemv_64.hpp | 10 +++++++++- src/database/kernels/xgemv/xgemv_6464.hpp | 8 ++++++++ src/database/kernels/xgemv_fast/xgemv_fast_32.hpp | 8 ++++++++ src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp | 8 ++++++++ src/database/kernels/xgemv_fast/xgemv_fast_64.hpp | 8 ++++++++ src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp | 8 ++++++++ .../kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp | 8 ++++++++ .../kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp | 8 ++++++++ .../kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp | 8 ++++++++ .../kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp | 10 +++++++++- src/database/kernels/xger/xger_16.hpp | 8 ++++++++ src/database/kernels/xger/xger_32.hpp | 3 ++- src/database/kernels/xger/xger_3232.hpp | 5 +++-- src/database/kernels/xger/xger_64.hpp | 5 +++-- src/database/kernels/xger/xger_6464.hpp | 3 ++- 59 files changed, 312 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 6c27af51..b255c09e 100644 --- a/README.md +++ b/README.md @@ -173,6 +173,7 @@ The CLBlast library is already tuned for the most commonly used OpenCL devices a - Core i7-6770HQ * Other devices: - ARM Mali-T628 GPU + - ARM Mali-T760 GPU - Qualcomm Adreno 330 GPU - Intel MIC diff --git a/src/database/kernels/copy/copy_16.hpp b/src/database/kernels/copy/copy_16.hpp index faf6d2bc..818c4c8c 100644 --- a/src/database/kernels/copy/copy_16.hpp +++ b/src/database/kernels/copy/copy_16.hpp @@ -23,6 +23,14 @@ const DatabaseEntry CopyHalf = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 32, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { diff --git a/src/database/kernels/copy/copy_32.hpp b/src/database/kernels/copy/copy_32.hpp index 0c6b54e9..7ab20459 100644 --- a/src/database/kernels/copy/copy_32.hpp +++ b/src/database/kernels/copy/copy_32.hpp @@ -60,7 +60,8 @@ const DatabaseEntry CopySingle = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/copy/copy_3232.hpp b/src/database/kernels/copy/copy_3232.hpp index d421eba3..2fc8156d 100644 --- a/src/database/kernels/copy/copy_3232.hpp +++ b/src/database/kernels/copy/copy_3232.hpp @@ -56,6 +56,14 @@ const DatabaseEntry CopyComplexSingle = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { @@ -145,7 +153,7 @@ const DatabaseEntry CopyComplexSingle = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/copy/copy_64.hpp b/src/database/kernels/copy/copy_64.hpp index bc1b752d..882cab5a 100644 --- a/src/database/kernels/copy/copy_64.hpp +++ b/src/database/kernels/copy/copy_64.hpp @@ -52,7 +52,8 @@ const DatabaseEntry CopyDouble = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 16, 8, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 16, 8, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -125,7 +126,7 @@ const DatabaseEntry CopyDouble = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/copy/copy_6464.hpp b/src/database/kernels/copy/copy_6464.hpp index 27b6ded4..b774225b 100644 --- a/src/database/kernels/copy/copy_6464.hpp +++ b/src/database/kernels/copy/copy_6464.hpp @@ -52,6 +52,7 @@ const DatabaseEntry CopyComplexDouble = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } @@ -125,7 +126,7 @@ const DatabaseEntry CopyComplexDouble = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 16, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/gemm_routine/gemm_routine_32.hpp b/src/database/kernels/gemm_routine/gemm_routine_32.hpp index 8a300444..37b52b20 100644 --- a/src/database/kernels/gemm_routine/gemm_routine_32.hpp +++ b/src/database/kernels/gemm_routine/gemm_routine_32.hpp @@ -12,6 +12,14 @@ namespace database { const DatabaseEntry GemmRoutineSingle = { "GemmRoutine", Precision::kSingle, {"XGEMM_MIN_INDIRECT_SIZE"}, { + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { @@ -47,7 +55,7 @@ const DatabaseEntry GemmRoutineSingle = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 896, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/pad/pad_16.hpp b/src/database/kernels/pad/pad_16.hpp index 89684314..3c24fdc9 100644 --- a/src/database/kernels/pad/pad_16.hpp +++ b/src/database/kernels/pad/pad_16.hpp @@ -23,6 +23,14 @@ const DatabaseEntry PadHalf = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { @@ -43,7 +51,7 @@ const DatabaseEntry PadHalf = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/pad/pad_32.hpp b/src/database/kernels/pad/pad_32.hpp index 7b7cbb50..6a06d314 100644 --- a/src/database/kernels/pad/pad_32.hpp +++ b/src/database/kernels/pad/pad_32.hpp @@ -60,7 +60,8 @@ const DatabaseEntry PadSingle = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/pad/pad_3232.hpp b/src/database/kernels/pad/pad_3232.hpp index 20fabcb7..c4b8eeec 100644 --- a/src/database/kernels/pad/pad_3232.hpp +++ b/src/database/kernels/pad/pad_3232.hpp @@ -60,7 +60,8 @@ const DatabaseEntry PadComplexSingle = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 8, 32, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/pad/pad_64.hpp b/src/database/kernels/pad/pad_64.hpp index a6960700..be3dc81b 100644 --- a/src/database/kernels/pad/pad_64.hpp +++ b/src/database/kernels/pad/pad_64.hpp @@ -52,7 +52,8 @@ const DatabaseEntry PadDouble = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 16, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -125,7 +126,7 @@ const DatabaseEntry PadDouble = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 32, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/pad/pad_6464.hpp b/src/database/kernels/pad/pad_6464.hpp index 0e0d6bad..92e5a194 100644 --- a/src/database/kernels/pad/pad_6464.hpp +++ b/src/database/kernels/pad/pad_6464.hpp @@ -52,6 +52,7 @@ const DatabaseEntry PadComplexDouble = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 16, 16, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } diff --git a/src/database/kernels/padtranspose/padtranspose_16.hpp b/src/database/kernels/padtranspose/padtranspose_16.hpp index ea09a062..9154d661 100644 --- a/src/database/kernels/padtranspose/padtranspose_16.hpp +++ b/src/database/kernels/padtranspose/padtranspose_16.hpp @@ -23,6 +23,14 @@ const DatabaseEntry PadtransposeHalf = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 1, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { diff --git a/src/database/kernels/padtranspose/padtranspose_32.hpp b/src/database/kernels/padtranspose/padtranspose_32.hpp index 05cb6562..97c89213 100644 --- a/src/database/kernels/padtranspose/padtranspose_32.hpp +++ b/src/database/kernels/padtranspose/padtranspose_32.hpp @@ -60,7 +60,8 @@ const DatabaseEntry PadtransposeSingle = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/padtranspose/padtranspose_3232.hpp b/src/database/kernels/padtranspose/padtranspose_3232.hpp index 570143bc..6e8bce8b 100644 --- a/src/database/kernels/padtranspose/padtranspose_3232.hpp +++ b/src/database/kernels/padtranspose/padtranspose_3232.hpp @@ -60,6 +60,7 @@ const DatabaseEntry PadtransposeComplexSingle = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } diff --git a/src/database/kernels/padtranspose/padtranspose_64.hpp b/src/database/kernels/padtranspose/padtranspose_64.hpp index 8b422502..219ca6a9 100644 --- a/src/database/kernels/padtranspose/padtranspose_64.hpp +++ b/src/database/kernels/padtranspose/padtranspose_64.hpp @@ -52,7 +52,8 @@ const DatabaseEntry PadtransposeDouble = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/padtranspose/padtranspose_6464.hpp b/src/database/kernels/padtranspose/padtranspose_6464.hpp index 94c957fa..58bd2d60 100644 --- a/src/database/kernels/padtranspose/padtranspose_6464.hpp +++ b/src/database/kernels/padtranspose/padtranspose_6464.hpp @@ -52,7 +52,8 @@ const DatabaseEntry PadtransposeComplexDouble = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/transpose/transpose_16.hpp b/src/database/kernels/transpose/transpose_16.hpp index e63102d1..2b24e192 100644 --- a/src/database/kernels/transpose/transpose_16.hpp +++ b/src/database/kernels/transpose/transpose_16.hpp @@ -23,6 +23,14 @@ const DatabaseEntry TransposeHalf = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { diff --git a/src/database/kernels/transpose/transpose_32.hpp b/src/database/kernels/transpose/transpose_32.hpp index 52f19dbb..4f2b16e6 100644 --- a/src/database/kernels/transpose/transpose_32.hpp +++ b/src/database/kernels/transpose/transpose_32.hpp @@ -60,7 +60,8 @@ const DatabaseEntry TransposeSingle = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 4, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/transpose/transpose_3232.hpp b/src/database/kernels/transpose/transpose_3232.hpp index 3c013a46..1127fab2 100644 --- a/src/database/kernels/transpose/transpose_3232.hpp +++ b/src/database/kernels/transpose/transpose_3232.hpp @@ -60,6 +60,7 @@ const DatabaseEntry TransposeComplexSingle = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 16, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 4, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 16, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } diff --git a/src/database/kernels/transpose/transpose_64.hpp b/src/database/kernels/transpose/transpose_64.hpp index 90560dfc..a7a808ed 100644 --- a/src/database/kernels/transpose/transpose_64.hpp +++ b/src/database/kernels/transpose/transpose_64.hpp @@ -52,6 +52,7 @@ const DatabaseEntry TransposeDouble = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, { kDeviceNameDefault , Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } diff --git a/src/database/kernels/transpose/transpose_6464.hpp b/src/database/kernels/transpose/transpose_6464.hpp index c02d424d..75d0c85b 100644 --- a/src/database/kernels/transpose/transpose_6464.hpp +++ b/src/database/kernels/transpose/transpose_6464.hpp @@ -52,7 +52,8 @@ const DatabaseEntry TransposeComplexDouble = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 16, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xaxpy/xaxpy_16.hpp b/src/database/kernels/xaxpy/xaxpy_16.hpp index d102bb52..fa91ae9f 100644 --- a/src/database/kernels/xaxpy/xaxpy_16.hpp +++ b/src/database/kernels/xaxpy/xaxpy_16.hpp @@ -23,6 +23,14 @@ const DatabaseEntry XaxpyHalf = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 8, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { @@ -43,7 +51,7 @@ const DatabaseEntry XaxpyHalf = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xaxpy/xaxpy_32.hpp b/src/database/kernels/xaxpy/xaxpy_32.hpp index 4e479193..bda415bd 100644 --- a/src/database/kernels/xaxpy/xaxpy_32.hpp +++ b/src/database/kernels/xaxpy/xaxpy_32.hpp @@ -60,7 +60,8 @@ const DatabaseEntry XaxpySingle = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 4, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xaxpy/xaxpy_3232.hpp b/src/database/kernels/xaxpy/xaxpy_3232.hpp index 959de7b8..fec34fef 100644 --- a/src/database/kernels/xaxpy/xaxpy_3232.hpp +++ b/src/database/kernels/xaxpy/xaxpy_3232.hpp @@ -60,7 +60,8 @@ const DatabaseEntry XaxpyComplexSingle = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 1, 128, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xaxpy/xaxpy_64.hpp b/src/database/kernels/xaxpy/xaxpy_64.hpp index 232498cc..53afe2f1 100644 --- a/src/database/kernels/xaxpy/xaxpy_64.hpp +++ b/src/database/kernels/xaxpy/xaxpy_64.hpp @@ -52,7 +52,8 @@ const DatabaseEntry XaxpyDouble = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 2, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 2, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xaxpy/xaxpy_6464.hpp b/src/database/kernels/xaxpy/xaxpy_6464.hpp index 1ec94388..44f73b3d 100644 --- a/src/database/kernels/xaxpy/xaxpy_6464.hpp +++ b/src/database/kernels/xaxpy/xaxpy_6464.hpp @@ -52,7 +52,8 @@ const DatabaseEntry XaxpyComplexDouble = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 1, 64, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 1, 64, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xdot/xdot_16.hpp b/src/database/kernels/xdot/xdot_16.hpp index ba9307dd..5422f88d 100644 --- a/src/database/kernels/xdot/xdot_16.hpp +++ b/src/database/kernels/xdot/xdot_16.hpp @@ -23,6 +23,14 @@ const DatabaseEntry XdotHalf = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { diff --git a/src/database/kernels/xdot/xdot_32.hpp b/src/database/kernels/xdot/xdot_32.hpp index b18de232..014ece99 100644 --- a/src/database/kernels/xdot/xdot_32.hpp +++ b/src/database/kernels/xdot/xdot_32.hpp @@ -52,6 +52,14 @@ const DatabaseEntry XdotSingle = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xdot/xdot_3232.hpp b/src/database/kernels/xdot/xdot_3232.hpp index bcfff5bf..6679bcdf 100644 --- a/src/database/kernels/xdot/xdot_3232.hpp +++ b/src/database/kernels/xdot/xdot_3232.hpp @@ -52,6 +52,14 @@ const DatabaseEntry XdotComplexSingle = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xdot/xdot_64.hpp b/src/database/kernels/xdot/xdot_64.hpp index 26c774d0..d4143ddb 100644 --- a/src/database/kernels/xdot/xdot_64.hpp +++ b/src/database/kernels/xdot/xdot_64.hpp @@ -44,6 +44,14 @@ const DatabaseEntry XdotDouble = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xdot/xdot_6464.hpp b/src/database/kernels/xdot/xdot_6464.hpp index 667cc830..84b05560 100644 --- a/src/database/kernels/xdot/xdot_6464.hpp +++ b/src/database/kernels/xdot/xdot_6464.hpp @@ -44,6 +44,14 @@ const DatabaseEntry XdotComplexDouble = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xgemm/xgemm_16.hpp b/src/database/kernels/xgemm/xgemm_16.hpp index 32562415..e2c806d7 100644 --- a/src/database/kernels/xgemm/xgemm_16.hpp +++ b/src/database/kernels/xgemm/xgemm_16.hpp @@ -23,6 +23,14 @@ const DatabaseEntry XgemmHalf = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 32, 2, 8, 16, 128, 8, 8, 32, 0, 1, 0, 1, 8, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 16, 128, 8, 8, 32, 0, 1, 0, 1, 8, 4 } }, + } }, + } + }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { @@ -34,7 +42,7 @@ const DatabaseEntry XgemmHalf = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 32, 2, 8, 8, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } }, } }, } }, diff --git a/src/database/kernels/xgemm/xgemm_32.hpp b/src/database/kernels/xgemm/xgemm_32.hpp index 8fba891f..28b0e11b 100644 --- a/src/database/kernels/xgemm/xgemm_32.hpp +++ b/src/database/kernels/xgemm/xgemm_32.hpp @@ -59,8 +59,8 @@ const DatabaseEntry XgemmSingle = { { // ARM GPUs kDeviceTypeGPU, "ARM", { { "default", { - { Name{"Mali-T628 "}, Params{ 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } }, - { kDeviceNameDefault , Params{ 16, 2, 8, 8, 64, 8, 16, 16, 0, 0, 1, 1, 8, 1 } }, + { Name{"Mali-T760 "}, Params{ 32, 2, 8, 8, 64, 8, 16, 16, 0, 1, 0, 0, 8, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 8, 8, 64, 8, 16, 16, 0, 1, 0, 0, 8, 1 } }, } }, } }, diff --git a/src/database/kernels/xgemm/xgemm_3232.hpp b/src/database/kernels/xgemm/xgemm_3232.hpp index 50699fea..95dd4585 100644 --- a/src/database/kernels/xgemm/xgemm_3232.hpp +++ b/src/database/kernels/xgemm/xgemm_3232.hpp @@ -59,8 +59,8 @@ const DatabaseEntry XgemmComplexSingle = { { // ARM GPUs kDeviceTypeGPU, "ARM", { { "default", { - { Name{"Mali-T628 "}, Params{ 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } }, - { kDeviceNameDefault , Params{ 16, 2, 8, 16, 128, 16, 8, 128, 0, 0, 0, 1, 8, 1 } }, + { Name{"Mali-T760 "}, Params{ 32, 2, 32, 32, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } }, + { kDeviceNameDefault , Params{ 32, 2, 32, 32, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } }, } }, } }, diff --git a/src/database/kernels/xgemm/xgemm_64.hpp b/src/database/kernels/xgemm/xgemm_64.hpp index e4671455..35b17702 100644 --- a/src/database/kernels/xgemm/xgemm_64.hpp +++ b/src/database/kernels/xgemm/xgemm_64.hpp @@ -51,8 +51,8 @@ const DatabaseEntry XgemmDouble = { { // ARM GPUs kDeviceTypeGPU, "ARM", { { "default", { - { Name{"Mali-T628 "}, Params{ 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } }, - { kDeviceNameDefault , Params{ 32, 2, 8, 8, 64, 8, 8, 16, 0, 1, 1, 0, 8, 2 } }, + { Name{"Mali-T760 "}, Params{ 16, 2, 8, 8, 32, 16, 16, 16, 1, 1, 1, 0, 4, 1 } }, + { kDeviceNameDefault , Params{ 16, 2, 8, 8, 32, 16, 16, 16, 1, 1, 1, 0, 4, 1 } }, } }, } }, diff --git a/src/database/kernels/xgemm/xgemm_6464.hpp b/src/database/kernels/xgemm/xgemm_6464.hpp index 2d304ce6..9e392aec 100644 --- a/src/database/kernels/xgemm/xgemm_6464.hpp +++ b/src/database/kernels/xgemm/xgemm_6464.hpp @@ -51,8 +51,8 @@ const DatabaseEntry XgemmComplexDouble = { { // ARM GPUs kDeviceTypeGPU, "ARM", { { "default", { - { Name{"Mali-T628 "}, Params{ 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } }, - { kDeviceNameDefault , Params{ 16, 2, 8, 8, 64, 32, 8, 64, 0, 0, 1, 0, 8, 1 } }, + { Name{"Mali-T760 "}, Params{ 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } }, } }, } }, @@ -124,7 +124,7 @@ const DatabaseEntry XgemmComplexDouble = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 32, 2, 32, 32, 32, 8, 8, 32, 1, 1, 0, 0, 1, 2 } }, + { kDeviceNameDefault , Params{ 32, 2, 16, 16, 16, 8, 8, 16, 1, 1, 0, 0, 1, 2 } }, } }, } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp index 49051b7f..b83382b8 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp @@ -23,6 +23,14 @@ const DatabaseEntry XgemmDirectHalf = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp index fee0ac24..7f1cbd10 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp @@ -40,6 +40,14 @@ const DatabaseEntry XgemmDirectSingle = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp index cf5f0fb1..7c3c5a14 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp @@ -36,6 +36,14 @@ const DatabaseEntry XgemmDirectComplexSingle = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 16, 16, 16, 16, 16, 1, 0, 1, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 16, 16, 16, 1, 0, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp index 87e4d014..54e53583 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp @@ -32,6 +32,14 @@ const DatabaseEntry XgemmDirectDouble = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 16, 16, 16, 16, 16, 1, 0, 1, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 16, 16, 16, 16, 16, 1, 0, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { @@ -77,7 +85,7 @@ const DatabaseEntry XgemmDirectDouble = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 2, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 32, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp index ba861e51..c6356469 100644 --- a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp +++ b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp @@ -32,6 +32,14 @@ const DatabaseEntry XgemmDirectComplexDouble = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xgemv/xgemv_32.hpp b/src/database/kernels/xgemv/xgemv_32.hpp index 191a5bd3..6252e2e1 100644 --- a/src/database/kernels/xgemv/xgemv_32.hpp +++ b/src/database/kernels/xgemv/xgemv_32.hpp @@ -56,6 +56,14 @@ const DatabaseEntry XgemvSingle = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xgemv/xgemv_3232.hpp b/src/database/kernels/xgemv/xgemv_3232.hpp index a3639715..c8c1c749 100644 --- a/src/database/kernels/xgemv/xgemv_3232.hpp +++ b/src/database/kernels/xgemv/xgemv_3232.hpp @@ -56,6 +56,14 @@ const DatabaseEntry XgemvComplexSingle = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xgemv/xgemv_64.hpp b/src/database/kernels/xgemv/xgemv_64.hpp index ccff6f8a..a314e7ac 100644 --- a/src/database/kernels/xgemv/xgemv_64.hpp +++ b/src/database/kernels/xgemv/xgemv_64.hpp @@ -48,6 +48,14 @@ const DatabaseEntry XgemvDouble = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { @@ -116,7 +124,7 @@ const DatabaseEntry XgemvDouble = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xgemv/xgemv_6464.hpp b/src/database/kernels/xgemv/xgemv_6464.hpp index 5d68ac23..835c7da1 100644 --- a/src/database/kernels/xgemv/xgemv_6464.hpp +++ b/src/database/kernels/xgemv/xgemv_6464.hpp @@ -48,6 +48,14 @@ const DatabaseEntry XgemvComplexDouble = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp index 3e1dfedd..c0a1388f 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp @@ -56,6 +56,14 @@ const DatabaseEntry XgemvFastSingle = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 2, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp index 634b8978..0348d0f2 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp @@ -56,6 +56,14 @@ const DatabaseEntry XgemvFastComplexSingle = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp index 41114d75..be274fee 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp @@ -48,6 +48,14 @@ const DatabaseEntry XgemvFastDouble = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp index b294bc37..cb81869f 100644 --- a/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp +++ b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp @@ -48,6 +48,14 @@ const DatabaseEntry XgemvFastComplexDouble = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp index 3c2336be..0b937e40 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp @@ -40,6 +40,14 @@ const DatabaseEntry XgemvFastRotSingle = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 8, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp index eb8f0ba8..68509d0c 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp @@ -40,6 +40,14 @@ const DatabaseEntry XgemvFastRotComplexSingle = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 1, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp index a08401d2..af857e0e 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp @@ -32,6 +32,14 @@ const DatabaseEntry XgemvFastRotDouble = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 2, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp index e9956eac..de661153 100644 --- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp +++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp @@ -32,6 +32,14 @@ const DatabaseEntry XgemvFastRotComplexDouble = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 1, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel CPUs kDeviceTypeCPU, "Intel", { { "default", { @@ -67,7 +75,7 @@ const DatabaseEntry XgemvFastRotComplexDouble = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 4, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 2, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xger/xger_16.hpp b/src/database/kernels/xger/xger_16.hpp index dae1a675..dee3be5c 100644 --- a/src/database/kernels/xger/xger_16.hpp +++ b/src/database/kernels/xger/xger_16.hpp @@ -23,6 +23,14 @@ const DatabaseEntry XgerHalf = { } }, } }, + { // ARM GPUs + kDeviceTypeGPU, "ARM", { + { "default", { + { Name{"Mali-T760 "}, Params{ 64, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + } }, + } + }, { // Intel GPUs kDeviceTypeGPU, "Intel", { { "default", { diff --git a/src/database/kernels/xger/xger_32.hpp b/src/database/kernels/xger/xger_32.hpp index 69c6ae02..31d996e2 100644 --- a/src/database/kernels/xger/xger_32.hpp +++ b/src/database/kernels/xger/xger_32.hpp @@ -60,7 +60,8 @@ const DatabaseEntry XgerSingle = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 64, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 64, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 4, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xger/xger_3232.hpp b/src/database/kernels/xger/xger_3232.hpp index f1e0da07..b7c83659 100644 --- a/src/database/kernels/xger/xger_3232.hpp +++ b/src/database/kernels/xger/xger_3232.hpp @@ -60,7 +60,8 @@ const DatabaseEntry XgerComplexSingle = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 128, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 128, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 16, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -138,7 +139,7 @@ const DatabaseEntry XgerComplexSingle = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 64, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xger/xger_64.hpp b/src/database/kernels/xger/xger_64.hpp index a3d16692..d67be672 100644 --- a/src/database/kernels/xger/xger_64.hpp +++ b/src/database/kernels/xger/xger_64.hpp @@ -52,7 +52,8 @@ const DatabaseEntry XgerDouble = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 64, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 64, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, @@ -109,7 +110,7 @@ const DatabaseEntry XgerDouble = { { // Default kDeviceTypeAll, "default", { { "default", { - { kDeviceNameDefault , Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } }, diff --git a/src/database/kernels/xger/xger_6464.hpp b/src/database/kernels/xger/xger_6464.hpp index 48b1f629..59759994 100644 --- a/src/database/kernels/xger/xger_6464.hpp +++ b/src/database/kernels/xger/xger_6464.hpp @@ -52,7 +52,8 @@ const DatabaseEntry XgerComplexDouble = { kDeviceTypeGPU, "ARM", { { "default", { { Name{"Mali-T628 "}, Params{ 64, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, - { kDeviceNameDefault , Params{ 64, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { Name{"Mali-T760 "}, Params{ 4, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, + { kDeviceNameDefault , Params{ 4, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, } }, } },