ggml : use __builtin_amdgcn_sudot4 in __dp4a for gfx11 (llama/4787)

pull/1753/head^2
Konstantin Zhuravlyov 2024-01-07 01:52:42 -05:00 committed by Georgi Gerganov
parent c46a74a19d
commit 2865e4710b
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
1 changed files with 1 additions and 1 deletions

View File

@ -183,7 +183,7 @@ static __device__ __forceinline__ int __vsubss4(const int a, const int b) {
static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
#if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__)
c = __builtin_amdgcn_sdot4(a, b, c, false);
#elif defined(__gfx1100__)
#elif defined(RDNA3)
c = __builtin_amdgcn_sudot4( true, a, true, b, c, false);
#elif defined(__gfx1010__) || defined(__gfx900__)
int tmp1;