Fixed a kernel/attribute order bug in the direct GEMM kernels
parent
b06bc01da9
commit
55a802c63d
|
@ -19,8 +19,8 @@ R"(
|
|||
// =================================================================================================
|
||||
|
||||
// Direct version of the batched GEMM kernel with [A, B] = [non-transposed, non-transposed]
|
||||
__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
__kernel void XgemmDirectBatchedNN(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
void XgemmDirectBatchedNN(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas,
|
||||
const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld,
|
||||
const __global realND* restrict bgm, const __constant int* b_offsets, const int b_ld,
|
||||
|
@ -40,8 +40,8 @@ __kernel void XgemmDirectBatchedNN(const int kSizeM, const int kSizeN, const int
|
|||
}
|
||||
|
||||
// Direct version of the batched GEMM kernel with [A, B] = [non-transposed, transposed]
|
||||
__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
__kernel void XgemmDirectBatchedNT(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
void XgemmDirectBatchedNT(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas,
|
||||
const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld,
|
||||
const __global realND* restrict bgm, const __constant int* b_offsets, const int b_ld,
|
||||
|
@ -61,8 +61,8 @@ __kernel void XgemmDirectBatchedNT(const int kSizeM, const int kSizeN, const int
|
|||
}
|
||||
|
||||
// Direct version of the batched GEMM kernel with [A, B] = [transposed, non-transposed]
|
||||
__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
__kernel void XgemmDirectBatchedTN(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
void XgemmDirectBatchedTN(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas,
|
||||
const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld,
|
||||
const __global realND* restrict bgm, const __constant int* b_offsets, const int b_ld,
|
||||
|
@ -82,8 +82,8 @@ __kernel void XgemmDirectBatchedTN(const int kSizeM, const int kSizeN, const int
|
|||
}
|
||||
|
||||
// Direct version of the batched GEMM kernel with [A, B] = [transposed, transposed]
|
||||
__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
__kernel void XgemmDirectBatchedTT(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
void XgemmDirectBatchedTT(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
const __constant real_arg* arg_alphas, const __constant real_arg* arg_betas,
|
||||
const __global realMD* restrict agm, const __constant int* a_offsets, const int a_ld,
|
||||
const __global realND* restrict bgm, const __constant int* b_offsets, const int b_ld,
|
||||
|
|
|
@ -147,8 +147,8 @@ INLINE_FUNC void XgemmDirect(const int kSizeM, const int kSizeN, const int kSize
|
|||
// =================================================================================================
|
||||
|
||||
// Direct version of the GEMM kernel with [A, B] = [non-transposed, non-transposed]
|
||||
__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
__kernel void XgemmDirectNN(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
void XgemmDirectNN(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
const real_arg arg_alpha, const real_arg arg_beta,
|
||||
const __global realMD* restrict agm, const int a_offset, const int a_ld,
|
||||
const __global realND* restrict bgm, const int b_offset, const int b_ld,
|
||||
|
@ -162,8 +162,8 @@ __kernel void XgemmDirectNN(const int kSizeM, const int kSizeN, const int kSizeK
|
|||
}
|
||||
|
||||
// Direct version of the GEMM kernel with [A, B] = [non-transposed, transposed]
|
||||
__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
__kernel void XgemmDirectNT(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
void XgemmDirectNT(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
const real_arg arg_alpha, const real_arg arg_beta,
|
||||
const __global realMD* restrict agm, const int a_offset, const int a_ld,
|
||||
const __global realND* restrict bgm, const int b_offset, const int b_ld,
|
||||
|
@ -177,8 +177,8 @@ __kernel void XgemmDirectNT(const int kSizeM, const int kSizeN, const int kSizeK
|
|||
}
|
||||
|
||||
// Direct version of the GEMM kernel with [A, B] = [transposed, non-transposed]
|
||||
__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
__kernel void XgemmDirectTN(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
void XgemmDirectTN(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
const real_arg arg_alpha, const real_arg arg_beta,
|
||||
const __global realMD* restrict agm, const int a_offset, const int a_ld,
|
||||
const __global realND* restrict bgm, const int b_offset, const int b_ld,
|
||||
|
@ -192,8 +192,8 @@ __kernel void XgemmDirectTN(const int kSizeM, const int kSizeN, const int kSizeK
|
|||
}
|
||||
|
||||
// Direct version of the GEMM kernel with [A, B] = [transposed, transposed]
|
||||
__attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
__kernel void XgemmDirectTT(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
__kernel __attribute__((reqd_work_group_size(MDIMCD, NDIMCD, 1)))
|
||||
void XgemmDirectTT(const int kSizeM, const int kSizeN, const int kSizeK,
|
||||
const real_arg arg_alpha, const real_arg arg_beta,
|
||||
const __global realMD* restrict agm, const int a_offset, const int a_ld,
|
||||
const __global realND* restrict bgm, const int b_offset, const int b_ld,
|
||||
|
|
Loading…
Reference in New Issue