|
|
|
@ -120,7 +120,7 @@ StatusCode Swap(const size_t n,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xswap<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xswap<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoSwap(n,
|
|
|
|
|
Buffer<T>(x_buffer), x_offset, x_inc,
|
|
|
|
|
Buffer<T>(y_buffer), y_offset, y_inc);
|
|
|
|
@ -158,7 +158,7 @@ StatusCode Scal(const size_t n,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xscal<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xscal<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoScal(n,
|
|
|
|
|
alpha,
|
|
|
|
|
Buffer<T>(x_buffer), x_offset, x_inc);
|
|
|
|
@ -196,7 +196,7 @@ StatusCode Copy(const size_t n,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xcopy<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xcopy<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoCopy(n,
|
|
|
|
|
Buffer<T>(x_buffer), x_offset, x_inc,
|
|
|
|
|
Buffer<T>(y_buffer), y_offset, y_inc);
|
|
|
|
@ -235,7 +235,7 @@ StatusCode Axpy(const size_t n,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xaxpy<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xaxpy<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoAxpy(n,
|
|
|
|
|
alpha,
|
|
|
|
|
Buffer<T>(x_buffer), x_offset, x_inc,
|
|
|
|
@ -280,7 +280,7 @@ StatusCode Dot(const size_t n,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xdot<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xdot<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoDot(n,
|
|
|
|
|
Buffer<T>(dot_buffer), dot_offset,
|
|
|
|
|
Buffer<T>(x_buffer), x_offset, x_inc,
|
|
|
|
@ -315,7 +315,7 @@ StatusCode Dotu(const size_t n,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xdotu<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xdotu<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoDotu(n,
|
|
|
|
|
Buffer<T>(dot_buffer), dot_offset,
|
|
|
|
|
Buffer<T>(x_buffer), x_offset, x_inc,
|
|
|
|
@ -345,7 +345,7 @@ StatusCode Dotc(const size_t n,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xdotc<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xdotc<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoDotc(n,
|
|
|
|
|
Buffer<T>(dot_buffer), dot_offset,
|
|
|
|
|
Buffer<T>(x_buffer), x_offset, x_inc,
|
|
|
|
@ -374,7 +374,7 @@ StatusCode Nrm2(const size_t n,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xnrm2<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xnrm2<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoNrm2(n,
|
|
|
|
|
Buffer<T>(nrm2_buffer), nrm2_offset,
|
|
|
|
|
Buffer<T>(x_buffer), x_offset, x_inc);
|
|
|
|
@ -412,7 +412,7 @@ StatusCode Asum(const size_t n,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xasum<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xasum<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoAsum(n,
|
|
|
|
|
Buffer<T>(asum_buffer), asum_offset,
|
|
|
|
|
Buffer<T>(x_buffer), x_offset, x_inc);
|
|
|
|
@ -450,7 +450,7 @@ StatusCode Sum(const size_t n,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xsum<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xsum<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoSum(n,
|
|
|
|
|
Buffer<T>(sum_buffer), sum_offset,
|
|
|
|
|
Buffer<T>(x_buffer), x_offset, x_inc);
|
|
|
|
@ -488,7 +488,7 @@ StatusCode Amax(const size_t n,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xamax<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xamax<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoAmax(n,
|
|
|
|
|
Buffer<unsigned int>(imax_buffer), imax_offset,
|
|
|
|
|
Buffer<T>(x_buffer), x_offset, x_inc);
|
|
|
|
@ -526,7 +526,7 @@ StatusCode Amin(const size_t n,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xamin<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xamin<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoAmin(n,
|
|
|
|
|
Buffer<unsigned int>(imin_buffer), imin_offset,
|
|
|
|
|
Buffer<T>(x_buffer), x_offset, x_inc);
|
|
|
|
@ -564,7 +564,7 @@ StatusCode Max(const size_t n,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xmax<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xmax<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoMax(n,
|
|
|
|
|
Buffer<unsigned int>(imax_buffer), imax_offset,
|
|
|
|
|
Buffer<T>(x_buffer), x_offset, x_inc);
|
|
|
|
@ -602,7 +602,7 @@ StatusCode Min(const size_t n,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xmin<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xmin<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoMin(n,
|
|
|
|
|
Buffer<unsigned int>(imin_buffer), imin_offset,
|
|
|
|
|
Buffer<T>(x_buffer), x_offset, x_inc);
|
|
|
|
@ -648,7 +648,7 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xgemv<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xgemv<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoGemv(layout, a_transpose,
|
|
|
|
|
m, n,
|
|
|
|
|
alpha,
|
|
|
|
@ -714,7 +714,7 @@ StatusCode Gbmv(const Layout layout, const Transpose a_transpose,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xgbmv<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xgbmv<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoGbmv(layout, a_transpose,
|
|
|
|
|
m, n, kl, ku,
|
|
|
|
|
alpha,
|
|
|
|
@ -780,7 +780,7 @@ StatusCode Hemv(const Layout layout, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xhemv<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xhemv<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoHemv(layout, triangle,
|
|
|
|
|
n,
|
|
|
|
|
alpha,
|
|
|
|
@ -822,7 +822,7 @@ StatusCode Hbmv(const Layout layout, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xhbmv<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xhbmv<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoHbmv(layout, triangle,
|
|
|
|
|
n, k,
|
|
|
|
|
alpha,
|
|
|
|
@ -864,7 +864,7 @@ StatusCode Hpmv(const Layout layout, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xhpmv<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xhpmv<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoHpmv(layout, triangle,
|
|
|
|
|
n,
|
|
|
|
|
alpha,
|
|
|
|
@ -906,7 +906,7 @@ StatusCode Symv(const Layout layout, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xsymv<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xsymv<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoSymv(layout, triangle,
|
|
|
|
|
n,
|
|
|
|
|
alpha,
|
|
|
|
@ -956,7 +956,7 @@ StatusCode Sbmv(const Layout layout, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xsbmv<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xsbmv<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoSbmv(layout, triangle,
|
|
|
|
|
n, k,
|
|
|
|
|
alpha,
|
|
|
|
@ -1006,7 +1006,7 @@ StatusCode Spmv(const Layout layout, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xspmv<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xspmv<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoSpmv(layout, triangle,
|
|
|
|
|
n,
|
|
|
|
|
alpha,
|
|
|
|
@ -1053,7 +1053,7 @@ StatusCode Trmv(const Layout layout, const Triangle triangle, const Transpose a_
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xtrmv<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xtrmv<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoTrmv(layout, triangle, a_transpose, diagonal,
|
|
|
|
|
n,
|
|
|
|
|
Buffer<T>(a_buffer), a_offset, a_ld,
|
|
|
|
@ -1098,7 +1098,7 @@ StatusCode Tbmv(const Layout layout, const Triangle triangle, const Transpose a_
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xtbmv<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xtbmv<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoTbmv(layout, triangle, a_transpose, diagonal,
|
|
|
|
|
n, k,
|
|
|
|
|
Buffer<T>(a_buffer), a_offset, a_ld,
|
|
|
|
@ -1143,7 +1143,7 @@ StatusCode Tpmv(const Layout layout, const Triangle triangle, const Transpose a_
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xtpmv<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xtpmv<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoTpmv(layout, triangle, a_transpose, diagonal,
|
|
|
|
|
n,
|
|
|
|
|
Buffer<T>(ap_buffer), ap_offset,
|
|
|
|
@ -1188,7 +1188,7 @@ StatusCode Trsv(const Layout layout, const Triangle triangle, const Transpose a_
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xtrsv<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xtrsv<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoTrsv(layout, triangle, a_transpose, diagonal,
|
|
|
|
|
n,
|
|
|
|
|
Buffer<T>(a_buffer), a_offset, a_ld,
|
|
|
|
@ -1290,7 +1290,7 @@ StatusCode Ger(const Layout layout,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xger<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xger<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoGer(layout,
|
|
|
|
|
m, n,
|
|
|
|
|
alpha,
|
|
|
|
@ -1335,7 +1335,7 @@ StatusCode Geru(const Layout layout,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xgeru<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xgeru<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoGeru(layout,
|
|
|
|
|
m, n,
|
|
|
|
|
alpha,
|
|
|
|
@ -1373,7 +1373,7 @@ StatusCode Gerc(const Layout layout,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xgerc<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xgerc<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoGerc(layout,
|
|
|
|
|
m, n,
|
|
|
|
|
alpha,
|
|
|
|
@ -1410,7 +1410,7 @@ StatusCode Her(const Layout layout, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xher<std::complex<T>,T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xher<std::complex<T>,T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoHer(layout, triangle,
|
|
|
|
|
n,
|
|
|
|
|
alpha,
|
|
|
|
@ -1444,7 +1444,7 @@ StatusCode Hpr(const Layout layout, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xhpr<std::complex<T>,T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xhpr<std::complex<T>,T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoHpr(layout, triangle,
|
|
|
|
|
n,
|
|
|
|
|
alpha,
|
|
|
|
@ -1479,7 +1479,7 @@ StatusCode Her2(const Layout layout, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xher2<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xher2<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoHer2(layout, triangle,
|
|
|
|
|
n,
|
|
|
|
|
alpha,
|
|
|
|
@ -1517,7 +1517,7 @@ StatusCode Hpr2(const Layout layout, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xhpr2<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xhpr2<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoHpr2(layout, triangle,
|
|
|
|
|
n,
|
|
|
|
|
alpha,
|
|
|
|
@ -1554,7 +1554,7 @@ StatusCode Syr(const Layout layout, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xsyr<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xsyr<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoSyr(layout, triangle,
|
|
|
|
|
n,
|
|
|
|
|
alpha,
|
|
|
|
@ -1594,7 +1594,7 @@ StatusCode Spr(const Layout layout, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xspr<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xspr<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoSpr(layout, triangle,
|
|
|
|
|
n,
|
|
|
|
|
alpha,
|
|
|
|
@ -1635,7 +1635,7 @@ StatusCode Syr2(const Layout layout, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xsyr2<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xsyr2<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoSyr2(layout, triangle,
|
|
|
|
|
n,
|
|
|
|
|
alpha,
|
|
|
|
@ -1680,7 +1680,7 @@ StatusCode Spr2(const Layout layout, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xspr2<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xspr2<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoSpr2(layout, triangle,
|
|
|
|
|
n,
|
|
|
|
|
alpha,
|
|
|
|
@ -1730,7 +1730,7 @@ StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpos
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xgemm<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xgemm<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoGemm(layout, a_transpose, b_transpose,
|
|
|
|
|
m, n, k,
|
|
|
|
|
alpha,
|
|
|
|
@ -1796,7 +1796,7 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xsymm<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xsymm<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoSymm(layout, side, triangle,
|
|
|
|
|
m, n,
|
|
|
|
|
alpha,
|
|
|
|
@ -1862,7 +1862,7 @@ StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xhemm<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xhemm<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoHemm(layout, side, triangle,
|
|
|
|
|
m, n,
|
|
|
|
|
alpha,
|
|
|
|
@ -1903,7 +1903,7 @@ StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xsyrk<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xsyrk<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoSyrk(layout, triangle, a_transpose,
|
|
|
|
|
n, k,
|
|
|
|
|
alpha,
|
|
|
|
@ -1962,7 +1962,7 @@ StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xherk<std::complex<T>,T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xherk<std::complex<T>,T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoHerk(layout, triangle, a_transpose,
|
|
|
|
|
n, k,
|
|
|
|
|
alpha,
|
|
|
|
@ -2001,7 +2001,7 @@ StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose a
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xsyr2k<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xsyr2k<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoSyr2k(layout, triangle, ab_transpose,
|
|
|
|
|
n, k,
|
|
|
|
|
alpha,
|
|
|
|
@ -2067,7 +2067,7 @@ StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose a
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xher2k<T,U>(queue_cpp, event);
|
|
|
|
|
auto routine = Xher2k<T,U>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoHer2k(layout, triangle, ab_transpose,
|
|
|
|
|
n, k,
|
|
|
|
|
alpha,
|
|
|
|
@ -2107,7 +2107,7 @@ StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle, c
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xtrmm<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xtrmm<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoTrmm(layout, side, triangle, a_transpose, diagonal,
|
|
|
|
|
m, n,
|
|
|
|
|
alpha,
|
|
|
|
@ -2159,7 +2159,7 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, c
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xtrsm<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xtrsm<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoTrsm(layout, side, triangle, a_transpose, diagonal,
|
|
|
|
|
m, n,
|
|
|
|
|
alpha,
|
|
|
|
@ -2209,7 +2209,7 @@ StatusCode Omatcopy(const Layout layout, const Transpose a_transpose,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xomatcopy<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xomatcopy<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoOmatcopy(layout, a_transpose,
|
|
|
|
|
m, n,
|
|
|
|
|
alpha,
|
|
|
|
@ -2259,7 +2259,7 @@ StatusCode Im2col(const size_t channels, const size_t height, const size_t width
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = Xim2col<T>(queue_cpp, event);
|
|
|
|
|
auto routine = Xim2col<T>(queue_cpp, nullptr);
|
|
|
|
|
routine.DoIm2col(channels, height, width, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
|
|
|
|
|
Buffer<T>(im_buffer), im_offset,
|
|
|
|
|
Buffer<T>(col_buffer), col_offset);
|
|
|
|
@ -2299,7 +2299,7 @@ StatusCode AxpyBatched(const size_t n,
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = XaxpyBatched<T>(queue_cpp, event);
|
|
|
|
|
auto routine = XaxpyBatched<T>(queue_cpp, nullptr);
|
|
|
|
|
auto alphas_cpp = std::vector<T>();
|
|
|
|
|
auto x_offsets_cpp = std::vector<size_t>();
|
|
|
|
|
auto y_offsets_cpp = std::vector<size_t>();
|
|
|
|
@ -2362,7 +2362,7 @@ StatusCode GemmBatched(const Layout layout, const Transpose a_transpose, const T
|
|
|
|
|
const auto context_cpp = Context(context);
|
|
|
|
|
const auto device_cpp = Device(device);
|
|
|
|
|
auto queue_cpp = Queue(context_cpp, device_cpp);
|
|
|
|
|
auto routine = XgemmBatched<T>(queue_cpp, event);
|
|
|
|
|
auto routine = XgemmBatched<T>(queue_cpp, nullptr);
|
|
|
|
|
auto alphas_cpp = std::vector<T>();
|
|
|
|
|
auto betas_cpp = std::vector<T>();
|
|
|
|
|
auto a_offsets_cpp = std::vector<size_t>();
|
|
|
|
|