Fixed a vector-size related bug in the CLBlast Netlib API

pull/125/head
Cedric Nugteren 2016-11-23 22:00:20 +01:00
parent fa42befcc1
commit 792cc8359f
2 changed files with 87 additions and 87 deletions

View File

@ -101,21 +101,21 @@ ROUTINES = [
[ # Level 1: vector-vector
Routine(False, True, "1", "rotg", T, [S,D], [], [], [], ["sa","sb","sc","ss"], ["1","1","1","1"], [], "", "Generate givens plane rotation", "", []),
Routine(False, True, "1", "rotmg", T, [S,D], [], [], ["sy1"], ["sd1","sd2","sx1","sparam"], ["1","1","1","1","1"], [], "", "Generate modified givens plane rotation", "", []),
Routine(False, True, "1", "rot", T, [S,D], ["n"], [], [], ["x","y"], ["n","n"], ["cos","sin"],"", "Apply givens plane rotation", "", []),
Routine(False, True, "1", "rotm", T, [S,D], ["n"], [], [], ["x","y","sparam"], ["n","n","1"], [], "", "Apply modified givens plane rotation", "", []),
Routine(True, True, "1", "swap", T, [S,D,C,Z,H], ["n"], [], [], ["x","y"], ["n","n"], [], "", "Swap two vectors", "Interchanges _n_ elements of vectors _x_ and _y_.", []),
Routine(True, True, "1", "scal", T, [S,D,C,Z,H], ["n"], [], [], ["x"], ["n"], ["alpha"], "", "Vector scaling", "Multiplies _n_ elements of vector _x_ by a scalar constant _alpha_.", []),
Routine(True, True, "1", "copy", T, [S,D,C,Z,H], ["n"], [], ["x"], ["y"], ["n","n"], [], "", "Vector copy", "Copies the contents of vector _x_ into vector _y_.", []),
Routine(True, True, "1", "axpy", T, [S,D,C,Z,H], ["n"], [], ["x"], ["y"], ["n","n"], ["alpha"], "", "Vector-times-constant plus vector", "Performs the operation _y = alpha * x + y_, in which _x_ and _y_ are vectors and _alpha_ is a scalar constant.", []),
Routine(True, True, "1", "dot", T, [S,D,H], ["n"], [], ["x","y"], ["dot"], ["n","n","1"], [], "n", "Dot product of two vectors", "Multiplies _n_ elements of the vectors _x_ and _y_ element-wise and accumulates the results. The sum is stored in the _dot_ buffer.", []),
Routine(True, True, "1", "dotu", T, [C,Z], ["n"], [], ["x","y"], ["dot"], ["n","n","1"], [], "n", "Dot product of two complex vectors", "See the regular xDOT routine.", []),
Routine(True, True, "1", "dotc", T, [C,Z], ["n"], [], ["x","y"], ["dot"], ["n","n","1"], [], "n", "Dot product of two complex vectors, one conjugated", "See the regular xDOT routine.", []),
Routine(True, True, "1", "nrm2", T, [S,D,Sc,Dz,H], ["n"], [], ["x"], ["nrm2"], ["n","1"], [], "2*n", "Euclidian norm of a vector", "Accumulates the square of _n_ elements in the _x_ vector and takes the square root. The resulting L2 norm is stored in the _nrm2_ buffer.", []),
Routine(True, True, "1", "asum", T, [S,D,Sc,Dz,H], ["n"], [], ["x"], ["asum"], ["n","1"], [], "n", "Absolute sum of values in a vector", "Accumulates the absolute value of _n_ elements in the _x_ vector. The results are stored in the _asum_ buffer.", []),
Routine(True, False, "1", "sum", T, [S,D,Sc,Dz,H], ["n"], [], ["x"], ["sum"], ["n","1"], [], "n", "Sum of values in a vector (non-BLAS function)", "Accumulates the values of _n_ elements in the _x_ vector. The results are stored in the _sum_ buffer. This routine is the non-absolute version of the xASUM BLAS routine.", []),
Routine(True, True, "1", "amax", T, [iS,iD,iC,iZ,iH], ["n"], [], ["x"], ["imax"], ["n","1"], [], "2*n", "Index of absolute maximum value in a vector", "Finds the index of the maximum of the absolute values in the _x_ vector. The resulting integer index is stored in the _imax_ buffer.", []),
Routine(True, False, "1", "max", T, [iS,iD,iC,iZ,iH], ["n"], [], ["x"], ["imax"], ["n","1"], [], "2*n", "Index of maximum value in a vector (non-BLAS function)", "Finds the index of the maximum of the values in the _x_ vector. The resulting integer index is stored in the _imax_ buffer. This routine is the non-absolute version of the IxAMAX BLAS routine.", []),
Routine(True, False, "1", "min", T, [iS,iD,iC,iZ,iH], ["n"], [], ["x"], ["imin"], ["n","1"], [], "2*n", "Index of minimum value in a vector (non-BLAS function)", "Finds the index of the minimum of the values in the _x_ vector. The resulting integer index is stored in the _imin_ buffer. This routine is the non-absolute minimum version of the IxAMAX BLAS routine.", []),
Routine(False, True, "1", "rot", T, [S,D], ["n"], [], [], ["x","y"], [xn,yn], ["cos","sin"],"", "Apply givens plane rotation", "", []),
Routine(False, True, "1", "rotm", T, [S,D], ["n"], [], [], ["x","y","sparam"], [xn,yn,"1"], [], "", "Apply modified givens plane rotation", "", []),
Routine(True, True, "1", "swap", T, [S,D,C,Z,H], ["n"], [], [], ["x","y"], [xn,yn], [], "", "Swap two vectors", "Interchanges _n_ elements of vectors _x_ and _y_.", []),
Routine(True, True, "1", "scal", T, [S,D,C,Z,H], ["n"], [], [], ["x"], [xn], ["alpha"], "", "Vector scaling", "Multiplies _n_ elements of vector _x_ by a scalar constant _alpha_.", []),
Routine(True, True, "1", "copy", T, [S,D,C,Z,H], ["n"], [], ["x"], ["y"], [xn,yn], [], "", "Vector copy", "Copies the contents of vector _x_ into vector _y_.", []),
Routine(True, True, "1", "axpy", T, [S,D,C,Z,H], ["n"], [], ["x"], ["y"], [xn,yn], ["alpha"], "", "Vector-times-constant plus vector", "Performs the operation _y = alpha * x + y_, in which _x_ and _y_ are vectors and _alpha_ is a scalar constant.", []),
Routine(True, True, "1", "dot", T, [S,D,H], ["n"], [], ["x","y"], ["dot"], [xn,yn,"1"], [], "n", "Dot product of two vectors", "Multiplies _n_ elements of the vectors _x_ and _y_ element-wise and accumulates the results. The sum is stored in the _dot_ buffer.", []),
Routine(True, True, "1", "dotu", T, [C,Z], ["n"], [], ["x","y"], ["dot"], [xn,yn,"1"], [], "n", "Dot product of two complex vectors", "See the regular xDOT routine.", []),
Routine(True, True, "1", "dotc", T, [C,Z], ["n"], [], ["x","y"], ["dot"], [xn,yn,"1"], [], "n", "Dot product of two complex vectors, one conjugated", "See the regular xDOT routine.", []),
Routine(True, True, "1", "nrm2", T, [S,D,Sc,Dz,H], ["n"], [], ["x"], ["nrm2"], [xn,"1"], [], "2*n", "Euclidian norm of a vector", "Accumulates the square of _n_ elements in the _x_ vector and takes the square root. The resulting L2 norm is stored in the _nrm2_ buffer.", []),
Routine(True, True, "1", "asum", T, [S,D,Sc,Dz,H], ["n"], [], ["x"], ["asum"], [xn,"1"], [], "n", "Absolute sum of values in a vector", "Accumulates the absolute value of _n_ elements in the _x_ vector. The results are stored in the _asum_ buffer.", []),
Routine(True, False, "1", "sum", T, [S,D,Sc,Dz,H], ["n"], [], ["x"], ["sum"], [xn,"1"], [], "n", "Sum of values in a vector (non-BLAS function)", "Accumulates the values of _n_ elements in the _x_ vector. The results are stored in the _sum_ buffer. This routine is the non-absolute version of the xASUM BLAS routine.", []),
Routine(True, True, "1", "amax", T, [iS,iD,iC,iZ,iH], ["n"], [], ["x"], ["imax"], [xn,"1"], [], "2*n", "Index of absolute maximum value in a vector", "Finds the index of the maximum of the absolute values in the _x_ vector. The resulting integer index is stored in the _imax_ buffer.", []),
Routine(True, False, "1", "max", T, [iS,iD,iC,iZ,iH], ["n"], [], ["x"], ["imax"], [xn,"1"], [], "2*n", "Index of maximum value in a vector (non-BLAS function)", "Finds the index of the maximum of the values in the _x_ vector. The resulting integer index is stored in the _imax_ buffer. This routine is the non-absolute version of the IxAMAX BLAS routine.", []),
Routine(True, False, "1", "min", T, [iS,iD,iC,iZ,iH], ["n"], [], ["x"], ["imin"], [xn,"1"], [], "2*n", "Index of minimum value in a vector (non-BLAS function)", "Finds the index of the minimum of the values in the _x_ vector. The resulting integer index is stored in the _imin_ buffer. This routine is the non-absolute minimum version of the IxAMAX BLAS routine.", []),
],
[ # Level 2: matrix-vector
Routine(True, True, "2a", "gemv", T, [S,D,C,Z,H], ["m","n"], ["layout","a_transpose"], ["a","x"], ["y"], [amn,xmn,ynm], ["alpha","beta"], "", "General matrix-vector multiplication", "Performs the operation _y = alpha * A * x + beta * y_, in which _x_ is an input vector, _y_ is an input and output vector, _A_ is an input matrix, and _alpha_ and _beta_ are scalars. The matrix _A_ can optionally be transposed before performing the operation.", [ald_m]),

View File

@ -192,8 +192,8 @@ void cblas_srot(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
auto x_buffer = clblast::Buffer<float>(context, x_size);
auto y_buffer = clblast::Buffer<float>(context, y_size);
x_buffer.Write(queue, x_size, reinterpret_cast<float*>(x));
@ -219,8 +219,8 @@ void cblas_drot(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
auto x_buffer = clblast::Buffer<double>(context, x_size);
auto y_buffer = clblast::Buffer<double>(context, y_size);
x_buffer.Write(queue, x_size, reinterpret_cast<double*>(x));
@ -247,8 +247,8 @@ void cblas_srotm(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
const auto sparam_size = 1;
auto x_buffer = clblast::Buffer<float>(context, x_size);
auto y_buffer = clblast::Buffer<float>(context, y_size);
@ -276,8 +276,8 @@ void cblas_drotm(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
const auto sparam_size = 1;
auto x_buffer = clblast::Buffer<double>(context, x_size);
auto y_buffer = clblast::Buffer<double>(context, y_size);
@ -306,8 +306,8 @@ void cblas_sswap(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
auto x_buffer = clblast::Buffer<float>(context, x_size);
auto y_buffer = clblast::Buffer<float>(context, y_size);
x_buffer.Write(queue, x_size, reinterpret_cast<float*>(x));
@ -329,8 +329,8 @@ void cblas_dswap(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
auto x_buffer = clblast::Buffer<double>(context, x_size);
auto y_buffer = clblast::Buffer<double>(context, y_size);
x_buffer.Write(queue, x_size, reinterpret_cast<double*>(x));
@ -352,8 +352,8 @@ void cblas_cswap(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
auto x_buffer = clblast::Buffer<float2>(context, x_size);
auto y_buffer = clblast::Buffer<float2>(context, y_size);
x_buffer.Write(queue, x_size, reinterpret_cast<float2*>(x));
@ -375,8 +375,8 @@ void cblas_zswap(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
auto x_buffer = clblast::Buffer<double2>(context, x_size);
auto y_buffer = clblast::Buffer<double2>(context, y_size);
x_buffer.Write(queue, x_size, reinterpret_cast<double2*>(x));
@ -401,7 +401,7 @@ void cblas_sscal(const int n,
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto alpha_cpp = alpha;
const auto x_size = n;
const auto x_size = n * x_inc;
auto x_buffer = clblast::Buffer<float>(context, x_size);
x_buffer.Write(queue, x_size, reinterpret_cast<float*>(x));
auto queue_cl = queue();
@ -421,7 +421,7 @@ void cblas_dscal(const int n,
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto alpha_cpp = alpha;
const auto x_size = n;
const auto x_size = n * x_inc;
auto x_buffer = clblast::Buffer<double>(context, x_size);
x_buffer.Write(queue, x_size, reinterpret_cast<double*>(x));
auto queue_cl = queue();
@ -441,7 +441,7 @@ void cblas_cscal(const int n,
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto alpha_cpp = float2{reinterpret_cast<const float*>(alpha)[0], reinterpret_cast<const float*>(alpha)[1]};
const auto x_size = n;
const auto x_size = n * x_inc;
auto x_buffer = clblast::Buffer<float2>(context, x_size);
x_buffer.Write(queue, x_size, reinterpret_cast<float2*>(x));
auto queue_cl = queue();
@ -461,7 +461,7 @@ void cblas_zscal(const int n,
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto alpha_cpp = double2{reinterpret_cast<const double*>(alpha)[0], reinterpret_cast<const double*>(alpha)[1]};
const auto x_size = n;
const auto x_size = n * x_inc;
auto x_buffer = clblast::Buffer<double2>(context, x_size);
x_buffer.Write(queue, x_size, reinterpret_cast<double2*>(x));
auto queue_cl = queue();
@ -482,8 +482,8 @@ void cblas_scopy(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
auto x_buffer = clblast::Buffer<float>(context, x_size);
auto y_buffer = clblast::Buffer<float>(context, y_size);
x_buffer.Write(queue, x_size, reinterpret_cast<const float*>(x));
@ -504,8 +504,8 @@ void cblas_dcopy(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
auto x_buffer = clblast::Buffer<double>(context, x_size);
auto y_buffer = clblast::Buffer<double>(context, y_size);
x_buffer.Write(queue, x_size, reinterpret_cast<const double*>(x));
@ -526,8 +526,8 @@ void cblas_ccopy(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
auto x_buffer = clblast::Buffer<float2>(context, x_size);
auto y_buffer = clblast::Buffer<float2>(context, y_size);
x_buffer.Write(queue, x_size, reinterpret_cast<const float2*>(x));
@ -548,8 +548,8 @@ void cblas_zcopy(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
auto x_buffer = clblast::Buffer<double2>(context, x_size);
auto y_buffer = clblast::Buffer<double2>(context, y_size);
x_buffer.Write(queue, x_size, reinterpret_cast<const double2*>(x));
@ -574,8 +574,8 @@ void cblas_saxpy(const int n,
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto alpha_cpp = alpha;
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
auto x_buffer = clblast::Buffer<float>(context, x_size);
auto y_buffer = clblast::Buffer<float>(context, y_size);
x_buffer.Write(queue, x_size, reinterpret_cast<const float*>(x));
@ -599,8 +599,8 @@ void cblas_daxpy(const int n,
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto alpha_cpp = alpha;
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
auto x_buffer = clblast::Buffer<double>(context, x_size);
auto y_buffer = clblast::Buffer<double>(context, y_size);
x_buffer.Write(queue, x_size, reinterpret_cast<const double*>(x));
@ -624,8 +624,8 @@ void cblas_caxpy(const int n,
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto alpha_cpp = float2{reinterpret_cast<const float*>(alpha)[0], reinterpret_cast<const float*>(alpha)[1]};
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
auto x_buffer = clblast::Buffer<float2>(context, x_size);
auto y_buffer = clblast::Buffer<float2>(context, y_size);
x_buffer.Write(queue, x_size, reinterpret_cast<const float2*>(x));
@ -649,8 +649,8 @@ void cblas_zaxpy(const int n,
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto alpha_cpp = double2{reinterpret_cast<const double*>(alpha)[0], reinterpret_cast<const double*>(alpha)[1]};
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
auto x_buffer = clblast::Buffer<double2>(context, x_size);
auto y_buffer = clblast::Buffer<double2>(context, y_size);
x_buffer.Write(queue, x_size, reinterpret_cast<const double2*>(x));
@ -674,8 +674,8 @@ float cblas_sdot(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
const auto dot_size = 1;
auto x_buffer = clblast::Buffer<float>(context, x_size);
auto y_buffer = clblast::Buffer<float>(context, y_size);
@ -701,8 +701,8 @@ double cblas_ddot(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
const auto dot_size = 1;
auto x_buffer = clblast::Buffer<double>(context, x_size);
auto y_buffer = clblast::Buffer<double>(context, y_size);
@ -731,8 +731,8 @@ void cblas_cdotu_sub(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
const auto dot_size = 1;
auto x_buffer = clblast::Buffer<float2>(context, x_size);
auto y_buffer = clblast::Buffer<float2>(context, y_size);
@ -757,8 +757,8 @@ void cblas_zdotu_sub(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
const auto dot_size = 1;
auto x_buffer = clblast::Buffer<double2>(context, x_size);
auto y_buffer = clblast::Buffer<double2>(context, y_size);
@ -785,8 +785,8 @@ void cblas_cdotc_sub(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
const auto dot_size = 1;
auto x_buffer = clblast::Buffer<float2>(context, x_size);
auto y_buffer = clblast::Buffer<float2>(context, y_size);
@ -811,8 +811,8 @@ void cblas_zdotc_sub(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto y_size = n;
const auto x_size = n * x_inc;
const auto y_size = n * y_inc;
const auto dot_size = 1;
auto x_buffer = clblast::Buffer<double2>(context, x_size);
auto y_buffer = clblast::Buffer<double2>(context, y_size);
@ -837,7 +837,7 @@ float cblas_snrm2(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto nrm2_size = 1;
auto x_buffer = clblast::Buffer<float>(context, x_size);
auto nrm2_buffer = clblast::Buffer<float>(context, nrm2_size);
@ -859,7 +859,7 @@ double cblas_dnrm2(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto nrm2_size = 1;
auto x_buffer = clblast::Buffer<double>(context, x_size);
auto nrm2_buffer = clblast::Buffer<double>(context, nrm2_size);
@ -881,7 +881,7 @@ float cblas_scnrm2(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto nrm2_size = 1;
auto x_buffer = clblast::Buffer<float2>(context, x_size);
auto nrm2_buffer = clblast::Buffer<float2>(context, nrm2_size);
@ -903,7 +903,7 @@ double cblas_dznrm2(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto nrm2_size = 1;
auto x_buffer = clblast::Buffer<double2>(context, x_size);
auto nrm2_buffer = clblast::Buffer<double2>(context, nrm2_size);
@ -927,7 +927,7 @@ float cblas_sasum(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto asum_size = 1;
auto x_buffer = clblast::Buffer<float>(context, x_size);
auto asum_buffer = clblast::Buffer<float>(context, asum_size);
@ -949,7 +949,7 @@ double cblas_dasum(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto asum_size = 1;
auto x_buffer = clblast::Buffer<double>(context, x_size);
auto asum_buffer = clblast::Buffer<double>(context, asum_size);
@ -971,7 +971,7 @@ float cblas_scasum(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto asum_size = 1;
auto x_buffer = clblast::Buffer<float2>(context, x_size);
auto asum_buffer = clblast::Buffer<float2>(context, asum_size);
@ -993,7 +993,7 @@ double cblas_dzasum(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto asum_size = 1;
auto x_buffer = clblast::Buffer<double2>(context, x_size);
auto asum_buffer = clblast::Buffer<double2>(context, asum_size);
@ -1017,7 +1017,7 @@ float cblas_ssum(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto sum_size = 1;
auto x_buffer = clblast::Buffer<float>(context, x_size);
auto sum_buffer = clblast::Buffer<float>(context, sum_size);
@ -1039,7 +1039,7 @@ double cblas_dsum(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto sum_size = 1;
auto x_buffer = clblast::Buffer<double>(context, x_size);
auto sum_buffer = clblast::Buffer<double>(context, sum_size);
@ -1061,7 +1061,7 @@ float cblas_scsum(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto sum_size = 1;
auto x_buffer = clblast::Buffer<float2>(context, x_size);
auto sum_buffer = clblast::Buffer<float2>(context, sum_size);
@ -1083,7 +1083,7 @@ double cblas_dzsum(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto sum_size = 1;
auto x_buffer = clblast::Buffer<double2>(context, x_size);
auto sum_buffer = clblast::Buffer<double2>(context, sum_size);
@ -1107,7 +1107,7 @@ int cblas_isamax(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto imax_size = 1;
auto x_buffer = clblast::Buffer<float>(context, x_size);
auto imax_buffer = clblast::Buffer<int>(context, imax_size);
@ -1129,7 +1129,7 @@ int cblas_idamax(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto imax_size = 1;
auto x_buffer = clblast::Buffer<double>(context, x_size);
auto imax_buffer = clblast::Buffer<int>(context, imax_size);
@ -1151,7 +1151,7 @@ int cblas_icamax(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto imax_size = 1;
auto x_buffer = clblast::Buffer<float2>(context, x_size);
auto imax_buffer = clblast::Buffer<int>(context, imax_size);
@ -1173,7 +1173,7 @@ int cblas_izamax(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto imax_size = 1;
auto x_buffer = clblast::Buffer<double2>(context, x_size);
auto imax_buffer = clblast::Buffer<int>(context, imax_size);
@ -1197,7 +1197,7 @@ int cblas_ismax(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto imax_size = 1;
auto x_buffer = clblast::Buffer<float>(context, x_size);
auto imax_buffer = clblast::Buffer<int>(context, imax_size);
@ -1219,7 +1219,7 @@ int cblas_idmax(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto imax_size = 1;
auto x_buffer = clblast::Buffer<double>(context, x_size);
auto imax_buffer = clblast::Buffer<int>(context, imax_size);
@ -1241,7 +1241,7 @@ int cblas_icmax(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto imax_size = 1;
auto x_buffer = clblast::Buffer<float2>(context, x_size);
auto imax_buffer = clblast::Buffer<int>(context, imax_size);
@ -1263,7 +1263,7 @@ int cblas_izmax(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto imax_size = 1;
auto x_buffer = clblast::Buffer<double2>(context, x_size);
auto imax_buffer = clblast::Buffer<int>(context, imax_size);
@ -1287,7 +1287,7 @@ int cblas_ismin(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto imin_size = 1;
auto x_buffer = clblast::Buffer<float>(context, x_size);
auto imin_buffer = clblast::Buffer<int>(context, imin_size);
@ -1309,7 +1309,7 @@ int cblas_idmin(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto imin_size = 1;
auto x_buffer = clblast::Buffer<double>(context, x_size);
auto imin_buffer = clblast::Buffer<int>(context, imin_size);
@ -1331,7 +1331,7 @@ int cblas_icmin(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto imin_size = 1;
auto x_buffer = clblast::Buffer<float2>(context, x_size);
auto imin_buffer = clblast::Buffer<int>(context, imin_size);
@ -1353,7 +1353,7 @@ int cblas_izmin(const int n,
auto device = get_device();
auto context = clblast::Context(device);
auto queue = clblast::Queue(context, device);
const auto x_size = n;
const auto x_size = n * x_inc;
const auto imin_size = 1;
auto x_buffer = clblast::Buffer<double2>(context, x_size);
auto imin_buffer = clblast::Buffer<int>(context, imin_size);