TBMV/TPMV/TRSV: Use the minimum x buffer size for copying to a temp buffer (#461)
parent
4f24d92730
commit
d94d086d6f
|
@ -1,6 +1,4 @@
|
|||
Development version (next version)
|
||||
- Fixes a minor issue with the expected input buffer size in the TRMV routine
|
||||
- Fixes two small issues in the plotting script
|
||||
- Modifications to improve performance on Qualcomm Adreno GPUs:
|
||||
* Unique database entries for specific Adreno devices
|
||||
* Toggle OpenCL kernel compilation options for Adreno
|
||||
|
@ -9,6 +7,8 @@ Development version (next version)
|
|||
- Fixed a bug in XAMAX/XAMIN routines related to inadvertently including the increment and offset in the result
|
||||
- Fixed a bug in XAMAX/XAMIN routines that would cause only the real part of a complex number to be taken into account
|
||||
- Fixed a bug that caused tests to not properly do integer-output testing (for XAMAX/XAMIN)
|
||||
- Fixes a minor issue with the expected input buffer size in the TRMV/TBMV/TPMV/TRSV routines
|
||||
- Fixes two small issues in the plotting script
|
||||
- Fixed a documentation bug in the 'ld' requirements
|
||||
- Added tuned parameters for various devices (see doc/tuning.md)
|
||||
|
||||
|
|
|
@ -36,8 +36,9 @@ void Xtbmv<T>::DoTbmv(const Layout layout, const Triangle triangle,
|
|||
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
|
||||
|
||||
// Creates a copy of X: a temporary scratch buffer
|
||||
auto scratch_buffer = Buffer<T>(context_, n*x_inc + x_offset);
|
||||
x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer);
|
||||
const auto x_size = (1 + (n - 1) * x_inc) + x_offset;
|
||||
auto scratch_buffer = Buffer<T>(context_, x_size);
|
||||
x_buffer.CopyTo(queue_, x_size, scratch_buffer);
|
||||
|
||||
// The data is either in the upper or lower triangle
|
||||
size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
|
||||
|
|
|
@ -36,8 +36,9 @@ void Xtpmv<T>::DoTpmv(const Layout layout, const Triangle triangle,
|
|||
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
|
||||
|
||||
// Creates a copy of X: a temporary scratch buffer
|
||||
auto scratch_buffer = Buffer<T>(context_, n*x_inc + x_offset);
|
||||
x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer);
|
||||
const auto x_size = (1 + (n - 1) * x_inc) + x_offset;
|
||||
auto scratch_buffer = Buffer<T>(context_, x_size);
|
||||
x_buffer.CopyTo(queue_, x_size, scratch_buffer);
|
||||
|
||||
// The data is either in the upper or lower triangle
|
||||
size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
|
||||
|
|
|
@ -99,7 +99,7 @@ void Xtrsv<T>::DoTrsv(const Layout layout, const Triangle triangle,
|
|||
// TODO: Make x with 0 offset and unit increment by creating custom copy-to and copy-from kernels
|
||||
const auto x_offset = b_offset;
|
||||
const auto x_inc = b_inc;
|
||||
const auto x_size = n*x_inc + x_offset;
|
||||
const auto x_size = (1 + (n - 1) * x_inc) + x_offset;
|
||||
auto x_buffer = Buffer<T>(context_, x_size);
|
||||
b_buffer.CopyTo(queue_, x_size, x_buffer);
|
||||
|
||||
|
|
Loading…
Reference in New Issue