block/io: refactor padding

We have similar padding code in bdrv_co_pwritev,
bdrv_co_do_pwrite_zeroes and bdrv_co_preadv. Let's combine and unify
it.

[Squashed in Vladimir's qemu-iotests 077 fix
--Stefan]

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 20190604161514.262241-4-vsementsov@virtuozzo.com
Message-Id: <20190604161514.262241-4-vsementsov@virtuozzo.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
stable-4.2
Vladimir Sementsov-Ogievskiy 2019-06-04 19:15:05 +03:00 committed by Stefan Hajnoczi
parent f76889e7b9
commit 7a3f542fbd
1 changed files with 200 additions and 165 deletions

View File

@ -1415,28 +1415,177 @@ out:
}
/*
* Handle a read request in coroutine context
* Request padding
*
* |<---- align ----->| |<----- align ---->|
* |<- head ->|<------------- bytes ------------->|<-- tail -->|
* | | | | | |
* -*----------$-------*-------- ... --------*-----$------------*---
* | | | | | |
* | offset | | end |
* ALIGN_DOWN(offset) ALIGN_UP(offset) ALIGN_DOWN(end) ALIGN_UP(end)
* [buf ... ) [tail_buf )
*
* @buf is an aligned allocation needed to store @head and @tail paddings. @head
* is placed at the beginning of @buf and @tail at the @end.
*
* @tail_buf is a pointer to sub-buffer, corresponding to align-sized chunk
* around tail, if tail exists.
*
* @merge_reads is true for small requests,
* if @buf_len == @head + bytes + @tail. In this case it is possible that both
* head and tail exist but @buf_len == align and @tail_buf == @buf.
*/
typedef struct BdrvRequestPadding {
uint8_t *buf;
size_t buf_len;
uint8_t *tail_buf;
size_t head;
size_t tail;
bool merge_reads;
QEMUIOVector local_qiov;
} BdrvRequestPadding;
static bool bdrv_init_padding(BlockDriverState *bs,
int64_t offset, int64_t bytes,
BdrvRequestPadding *pad)
{
uint64_t align = bs->bl.request_alignment;
size_t sum;
memset(pad, 0, sizeof(*pad));
pad->head = offset & (align - 1);
pad->tail = ((offset + bytes) & (align - 1));
if (pad->tail) {
pad->tail = align - pad->tail;
}
if ((!pad->head && !pad->tail) || !bytes) {
return false;
}
sum = pad->head + bytes + pad->tail;
pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
pad->buf = qemu_blockalign(bs, pad->buf_len);
pad->merge_reads = sum == pad->buf_len;
if (pad->tail) {
pad->tail_buf = pad->buf + pad->buf_len - align;
}
return true;
}
static int bdrv_padding_rmw_read(BdrvChild *child,
BdrvTrackedRequest *req,
BdrvRequestPadding *pad,
bool zero_middle)
{
QEMUIOVector local_qiov;
BlockDriverState *bs = child->bs;
uint64_t align = bs->bl.request_alignment;
int ret;
assert(req->serialising && pad->buf);
if (pad->head || pad->merge_reads) {
uint64_t bytes = pad->merge_reads ? pad->buf_len : align;
qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
if (pad->head) {
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
}
if (pad->merge_reads && pad->tail) {
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
}
ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
align, &local_qiov, 0);
if (ret < 0) {
return ret;
}
if (pad->head) {
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
}
if (pad->merge_reads && pad->tail) {
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
}
if (pad->merge_reads) {
goto zero_mem;
}
}
if (pad->tail) {
qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(
child, req,
req->overlap_offset + req->overlap_bytes - align,
align, align, &local_qiov, 0);
if (ret < 0) {
return ret;
}
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
}
zero_mem:
if (zero_middle) {
memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
}
return 0;
}
static void bdrv_padding_destroy(BdrvRequestPadding *pad)
{
if (pad->buf) {
qemu_vfree(pad->buf);
qemu_iovec_destroy(&pad->local_qiov);
}
}
/*
* bdrv_pad_request
*
* Exchange request parameters with padded request if needed. Don't include RMW
* read of padding, bdrv_padding_rmw_read() should be called separately if
* needed.
*
* All parameters except @bs are in-out: they represent original request at
* function call and padded (if padding needed) at function finish.
*
* Function always succeeds.
*/
static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov,
int64_t *offset, unsigned int *bytes,
BdrvRequestPadding *pad)
{
if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
return false;
}
qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
*qiov, 0, *bytes,
pad->buf + pad->buf_len - pad->tail, pad->tail);
*bytes += pad->head + pad->tail;
*offset -= pad->head;
*qiov = &pad->local_qiov;
return true;
}
int coroutine_fn bdrv_co_preadv(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
BdrvTrackedRequest req;
uint64_t align = bs->bl.request_alignment;
uint8_t *head_buf = NULL;
uint8_t *tail_buf = NULL;
QEMUIOVector local_qiov;
bool use_local_qiov = false;
BdrvRequestPadding pad;
int ret;
trace_bdrv_co_preadv(child->bs, offset, bytes, flags);
if (!drv) {
return -ENOMEDIUM;
}
trace_bdrv_co_preadv(bs, offset, bytes, flags);
ret = bdrv_check_byte_request(bs, offset, bytes);
if (ret < 0) {
@ -1450,43 +1599,16 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
flags |= BDRV_REQ_COPY_ON_READ;
}
/* Align read if necessary by padding qiov */
if (offset & (align - 1)) {
head_buf = qemu_blockalign(bs, align);
qemu_iovec_init(&local_qiov, qiov->niov + 2);
qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
use_local_qiov = true;
bytes += offset & (align - 1);
offset = offset & ~(align - 1);
}
if ((offset + bytes) & (align - 1)) {
if (!use_local_qiov) {
qemu_iovec_init(&local_qiov, qiov->niov + 1);
qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
use_local_qiov = true;
}
tail_buf = qemu_blockalign(bs, align);
qemu_iovec_add(&local_qiov, tail_buf,
align - ((offset + bytes) & (align - 1)));
bytes = ROUND_UP(bytes, align);
}
bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad);
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);
ret = bdrv_aligned_preadv(child, &req, offset, bytes,
bs->bl.request_alignment,
qiov, flags);
tracked_request_end(&req);
bdrv_dec_in_flight(bs);
if (use_local_qiov) {
qemu_iovec_destroy(&local_qiov);
qemu_vfree(head_buf);
qemu_vfree(tail_buf);
}
bdrv_padding_destroy(&pad);
return ret;
}
@ -1782,44 +1904,34 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
BdrvTrackedRequest *req)
{
BlockDriverState *bs = child->bs;
uint8_t *buf = NULL;
QEMUIOVector local_qiov;
uint64_t align = bs->bl.request_alignment;
unsigned int head_padding_bytes, tail_padding_bytes;
int ret = 0;
bool padding;
BdrvRequestPadding pad;
head_padding_bytes = offset & (align - 1);
tail_padding_bytes = (align - (offset + bytes)) & (align - 1);
assert(flags & BDRV_REQ_ZERO_WRITE);
if (head_padding_bytes || tail_padding_bytes) {
buf = qemu_blockalign(bs, align);
qemu_iovec_init_buf(&local_qiov, buf, align);
}
if (head_padding_bytes) {
uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes);
/* RMW the unaligned part before head. */
padding = bdrv_init_padding(bs, offset, bytes, &pad);
if (padding) {
mark_request_serialising(req, align);
wait_serialising_requests(req);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
}
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
memset(buf + head_padding_bytes, 0, zero_bytes);
ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
align, &local_qiov,
flags & ~BDRV_REQ_ZERO_WRITE);
if (ret < 0) {
goto fail;
bdrv_padding_rmw_read(child, req, &pad, true);
if (pad.head || pad.merge_reads) {
int64_t aligned_offset = offset & ~(align - 1);
int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
align, &local_qiov,
flags & ~BDRV_REQ_ZERO_WRITE);
if (ret < 0 || pad.merge_reads) {
/* Error or all work is done */
goto out;
}
offset += write_bytes - pad.head;
bytes -= write_bytes - pad.head;
}
offset += zero_bytes;
bytes -= zero_bytes;
}
assert(!bytes || (offset & (align - 1)) == 0);
@ -1829,7 +1941,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
NULL, flags);
if (ret < 0) {
goto fail;
goto out;
}
bytes -= aligned_bytes;
offset += aligned_bytes;
@ -1837,26 +1949,17 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
assert(!bytes || (offset & (align - 1)) == 0);
if (bytes) {
assert(align == tail_padding_bytes + bytes);
/* RMW the unaligned part after tail. */
mark_request_serialising(req, align);
wait_serialising_requests(req);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(child, req, offset, align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
}
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
assert(align == pad.tail + bytes);
memset(buf, 0, bytes);
qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
ret = bdrv_aligned_pwritev(child, req, offset, align, align,
&local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
}
fail:
qemu_vfree(buf);
return ret;
out:
bdrv_padding_destroy(&pad);
return ret;
}
/*
@ -1869,10 +1972,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
BlockDriverState *bs = child->bs;
BdrvTrackedRequest req;
uint64_t align = bs->bl.request_alignment;
uint8_t *head_buf = NULL;
uint8_t *tail_buf = NULL;
QEMUIOVector local_qiov;
bool use_local_qiov = false;
BdrvRequestPadding pad;
int ret;
trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
@ -1899,86 +1999,21 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
goto out;
}
if (offset & (align - 1)) {
QEMUIOVector head_qiov;
if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) {
mark_request_serialising(&req, align);
wait_serialising_requests(&req);
head_buf = qemu_blockalign(bs, align);
qemu_iovec_init_buf(&head_qiov, head_buf, align);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
align, &head_qiov, 0);
if (ret < 0) {
goto fail;
}
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
qemu_iovec_init(&local_qiov, qiov->niov + 2);
qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
use_local_qiov = true;
bytes += offset & (align - 1);
offset = offset & ~(align - 1);
/* We have read the tail already if the request is smaller
* than one aligned block.
*/
if (bytes < align) {
qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes);
bytes = align;
}
}
if ((offset + bytes) & (align - 1)) {
QEMUIOVector tail_qiov;
size_t tail_bytes;
bool waited;
mark_request_serialising(&req, align);
waited = wait_serialising_requests(&req);
assert(!waited || !use_local_qiov);
tail_buf = qemu_blockalign(bs, align);
qemu_iovec_init_buf(&tail_qiov, tail_buf, align);
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
align, align, &tail_qiov, 0);
if (ret < 0) {
goto fail;
}
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
if (!use_local_qiov) {
qemu_iovec_init(&local_qiov, qiov->niov + 1);
qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
use_local_qiov = true;
}
tail_bytes = (offset + bytes) & (align - 1);
qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
bytes = ROUND_UP(bytes, align);
bdrv_padding_rmw_read(child, &req, &pad, false);
}
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);
qiov, flags);
fail:
bdrv_padding_destroy(&pad);
if (use_local_qiov) {
qemu_iovec_destroy(&local_qiov);
}
qemu_vfree(head_buf);
qemu_vfree(tail_buf);
out:
tracked_request_end(&req);
bdrv_dec_in_flight(bs);
return ret;
}