From 45270ad8a86a80cca4c59dfa73d9a9ee0688d781 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Sun, 11 Mar 2012 17:52:59 +0400 Subject: [PATCH 01/11] virtio-serial-bus: use correct lengths in control_out() message Original code has one thing to process (cur_len), requests to convert from iovec to buf another thing (len which is actually max_len), and processes something else (copied). Whole thing is very difficult to understand, even if it does a right thing. The iov_to_buf() conversion in this case will always return cur_len, because it is the length of the iovec it was asked to process, and the size we asked to convert is the same or larger, and iov_to_buf() will stop at reaching either iov or buf. Make the code saner by doing the only sane thing: dropping `copied' which is always the same as `cur_len' but just introduces questions. Signed-off-by: Michael Tokarev --- hw/virtio-serial-bus.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c index 72287d10ce..1e477e5a33 100644 --- a/hw/virtio-serial-bus.c +++ b/hw/virtio-serial-bus.c @@ -454,7 +454,7 @@ static void control_out(VirtIODevice *vdev, VirtQueue *vq) len = 0; buf = NULL; while (virtqueue_pop(vq, &elem)) { - size_t cur_len, copied; + size_t cur_len; cur_len = iov_size(elem.out_sg, elem.out_num); /* @@ -467,9 +467,9 @@ static void control_out(VirtIODevice *vdev, VirtQueue *vq) buf = g_malloc(cur_len); len = cur_len; } - copied = iov_to_buf(elem.out_sg, elem.out_num, buf, 0, len); + iov_to_buf(elem.out_sg, elem.out_num, buf, 0, cur_len); - handle_control_message(vser, buf, copied); + handle_control_message(vser, buf, cur_len); virtqueue_push(vq, &elem, 0); } g_free(buf); From dcf6f5e15ecee4f593eeacbe0591c1addc004d92 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Sun, 11 Mar 2012 18:05:12 +0400 Subject: [PATCH 02/11] change iov_* function prototypes to be more appropriate Reorder arguments to be more natural, readable and consistent with other iov_* functions, and change argument names, from: iov_from_buf(iov, iov_cnt, buf, iov_off, size) to iov_from_buf(iov, iov_cnt, offset, buf, bytes) The result becomes natural English: copy data to this `iov' vector with `iov_cnt' elements starting at byte offset `offset' from memory buffer `buf', processing `bytes' bytes max. (Try to read the original prototype this way). Also change iov_clear() to more general iov_memset() (it uses memset() internally anyway). While at it, add comments to the header file describing what the routines actually does. The patch only renames argumens in the header, but keeps old names in the implementation. The next patch will touch actual code to match. Now, it might look wrong to pay so much attention to so small things. But we've so many badly designed interfaces already so the whole thing becomes rather confusing or error prone. One example of this is previous commit and small discussion which emerged from it, with an outcome that the utility functions like these aren't well-understdandable, leading to strange usage cases. That's why I paid quite some attention to this set of functions and a few others in subsequent patches. Signed-off-by: Michael Tokarev --- hw/rtl8139.c | 2 +- hw/usb/core.c | 6 +++--- hw/virtio-balloon.c | 4 ++-- hw/virtio-net.c | 4 ++-- hw/virtio-serial-bus.c | 6 +++--- iov.c | 14 +++++++------- iov.h | 44 ++++++++++++++++++++++++++++++++++++------ net.c | 2 +- 8 files changed, 57 insertions(+), 25 deletions(-) diff --git a/hw/rtl8139.c b/hw/rtl8139.c index eb22d04fad..8128b64a0f 100644 --- a/hw/rtl8139.c +++ b/hw/rtl8139.c @@ -1783,7 +1783,7 @@ static void rtl8139_transfer_frame(RTL8139State *s, uint8_t *buf, int size, if (iov) { buf2_size = iov_size(iov, 3); buf2 = g_malloc(buf2_size); - iov_to_buf(iov, 3, buf2, 0, buf2_size); + iov_to_buf(iov, 3, 0, buf2, buf2_size); buf = buf2; } diff --git a/hw/usb/core.c b/hw/usb/core.c index 0e02da7601..2641685a32 100644 --- a/hw/usb/core.c +++ b/hw/usb/core.c @@ -522,10 +522,10 @@ void usb_packet_copy(USBPacket *p, void *ptr, size_t bytes) switch (p->pid) { case USB_TOKEN_SETUP: case USB_TOKEN_OUT: - iov_to_buf(p->iov.iov, p->iov.niov, ptr, p->result, bytes); + iov_to_buf(p->iov.iov, p->iov.niov, p->result, ptr, bytes); break; case USB_TOKEN_IN: - iov_from_buf(p->iov.iov, p->iov.niov, ptr, p->result, bytes); + iov_from_buf(p->iov.iov, p->iov.niov, p->result, ptr, bytes); break; default: fprintf(stderr, "%s: invalid pid: %x\n", __func__, p->pid); @@ -539,7 +539,7 @@ void usb_packet_skip(USBPacket *p, size_t bytes) assert(p->result >= 0); assert(p->result + bytes <= p->iov.size); if (p->pid == USB_TOKEN_IN) { - iov_clear(p->iov.iov, p->iov.niov, p->result, bytes); + iov_memset(p->iov.iov, p->iov.niov, p->result, 0, bytes); } p->result += bytes; } diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c index 075ed87e37..23effa40a0 100644 --- a/hw/virtio-balloon.c +++ b/hw/virtio-balloon.c @@ -77,7 +77,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) size_t offset = 0; uint32_t pfn; - while (iov_to_buf(elem.out_sg, elem.out_num, &pfn, offset, 4) == 4) { + while (iov_to_buf(elem.out_sg, elem.out_num, offset, &pfn, 4) == 4) { ram_addr_t pa; ram_addr_t addr; @@ -118,7 +118,7 @@ static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq) */ reset_stats(s); - while (iov_to_buf(elem->out_sg, elem->out_num, &stat, offset, sizeof(stat)) + while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat)) == sizeof(stat)) { uint16_t tag = tswap16(stat.tag); uint64_t val = tswap64(stat.val); diff --git a/hw/virtio-net.c b/hw/virtio-net.c index 3f190d417e..533aa3d0f3 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -656,8 +656,8 @@ static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_ } /* copy in packet. ugh */ - len = iov_from_buf(sg, elem.in_num, - buf + offset, 0, size - offset); + len = iov_from_buf(sg, elem.in_num, 0, + buf + offset, size - offset); total += len; offset += len; /* If buffers can't be merged, at this point we diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c index 1e477e5a33..728f218dcf 100644 --- a/hw/virtio-serial-bus.c +++ b/hw/virtio-serial-bus.c @@ -106,8 +106,8 @@ static size_t write_to_port(VirtIOSerialPort *port, break; } - len = iov_from_buf(elem.in_sg, elem.in_num, - buf + offset, 0, size - offset); + len = iov_from_buf(elem.in_sg, elem.in_num, 0, + buf + offset, size - offset); offset += len; virtqueue_push(vq, &elem, len); @@ -467,7 +467,7 @@ static void control_out(VirtIODevice *vdev, VirtQueue *vq) buf = g_malloc(cur_len); len = cur_len; } - iov_to_buf(elem.out_sg, elem.out_num, buf, 0, cur_len); + iov_to_buf(elem.out_sg, elem.out_num, 0, buf, cur_len); handle_control_message(vser, buf, cur_len); virtqueue_push(vq, &elem, 0); diff --git a/iov.c b/iov.c index 0f964939d0..bc58cab05a 100644 --- a/iov.c +++ b/iov.c @@ -17,8 +17,8 @@ #include "iov.h" -size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, - const void *buf, size_t iov_off, size_t size) +size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, size_t iov_off, + const void *buf, size_t size) { size_t iovec_off, buf_off; unsigned int i; @@ -40,8 +40,8 @@ size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, return buf_off; } -size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, - void *buf, size_t iov_off, size_t size) +size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, size_t iov_off, + void *buf, size_t size) { uint8_t *ptr; size_t iovec_off, buf_off; @@ -65,8 +65,8 @@ size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, return buf_off; } -size_t iov_clear(const struct iovec *iov, const unsigned int iov_cnt, - size_t iov_off, size_t size) +size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, + size_t iov_off, int fillc, size_t size) { size_t iovec_off, buf_off; unsigned int i; @@ -77,7 +77,7 @@ size_t iov_clear(const struct iovec *iov, const unsigned int iov_cnt, if (iov_off < (iovec_off + iov[i].iov_len)) { size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size); - memset(iov[i].iov_base + (iov_off - iovec_off), 0, len); + memset(iov[i].iov_base + (iov_off - iovec_off), fillc, len); buf_off += len; iov_off += len; diff --git a/iov.h b/iov.h index 94d2f78284..0b4acf4a51 100644 --- a/iov.h +++ b/iov.h @@ -12,12 +12,44 @@ #include "qemu-common.h" -size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, - const void *buf, size_t iov_off, size_t size); -size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, - void *buf, size_t iov_off, size_t size); +/** + * count and return data size, in bytes, of an iovec + * starting at `iov' of `iov_cnt' number of elements. + */ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt); -size_t iov_clear(const struct iovec *iov, const unsigned int iov_cnt, - size_t iov_off, size_t size); + +/** + * Copy from single continuous buffer to scatter-gather vector of buffers + * (iovec) and back like memcpy() between two continuous memory regions. + * Data in single continuous buffer starting at address `buf' and + * `bytes' bytes long will be copied to/from an iovec `iov' with + * `iov_cnt' number of elements, starting at byte position `offset' + * within the iovec. If the iovec does not contain enough space, + * only part of data will be copied, up to the end of the iovec. + * Number of bytes actually copied will be returned, which is + * min(bytes, iov_size(iov)-offset) + */ +size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, + size_t offset, const void *buf, size_t bytes); +size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, + size_t offset, void *buf, size_t bytes); + +/** + * Set data bytes pointed out by iovec `iov' of size `iov_cnt' elements, + * starting at byte offset `start', to value `fillc', repeating it + * `bytes' number of times. + * If `bytes' is large enough, only last bytes portion of iovec, + * up to the end of it, will be filled with the specified value. + * Function return actual number of bytes processed, which is + * min(size, iov_size(iov) - offset). + */ +size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, + size_t offset, int fillc, size_t bytes); + +/** + * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements + * in file `fp', prefixing each line with `prefix' and processing not more + * than `limit' data bytes. + */ void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt, FILE *fp, const char *prefix, size_t limit); diff --git a/net.c b/net.c index 4aa416cffb..abf0fd0a0d 100644 --- a/net.c +++ b/net.c @@ -544,7 +544,7 @@ static ssize_t vc_sendv_compat(VLANClientState *vc, const struct iovec *iov, uint8_t buffer[4096]; size_t offset; - offset = iov_to_buf(iov, iovcnt, buffer, 0, sizeof(buffer)); + offset = iov_to_buf(iov, iovcnt, 0, buffer, sizeof(buffer)); return vc->info->receive(vc, buffer, offset); } From 2278a69e7020d86a8c73a28474e7709d3e7d5081 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Thu, 7 Jun 2012 20:08:19 +0400 Subject: [PATCH 03/11] rewrite iov_* functions This changes implementations of all iov_* functions, completing the previous step. All iov_* functions now ensure that this offset argument is within the iovec (using assertion), but lets to specify `bytes' value larger than actual length of the iovec - in this case they stops at the actual end of iovec. It is also suggested to use convinient `-1' value as `bytes' to mean just this -- "up to the end". There's one very minor semantic change here: new requiriment is that `offset' points to inside of iovec. This is checked just at the end of functions (assert()), it does not actually need to be enforced, but using any of these functions with offset pointing past the end of iovec is wrong anyway. Note: the new code in iov.c uses arithmetic with void pointers. I thought this is not supported everywhere and is a GCC extension (indeed, the C standard does not define void arithmetic). However, the original code already use void arith in iov_from_buf() function: (memcpy(..., buf + buf_off,...) which apparently works well so far (it is this way in qemu 1.0). So I left it this way and used it in other places. While at it, add a unit-test file test-iov.c, to check various corner cases with iov_from_buf(), iov_to_buf() and iov_memset(). Signed-off-by: Michael Tokarev --- iov.c | 91 ++++++++++++---------------- iov.h | 12 +++- tests/Makefile | 2 + tests/test-iov.c | 153 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 204 insertions(+), 54 deletions(-) create mode 100644 tests/test-iov.c diff --git a/iov.c b/iov.c index bc58cab05a..9657d286b8 100644 --- a/iov.c +++ b/iov.c @@ -7,6 +7,7 @@ * Author(s): * Anthony Liguori * Amit Shah + * Michael Tokarev * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. @@ -17,75 +18,61 @@ #include "iov.h" -size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, size_t iov_off, - const void *buf, size_t size) +size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, + size_t offset, const void *buf, size_t bytes) { - size_t iovec_off, buf_off; + size_t done; unsigned int i; - - iovec_off = 0; - buf_off = 0; - for (i = 0; i < iov_cnt && size; i++) { - if (iov_off < (iovec_off + iov[i].iov_len)) { - size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off, size); - - memcpy(iov[i].iov_base + (iov_off - iovec_off), buf + buf_off, len); - - buf_off += len; - iov_off += len; - size -= len; + for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { + if (offset < iov[i].iov_len) { + size_t len = MIN(iov[i].iov_len - offset, bytes - done); + memcpy(iov[i].iov_base + offset, buf + done, len); + done += len; + offset = 0; + } else { + offset -= iov[i].iov_len; } - iovec_off += iov[i].iov_len; } - return buf_off; + assert(offset == 0); + return done; } -size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, size_t iov_off, - void *buf, size_t size) +size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, + size_t offset, void *buf, size_t bytes) { - uint8_t *ptr; - size_t iovec_off, buf_off; + size_t done; unsigned int i; - - ptr = buf; - iovec_off = 0; - buf_off = 0; - for (i = 0; i < iov_cnt && size; i++) { - if (iov_off < (iovec_off + iov[i].iov_len)) { - size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size); - - memcpy(ptr + buf_off, iov[i].iov_base + (iov_off - iovec_off), len); - - buf_off += len; - iov_off += len; - size -= len; + for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { + if (offset < iov[i].iov_len) { + size_t len = MIN(iov[i].iov_len - offset, bytes - done); + memcpy(buf + done, iov[i].iov_base + offset, len); + done += len; + offset = 0; + } else { + offset -= iov[i].iov_len; } - iovec_off += iov[i].iov_len; } - return buf_off; + assert(offset == 0); + return done; } size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, - size_t iov_off, int fillc, size_t size) + size_t offset, int fillc, size_t bytes) { - size_t iovec_off, buf_off; + size_t done; unsigned int i; - - iovec_off = 0; - buf_off = 0; - for (i = 0; i < iov_cnt && size; i++) { - if (iov_off < (iovec_off + iov[i].iov_len)) { - size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size); - - memset(iov[i].iov_base + (iov_off - iovec_off), fillc, len); - - buf_off += len; - iov_off += len; - size -= len; + for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { + if (offset < iov[i].iov_len) { + size_t len = MIN(iov[i].iov_len - offset, bytes - done); + memset(iov[i].iov_base + offset, fillc, len); + done += len; + offset = 0; + } else { + offset -= iov[i].iov_len; } - iovec_off += iov[i].iov_len; } - return buf_off; + assert(offset == 0); + return done; } size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt) diff --git a/iov.h b/iov.h index 0b4acf4a51..19ee3b3acd 100644 --- a/iov.h +++ b/iov.h @@ -1,10 +1,11 @@ /* - * Helpers for getting linearized buffers from iov / filling buffers into iovs + * Helpers for using (partial) iovecs. * * Copyright (C) 2010 Red Hat, Inc. * * Author(s): * Amit Shah + * Michael Tokarev * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. @@ -28,6 +29,12 @@ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt); * only part of data will be copied, up to the end of the iovec. * Number of bytes actually copied will be returned, which is * min(bytes, iov_size(iov)-offset) + * `Offset' must point to the inside of iovec. + * It is okay to use very large value for `bytes' since we're + * limited by the size of the iovec anyway, provided that the + * buffer pointed to by buf has enough space. One possible + * such "large" value is -1 (sinice size_t is unsigned), + * so specifying `-1' as `bytes' means 'up to the end of iovec'. */ size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, size_t offset, const void *buf, size_t bytes); @@ -37,11 +44,12 @@ size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, /** * Set data bytes pointed out by iovec `iov' of size `iov_cnt' elements, * starting at byte offset `start', to value `fillc', repeating it - * `bytes' number of times. + * `bytes' number of times. `Offset' must point to the inside of iovec. * If `bytes' is large enough, only last bytes portion of iovec, * up to the end of it, will be filled with the specified value. * Function return actual number of bytes processed, which is * min(size, iov_size(iov) - offset). + * Again, it is okay to use large value for `bytes' to mean "up to the end". */ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, size_t offset, int fillc, size_t bytes); diff --git a/tests/Makefile b/tests/Makefile index ab7f667009..7340bc5044 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -13,6 +13,7 @@ check-unit-y += tests/test-qmp-commands$(EXESUF) check-unit-y += tests/test-string-input-visitor$(EXESUF) check-unit-y += tests/test-string-output-visitor$(EXESUF) check-unit-y += tests/test-coroutine$(EXESUF) +check-unit-y += tests/test-iov$(EXESUF) check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh @@ -47,6 +48,7 @@ tests/check-qlist$(EXESUF): tests/check-qlist.o qlist.o qint.o $(tools-obj-y) tests/check-qfloat$(EXESUF): tests/check-qfloat.o qfloat.o $(tools-obj-y) tests/check-qjson$(EXESUF): tests/check-qjson.o $(qobject-obj-y) $(tools-obj-y) tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(coroutine-obj-y) $(tools-obj-y) +tests/test-iov$(EXESUF): tests/test-iov.o iov.o tests/test-qapi-types.c tests/test-qapi-types.h :\ $(SRC_PATH)/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py diff --git a/tests/test-iov.c b/tests/test-iov.c new file mode 100644 index 0000000000..5f82296a86 --- /dev/null +++ b/tests/test-iov.c @@ -0,0 +1,153 @@ +#include +#include "qemu-common.h" +#include "iov.h" + +/* create a randomly-sized iovec with random vectors */ +static void iov_random(struct iovec **iovp, unsigned *iov_cntp) +{ + unsigned niov = g_test_rand_int_range(3,8); + struct iovec *iov = g_malloc(niov * sizeof(*iov)); + unsigned i; + for (i = 0; i < niov; ++i) { + iov[i].iov_len = g_test_rand_int_range(5,20); + iov[i].iov_base = g_malloc(iov[i].iov_len); + } + *iovp = iov; + *iov_cntp = niov; +} + +static void iov_free(struct iovec *iov, unsigned niov) +{ + unsigned i; + for (i = 0; i < niov; ++i) { + g_free(iov[i].iov_base); + } + g_free(iov); +} + +static void test_iov_bytes(struct iovec *iov, unsigned niov, + size_t offset, size_t bytes) +{ + unsigned i; + size_t j, o; + unsigned char *b; + o = 0; + + /* we walk over all elements, */ + for (i = 0; i < niov; ++i) { + b = iov[i].iov_base; + /* over each char of each element, */ + for (j = 0; j < iov[i].iov_len; ++j) { + /* counting each of them and + * verifying that the ones within [offset,offset+bytes) + * range are equal to the position number (o) */ + if (o >= offset && o < offset + bytes) { + g_assert(b[j] == (o & 255)); + } else { + g_assert(b[j] == 0xff); + } + ++o; + } + } +} + +static void test_to_from_buf_1(void) +{ + unsigned niov; + struct iovec *iov; + size_t sz; + unsigned char *ibuf, *obuf; + unsigned i, j, n; + + iov_random(&iov, &niov); + + sz = iov_size(iov, niov); + + ibuf = g_malloc(sz + 8) + 4; + memcpy(ibuf-4, "aaaa", 4); memcpy(ibuf + sz, "bbbb", 4); + obuf = g_malloc(sz + 8) + 4; + memcpy(obuf-4, "xxxx", 4); memcpy(obuf + sz, "yyyy", 4); + + /* fill in ibuf with 0123456... */ + for (i = 0; i < sz; ++i) { + ibuf[i] = i & 255; + } + + for (i = 0; i <= sz; ++i) { + + /* Test from/to buf for offset(i) in [0..sz] up to the end of buffer. + * For last iteration with offset == sz, the procedure should + * skip whole vector and process exactly 0 bytes */ + + /* first set bytes [i..sz) to some "random" value */ + n = iov_memset(iov, niov, 0, 0xff, -1); + g_assert(n == sz); + + /* next copy bytes [i..sz) from ibuf to iovec */ + n = iov_from_buf(iov, niov, i, ibuf + i, -1); + g_assert(n == sz - i); + + /* clear part of obuf */ + memset(obuf + i, 0, sz - i); + /* and set this part of obuf to values from iovec */ + n = iov_to_buf(iov, niov, i, obuf + i, -1); + g_assert(n == sz - i); + + /* now compare resulting buffers */ + g_assert(memcmp(ibuf, obuf, sz) == 0); + + /* test just one char */ + n = iov_to_buf(iov, niov, i, obuf + i, 1); + g_assert(n == (i < sz)); + if (n) { + g_assert(obuf[i] == (i & 255)); + } + + for (j = i; j <= sz; ++j) { + /* now test num of bytes cap up to byte no. j, + * with j in [i..sz]. */ + + /* clear iovec */ + n = iov_memset(iov, niov, 0, 0xff, -1); + g_assert(n == sz); + + /* copy bytes [i..j) from ibuf to iovec */ + n = iov_from_buf(iov, niov, i, ibuf + i, j - i); + g_assert(n == j - i); + + /* clear part of obuf */ + memset(obuf + i, 0, j - i); + + /* copy bytes [i..j) from iovec to obuf */ + n = iov_to_buf(iov, niov, i, obuf + i, j - i); + g_assert(n == j - i); + + /* verify result */ + g_assert(memcmp(ibuf, obuf, sz) == 0); + + /* now actually check if the iovec contains the right data */ + test_iov_bytes(iov, niov, i, j - i); + } + } + g_assert(!memcmp(ibuf-4, "aaaa", 4) && !memcmp(ibuf+sz, "bbbb", 4)); + g_free(ibuf-4); + g_assert(!memcmp(obuf-4, "xxxx", 4) && !memcmp(obuf+sz, "yyyy", 4)); + g_free(obuf-4); + iov_free(iov, niov); +} + +static void test_to_from_buf(void) +{ + int x; + for (x = 0; x < 4; ++x) { + test_to_from_buf_1(); + } +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + g_test_rand_int(); + g_test_add_func("/basic/iov/from-to-buf", test_to_from_buf); + return g_test_run(); +} From 3d9b49254f893f2a3739400e536de25db1cdc5f9 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Sat, 10 Mar 2012 16:54:23 +0400 Subject: [PATCH 04/11] consolidate qemu_iovec_memset{,_skip}() into single function and use existing iov_memset() This patch combines two functions into one, and replaces the implementation with already existing iov_memset() from iov.c. The new prototype of qemu_iovec_memset(): size_t qemu_iovec_memset(qiov, size_t offset, int fillc, size_t bytes) It is different from former qemu_iovec_memset_skip(), and I want to make other functions to be consistent with it too: first how much to skip, second what, and 3rd how many of it. It also returns actual number of bytes filled in, which may be less than the requested `bytes' if qiov is smaller than offset+bytes, in the same way iov_memset() does. While at it, use utility function iov_memset() from iov.h in posix-aio-compat.c, where qiov was used. Signed-off-by: Michael Tokarev --- Makefile | 3 ++- Makefile.objs | 4 ++-- block/qcow2.c | 6 +++--- block/qed.c | 4 ++-- cutils.c | 44 ++++---------------------------------------- linux-aio.c | 4 ++-- posix-aio-compat.c | 8 +++----- qemu-common.h | 5 ++--- 8 files changed, 20 insertions(+), 58 deletions(-) diff --git a/Makefile b/Makefile index 9b7a85e4d2..017836bc02 100644 --- a/Makefile +++ b/Makefile @@ -154,7 +154,8 @@ qemu-img.o: qemu-img-cmds.h qemu-img.o qemu-tool.o qemu-nbd.o qemu-io.o cmd.o qemu-ga.o: $(GENERATED_HEADERS) tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \ - qemu-timer-common.o main-loop.o notify.o iohandler.o cutils.o async.o + qemu-timer-common.o main-loop.o notify.o \ + iohandler.o cutils.o iov.o async.o tools-obj-$(CONFIG_POSIX) += compatfd.o qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y) diff --git a/Makefile.objs b/Makefile.objs index 70c5c79a6e..f173946fd2 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -42,7 +42,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o ####################################################################### # block-obj-y is code used by both qemu system emulation and qemu-img -block-obj-y = cutils.o cache-utils.o qemu-option.o module.o async.o +block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y) block-obj-$(CONFIG_POSIX) += posix-aio-compat.o @@ -198,7 +198,7 @@ common-obj-$(CONFIG_XEN_BACKEND) += xen_console.o xenfb.o xen_disk.o xen_nic.o user-obj-y = user-obj-y += envlist.o path.o user-obj-y += tcg-runtime.o host-utils.o -user-obj-y += cutils.o cache-utils.o +user-obj-y += cutils.o iov.o cache-utils.o user-obj-y += module.o user-obj-y += qemu-user.o user-obj-y += $(trace-obj-y) diff --git a/block/qcow2.c b/block/qcow2.c index c2e49cded3..fcbf95273b 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -510,7 +510,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, else n1 = bs->total_sectors - sector_num; - qemu_iovec_memset_skip(qiov, 0, 512 * (nb_sectors - n1), 512 * n1); + qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1)); return n1; } @@ -571,7 +571,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, } } else { /* Note: in this case, no need to wait */ - qemu_iovec_memset(&hd_qiov, 0, 512 * cur_nr_sectors); + qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); } break; @@ -580,7 +580,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, ret = -EIO; goto fail; } - qemu_iovec_memset(&hd_qiov, 0, 512 * cur_nr_sectors); + qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); break; case QCOW2_CLUSTER_COMPRESSED: diff --git a/block/qed.c b/block/qed.c index 30a31f907f..40bdb5364a 100644 --- a/block/qed.c +++ b/block/qed.c @@ -736,7 +736,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, /* Zero all sectors if reading beyond the end of the backing file */ if (pos >= backing_length || pos + qiov->size > backing_length) { - qemu_iovec_memset(qiov, 0, qiov->size); + qemu_iovec_memset(qiov, 0, 0, qiov->size); } /* Complete now if there are no backing file sectors to read */ @@ -1251,7 +1251,7 @@ static void qed_aio_read_data(void *opaque, int ret, /* Handle zero cluster and backing file reads */ if (ret == QED_CLUSTER_ZERO) { - qemu_iovec_memset(&acb->cur_qiov, 0, acb->cur_qiov.size); + qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size); qed_aio_next_io(acb, 0); return; } else if (ret != QED_CLUSTER_FOUND) { diff --git a/cutils.c b/cutils.c index af308cd7b9..0ddf4c7d74 100644 --- a/cutils.c +++ b/cutils.c @@ -26,6 +26,7 @@ #include #include "qemu_socket.h" +#include "iov.h" void pstrcpy(char *buf, int buf_size, const char *str) { @@ -260,47 +261,10 @@ void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count) } } -void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count) +size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, + int fillc, size_t bytes) { - size_t n; - int i; - - for (i = 0; i < qiov->niov && count; ++i) { - n = MIN(count, qiov->iov[i].iov_len); - memset(qiov->iov[i].iov_base, c, n); - count -= n; - } -} - -void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count, - size_t skip) -{ - int i; - size_t done; - void *iov_base; - uint64_t iov_len; - - done = 0; - for (i = 0; (i < qiov->niov) && (done != count); i++) { - if (skip >= qiov->iov[i].iov_len) { - /* Skip the whole iov */ - skip -= qiov->iov[i].iov_len; - continue; - } else { - /* Skip only part (or nothing) of the iov */ - iov_base = (uint8_t*) qiov->iov[i].iov_base + skip; - iov_len = qiov->iov[i].iov_len - skip; - skip = 0; - } - - if (done + iov_len > count) { - memset(iov_base, c, count - done); - break; - } else { - memset(iov_base, c, iov_len); - } - done += iov_len; - } + return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes); } /* diff --git a/linux-aio.c b/linux-aio.c index fa0fbf34aa..ce9b5d4be8 100644 --- a/linux-aio.c +++ b/linux-aio.c @@ -63,8 +63,8 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s, } else if (ret >= 0) { /* Short reads mean EOF, pad with zeros. */ if (laiocb->is_read) { - qemu_iovec_memset_skip(laiocb->qiov, 0, - laiocb->qiov->size - ret, ret); + qemu_iovec_memset(laiocb->qiov, ret, 0, + laiocb->qiov->size - ret); } else { ret = -EINVAL; } diff --git a/posix-aio-compat.c b/posix-aio-compat.c index 68361f555a..96e4daf505 100644 --- a/posix-aio-compat.c +++ b/posix-aio-compat.c @@ -29,6 +29,7 @@ #include "qemu-common.h" #include "trace.h" #include "block_int.h" +#include "iov.h" #include "block/raw-posix-aio.h" @@ -351,11 +352,8 @@ static void *aio_thread(void *unused) if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) { /* A short read means that we have reached EOF. Pad the buffer * with zeros for bytes after EOF. */ - QEMUIOVector qiov; - - qemu_iovec_init_external(&qiov, aiocb->aio_iov, - aiocb->aio_niov); - qemu_iovec_memset_skip(&qiov, 0, aiocb->aio_nbytes - ret, ret); + iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret, + 0, aiocb->aio_nbytes - ret); ret = aiocb->aio_nbytes; } diff --git a/qemu-common.h b/qemu-common.h index 91e056296d..e752d2b6c1 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -347,9 +347,8 @@ void qemu_iovec_destroy(QEMUIOVector *qiov); void qemu_iovec_reset(QEMUIOVector *qiov); void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf); void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count); -void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count); -void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count, - size_t skip); +size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, + int fillc, size_t bytes); bool buffer_is_zero(const void *buf, size_t len); From 03396148bca54c0e81ad8eecb12a136456d14c16 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Thu, 7 Jun 2012 20:17:55 +0400 Subject: [PATCH 05/11] allow qemu_iovec_from_buffer() to specify offset from which to start copying Similar to qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, int c, size_t bytes); the new prototype is: qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, const void *buf, size_t bytes); The processing starts at offset bytes within qiov. This way, we may copy a bounce buffer directly to a middle of qiov. This is exactly the same function as iov_from_buf() from iov.c, so use the existing implementation and rename it to qemu_iovec_from_buf() to be shorter and to match the utility function. As with utility implementation, we now assert that the offset is inside actual iovec. Nothing changed for current callers, because `offset' parameter is new. While at it, stop using "bounce-qiov" in block/qcow2.c and copy decrypted data directly from cluster_data instead of recreating a temp qiov for doing that. Signed-off-by: Michael Tokarev --- block.c | 6 +++--- block/curl.c | 6 +++--- block/qcow.c | 2 +- block/qcow2.c | 9 +++------ block/rbd.c | 2 +- cutils.c | 16 +++------------- qemu-common.h | 3 ++- 7 files changed, 16 insertions(+), 28 deletions(-) diff --git a/block.c b/block.c index 7547051ec2..e0ef95e094 100644 --- a/block.c +++ b/block.c @@ -1821,8 +1821,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, } skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE; - qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes, - nb_sectors * BDRV_SECTOR_SIZE); + qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, + nb_sectors * BDRV_SECTOR_SIZE); err: qemu_vfree(bounce_buffer); @@ -3382,7 +3382,7 @@ static void bdrv_aio_bh_cb(void *opaque) BlockDriverAIOCBSync *acb = opaque; if (!acb->is_write) - qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size); + qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); qemu_vfree(acb->bounce); acb->common.cb(acb->common.opaque, acb->ret); qemu_bh_delete(acb->bh); diff --git a/block/curl.c b/block/curl.c index bf3680ba57..e7c3634d35 100644 --- a/block/curl.c +++ b/block/curl.c @@ -140,8 +140,8 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque) continue; if ((s->buf_off >= acb->end)) { - qemu_iovec_from_buffer(acb->qiov, s->orig_buf + acb->start, - acb->end - acb->start); + qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start, + acb->end - acb->start); acb->common.cb(acb->common.opaque, 0); qemu_aio_release(acb); s->acb[i] = NULL; @@ -176,7 +176,7 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len, { char *buf = state->orig_buf + (start - state->buf_start); - qemu_iovec_from_buffer(acb->qiov, buf, len); + qemu_iovec_from_buf(acb->qiov, 0, buf, len); acb->common.cb(acb->common.opaque, 0); return FIND_RET_OK; diff --git a/block/qcow.c b/block/qcow.c index 35dff497ae..728010319f 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -540,7 +540,7 @@ done: qemu_co_mutex_unlock(&s->lock); if (qiov->niov > 1) { - qemu_iovec_from_buffer(qiov, orig_buf, qiov->size); + qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size); qemu_vfree(orig_buf); } diff --git a/block/qcow2.c b/block/qcow2.c index fcbf95273b..ccc599b519 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -590,7 +590,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, goto fail; } - qemu_iovec_from_buffer(&hd_qiov, + qemu_iovec_from_buf(&hd_qiov, 0, s->cluster_cache + index_in_cluster * 512, 512 * cur_nr_sectors); break; @@ -630,11 +630,8 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, if (s->crypt_method) { qcow2_encrypt_sectors(s, sector_num, cluster_data, cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key); - qemu_iovec_reset(&hd_qiov); - qemu_iovec_copy(&hd_qiov, qiov, bytes_done, - cur_nr_sectors * 512); - qemu_iovec_from_buffer(&hd_qiov, cluster_data, - 512 * cur_nr_sectors); + qemu_iovec_from_buf(qiov, bytes_done, + cluster_data, 512 * cur_nr_sectors); } break; diff --git a/block/rbd.c b/block/rbd.c index 1280d66d3c..8bb3252bc3 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -620,7 +620,7 @@ static void rbd_aio_bh_cb(void *opaque) RBDAIOCB *acb = opaque; if (acb->cmd == RBD_AIO_READ) { - qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size); + qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); } qemu_vfree(acb->bounce); acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); diff --git a/cutils.c b/cutils.c index 0ddf4c7d74..b4dd844644 100644 --- a/cutils.c +++ b/cutils.c @@ -245,20 +245,10 @@ void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf) } } -void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count) +size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, + const void *buf, size_t bytes) { - const uint8_t *p = (const uint8_t *)buf; - size_t copy; - int i; - - for (i = 0; i < qiov->niov && count; ++i) { - copy = count; - if (copy > qiov->iov[i].iov_len) - copy = qiov->iov[i].iov_len; - memcpy(qiov->iov[i].iov_base, p, copy); - p += copy; - count -= copy; - } + return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes); } size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, diff --git a/qemu-common.h b/qemu-common.h index e752d2b6c1..430ec15a44 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -346,7 +346,8 @@ void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size); void qemu_iovec_destroy(QEMUIOVector *qiov); void qemu_iovec_reset(QEMUIOVector *qiov); void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf); -void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count); +size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, + const void *buf, size_t bytes); size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, int fillc, size_t bytes); From 1b093c480a32051cc856b6ab2395d8cbc3ae99da Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Mon, 12 Mar 2012 21:28:06 +0400 Subject: [PATCH 06/11] consolidate qemu_iovec_copy() and qemu_iovec_concat() and make them consistent qemu_iovec_concat() is currently a wrapper for qemu_iovec_copy(), use the former (with extra "0" arg) in a few places where it is used. Change skip argument of qemu_iovec_copy() from uint64_t to size_t, since size of qiov itself is size_t, so there's no way to skip larger sizes. Rename it to soffset, to make it clear that the offset is applied to src. Also change the only usage of uint64_t in hw/9pfs/virtio-9p.c, in v9fs_init_qiov_from_pdu() - all callers of it actually uses size_t too, not uint64_t. One added restriction: as for all other iovec-related functions, soffset must point inside src. Order of argumens is already good: qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, int c, size_t bytes) vs: qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t soffset, size_t sbytes) (note soffset is after _src_ not dst, since it applies to src; for memset it applies to qiov). Note that in many places where this function is used, the previous call is qemu_iovec_reset(), which means many callers actually want copy (replacing dst content), not concat. So we may want to add a wrapper like qemu_iovec_copy() with the same arguments but which calls qemu_iovec_reset() before _concat(). Signed-off-by: Michael Tokarev --- block.c | 4 ++-- block/qcow2.c | 4 ++-- block/qed.c | 6 ++--- cutils.c | 54 +++++++++++++++++---------------------------- hw/9pfs/virtio-9p.c | 8 +++---- qemu-common.h | 5 ++--- 6 files changed, 33 insertions(+), 48 deletions(-) diff --git a/block.c b/block.c index e0ef95e094..5b1a86249c 100644 --- a/block.c +++ b/block.c @@ -3101,13 +3101,13 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, // Add the first request to the merged one. If the requests are // overlapping, drop the last sectors of the first request. size = (reqs[i].sector - reqs[outidx].sector) << 9; - qemu_iovec_concat(qiov, reqs[outidx].qiov, size); + qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size); // We should need to add any zeros between the two requests assert (reqs[i].sector <= oldreq_last); // Add the second request - qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size); + qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size); reqs[outidx].nb_sectors = qiov->size >> 9; reqs[outidx].qiov = qiov; diff --git a/block/qcow2.c b/block/qcow2.c index ccc599b519..8458d10c10 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -549,7 +549,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, index_in_cluster = sector_num & (s->cluster_sectors - 1); qemu_iovec_reset(&hd_qiov); - qemu_iovec_copy(&hd_qiov, qiov, bytes_done, + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_nr_sectors * 512); switch (ret) { @@ -720,7 +720,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, assert((cluster_offset & 511) == 0); qemu_iovec_reset(&hd_qiov); - qemu_iovec_copy(&hd_qiov, qiov, bytes_done, + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_nr_sectors * 512); if (s->crypt_method) { diff --git a/block/qed.c b/block/qed.c index 40bdb5364a..847417a3d6 100644 --- a/block/qed.c +++ b/block/qed.c @@ -1131,7 +1131,7 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) acb->cur_nclusters = qed_bytes_to_clusters(s, qed_offset_into_cluster(s, acb->cur_pos) + len); - qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); + qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); if (acb->flags & QED_AIOCB_ZERO) { /* Skip ahead if the clusters are already zero */ @@ -1177,7 +1177,7 @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) /* Calculate the I/O vector */ acb->cur_cluster = offset; - qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); + qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); /* Do the actual write */ qed_aio_write_main(acb, 0); @@ -1247,7 +1247,7 @@ static void qed_aio_read_data(void *opaque, int ret, goto err; } - qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); + qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); /* Handle zero cluster and backing file reads */ if (ret == QED_CLUSTER_ZERO) { diff --git a/cutils.c b/cutils.c index b4dd844644..1aeac153ad 100644 --- a/cutils.c +++ b/cutils.c @@ -172,48 +172,34 @@ void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len) } /* - * Copies iovecs from src to the end of dst. It starts copying after skipping - * the given number of bytes in src and copies until src is completely copied - * or the total size of the copied iovec reaches size.The size of the last - * copied iovec is changed in order to fit the specified total size if it isn't - * a perfect fit already. + * Concatenates (partial) iovecs from src to the end of dst. + * It starts copying after skipping `soffset' bytes at the + * beginning of src and adds individual vectors from src to + * dst copies up to `sbytes' bytes total, or up to the end + * of src if it comes first. This way, it is okay to specify + * very large value for `sbytes' to indicate "up to the end + * of src". + * Only vector pointers are processed, not the actual data buffers. */ -void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip, - size_t size) +void qemu_iovec_concat(QEMUIOVector *dst, + QEMUIOVector *src, size_t soffset, size_t sbytes) { int i; size_t done; - void *iov_base; - uint64_t iov_len; - + struct iovec *siov = src->iov; assert(dst->nalloc != -1); - - done = 0; - for (i = 0; (i < src->niov) && (done != size); i++) { - if (skip >= src->iov[i].iov_len) { - /* Skip the whole iov */ - skip -= src->iov[i].iov_len; - continue; + assert(src->size >= soffset); + for (i = 0, done = 0; done < sbytes && i < src->niov; i++) { + if (soffset < siov[i].iov_len) { + size_t len = MIN(siov[i].iov_len - soffset, sbytes - done); + qemu_iovec_add(dst, siov[i].iov_base + soffset, len); + done += len; + soffset = 0; } else { - /* Skip only part (or nothing) of the iov */ - iov_base = (uint8_t*) src->iov[i].iov_base + skip; - iov_len = src->iov[i].iov_len - skip; - skip = 0; + soffset -= siov[i].iov_len; } - - if (done + iov_len > size) { - qemu_iovec_add(dst, iov_base, size - done); - break; - } else { - qemu_iovec_add(dst, iov_base, iov_len); - } - done += iov_len; } -} - -void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size) -{ - qemu_iovec_copy(dst, src, 0, size); + /* return done; */ } void qemu_iovec_destroy(QEMUIOVector *qiov) diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c index c633fb9b7e..f4a7026381 100644 --- a/hw/9pfs/virtio-9p.c +++ b/hw/9pfs/virtio-9p.c @@ -1648,7 +1648,7 @@ out: * with qemu_iovec_destroy(). */ static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu, - uint64_t skip, size_t size, + size_t skip, size_t size, bool is_write) { QEMUIOVector elem; @@ -1665,7 +1665,7 @@ static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu, qemu_iovec_init_external(&elem, iov, niov); qemu_iovec_init(qiov, niov); - qemu_iovec_copy(qiov, &elem, skip, size); + qemu_iovec_concat(qiov, &elem, skip, size); } static void v9fs_read(void *opaque) @@ -1715,7 +1715,7 @@ static void v9fs_read(void *opaque) qemu_iovec_init(&qiov, qiov_full.niov); do { qemu_iovec_reset(&qiov); - qemu_iovec_copy(&qiov, &qiov_full, count, qiov_full.size - count); + qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count); if (0) { print_sg(qiov.iov, qiov.niov); } @@ -1970,7 +1970,7 @@ static void v9fs_write(void *opaque) qemu_iovec_init(&qiov, qiov_full.niov); do { qemu_iovec_reset(&qiov); - qemu_iovec_copy(&qiov, &qiov_full, total, qiov_full.size - total); + qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total); if (0) { print_sg(qiov.iov, qiov.niov); } diff --git a/qemu-common.h b/qemu-common.h index 430ec15a44..cae7bb6608 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -340,9 +340,8 @@ typedef struct QEMUIOVector { void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); -void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip, - size_t size); -void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size); +void qemu_iovec_concat(QEMUIOVector *dst, + QEMUIOVector *src, size_t soffset, size_t sbytes); void qemu_iovec_destroy(QEMUIOVector *qiov); void qemu_iovec_reset(QEMUIOVector *qiov); void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf); From d5e6b1619c516fa1e2ee4d8d20f08fcda4fb67a0 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Thu, 7 Jun 2012 20:21:06 +0400 Subject: [PATCH 07/11] change qemu_iovec_to_buf() to match other to,from_buf functions It now allows specifying offset within qiov to start from and amount of bytes to copy. Actual implementation is just a call to iov_to_buf(). Signed-off-by: Michael Tokarev --- block.c | 2 +- block/iscsi.c | 3 +-- block/qcow.c | 2 +- block/qcow2.c | 2 +- block/rbd.c | 2 +- cutils.c | 11 +++-------- qemu-common.h | 3 ++- 7 files changed, 10 insertions(+), 15 deletions(-) diff --git a/block.c b/block.c index 5b1a86249c..63b7ae083c 100644 --- a/block.c +++ b/block.c @@ -3408,7 +3408,7 @@ static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); if (is_write) { - qemu_iovec_to_buffer(acb->qiov, acb->bounce); + qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors); } else { acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors); diff --git a/block/iscsi.c b/block/iscsi.c index 22888a0845..ecb7a2211e 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -240,8 +240,7 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num, /* this will allow us to get rid of 'buf' completely */ size = nb_sectors * BDRV_SECTOR_SIZE; acb->buf = g_malloc(size); - qemu_iovec_to_buffer(acb->qiov, acb->buf); - + qemu_iovec_to_buf(acb->qiov, 0, acb->buf, size); acb->task = malloc(sizeof(struct scsi_task)); if (acb->task == NULL) { diff --git a/block/qcow.c b/block/qcow.c index 728010319f..7b5ab87d2d 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -569,7 +569,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, if (qiov->niov > 1) { buf = orig_buf = qemu_blockalign(bs, qiov->size); - qemu_iovec_to_buffer(qiov, buf); + qemu_iovec_to_buf(qiov, 0, buf, qiov->size); } else { orig_buf = NULL; buf = (uint8_t *)qiov->iov->iov_base; diff --git a/block/qcow2.c b/block/qcow2.c index 8458d10c10..1b5b36cfc1 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -731,7 +731,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, assert(hd_qiov.size <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); - qemu_iovec_to_buffer(&hd_qiov, cluster_data); + qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); qcow2_encrypt_sectors(s, sector_num, cluster_data, cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key); diff --git a/block/rbd.c b/block/rbd.c index 8bb3252bc3..49a4787b55 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -674,7 +674,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, acb->bh = NULL; if (cmd == RBD_AIO_WRITE) { - qemu_iovec_to_buffer(acb->qiov, acb->bounce); + qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); } buf = acb->bounce; diff --git a/cutils.c b/cutils.c index 1aeac153ad..352bc521b1 100644 --- a/cutils.c +++ b/cutils.c @@ -220,15 +220,10 @@ void qemu_iovec_reset(QEMUIOVector *qiov) qiov->size = 0; } -void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf) +size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, + void *buf, size_t bytes) { - uint8_t *p = (uint8_t *)buf; - int i; - - for (i = 0; i < qiov->niov; ++i) { - memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len); - p += qiov->iov[i].iov_len; - } + return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes); } size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, diff --git a/qemu-common.h b/qemu-common.h index cae7bb6608..056e495f30 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -344,7 +344,8 @@ void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t soffset, size_t sbytes); void qemu_iovec_destroy(QEMUIOVector *qiov); void qemu_iovec_reset(QEMUIOVector *qiov); -void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf); +size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, + void *buf, size_t bytes); size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, const void *buf, size_t bytes); size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, From 3e80bf9351f8fec9085c46df6da075efd5e71003 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Sat, 10 Mar 2012 17:00:41 +0400 Subject: [PATCH 08/11] rename qemu_sendv to iov_send, change proto and move declarations to iov.h Rename arguments and use size_t for sizes instead of int, from int qemu_sendv(int sockfd, struct iovec *iov, int len, int iov_offset) to ssize_t iov_send(int sockfd, struct iovec *iov, size_t offset, size_t bytes) The main motivation was to make it clear that length and offset are in _bytes_, not in iov elements: it was very confusing before, because all standard functions which deals with iovecs expects number of iovs, not bytes, even the fact that struct iovec has iov_len and iov_ prefix does not help. With "bytes" and "offset", especially since they're now size_t, it is much more explicit. Also change the return type to be ssize_t instead of int. This also changes it to match other iov-related functons, but not _quite_: there's still no argument indicating where iovec ends, ie, no iov_cnt parameter as used in iov_size() and friends. If will be added in subsequent patch/rewrite. All callers of qemu_sendv() and qemu_recvv() and related, like qemu_co_sendv() and qemu_co_recvv(), were checked to verify that it is safe to use unsigned datatype instead of int. Note that the order of arguments is changed to: offset and bytes (len and iov_offset) are swapped with each other. This is to make them consistent with very similar functions from qemu_iovec family, where offset always follows qiov, to mean the place in it to start from. Signed-off-by: Michael Tokarev --- cutils.c | 41 +++++++++++++---------------------------- iov.h | 18 ++++++++++++++++++ qemu-common.h | 3 --- qemu-coroutine-io.c | 5 +++-- 4 files changed, 34 insertions(+), 33 deletions(-) diff --git a/cutils.c b/cutils.c index 352bc521b1..2ad5fa3a9b 100644 --- a/cutils.c +++ b/cutils.c @@ -376,43 +376,28 @@ int qemu_parse_fd(const char *param) return fd; } -/* - * Send/recv data with iovec buffers - * - * This function send/recv data from/to the iovec buffer directly. - * The first `offset' bytes in the iovec buffer are skipped and next - * `len' bytes are used. - * - * For example, - * - * do_sendv_recvv(sockfd, iov, len, offset, 1); - * - * is equal to - * - * char *buf = malloc(size); - * iov_to_buf(iov, iovcnt, buf, offset, size); - * send(sockfd, buf, size, 0); - * free(buf); - */ -static int do_sendv_recvv(int sockfd, struct iovec *iov, int len, int offset, +static ssize_t do_sendv_recvv(int sockfd, struct iovec *iov, + size_t offset, size_t bytes, int do_sendv) { - int ret, diff, iovlen; + int iovlen; + ssize_t ret; + size_t diff; struct iovec *last_iov; /* last_iov is inclusive, so count from one. */ iovlen = 1; last_iov = iov; - len += offset; + bytes += offset; - while (last_iov->iov_len < len) { - len -= last_iov->iov_len; + while (last_iov->iov_len < bytes) { + bytes -= last_iov->iov_len; last_iov++; iovlen++; } - diff = last_iov->iov_len - len; + diff = last_iov->iov_len - bytes; last_iov->iov_len -= diff; while (iov->iov_len <= offset) { @@ -474,13 +459,13 @@ static int do_sendv_recvv(int sockfd, struct iovec *iov, int len, int offset, return ret; } -int qemu_recvv(int sockfd, struct iovec *iov, int len, int iov_offset) +ssize_t iov_recv(int sockfd, struct iovec *iov, size_t offset, size_t bytes) { - return do_sendv_recvv(sockfd, iov, len, iov_offset, 0); + return do_sendv_recvv(sockfd, iov, offset, bytes, 0); } -int qemu_sendv(int sockfd, struct iovec *iov, int len, int iov_offset) +ssize_t iov_send(int sockfd, struct iovec *iov, size_t offset, size_t bytes) { - return do_sendv_recvv(sockfd, iov, len, iov_offset, 1); + return do_sendv_recvv(sockfd, iov, offset, bytes, 1); } diff --git a/iov.h b/iov.h index 19ee3b3acd..5aa2f455de 100644 --- a/iov.h +++ b/iov.h @@ -54,6 +54,24 @@ size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, size_t offset, int fillc, size_t bytes); +/* + * Send/recv data from/to iovec buffers directly + * + * `offset' bytes in the beginning of iovec buffer are skipped and + * next `bytes' bytes are used, which must be within data of iovec. + * + * r = iov_send(sockfd, iov, offset, bytes); + * + * is logically equivalent to + * + * char *buf = malloc(bytes); + * iov_to_buf(iov, iovcnt, offset, buf, bytes); + * r = send(sockfd, buf, bytes, 0); + * free(buf); + */ +ssize_t iov_recv(int sockfd, struct iovec *iov, size_t offset, size_t bytes); +ssize_t iov_send(int sockfd, struct iovec *iov, size_t offset, size_t bytes); + /** * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements * in file `fp', prefixing each line with `prefix' and processing not more diff --git a/qemu-common.h b/qemu-common.h index 056e495f30..41b8ae77cb 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -205,9 +205,6 @@ int qemu_pipe(int pipefd[2]); #define qemu_recv(sockfd, buf, len, flags) recv(sockfd, buf, len, flags) #endif -int qemu_recvv(int sockfd, struct iovec *iov, int len, int iov_offset); -int qemu_sendv(int sockfd, struct iovec *iov, int len, int iov_offset); - /* Error handling. */ void QEMU_NORETURN hw_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2); diff --git a/qemu-coroutine-io.c b/qemu-coroutine-io.c index 40fd514395..0461a9aae6 100644 --- a/qemu-coroutine-io.c +++ b/qemu-coroutine-io.c @@ -25,6 +25,7 @@ #include "qemu-common.h" #include "qemu_socket.h" #include "qemu-coroutine.h" +#include "iov.h" int coroutine_fn qemu_co_recvv(int sockfd, struct iovec *iov, int len, int iov_offset) @@ -32,7 +33,7 @@ int coroutine_fn qemu_co_recvv(int sockfd, struct iovec *iov, int total = 0; int ret; while (len) { - ret = qemu_recvv(sockfd, iov, len, iov_offset + total); + ret = iov_recv(sockfd, iov, iov_offset + total, len); if (ret < 0) { if (errno == EAGAIN) { qemu_coroutine_yield(); @@ -58,7 +59,7 @@ int coroutine_fn qemu_co_sendv(int sockfd, struct iovec *iov, int total = 0; int ret; while (len) { - ret = qemu_sendv(sockfd, iov, len, iov_offset + total); + ret = iov_send(sockfd, iov, iov_offset + total, len); if (ret < 0) { if (errno == EAGAIN) { qemu_coroutine_yield(); From e3e87df4c94319b15017f958e22761aba03c452a Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Wed, 14 Mar 2012 10:56:04 +0400 Subject: [PATCH 09/11] export iov_send_recv() and use it in iov_send() and iov_recv() Rename do_sendv_recvv() to iov_send_recv(), change its last arg (do_send) from int to bool, export it in iov.h, and made the two callers of it (iov_send() and iov_recv()) to be trivial #defines just adding 5th arg. iov_send_recv() will be used later. Signed-off-by: Michael Tokarev --- cutils.c | 17 +++-------------- iov.h | 10 +++++++--- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/cutils.c b/cutils.c index 2ad5fa3a9b..cb6f63848c 100644 --- a/cutils.c +++ b/cutils.c @@ -376,9 +376,9 @@ int qemu_parse_fd(const char *param) return fd; } -static ssize_t do_sendv_recvv(int sockfd, struct iovec *iov, - size_t offset, size_t bytes, - int do_sendv) +ssize_t iov_send_recv(int sockfd, struct iovec *iov, + size_t offset, size_t bytes, + bool do_sendv) { int iovlen; ssize_t ret; @@ -458,14 +458,3 @@ static ssize_t do_sendv_recvv(int sockfd, struct iovec *iov, last_iov->iov_len += diff; return ret; } - -ssize_t iov_recv(int sockfd, struct iovec *iov, size_t offset, size_t bytes) -{ - return do_sendv_recvv(sockfd, iov, offset, bytes, 0); -} - -ssize_t iov_send(int sockfd, struct iovec *iov, size_t offset, size_t bytes) -{ - return do_sendv_recvv(sockfd, iov, offset, bytes, 1); -} - diff --git a/iov.h b/iov.h index 5aa2f455de..9b6a883922 100644 --- a/iov.h +++ b/iov.h @@ -60,7 +60,7 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, * `offset' bytes in the beginning of iovec buffer are skipped and * next `bytes' bytes are used, which must be within data of iovec. * - * r = iov_send(sockfd, iov, offset, bytes); + * r = iov_send_recv(sockfd, iov, offset, bytes, true); * * is logically equivalent to * @@ -69,8 +69,12 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, * r = send(sockfd, buf, bytes, 0); * free(buf); */ -ssize_t iov_recv(int sockfd, struct iovec *iov, size_t offset, size_t bytes); -ssize_t iov_send(int sockfd, struct iovec *iov, size_t offset, size_t bytes); +ssize_t iov_send_recv(int sockfd, struct iovec *iov, + size_t offset, size_t bytes, bool do_send); +#define iov_recv(sockfd, iov, offset, bytes) \ + iov_send_recv(sockfd, iov, offset, bytes, false) +#define iov_send(sockfd, iov, offset, bytes) \ + iov_send_recv(sockfd, iov, offset, bytes, true) /** * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements From 2fc8ae1dd77fbc55146b602f703add6dc314dea4 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Thu, 7 Jun 2012 20:22:46 +0400 Subject: [PATCH 10/11] cleanup qemu_co_sendv(), qemu_co_recvv() and friends The same as for non-coroutine versions in previous patches: rename arguments to be more obvious, change type of arguments from int to size_t where appropriate, and use common code for send and receive paths (with one extra argument) since these are exactly the same. Use common iov_send_recv() directly. qemu_co_sendv(), qemu_co_recvv(), and qemu_co_recv() are now trivial #define's merely adding one extra arg. qemu_co_sendv() and qemu_co_recvv() callers are converted to different argument order and extra `iov_cnt' argument. Signed-off-by: Michael Tokarev --- block/nbd.c | 18 +++++----- block/sheepdog.c | 6 ++-- qemu-common.h | 37 ++++++++++---------- qemu-coroutine-io.c | 82 ++++++++++++++------------------------------- 4 files changed, 55 insertions(+), 88 deletions(-) diff --git a/block/nbd.c b/block/nbd.c index 1212614223..2bce47bf7a 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -196,7 +196,7 @@ static void nbd_restart_write(void *opaque) } static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, - struct iovec *iov, int offset) + QEMUIOVector *qiov, int offset) { int rc, ret; @@ -205,8 +205,9 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, nbd_have_request, s); rc = nbd_send_request(s->sock, request); - if (rc >= 0 && iov) { - ret = qemu_co_sendv(s->sock, iov, request->len, offset); + if (rc >= 0 && qiov) { + ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov, + offset, request->len); if (ret != request->len) { return -EIO; } @@ -220,7 +221,7 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, static void nbd_co_receive_reply(BDRVNBDState *s, struct nbd_request *request, struct nbd_reply *reply, - struct iovec *iov, int offset) + QEMUIOVector *qiov, int offset) { int ret; @@ -231,8 +232,9 @@ static void nbd_co_receive_reply(BDRVNBDState *s, struct nbd_request *request, if (reply->handle != request->handle) { reply->error = EIO; } else { - if (iov && reply->error == 0) { - ret = qemu_co_recvv(s->sock, iov, request->len, offset); + if (qiov && reply->error == 0) { + ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov, + offset, request->len); if (ret != request->len) { reply->error = EIO; } @@ -349,7 +351,7 @@ static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num, if (ret < 0) { reply.error = -ret; } else { - nbd_co_receive_reply(s, &request, &reply, qiov->iov, offset); + nbd_co_receive_reply(s, &request, &reply, qiov, offset); } nbd_coroutine_end(s, &request); return -reply.error; @@ -374,7 +376,7 @@ static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num, request.len = nb_sectors * 512; nbd_coroutine_start(s, &request); - ret = nbd_co_send_request(s, &request, qiov->iov, offset); + ret = nbd_co_send_request(s, &request, qiov, offset); if (ret < 0) { reply.error = -ret; } else { diff --git a/block/sheepdog.c b/block/sheepdog.c index f46ca8fb69..2c7aecec4f 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -719,8 +719,8 @@ static void coroutine_fn aio_read_response(void *opaque) } break; case AIOCB_READ_UDATA: - ret = qemu_co_recvv(fd, acb->qiov->iov, rsp.data_length, - aio_req->iov_offset); + ret = qemu_co_recvv(fd, acb->qiov->iov, acb->qiov->niov, + aio_req->iov_offset, rsp.data_length); if (ret < 0) { error_report("failed to get the data, %s", strerror(errno)); goto out; @@ -992,7 +992,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, } if (wlen) { - ret = qemu_co_sendv(s->fd, iov, wlen, aio_req->iov_offset); + ret = qemu_co_sendv(s->fd, iov, niov, aio_req->iov_offset, wlen); if (ret < 0) { qemu_co_mutex_unlock(&s->lock); error_report("failed to send a data, %s", strerror(errno)); diff --git a/qemu-common.h b/qemu-common.h index 41b8ae77cb..71395771db 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -300,32 +300,29 @@ struct qemu_work_item { void qemu_init_vcpu(void *env); #endif -/** - * Sends an iovec (or optionally a part of it) down a socket, yielding - * when the socket is full. - */ -int qemu_co_sendv(int sockfd, struct iovec *iov, - int len, int iov_offset); /** - * Receives data into an iovec (or optionally into a part of it) from - * a socket, yielding when there is no data in the socket. + * Sends a (part of) iovec down a socket, yielding when the socket is full, or + * Receives data into a (part of) iovec from a socket, + * yielding when there is no data in the socket. + * The same interface as qemu_sendv_recvv(), with added yielding. + * XXX should mark these as coroutine_fn */ -int qemu_co_recvv(int sockfd, struct iovec *iov, - int len, int iov_offset); - +ssize_t qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, bool do_send); +#define qemu_co_recvv(sockfd, iov, iov_cnt, offset, bytes) \ + qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, false) +#define qemu_co_sendv(sockfd, iov, iov_cnt, offset, bytes) \ + qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, true) /** - * Sends a buffer down a socket, yielding when the socket is full. + * The same as above, but with just a single buffer */ -int qemu_co_send(int sockfd, void *buf, int len); - -/** - * Receives data into a buffer from a socket, yielding when there - * is no data in the socket. - */ -int qemu_co_recv(int sockfd, void *buf, int len); - +ssize_t qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send); +#define qemu_co_recv(sockfd, buf, bytes) \ + qemu_co_send_recv(sockfd, buf, bytes, false) +#define qemu_co_send(sockfd, buf, bytes) \ + qemu_co_send_recv(sockfd, buf, bytes, true) typedef struct QEMUIOVector { struct iovec *iov; diff --git a/qemu-coroutine-io.c b/qemu-coroutine-io.c index 0461a9aae6..6693c7824b 100644 --- a/qemu-coroutine-io.c +++ b/qemu-coroutine-io.c @@ -27,71 +27,39 @@ #include "qemu-coroutine.h" #include "iov.h" -int coroutine_fn qemu_co_recvv(int sockfd, struct iovec *iov, - int len, int iov_offset) +ssize_t coroutine_fn +qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, bool do_send) { - int total = 0; - int ret; - while (len) { - ret = iov_recv(sockfd, iov, iov_offset + total, len); - if (ret < 0) { + size_t done = 0; + ssize_t ret; + while (done < bytes) { + ret = iov_send_recv(sockfd, iov, + offset + done, bytes - done, do_send); + if (ret > 0) { + done += ret; + } else if (ret < 0) { if (errno == EAGAIN) { qemu_coroutine_yield(); - continue; - } - if (total == 0) { - total = -1; + } else if (done == 0) { + return -1; + } else { + break; } + } else if (ret == 0 && !do_send) { + /* write (send) should never return 0. + * read (recv) returns 0 for end-of-file (-data). + * In both cases there's little point retrying, + * but we do for write anyway, just in case */ break; } - if (ret == 0) { - break; - } - total += ret, len -= ret; } - - return total; + return done; } -int coroutine_fn qemu_co_sendv(int sockfd, struct iovec *iov, - int len, int iov_offset) +ssize_t coroutine_fn +qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send) { - int total = 0; - int ret; - while (len) { - ret = iov_send(sockfd, iov, iov_offset + total, len); - if (ret < 0) { - if (errno == EAGAIN) { - qemu_coroutine_yield(); - continue; - } - if (total == 0) { - total = -1; - } - break; - } - total += ret, len -= ret; - } - - return total; -} - -int coroutine_fn qemu_co_recv(int sockfd, void *buf, int len) -{ - struct iovec iov; - - iov.iov_base = buf; - iov.iov_len = len; - - return qemu_co_recvv(sockfd, &iov, len, 0); -} - -int coroutine_fn qemu_co_send(int sockfd, void *buf, int len) -{ - struct iovec iov; - - iov.iov_base = buf; - iov.iov_len = len; - - return qemu_co_sendv(sockfd, &iov, len, 0); + struct iovec iov = { .iov_base = buf, .iov_len = bytes }; + return qemu_co_sendv_recvv(sockfd, &iov, 1, 0, bytes, do_send); } From 25e5e4c7e9d5ec3e95c9526d1abaca40ada50ab0 Mon Sep 17 00:00:00 2001 From: Michael Tokarev Date: Wed, 14 Mar 2012 11:18:54 +0400 Subject: [PATCH 11/11] rewrite iov_send_recv() and move it to iov.c Make it much more understandable, add a missing iov_cnt argument (number of iovs in the iov), and add comments to it. The new implementation has been extensively tested by splitting a large buffer into many small randomly-sized chunks, sending it over socket to another, slow process and verifying the receiving data is the same. Also add a unit test for iov_send_recv(), sending/ receiving data between two processes over a socketpair using random vectors and random sizes. Signed-off-by: Michael Tokarev --- cutils.c | 83 ---------------------------------- iov.c | 103 ++++++++++++++++++++++++++++++++++++++++++ iov.h | 15 ++++--- qemu-coroutine-io.c | 2 +- tests/test-iov.c | 107 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 220 insertions(+), 90 deletions(-) diff --git a/cutils.c b/cutils.c index cb6f63848c..e2bc1b89df 100644 --- a/cutils.c +++ b/cutils.c @@ -375,86 +375,3 @@ int qemu_parse_fd(const char *param) } return fd; } - -ssize_t iov_send_recv(int sockfd, struct iovec *iov, - size_t offset, size_t bytes, - bool do_sendv) -{ - int iovlen; - ssize_t ret; - size_t diff; - struct iovec *last_iov; - - /* last_iov is inclusive, so count from one. */ - iovlen = 1; - last_iov = iov; - bytes += offset; - - while (last_iov->iov_len < bytes) { - bytes -= last_iov->iov_len; - - last_iov++; - iovlen++; - } - - diff = last_iov->iov_len - bytes; - last_iov->iov_len -= diff; - - while (iov->iov_len <= offset) { - offset -= iov->iov_len; - - iov++; - iovlen--; - } - - iov->iov_base = (char *) iov->iov_base + offset; - iov->iov_len -= offset; - - { -#if defined CONFIG_IOVEC && defined CONFIG_POSIX - struct msghdr msg; - memset(&msg, 0, sizeof(msg)); - msg.msg_iov = iov; - msg.msg_iovlen = iovlen; - - do { - if (do_sendv) { - ret = sendmsg(sockfd, &msg, 0); - } else { - ret = recvmsg(sockfd, &msg, 0); - } - } while (ret == -1 && errno == EINTR); -#else - struct iovec *p = iov; - ret = 0; - while (iovlen > 0) { - int rc; - if (do_sendv) { - rc = send(sockfd, p->iov_base, p->iov_len, 0); - } else { - rc = qemu_recv(sockfd, p->iov_base, p->iov_len, 0); - } - if (rc == -1) { - if (errno == EINTR) { - continue; - } - if (ret == 0) { - ret = -1; - } - break; - } - if (rc == 0) { - break; - } - ret += rc; - iovlen--, p++; - } -#endif - } - - /* Undo the changes above */ - iov->iov_base = (char *) iov->iov_base - offset; - iov->iov_len += offset; - last_iov->iov_len += diff; - return ret; -} diff --git a/iov.c b/iov.c index 9657d286b8..7cc08f0fe4 100644 --- a/iov.c +++ b/iov.c @@ -18,6 +18,14 @@ #include "iov.h" +#ifdef _WIN32 +# include +# include +#else +# include +# include +#endif + size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, size_t offset, const void *buf, size_t bytes) { @@ -87,6 +95,101 @@ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt) return len; } +/* helper function for iov_send_recv() */ +static ssize_t +do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send) +{ +#if defined CONFIG_IOVEC && defined CONFIG_POSIX + ssize_t ret; + struct msghdr msg; + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = iov; + msg.msg_iovlen = iov_cnt; + do { + ret = do_send + ? sendmsg(sockfd, &msg, 0) + : recvmsg(sockfd, &msg, 0); + } while (ret < 0 && errno == EINTR); + return ret; +#else + /* else send piece-by-piece */ + /*XXX Note: windows has WSASend() and WSARecv() */ + unsigned i; + size_t count = 0; + for (i = 0; i < iov_cnt; ++i) { + ssize_t r = do_send + ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0) + : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0); + if (r > 0) { + ret += r; + } else if (!r) { + break; + } else if (errno == EINTR) { + continue; + } else { + /* else it is some "other" error, + * only return if there was no data processed. */ + if (ret == 0) { + return -1; + } + break; + } + } + return count; +#endif +} + +ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, + bool do_send) +{ + ssize_t ret; + unsigned si, ei; /* start and end indexes */ + + /* Find the start position, skipping `offset' bytes: + * first, skip all full-sized vector elements, */ + for (si = 0; si < iov_cnt && offset >= iov[si].iov_len; ++si) { + offset -= iov[si].iov_len; + } + if (offset) { + assert(si < iov_cnt); + /* second, skip `offset' bytes from the (now) first element, + * undo it on exit */ + iov[si].iov_base += offset; + iov[si].iov_len -= offset; + } + /* Find the end position skipping `bytes' bytes: */ + /* first, skip all full-sized elements */ + for (ei = si; ei < iov_cnt && iov[ei].iov_len <= bytes; ++ei) { + bytes -= iov[ei].iov_len; + } + if (bytes) { + /* second, fixup the last element, and remember + * the length we've cut from the end of it in `bytes' */ + size_t tail; + assert(ei < iov_cnt); + assert(iov[ei].iov_len > bytes); + tail = iov[ei].iov_len - bytes; + iov[ei].iov_len = bytes; + bytes = tail; /* bytes is now equal to the tail size */ + ++ei; + } + + ret = do_send_recv(sockfd, iov + si, ei - si, do_send); + + /* Undo the changes above */ + if (offset) { + iov[si].iov_base -= offset; + iov[si].iov_len += offset; + } + if (bytes) { + iov[ei-1].iov_len += bytes; + } + + return ret; +} + + void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt, FILE *fp, const char *prefix, size_t limit) { diff --git a/iov.h b/iov.h index 9b6a883922..381f37a546 100644 --- a/iov.h +++ b/iov.h @@ -60,7 +60,7 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, * `offset' bytes in the beginning of iovec buffer are skipped and * next `bytes' bytes are used, which must be within data of iovec. * - * r = iov_send_recv(sockfd, iov, offset, bytes, true); + * r = iov_send_recv(sockfd, iov, iovcnt, offset, bytes, true); * * is logically equivalent to * @@ -68,13 +68,16 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, * iov_to_buf(iov, iovcnt, offset, buf, bytes); * r = send(sockfd, buf, bytes, 0); * free(buf); + * + * For iov_send_recv() _whole_ area being sent or received + * should be within the iovec, not only beginning of it. */ -ssize_t iov_send_recv(int sockfd, struct iovec *iov, +ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, size_t offset, size_t bytes, bool do_send); -#define iov_recv(sockfd, iov, offset, bytes) \ - iov_send_recv(sockfd, iov, offset, bytes, false) -#define iov_send(sockfd, iov, offset, bytes) \ - iov_send_recv(sockfd, iov, offset, bytes, true) +#define iov_recv(sockfd, iov, iov_cnt, offset, bytes) \ + iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, false) +#define iov_send(sockfd, iov, iov_cnt, offset, bytes) \ + iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, true) /** * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements diff --git a/qemu-coroutine-io.c b/qemu-coroutine-io.c index 6693c7824b..5734965003 100644 --- a/qemu-coroutine-io.c +++ b/qemu-coroutine-io.c @@ -34,7 +34,7 @@ qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, size_t done = 0; ssize_t ret; while (done < bytes) { - ret = iov_send_recv(sockfd, iov, + ret = iov_send_recv(sockfd, iov, iov_cnt, offset + done, bytes - done, do_send); if (ret > 0) { done += ret; diff --git a/tests/test-iov.c b/tests/test-iov.c index 5f82296a86..cbe7a8955c 100644 --- a/tests/test-iov.c +++ b/tests/test-iov.c @@ -1,6 +1,7 @@ #include #include "qemu-common.h" #include "iov.h" +#include "qemu_socket.h" /* create a randomly-sized iovec with random vectors */ static void iov_random(struct iovec **iovp, unsigned *iov_cntp) @@ -144,10 +145,116 @@ static void test_to_from_buf(void) } } +static void test_io(void) +{ +#ifndef _WIN32 +/* socketpair(PF_UNIX) which does not exist on windows */ + + int sv[2]; + int r; + unsigned i, j, k, s, t; + fd_set fds; + unsigned niov; + struct iovec *iov, *siov; + unsigned char *buf; + size_t sz; + + iov_random(&iov, &niov); + sz = iov_size(iov, niov); + buf = g_malloc(sz); + for (i = 0; i < sz; ++i) { + buf[i] = i & 255; + } + iov_from_buf(iov, niov, 0, buf, sz); + + siov = g_malloc(sizeof(*iov) * niov); + memcpy(siov, iov, sizeof(*iov) * niov); + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) < 0) { + perror("socketpair"); + exit(1); + } + + FD_ZERO(&fds); + + t = 0; + if (fork() == 0) { + /* writer */ + + close(sv[0]); + FD_SET(sv[1], &fds); + fcntl(sv[1], F_SETFL, O_RDWR|O_NONBLOCK); + r = g_test_rand_int_range(sz / 2, sz); + setsockopt(sv[1], SOL_SOCKET, SO_SNDBUF, &r, sizeof(r)); + + for (i = 0; i <= sz; ++i) { + for (j = i; j <= sz; ++j) { + k = i; + do { + s = g_test_rand_int_range(0, j - k + 1); + r = iov_send(sv[1], iov, niov, k, s); + g_assert(memcmp(iov, siov, sizeof(*iov)*niov) == 0); + if (r >= 0) { + k += r; + t += r; + usleep(g_test_rand_int_range(0, 30)); + } else if (errno == EAGAIN) { + select(sv[1]+1, NULL, &fds, NULL, NULL); + continue; + } else { + perror("send"); + exit(1); + } + } while(k < j); + } + } + exit(0); + + } else { + /* reader & verifier */ + + close(sv[1]); + FD_SET(sv[0], &fds); + fcntl(sv[0], F_SETFL, O_RDWR|O_NONBLOCK); + r = g_test_rand_int_range(sz / 2, sz); + setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &r, sizeof(r)); + usleep(500000); + + for (i = 0; i <= sz; ++i) { + for (j = i; j <= sz; ++j) { + k = i; + iov_memset(iov, niov, 0, 0xff, -1); + do { + s = g_test_rand_int_range(0, j - k + 1); + r = iov_recv(sv[0], iov, niov, k, s); + g_assert(memcmp(iov, siov, sizeof(*iov)*niov) == 0); + if (r > 0) { + k += r; + t += r; + } else if (!r) { + if (s) { + break; + } + } else if (errno == EAGAIN) { + select(sv[0]+1, &fds, NULL, NULL, NULL); + continue; + } else { + perror("recv"); + exit(1); + } + } while(k < j); + test_iov_bytes(iov, niov, i, j - i); + } + } + } +#endif +} + int main(int argc, char **argv) { g_test_init(&argc, &argv, NULL); g_test_rand_int(); g_test_add_func("/basic/iov/from-to-buf", test_to_from_buf); + g_test_add_func("/basic/iov/io", test_io); return g_test_run(); }