diff --git a/Makefile b/Makefile index bad0e3142a..34d6a9e52c 100644 --- a/Makefile +++ b/Makefile @@ -156,7 +156,8 @@ vscclient$(EXESUF): $(libcacard-y) $(oslib-obj-y) $(trace-obj-y) qemu-timer-comm qemu-img.o: qemu-img-cmds.h tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \ - qemu-timer-common.o main-loop.o notify.o iohandler.o cutils.o async.o + qemu-timer-common.o main-loop.o notify.o \ + iohandler.o cutils.o iov.o async.o tools-obj-$(CONFIG_POSIX) += compatfd.o qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y) diff --git a/Makefile.objs b/Makefile.objs index 625c4d5da7..5ebbcfa171 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -41,7 +41,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o ####################################################################### # block-obj-y is code used by both qemu system emulation and qemu-img -block-obj-y = cutils.o cache-utils.o qemu-option.o module.o async.o +block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y) block-obj-$(CONFIG_POSIX) += posix-aio-compat.o @@ -101,7 +101,7 @@ common-obj-$(CONFIG_SLIRP) += slirp/ user-obj-y = user-obj-y += envlist.o path.o user-obj-y += tcg-runtime.o host-utils.o -user-obj-y += cutils.o cache-utils.o +user-obj-y += cutils.o iov.o cache-utils.o user-obj-y += module.o user-obj-y += qemu-user.o user-obj-y += $(trace-obj-y) diff --git a/block.c b/block.c index b410ed6467..0c923f2ae9 100644 --- a/block.c +++ b/block.c @@ -1871,8 +1871,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs, } skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE; - qemu_iovec_from_buffer(qiov, bounce_buffer + skip_bytes, - nb_sectors * BDRV_SECTOR_SIZE); + qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, + nb_sectors * BDRV_SECTOR_SIZE); err: qemu_vfree(bounce_buffer); @@ -3200,13 +3200,13 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, // Add the first request to the merged one. If the requests are // overlapping, drop the last sectors of the first request. size = (reqs[i].sector - reqs[outidx].sector) << 9; - qemu_iovec_concat(qiov, reqs[outidx].qiov, size); + qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size); // We should need to add any zeros between the two requests assert (reqs[i].sector <= oldreq_last); // Add the second request - qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size); + qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size); reqs[outidx].nb_sectors = qiov->size >> 9; reqs[outidx].qiov = qiov; @@ -3481,7 +3481,7 @@ static void bdrv_aio_bh_cb(void *opaque) BlockDriverAIOCBSync *acb = opaque; if (!acb->is_write) - qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size); + qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); qemu_vfree(acb->bounce); acb->common.cb(acb->common.opaque, acb->ret); qemu_bh_delete(acb->bh); @@ -3507,7 +3507,7 @@ static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); if (is_write) { - qemu_iovec_to_buffer(acb->qiov, acb->bounce); + qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors); } else { acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors); diff --git a/block/curl.c b/block/curl.c index bf3680ba57..e7c3634d35 100644 --- a/block/curl.c +++ b/block/curl.c @@ -140,8 +140,8 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque) continue; if ((s->buf_off >= acb->end)) { - qemu_iovec_from_buffer(acb->qiov, s->orig_buf + acb->start, - acb->end - acb->start); + qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start, + acb->end - acb->start); acb->common.cb(acb->common.opaque, 0); qemu_aio_release(acb); s->acb[i] = NULL; @@ -176,7 +176,7 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len, { char *buf = state->orig_buf + (start - state->buf_start); - qemu_iovec_from_buffer(acb->qiov, buf, len); + qemu_iovec_from_buf(acb->qiov, 0, buf, len); acb->common.cb(acb->common.opaque, 0); return FIND_RET_OK; diff --git a/block/iscsi.c b/block/iscsi.c index ac65277048..993a86d829 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -247,8 +247,7 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num, /* this will allow us to get rid of 'buf' completely */ size = nb_sectors * BDRV_SECTOR_SIZE; acb->buf = g_malloc(size); - qemu_iovec_to_buffer(acb->qiov, acb->buf); - + qemu_iovec_to_buf(acb->qiov, 0, acb->buf, size); acb->task = malloc(sizeof(struct scsi_task)); if (acb->task == NULL) { diff --git a/block/nbd.c b/block/nbd.c index 1212614223..2bce47bf7a 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -196,7 +196,7 @@ static void nbd_restart_write(void *opaque) } static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, - struct iovec *iov, int offset) + QEMUIOVector *qiov, int offset) { int rc, ret; @@ -205,8 +205,9 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, nbd_have_request, s); rc = nbd_send_request(s->sock, request); - if (rc >= 0 && iov) { - ret = qemu_co_sendv(s->sock, iov, request->len, offset); + if (rc >= 0 && qiov) { + ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov, + offset, request->len); if (ret != request->len) { return -EIO; } @@ -220,7 +221,7 @@ static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request, static void nbd_co_receive_reply(BDRVNBDState *s, struct nbd_request *request, struct nbd_reply *reply, - struct iovec *iov, int offset) + QEMUIOVector *qiov, int offset) { int ret; @@ -231,8 +232,9 @@ static void nbd_co_receive_reply(BDRVNBDState *s, struct nbd_request *request, if (reply->handle != request->handle) { reply->error = EIO; } else { - if (iov && reply->error == 0) { - ret = qemu_co_recvv(s->sock, iov, request->len, offset); + if (qiov && reply->error == 0) { + ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov, + offset, request->len); if (ret != request->len) { reply->error = EIO; } @@ -349,7 +351,7 @@ static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num, if (ret < 0) { reply.error = -ret; } else { - nbd_co_receive_reply(s, &request, &reply, qiov->iov, offset); + nbd_co_receive_reply(s, &request, &reply, qiov, offset); } nbd_coroutine_end(s, &request); return -reply.error; @@ -374,7 +376,7 @@ static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num, request.len = nb_sectors * 512; nbd_coroutine_start(s, &request); - ret = nbd_co_send_request(s, &request, qiov->iov, offset); + ret = nbd_co_send_request(s, &request, qiov, offset); if (ret < 0) { reply.error = -ret; } else { diff --git a/block/qcow.c b/block/qcow.c index 35dff497ae..7b5ab87d2d 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -540,7 +540,7 @@ done: qemu_co_mutex_unlock(&s->lock); if (qiov->niov > 1) { - qemu_iovec_from_buffer(qiov, orig_buf, qiov->size); + qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size); qemu_vfree(orig_buf); } @@ -569,7 +569,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, if (qiov->niov > 1) { buf = orig_buf = qemu_blockalign(bs, qiov->size); - qemu_iovec_to_buffer(qiov, buf); + qemu_iovec_to_buf(qiov, 0, buf, qiov->size); } else { orig_buf = NULL; buf = (uint8_t *)qiov->iov->iov_base; diff --git a/block/qcow2.c b/block/qcow2.c index 5be5ace694..870148ddf8 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -508,7 +508,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, else n1 = bs->total_sectors - sector_num; - qemu_iovec_memset_skip(qiov, 0, 512 * (nb_sectors - n1), 512 * n1); + qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1)); return n1; } @@ -547,7 +547,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, index_in_cluster = sector_num & (s->cluster_sectors - 1); qemu_iovec_reset(&hd_qiov); - qemu_iovec_copy(&hd_qiov, qiov, bytes_done, + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_nr_sectors * 512); switch (ret) { @@ -569,7 +569,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, } } else { /* Note: in this case, no need to wait */ - qemu_iovec_memset(&hd_qiov, 0, 512 * cur_nr_sectors); + qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); } break; @@ -578,7 +578,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, ret = -EIO; goto fail; } - qemu_iovec_memset(&hd_qiov, 0, 512 * cur_nr_sectors); + qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors); break; case QCOW2_CLUSTER_COMPRESSED: @@ -588,7 +588,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, goto fail; } - qemu_iovec_from_buffer(&hd_qiov, + qemu_iovec_from_buf(&hd_qiov, 0, s->cluster_cache + index_in_cluster * 512, 512 * cur_nr_sectors); break; @@ -628,11 +628,8 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, if (s->crypt_method) { qcow2_encrypt_sectors(s, sector_num, cluster_data, cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key); - qemu_iovec_reset(&hd_qiov); - qemu_iovec_copy(&hd_qiov, qiov, bytes_done, - cur_nr_sectors * 512); - qemu_iovec_from_buffer(&hd_qiov, cluster_data, - 512 * cur_nr_sectors); + qemu_iovec_from_buf(qiov, bytes_done, + cluster_data, 512 * cur_nr_sectors); } break; @@ -721,7 +718,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, assert((cluster_offset & 511) == 0); qemu_iovec_reset(&hd_qiov); - qemu_iovec_copy(&hd_qiov, qiov, bytes_done, + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_nr_sectors * 512); if (s->crypt_method) { @@ -732,7 +729,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, assert(hd_qiov.size <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); - qemu_iovec_to_buffer(&hd_qiov, cluster_data); + qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); qcow2_encrypt_sectors(s, sector_num, cluster_data, cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key); diff --git a/block/qed.c b/block/qed.c index dd2832a93b..5f3eefa3af 100644 --- a/block/qed.c +++ b/block/qed.c @@ -736,7 +736,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, /* Zero all sectors if reading beyond the end of the backing file */ if (pos >= backing_length || pos + qiov->size > backing_length) { - qemu_iovec_memset(qiov, 0, qiov->size); + qemu_iovec_memset(qiov, 0, 0, qiov->size); } /* Complete now if there are no backing file sectors to read */ @@ -1131,7 +1131,7 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) acb->cur_nclusters = qed_bytes_to_clusters(s, qed_offset_into_cluster(s, acb->cur_pos) + len); - qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); + qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); if (acb->flags & QED_AIOCB_ZERO) { /* Skip ahead if the clusters are already zero */ @@ -1177,7 +1177,7 @@ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) /* Calculate the I/O vector */ acb->cur_cluster = offset; - qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); + qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); /* Do the actual write */ qed_aio_write_main(acb, 0); @@ -1247,11 +1247,11 @@ static void qed_aio_read_data(void *opaque, int ret, goto err; } - qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); + qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); /* Handle zero cluster and backing file reads */ if (ret == QED_CLUSTER_ZERO) { - qemu_iovec_memset(&acb->cur_qiov, 0, acb->cur_qiov.size); + qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size); qed_aio_next_io(acb, 0); return; } else if (ret != QED_CLUSTER_FOUND) { diff --git a/block/rbd.c b/block/rbd.c index eebc334462..5a0f79fc8f 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -639,7 +639,7 @@ static void rbd_aio_bh_cb(void *opaque) RBDAIOCB *acb = opaque; if (acb->cmd == RBD_AIO_READ) { - qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size); + qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size); } qemu_vfree(acb->bounce); acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret)); @@ -693,7 +693,7 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs, acb->bh = NULL; if (cmd == RBD_AIO_WRITE) { - qemu_iovec_to_buffer(acb->qiov, acb->bounce); + qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); } buf = acb->bounce; diff --git a/block/sheepdog.c b/block/sheepdog.c index 6e73efbad1..809df39d9e 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -745,8 +745,8 @@ static void coroutine_fn aio_read_response(void *opaque) } break; case AIOCB_READ_UDATA: - ret = qemu_co_recvv(fd, acb->qiov->iov, rsp.data_length, - aio_req->iov_offset); + ret = qemu_co_recvv(fd, acb->qiov->iov, acb->qiov->niov, + aio_req->iov_offset, rsp.data_length); if (ret < 0) { error_report("failed to get the data, %s", strerror(errno)); goto out; @@ -1019,7 +1019,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, } if (wlen) { - ret = qemu_co_sendv(s->fd, iov, wlen, aio_req->iov_offset); + ret = qemu_co_sendv(s->fd, iov, niov, aio_req->iov_offset, wlen); if (ret < 0) { qemu_co_mutex_unlock(&s->lock); error_report("failed to send a data, %s", strerror(errno)); diff --git a/cutils.c b/cutils.c index af308cd7b9..e2bc1b89df 100644 --- a/cutils.c +++ b/cutils.c @@ -26,6 +26,7 @@ #include #include "qemu_socket.h" +#include "iov.h" void pstrcpy(char *buf, int buf_size, const char *str) { @@ -171,48 +172,34 @@ void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len) } /* - * Copies iovecs from src to the end of dst. It starts copying after skipping - * the given number of bytes in src and copies until src is completely copied - * or the total size of the copied iovec reaches size.The size of the last - * copied iovec is changed in order to fit the specified total size if it isn't - * a perfect fit already. + * Concatenates (partial) iovecs from src to the end of dst. + * It starts copying after skipping `soffset' bytes at the + * beginning of src and adds individual vectors from src to + * dst copies up to `sbytes' bytes total, or up to the end + * of src if it comes first. This way, it is okay to specify + * very large value for `sbytes' to indicate "up to the end + * of src". + * Only vector pointers are processed, not the actual data buffers. */ -void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip, - size_t size) +void qemu_iovec_concat(QEMUIOVector *dst, + QEMUIOVector *src, size_t soffset, size_t sbytes) { int i; size_t done; - void *iov_base; - uint64_t iov_len; - + struct iovec *siov = src->iov; assert(dst->nalloc != -1); - - done = 0; - for (i = 0; (i < src->niov) && (done != size); i++) { - if (skip >= src->iov[i].iov_len) { - /* Skip the whole iov */ - skip -= src->iov[i].iov_len; - continue; + assert(src->size >= soffset); + for (i = 0, done = 0; done < sbytes && i < src->niov; i++) { + if (soffset < siov[i].iov_len) { + size_t len = MIN(siov[i].iov_len - soffset, sbytes - done); + qemu_iovec_add(dst, siov[i].iov_base + soffset, len); + done += len; + soffset = 0; } else { - /* Skip only part (or nothing) of the iov */ - iov_base = (uint8_t*) src->iov[i].iov_base + skip; - iov_len = src->iov[i].iov_len - skip; - skip = 0; + soffset -= siov[i].iov_len; } - - if (done + iov_len > size) { - qemu_iovec_add(dst, iov_base, size - done); - break; - } else { - qemu_iovec_add(dst, iov_base, iov_len); - } - done += iov_len; } -} - -void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size) -{ - qemu_iovec_copy(dst, src, 0, size); + /* return done; */ } void qemu_iovec_destroy(QEMUIOVector *qiov) @@ -233,74 +220,22 @@ void qemu_iovec_reset(QEMUIOVector *qiov) qiov->size = 0; } -void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf) +size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, + void *buf, size_t bytes) { - uint8_t *p = (uint8_t *)buf; - int i; - - for (i = 0; i < qiov->niov; ++i) { - memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len); - p += qiov->iov[i].iov_len; - } + return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes); } -void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count) +size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, + const void *buf, size_t bytes) { - const uint8_t *p = (const uint8_t *)buf; - size_t copy; - int i; - - for (i = 0; i < qiov->niov && count; ++i) { - copy = count; - if (copy > qiov->iov[i].iov_len) - copy = qiov->iov[i].iov_len; - memcpy(qiov->iov[i].iov_base, p, copy); - p += copy; - count -= copy; - } + return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes); } -void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count) +size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, + int fillc, size_t bytes) { - size_t n; - int i; - - for (i = 0; i < qiov->niov && count; ++i) { - n = MIN(count, qiov->iov[i].iov_len); - memset(qiov->iov[i].iov_base, c, n); - count -= n; - } -} - -void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count, - size_t skip) -{ - int i; - size_t done; - void *iov_base; - uint64_t iov_len; - - done = 0; - for (i = 0; (i < qiov->niov) && (done != count); i++) { - if (skip >= qiov->iov[i].iov_len) { - /* Skip the whole iov */ - skip -= qiov->iov[i].iov_len; - continue; - } else { - /* Skip only part (or nothing) of the iov */ - iov_base = (uint8_t*) qiov->iov[i].iov_base + skip; - iov_len = qiov->iov[i].iov_len - skip; - skip = 0; - } - - if (done + iov_len > count) { - memset(iov_base, c, count - done); - break; - } else { - memset(iov_base, c, iov_len); - } - done += iov_len; - } + return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes); } /* @@ -440,112 +375,3 @@ int qemu_parse_fd(const char *param) } return fd; } - -/* - * Send/recv data with iovec buffers - * - * This function send/recv data from/to the iovec buffer directly. - * The first `offset' bytes in the iovec buffer are skipped and next - * `len' bytes are used. - * - * For example, - * - * do_sendv_recvv(sockfd, iov, len, offset, 1); - * - * is equal to - * - * char *buf = malloc(size); - * iov_to_buf(iov, iovcnt, buf, offset, size); - * send(sockfd, buf, size, 0); - * free(buf); - */ -static int do_sendv_recvv(int sockfd, struct iovec *iov, int len, int offset, - int do_sendv) -{ - int ret, diff, iovlen; - struct iovec *last_iov; - - /* last_iov is inclusive, so count from one. */ - iovlen = 1; - last_iov = iov; - len += offset; - - while (last_iov->iov_len < len) { - len -= last_iov->iov_len; - - last_iov++; - iovlen++; - } - - diff = last_iov->iov_len - len; - last_iov->iov_len -= diff; - - while (iov->iov_len <= offset) { - offset -= iov->iov_len; - - iov++; - iovlen--; - } - - iov->iov_base = (char *) iov->iov_base + offset; - iov->iov_len -= offset; - - { -#if defined CONFIG_IOVEC && defined CONFIG_POSIX - struct msghdr msg; - memset(&msg, 0, sizeof(msg)); - msg.msg_iov = iov; - msg.msg_iovlen = iovlen; - - do { - if (do_sendv) { - ret = sendmsg(sockfd, &msg, 0); - } else { - ret = recvmsg(sockfd, &msg, 0); - } - } while (ret == -1 && errno == EINTR); -#else - struct iovec *p = iov; - ret = 0; - while (iovlen > 0) { - int rc; - if (do_sendv) { - rc = send(sockfd, p->iov_base, p->iov_len, 0); - } else { - rc = qemu_recv(sockfd, p->iov_base, p->iov_len, 0); - } - if (rc == -1) { - if (errno == EINTR) { - continue; - } - if (ret == 0) { - ret = -1; - } - break; - } - if (rc == 0) { - break; - } - ret += rc; - iovlen--, p++; - } -#endif - } - - /* Undo the changes above */ - iov->iov_base = (char *) iov->iov_base - offset; - iov->iov_len += offset; - last_iov->iov_len += diff; - return ret; -} - -int qemu_recvv(int sockfd, struct iovec *iov, int len, int iov_offset) -{ - return do_sendv_recvv(sockfd, iov, len, iov_offset, 0); -} - -int qemu_sendv(int sockfd, struct iovec *iov, int len, int iov_offset) -{ - return do_sendv_recvv(sockfd, iov, len, iov_offset, 1); -} - diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c index c633fb9b7e..f4a7026381 100644 --- a/hw/9pfs/virtio-9p.c +++ b/hw/9pfs/virtio-9p.c @@ -1648,7 +1648,7 @@ out: * with qemu_iovec_destroy(). */ static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu, - uint64_t skip, size_t size, + size_t skip, size_t size, bool is_write) { QEMUIOVector elem; @@ -1665,7 +1665,7 @@ static void v9fs_init_qiov_from_pdu(QEMUIOVector *qiov, V9fsPDU *pdu, qemu_iovec_init_external(&elem, iov, niov); qemu_iovec_init(qiov, niov); - qemu_iovec_copy(qiov, &elem, skip, size); + qemu_iovec_concat(qiov, &elem, skip, size); } static void v9fs_read(void *opaque) @@ -1715,7 +1715,7 @@ static void v9fs_read(void *opaque) qemu_iovec_init(&qiov, qiov_full.niov); do { qemu_iovec_reset(&qiov); - qemu_iovec_copy(&qiov, &qiov_full, count, qiov_full.size - count); + qemu_iovec_concat(&qiov, &qiov_full, count, qiov_full.size - count); if (0) { print_sg(qiov.iov, qiov.niov); } @@ -1970,7 +1970,7 @@ static void v9fs_write(void *opaque) qemu_iovec_init(&qiov, qiov_full.niov); do { qemu_iovec_reset(&qiov); - qemu_iovec_copy(&qiov, &qiov_full, total, qiov_full.size - total); + qemu_iovec_concat(&qiov, &qiov_full, total, qiov_full.size - total); if (0) { print_sg(qiov.iov, qiov.niov); } diff --git a/hw/rtl8139.c b/hw/rtl8139.c index 7b150475f4..436b015c64 100644 --- a/hw/rtl8139.c +++ b/hw/rtl8139.c @@ -1785,7 +1785,7 @@ static void rtl8139_transfer_frame(RTL8139State *s, uint8_t *buf, int size, if (iov) { buf2_size = iov_size(iov, 3); buf2 = g_malloc(buf2_size); - iov_to_buf(iov, 3, buf2, 0, buf2_size); + iov_to_buf(iov, 3, 0, buf2, buf2_size); buf = buf2; } diff --git a/hw/usb/core.c b/hw/usb/core.c index 0614f76f4f..01a7622837 100644 --- a/hw/usb/core.c +++ b/hw/usb/core.c @@ -522,10 +522,10 @@ void usb_packet_copy(USBPacket *p, void *ptr, size_t bytes) switch (p->pid) { case USB_TOKEN_SETUP: case USB_TOKEN_OUT: - iov_to_buf(p->iov.iov, p->iov.niov, ptr, p->result, bytes); + iov_to_buf(p->iov.iov, p->iov.niov, p->result, ptr, bytes); break; case USB_TOKEN_IN: - iov_from_buf(p->iov.iov, p->iov.niov, ptr, p->result, bytes); + iov_from_buf(p->iov.iov, p->iov.niov, p->result, ptr, bytes); break; default: fprintf(stderr, "%s: invalid pid: %x\n", __func__, p->pid); @@ -539,7 +539,7 @@ void usb_packet_skip(USBPacket *p, size_t bytes) assert(p->result >= 0); assert(p->result + bytes <= p->iov.size); if (p->pid == USB_TOKEN_IN) { - iov_clear(p->iov.iov, p->iov.niov, p->result, bytes); + iov_memset(p->iov.iov, p->iov.niov, p->result, 0, bytes); } p->result += bytes; } diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c index d048cef50f..dd1a6506cf 100644 --- a/hw/virtio-balloon.c +++ b/hw/virtio-balloon.c @@ -77,7 +77,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) size_t offset = 0; uint32_t pfn; - while (iov_to_buf(elem.out_sg, elem.out_num, &pfn, offset, 4) == 4) { + while (iov_to_buf(elem.out_sg, elem.out_num, offset, &pfn, 4) == 4) { ram_addr_t pa; ram_addr_t addr; @@ -118,7 +118,7 @@ static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq) */ reset_stats(s); - while (iov_to_buf(elem->out_sg, elem->out_num, &stat, offset, sizeof(stat)) + while (iov_to_buf(elem->out_sg, elem->out_num, offset, &stat, sizeof(stat)) == sizeof(stat)) { uint16_t tag = tswap16(stat.tag); uint64_t val = tswap64(stat.val); diff --git a/hw/virtio-net.c b/hw/virtio-net.c index 3f190d417e..533aa3d0f3 100644 --- a/hw/virtio-net.c +++ b/hw/virtio-net.c @@ -656,8 +656,8 @@ static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_ } /* copy in packet. ugh */ - len = iov_from_buf(sg, elem.in_num, - buf + offset, 0, size - offset); + len = iov_from_buf(sg, elem.in_num, 0, + buf + offset, size - offset); total += len; offset += len; /* If buffers can't be merged, at this point we diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c index 96382a4ea1..82073f5dc2 100644 --- a/hw/virtio-serial-bus.c +++ b/hw/virtio-serial-bus.c @@ -106,8 +106,8 @@ static size_t write_to_port(VirtIOSerialPort *port, break; } - len = iov_from_buf(elem.in_sg, elem.in_num, - buf + offset, 0, size - offset); + len = iov_from_buf(elem.in_sg, elem.in_num, 0, + buf + offset, size - offset); offset += len; virtqueue_push(vq, &elem, len); @@ -454,7 +454,7 @@ static void control_out(VirtIODevice *vdev, VirtQueue *vq) len = 0; buf = NULL; while (virtqueue_pop(vq, &elem)) { - size_t cur_len, copied; + size_t cur_len; cur_len = iov_size(elem.out_sg, elem.out_num); /* @@ -467,9 +467,9 @@ static void control_out(VirtIODevice *vdev, VirtQueue *vq) buf = g_malloc(cur_len); len = cur_len; } - copied = iov_to_buf(elem.out_sg, elem.out_num, buf, 0, len); + iov_to_buf(elem.out_sg, elem.out_num, 0, buf, cur_len); - handle_control_message(vser, buf, copied); + handle_control_message(vser, buf, cur_len); virtqueue_push(vq, &elem, 0); } g_free(buf); diff --git a/iov.c b/iov.c index 0f964939d0..7cc08f0fe4 100644 --- a/iov.c +++ b/iov.c @@ -7,6 +7,7 @@ * Author(s): * Anthony Liguori * Amit Shah + * Michael Tokarev * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. @@ -17,75 +18,69 @@ #include "iov.h" +#ifdef _WIN32 +# include +# include +#else +# include +# include +#endif + size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, - const void *buf, size_t iov_off, size_t size) + size_t offset, const void *buf, size_t bytes) { - size_t iovec_off, buf_off; + size_t done; unsigned int i; - - iovec_off = 0; - buf_off = 0; - for (i = 0; i < iov_cnt && size; i++) { - if (iov_off < (iovec_off + iov[i].iov_len)) { - size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off, size); - - memcpy(iov[i].iov_base + (iov_off - iovec_off), buf + buf_off, len); - - buf_off += len; - iov_off += len; - size -= len; + for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { + if (offset < iov[i].iov_len) { + size_t len = MIN(iov[i].iov_len - offset, bytes - done); + memcpy(iov[i].iov_base + offset, buf + done, len); + done += len; + offset = 0; + } else { + offset -= iov[i].iov_len; } - iovec_off += iov[i].iov_len; } - return buf_off; + assert(offset == 0); + return done; } size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, - void *buf, size_t iov_off, size_t size) + size_t offset, void *buf, size_t bytes) { - uint8_t *ptr; - size_t iovec_off, buf_off; + size_t done; unsigned int i; - - ptr = buf; - iovec_off = 0; - buf_off = 0; - for (i = 0; i < iov_cnt && size; i++) { - if (iov_off < (iovec_off + iov[i].iov_len)) { - size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size); - - memcpy(ptr + buf_off, iov[i].iov_base + (iov_off - iovec_off), len); - - buf_off += len; - iov_off += len; - size -= len; + for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { + if (offset < iov[i].iov_len) { + size_t len = MIN(iov[i].iov_len - offset, bytes - done); + memcpy(buf + done, iov[i].iov_base + offset, len); + done += len; + offset = 0; + } else { + offset -= iov[i].iov_len; } - iovec_off += iov[i].iov_len; } - return buf_off; + assert(offset == 0); + return done; } -size_t iov_clear(const struct iovec *iov, const unsigned int iov_cnt, - size_t iov_off, size_t size) +size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, + size_t offset, int fillc, size_t bytes) { - size_t iovec_off, buf_off; + size_t done; unsigned int i; - - iovec_off = 0; - buf_off = 0; - for (i = 0; i < iov_cnt && size; i++) { - if (iov_off < (iovec_off + iov[i].iov_len)) { - size_t len = MIN((iovec_off + iov[i].iov_len) - iov_off , size); - - memset(iov[i].iov_base + (iov_off - iovec_off), 0, len); - - buf_off += len; - iov_off += len; - size -= len; + for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { + if (offset < iov[i].iov_len) { + size_t len = MIN(iov[i].iov_len - offset, bytes - done); + memset(iov[i].iov_base + offset, fillc, len); + done += len; + offset = 0; + } else { + offset -= iov[i].iov_len; } - iovec_off += iov[i].iov_len; } - return buf_off; + assert(offset == 0); + return done; } size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt) @@ -100,6 +95,101 @@ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt) return len; } +/* helper function for iov_send_recv() */ +static ssize_t +do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send) +{ +#if defined CONFIG_IOVEC && defined CONFIG_POSIX + ssize_t ret; + struct msghdr msg; + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = iov; + msg.msg_iovlen = iov_cnt; + do { + ret = do_send + ? sendmsg(sockfd, &msg, 0) + : recvmsg(sockfd, &msg, 0); + } while (ret < 0 && errno == EINTR); + return ret; +#else + /* else send piece-by-piece */ + /*XXX Note: windows has WSASend() and WSARecv() */ + unsigned i; + size_t count = 0; + for (i = 0; i < iov_cnt; ++i) { + ssize_t r = do_send + ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0) + : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0); + if (r > 0) { + ret += r; + } else if (!r) { + break; + } else if (errno == EINTR) { + continue; + } else { + /* else it is some "other" error, + * only return if there was no data processed. */ + if (ret == 0) { + return -1; + } + break; + } + } + return count; +#endif +} + +ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, + bool do_send) +{ + ssize_t ret; + unsigned si, ei; /* start and end indexes */ + + /* Find the start position, skipping `offset' bytes: + * first, skip all full-sized vector elements, */ + for (si = 0; si < iov_cnt && offset >= iov[si].iov_len; ++si) { + offset -= iov[si].iov_len; + } + if (offset) { + assert(si < iov_cnt); + /* second, skip `offset' bytes from the (now) first element, + * undo it on exit */ + iov[si].iov_base += offset; + iov[si].iov_len -= offset; + } + /* Find the end position skipping `bytes' bytes: */ + /* first, skip all full-sized elements */ + for (ei = si; ei < iov_cnt && iov[ei].iov_len <= bytes; ++ei) { + bytes -= iov[ei].iov_len; + } + if (bytes) { + /* second, fixup the last element, and remember + * the length we've cut from the end of it in `bytes' */ + size_t tail; + assert(ei < iov_cnt); + assert(iov[ei].iov_len > bytes); + tail = iov[ei].iov_len - bytes; + iov[ei].iov_len = bytes; + bytes = tail; /* bytes is now equal to the tail size */ + ++ei; + } + + ret = do_send_recv(sockfd, iov + si, ei - si, do_send); + + /* Undo the changes above */ + if (offset) { + iov[si].iov_base -= offset; + iov[si].iov_len += offset; + } + if (bytes) { + iov[ei-1].iov_len += bytes; + } + + return ret; +} + + void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt, FILE *fp, const char *prefix, size_t limit) { diff --git a/iov.h b/iov.h index 94d2f78284..381f37a546 100644 --- a/iov.h +++ b/iov.h @@ -1,10 +1,11 @@ /* - * Helpers for getting linearized buffers from iov / filling buffers into iovs + * Helpers for using (partial) iovecs. * * Copyright (C) 2010 Red Hat, Inc. * * Author(s): * Amit Shah + * Michael Tokarev * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. @@ -12,12 +13,76 @@ #include "qemu-common.h" -size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, - const void *buf, size_t iov_off, size_t size); -size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, - void *buf, size_t iov_off, size_t size); +/** + * count and return data size, in bytes, of an iovec + * starting at `iov' of `iov_cnt' number of elements. + */ size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt); -size_t iov_clear(const struct iovec *iov, const unsigned int iov_cnt, - size_t iov_off, size_t size); + +/** + * Copy from single continuous buffer to scatter-gather vector of buffers + * (iovec) and back like memcpy() between two continuous memory regions. + * Data in single continuous buffer starting at address `buf' and + * `bytes' bytes long will be copied to/from an iovec `iov' with + * `iov_cnt' number of elements, starting at byte position `offset' + * within the iovec. If the iovec does not contain enough space, + * only part of data will be copied, up to the end of the iovec. + * Number of bytes actually copied will be returned, which is + * min(bytes, iov_size(iov)-offset) + * `Offset' must point to the inside of iovec. + * It is okay to use very large value for `bytes' since we're + * limited by the size of the iovec anyway, provided that the + * buffer pointed to by buf has enough space. One possible + * such "large" value is -1 (sinice size_t is unsigned), + * so specifying `-1' as `bytes' means 'up to the end of iovec'. + */ +size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt, + size_t offset, const void *buf, size_t bytes); +size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, + size_t offset, void *buf, size_t bytes); + +/** + * Set data bytes pointed out by iovec `iov' of size `iov_cnt' elements, + * starting at byte offset `start', to value `fillc', repeating it + * `bytes' number of times. `Offset' must point to the inside of iovec. + * If `bytes' is large enough, only last bytes portion of iovec, + * up to the end of it, will be filled with the specified value. + * Function return actual number of bytes processed, which is + * min(size, iov_size(iov) - offset). + * Again, it is okay to use large value for `bytes' to mean "up to the end". + */ +size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, + size_t offset, int fillc, size_t bytes); + +/* + * Send/recv data from/to iovec buffers directly + * + * `offset' bytes in the beginning of iovec buffer are skipped and + * next `bytes' bytes are used, which must be within data of iovec. + * + * r = iov_send_recv(sockfd, iov, iovcnt, offset, bytes, true); + * + * is logically equivalent to + * + * char *buf = malloc(bytes); + * iov_to_buf(iov, iovcnt, offset, buf, bytes); + * r = send(sockfd, buf, bytes, 0); + * free(buf); + * + * For iov_send_recv() _whole_ area being sent or received + * should be within the iovec, not only beginning of it. + */ +ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, bool do_send); +#define iov_recv(sockfd, iov, iov_cnt, offset, bytes) \ + iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, false) +#define iov_send(sockfd, iov, iov_cnt, offset, bytes) \ + iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, true) + +/** + * Produce a text hexdump of iovec `iov' with `iov_cnt' number of elements + * in file `fp', prefixing each line with `prefix' and processing not more + * than `limit' data bytes. + */ void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt, FILE *fp, const char *prefix, size_t limit); diff --git a/linux-aio.c b/linux-aio.c index fa0fbf34aa..ce9b5d4be8 100644 --- a/linux-aio.c +++ b/linux-aio.c @@ -63,8 +63,8 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s, } else if (ret >= 0) { /* Short reads mean EOF, pad with zeros. */ if (laiocb->is_read) { - qemu_iovec_memset_skip(laiocb->qiov, 0, - laiocb->qiov->size - ret, ret); + qemu_iovec_memset(laiocb->qiov, ret, 0, + laiocb->qiov->size - ret); } else { ret = -EINVAL; } diff --git a/net.c b/net.c index 4aa416cffb..abf0fd0a0d 100644 --- a/net.c +++ b/net.c @@ -544,7 +544,7 @@ static ssize_t vc_sendv_compat(VLANClientState *vc, const struct iovec *iov, uint8_t buffer[4096]; size_t offset; - offset = iov_to_buf(iov, iovcnt, buffer, 0, sizeof(buffer)); + offset = iov_to_buf(iov, iovcnt, 0, buffer, sizeof(buffer)); return vc->info->receive(vc, buffer, offset); } diff --git a/posix-aio-compat.c b/posix-aio-compat.c index 68361f555a..96e4daf505 100644 --- a/posix-aio-compat.c +++ b/posix-aio-compat.c @@ -29,6 +29,7 @@ #include "qemu-common.h" #include "trace.h" #include "block_int.h" +#include "iov.h" #include "block/raw-posix-aio.h" @@ -351,11 +352,8 @@ static void *aio_thread(void *unused) if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->common.bs->growable) { /* A short read means that we have reached EOF. Pad the buffer * with zeros for bytes after EOF. */ - QEMUIOVector qiov; - - qemu_iovec_init_external(&qiov, aiocb->aio_iov, - aiocb->aio_niov); - qemu_iovec_memset_skip(&qiov, 0, aiocb->aio_nbytes - ret, ret); + iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret, + 0, aiocb->aio_nbytes - ret); ret = aiocb->aio_nbytes; } diff --git a/qemu-common.h b/qemu-common.h index c8c6b2ae35..09676f529f 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -206,9 +206,6 @@ int qemu_pipe(int pipefd[2]); #define qemu_recv(sockfd, buf, len, flags) recv(sockfd, buf, len, flags) #endif -int qemu_recvv(int sockfd, struct iovec *iov, int len, int iov_offset); -int qemu_sendv(int sockfd, struct iovec *iov, int len, int iov_offset); - /* Error handling. */ void QEMU_NORETURN hw_error(const char *fmt, ...) GCC_FMT_ATTR(1, 2); @@ -312,32 +309,29 @@ struct qemu_work_item { void qemu_init_vcpu(void *env); #endif -/** - * Sends an iovec (or optionally a part of it) down a socket, yielding - * when the socket is full. - */ -int qemu_co_sendv(int sockfd, struct iovec *iov, - int len, int iov_offset); /** - * Receives data into an iovec (or optionally into a part of it) from - * a socket, yielding when there is no data in the socket. + * Sends a (part of) iovec down a socket, yielding when the socket is full, or + * Receives data into a (part of) iovec from a socket, + * yielding when there is no data in the socket. + * The same interface as qemu_sendv_recvv(), with added yielding. + * XXX should mark these as coroutine_fn */ -int qemu_co_recvv(int sockfd, struct iovec *iov, - int len, int iov_offset); - +ssize_t qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, bool do_send); +#define qemu_co_recvv(sockfd, iov, iov_cnt, offset, bytes) \ + qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, false) +#define qemu_co_sendv(sockfd, iov, iov_cnt, offset, bytes) \ + qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, true) /** - * Sends a buffer down a socket, yielding when the socket is full. + * The same as above, but with just a single buffer */ -int qemu_co_send(int sockfd, void *buf, int len); - -/** - * Receives data into a buffer from a socket, yielding when there - * is no data in the socket. - */ -int qemu_co_recv(int sockfd, void *buf, int len); - +ssize_t qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send); +#define qemu_co_recv(sockfd, buf, bytes) \ + qemu_co_send_recv(sockfd, buf, bytes, false) +#define qemu_co_send(sockfd, buf, bytes) \ + qemu_co_send_recv(sockfd, buf, bytes, true) typedef struct QEMUIOVector { struct iovec *iov; @@ -349,16 +343,16 @@ typedef struct QEMUIOVector { void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); -void qemu_iovec_copy(QEMUIOVector *dst, QEMUIOVector *src, uint64_t skip, - size_t size); -void qemu_iovec_concat(QEMUIOVector *dst, QEMUIOVector *src, size_t size); +void qemu_iovec_concat(QEMUIOVector *dst, + QEMUIOVector *src, size_t soffset, size_t sbytes); void qemu_iovec_destroy(QEMUIOVector *qiov); void qemu_iovec_reset(QEMUIOVector *qiov); -void qemu_iovec_to_buffer(QEMUIOVector *qiov, void *buf); -void qemu_iovec_from_buffer(QEMUIOVector *qiov, const void *buf, size_t count); -void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count); -void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count, - size_t skip); +size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, + void *buf, size_t bytes); +size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, + const void *buf, size_t bytes); +size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, + int fillc, size_t bytes); bool buffer_is_zero(const void *buf, size_t len); diff --git a/qemu-coroutine-io.c b/qemu-coroutine-io.c index 40fd514395..5734965003 100644 --- a/qemu-coroutine-io.c +++ b/qemu-coroutine-io.c @@ -25,72 +25,41 @@ #include "qemu-common.h" #include "qemu_socket.h" #include "qemu-coroutine.h" +#include "iov.h" -int coroutine_fn qemu_co_recvv(int sockfd, struct iovec *iov, - int len, int iov_offset) +ssize_t coroutine_fn +qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, bool do_send) { - int total = 0; - int ret; - while (len) { - ret = qemu_recvv(sockfd, iov, len, iov_offset + total); - if (ret < 0) { + size_t done = 0; + ssize_t ret; + while (done < bytes) { + ret = iov_send_recv(sockfd, iov, iov_cnt, + offset + done, bytes - done, do_send); + if (ret > 0) { + done += ret; + } else if (ret < 0) { if (errno == EAGAIN) { qemu_coroutine_yield(); - continue; - } - if (total == 0) { - total = -1; + } else if (done == 0) { + return -1; + } else { + break; } + } else if (ret == 0 && !do_send) { + /* write (send) should never return 0. + * read (recv) returns 0 for end-of-file (-data). + * In both cases there's little point retrying, + * but we do for write anyway, just in case */ break; } - if (ret == 0) { - break; - } - total += ret, len -= ret; } - - return total; + return done; } -int coroutine_fn qemu_co_sendv(int sockfd, struct iovec *iov, - int len, int iov_offset) +ssize_t coroutine_fn +qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send) { - int total = 0; - int ret; - while (len) { - ret = qemu_sendv(sockfd, iov, len, iov_offset + total); - if (ret < 0) { - if (errno == EAGAIN) { - qemu_coroutine_yield(); - continue; - } - if (total == 0) { - total = -1; - } - break; - } - total += ret, len -= ret; - } - - return total; -} - -int coroutine_fn qemu_co_recv(int sockfd, void *buf, int len) -{ - struct iovec iov; - - iov.iov_base = buf; - iov.iov_len = len; - - return qemu_co_recvv(sockfd, &iov, len, 0); -} - -int coroutine_fn qemu_co_send(int sockfd, void *buf, int len) -{ - struct iovec iov; - - iov.iov_base = buf; - iov.iov_len = len; - - return qemu_co_sendv(sockfd, &iov, len, 0); + struct iovec iov = { .iov_base = buf, .iov_len = bytes }; + return qemu_co_sendv_recvv(sockfd, &iov, 1, 0, bytes, do_send); } diff --git a/tests/Makefile b/tests/Makefile index d66ab196a7..d687ecce3f 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -14,6 +14,7 @@ check-unit-y += tests/test-string-input-visitor$(EXESUF) check-unit-y += tests/test-string-output-visitor$(EXESUF) check-unit-y += tests/test-coroutine$(EXESUF) check-unit-y += tests/test-visitor-serialization$(EXESUF) +check-unit-y += tests/test-iov$(EXESUF) check-block-$(CONFIG_POSIX) += tests/qemu-iotests-quick.sh @@ -47,6 +48,7 @@ tests/check-qlist$(EXESUF): tests/check-qlist.o qlist.o qint.o $(tools-obj-y) tests/check-qfloat$(EXESUF): tests/check-qfloat.o qfloat.o $(tools-obj-y) tests/check-qjson$(EXESUF): tests/check-qjson.o $(qobject-obj-y) $(tools-obj-y) tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(coroutine-obj-y) $(tools-obj-y) +tests/test-iov$(EXESUF): tests/test-iov.o iov.o tests/test-qapi-types.c tests/test-qapi-types.h :\ $(SRC_PATH)/qapi-schema-test.json $(SRC_PATH)/scripts/qapi-types.py diff --git a/tests/test-iov.c b/tests/test-iov.c new file mode 100644 index 0000000000..cbe7a8955c --- /dev/null +++ b/tests/test-iov.c @@ -0,0 +1,260 @@ +#include +#include "qemu-common.h" +#include "iov.h" +#include "qemu_socket.h" + +/* create a randomly-sized iovec with random vectors */ +static void iov_random(struct iovec **iovp, unsigned *iov_cntp) +{ + unsigned niov = g_test_rand_int_range(3,8); + struct iovec *iov = g_malloc(niov * sizeof(*iov)); + unsigned i; + for (i = 0; i < niov; ++i) { + iov[i].iov_len = g_test_rand_int_range(5,20); + iov[i].iov_base = g_malloc(iov[i].iov_len); + } + *iovp = iov; + *iov_cntp = niov; +} + +static void iov_free(struct iovec *iov, unsigned niov) +{ + unsigned i; + for (i = 0; i < niov; ++i) { + g_free(iov[i].iov_base); + } + g_free(iov); +} + +static void test_iov_bytes(struct iovec *iov, unsigned niov, + size_t offset, size_t bytes) +{ + unsigned i; + size_t j, o; + unsigned char *b; + o = 0; + + /* we walk over all elements, */ + for (i = 0; i < niov; ++i) { + b = iov[i].iov_base; + /* over each char of each element, */ + for (j = 0; j < iov[i].iov_len; ++j) { + /* counting each of them and + * verifying that the ones within [offset,offset+bytes) + * range are equal to the position number (o) */ + if (o >= offset && o < offset + bytes) { + g_assert(b[j] == (o & 255)); + } else { + g_assert(b[j] == 0xff); + } + ++o; + } + } +} + +static void test_to_from_buf_1(void) +{ + unsigned niov; + struct iovec *iov; + size_t sz; + unsigned char *ibuf, *obuf; + unsigned i, j, n; + + iov_random(&iov, &niov); + + sz = iov_size(iov, niov); + + ibuf = g_malloc(sz + 8) + 4; + memcpy(ibuf-4, "aaaa", 4); memcpy(ibuf + sz, "bbbb", 4); + obuf = g_malloc(sz + 8) + 4; + memcpy(obuf-4, "xxxx", 4); memcpy(obuf + sz, "yyyy", 4); + + /* fill in ibuf with 0123456... */ + for (i = 0; i < sz; ++i) { + ibuf[i] = i & 255; + } + + for (i = 0; i <= sz; ++i) { + + /* Test from/to buf for offset(i) in [0..sz] up to the end of buffer. + * For last iteration with offset == sz, the procedure should + * skip whole vector and process exactly 0 bytes */ + + /* first set bytes [i..sz) to some "random" value */ + n = iov_memset(iov, niov, 0, 0xff, -1); + g_assert(n == sz); + + /* next copy bytes [i..sz) from ibuf to iovec */ + n = iov_from_buf(iov, niov, i, ibuf + i, -1); + g_assert(n == sz - i); + + /* clear part of obuf */ + memset(obuf + i, 0, sz - i); + /* and set this part of obuf to values from iovec */ + n = iov_to_buf(iov, niov, i, obuf + i, -1); + g_assert(n == sz - i); + + /* now compare resulting buffers */ + g_assert(memcmp(ibuf, obuf, sz) == 0); + + /* test just one char */ + n = iov_to_buf(iov, niov, i, obuf + i, 1); + g_assert(n == (i < sz)); + if (n) { + g_assert(obuf[i] == (i & 255)); + } + + for (j = i; j <= sz; ++j) { + /* now test num of bytes cap up to byte no. j, + * with j in [i..sz]. */ + + /* clear iovec */ + n = iov_memset(iov, niov, 0, 0xff, -1); + g_assert(n == sz); + + /* copy bytes [i..j) from ibuf to iovec */ + n = iov_from_buf(iov, niov, i, ibuf + i, j - i); + g_assert(n == j - i); + + /* clear part of obuf */ + memset(obuf + i, 0, j - i); + + /* copy bytes [i..j) from iovec to obuf */ + n = iov_to_buf(iov, niov, i, obuf + i, j - i); + g_assert(n == j - i); + + /* verify result */ + g_assert(memcmp(ibuf, obuf, sz) == 0); + + /* now actually check if the iovec contains the right data */ + test_iov_bytes(iov, niov, i, j - i); + } + } + g_assert(!memcmp(ibuf-4, "aaaa", 4) && !memcmp(ibuf+sz, "bbbb", 4)); + g_free(ibuf-4); + g_assert(!memcmp(obuf-4, "xxxx", 4) && !memcmp(obuf+sz, "yyyy", 4)); + g_free(obuf-4); + iov_free(iov, niov); +} + +static void test_to_from_buf(void) +{ + int x; + for (x = 0; x < 4; ++x) { + test_to_from_buf_1(); + } +} + +static void test_io(void) +{ +#ifndef _WIN32 +/* socketpair(PF_UNIX) which does not exist on windows */ + + int sv[2]; + int r; + unsigned i, j, k, s, t; + fd_set fds; + unsigned niov; + struct iovec *iov, *siov; + unsigned char *buf; + size_t sz; + + iov_random(&iov, &niov); + sz = iov_size(iov, niov); + buf = g_malloc(sz); + for (i = 0; i < sz; ++i) { + buf[i] = i & 255; + } + iov_from_buf(iov, niov, 0, buf, sz); + + siov = g_malloc(sizeof(*iov) * niov); + memcpy(siov, iov, sizeof(*iov) * niov); + + if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) < 0) { + perror("socketpair"); + exit(1); + } + + FD_ZERO(&fds); + + t = 0; + if (fork() == 0) { + /* writer */ + + close(sv[0]); + FD_SET(sv[1], &fds); + fcntl(sv[1], F_SETFL, O_RDWR|O_NONBLOCK); + r = g_test_rand_int_range(sz / 2, sz); + setsockopt(sv[1], SOL_SOCKET, SO_SNDBUF, &r, sizeof(r)); + + for (i = 0; i <= sz; ++i) { + for (j = i; j <= sz; ++j) { + k = i; + do { + s = g_test_rand_int_range(0, j - k + 1); + r = iov_send(sv[1], iov, niov, k, s); + g_assert(memcmp(iov, siov, sizeof(*iov)*niov) == 0); + if (r >= 0) { + k += r; + t += r; + usleep(g_test_rand_int_range(0, 30)); + } else if (errno == EAGAIN) { + select(sv[1]+1, NULL, &fds, NULL, NULL); + continue; + } else { + perror("send"); + exit(1); + } + } while(k < j); + } + } + exit(0); + + } else { + /* reader & verifier */ + + close(sv[1]); + FD_SET(sv[0], &fds); + fcntl(sv[0], F_SETFL, O_RDWR|O_NONBLOCK); + r = g_test_rand_int_range(sz / 2, sz); + setsockopt(sv[0], SOL_SOCKET, SO_RCVBUF, &r, sizeof(r)); + usleep(500000); + + for (i = 0; i <= sz; ++i) { + for (j = i; j <= sz; ++j) { + k = i; + iov_memset(iov, niov, 0, 0xff, -1); + do { + s = g_test_rand_int_range(0, j - k + 1); + r = iov_recv(sv[0], iov, niov, k, s); + g_assert(memcmp(iov, siov, sizeof(*iov)*niov) == 0); + if (r > 0) { + k += r; + t += r; + } else if (!r) { + if (s) { + break; + } + } else if (errno == EAGAIN) { + select(sv[0]+1, &fds, NULL, NULL, NULL); + continue; + } else { + perror("recv"); + exit(1); + } + } while(k < j); + test_iov_bytes(iov, niov, i, j - i); + } + } + } +#endif +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + g_test_rand_int(); + g_test_add_func("/basic/iov/from-to-buf", test_to_from_buf); + g_test_add_func("/basic/iov/io", test_io); + return g_test_run(); +}