diff --git a/block.c b/block.c index b5e64ecf11..6fa7c90144 100644 --- a/block.c +++ b/block.c @@ -155,10 +155,6 @@ void bdrv_io_limits_enable(BlockDriverState *bs) { qemu_co_queue_init(&bs->throttled_reqs); bs->block_timer = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs); - bs->slice_time = 5 * BLOCK_IO_SLICE_TIME; - bs->slice_start = qemu_get_clock_ns(vm_clock); - bs->slice_end = bs->slice_start + bs->slice_time; - memset(&bs->io_base, 0, sizeof(bs->io_base)); bs->io_limits_enabled = true; } @@ -4174,7 +4170,13 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, return -EIO; } else if (bs->read_only) { return -EROFS; - } else if (bs->drv->bdrv_co_discard) { + } + + if (bs->dirty_bitmap) { + set_dirty_bitmap(bs, sector_num, nb_sectors, 0); + } + + if (bs->drv->bdrv_co_discard) { return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors); } else if (bs->drv->bdrv_aio_discard) { BlockDriverAIOCB *acb; diff --git a/block/mirror.c b/block/mirror.c index 8aeacbf12c..6180aa30e5 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -225,7 +225,7 @@ static void coroutine_fn mirror_run(void *opaque) } immediate_exit: - g_free(s->buf); + qemu_vfree(s->buf); bdrv_set_dirty_tracking(bs, false); bdrv_iostatus_disable(s->target); if (s->should_complete && ret == 0) { diff --git a/block/qcow2.c b/block/qcow2.c index d603f98a9c..f6abff6111 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -759,7 +759,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, QEMUIOVector hd_qiov; uint64_t bytes_done = 0; uint8_t *cluster_data = NULL; - QCowL2Meta *l2meta; + QCowL2Meta *l2meta = NULL; trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num, remaining_sectors); diff --git a/block/raw-aio.h b/block/raw-aio.h index e77f361148..c61f1595d9 100644 --- a/block/raw-aio.h +++ b/block/raw-aio.h @@ -20,11 +20,14 @@ #define QEMU_AIO_WRITE 0x0002 #define QEMU_AIO_IOCTL 0x0004 #define QEMU_AIO_FLUSH 0x0008 +#define QEMU_AIO_DISCARD 0x0010 #define QEMU_AIO_TYPE_MASK \ - (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH) + (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \ + QEMU_AIO_DISCARD) /* AIO flags */ #define QEMU_AIO_MISALIGNED 0x1000 +#define QEMU_AIO_BLKDEV 0x2000 /* linux-aio.c - Linux native implementation */ diff --git a/block/raw-posix.c b/block/raw-posix.c index c3d7fda7b7..679fcc5113 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -59,6 +59,9 @@ #ifdef CONFIG_FIEMAP #include #endif +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE +#include +#endif #if defined (__FreeBSD__) || defined(__FreeBSD_kernel__) #include #include @@ -138,6 +141,7 @@ typedef struct BDRVRawState { #ifdef CONFIG_XFS bool is_xfs : 1; #endif + bool has_discard : 1; } BDRVRawState; typedef struct BDRVRawReopenState { @@ -159,7 +163,7 @@ typedef struct RawPosixAIOData { void *aio_ioctl_buf; }; int aio_niov; - size_t aio_nbytes; + uint64_t aio_nbytes; #define aio_ioctl_cmd aio_nbytes /* for QEMU_AIO_IOCTL */ off_t aio_offset; int aio_type; @@ -289,6 +293,7 @@ static int raw_open_common(BlockDriverState *bs, const char *filename, } #endif + s->has_discard = 1; #ifdef CONFIG_XFS if (platform_test_xfs_fd(s->fd)) { s->is_xfs = 1; @@ -618,6 +623,72 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb) return nbytes; } +#ifdef CONFIG_XFS +static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes) +{ + struct xfs_flock64 fl; + + memset(&fl, 0, sizeof(fl)); + fl.l_whence = SEEK_SET; + fl.l_start = offset; + fl.l_len = bytes; + + if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) { + DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno)); + return -errno; + } + + return 0; +} +#endif + +static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb) +{ + int ret = -EOPNOTSUPP; + BDRVRawState *s = aiocb->bs->opaque; + + if (s->has_discard == 0) { + return 0; + } + + if (aiocb->aio_type & QEMU_AIO_BLKDEV) { +#ifdef BLKDISCARD + do { + uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes }; + if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) { + return 0; + } + } while (errno == EINTR); + + ret = -errno; +#endif + } else { +#ifdef CONFIG_XFS + if (s->is_xfs) { + return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes); + } +#endif + +#ifdef CONFIG_FALLOCATE_PUNCH_HOLE + do { + if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + aiocb->aio_offset, aiocb->aio_nbytes) == 0) { + return 0; + } + } while (errno == EINTR); + + ret = -errno; +#endif + } + + if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP || + ret == -ENOTTY) { + s->has_discard = 0; + ret = 0; + } + return ret; +} + static int aio_worker(void *arg) { RawPosixAIOData *aiocb = arg; @@ -652,6 +723,9 @@ static int aio_worker(void *arg) case QEMU_AIO_IOCTL: ret = handle_aiocb_ioctl(aiocb); break; + case QEMU_AIO_DISCARD: + ret = handle_aiocb_discard(aiocb); + break; default: fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type); ret = -EINVAL; @@ -1052,37 +1126,14 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs, } } -#ifdef CONFIG_XFS -static int xfs_discard(BDRVRawState *s, int64_t sector_num, int nb_sectors) +static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) { - struct xfs_flock64 fl; - - memset(&fl, 0, sizeof(fl)); - fl.l_whence = SEEK_SET; - fl.l_start = sector_num << 9; - fl.l_len = (int64_t)nb_sectors << 9; - - if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) { - DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno)); - return -errno; - } - - return 0; -} -#endif - -static coroutine_fn int raw_co_discard(BlockDriverState *bs, - int64_t sector_num, int nb_sectors) -{ -#ifdef CONFIG_XFS BDRVRawState *s = bs->opaque; - if (s->is_xfs) { - return xfs_discard(s, sector_num, nb_sectors); - } -#endif - - return 0; + return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors, + cb, opaque, QEMU_AIO_DISCARD); } static QEMUOptionParameter raw_create_options[] = { @@ -1105,12 +1156,12 @@ static BlockDriver bdrv_file = { .bdrv_reopen_abort = raw_reopen_abort, .bdrv_close = raw_close, .bdrv_create = raw_create, - .bdrv_co_discard = raw_co_discard, .bdrv_co_is_allocated = raw_co_is_allocated, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, + .bdrv_aio_discard = raw_aio_discard, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, @@ -1320,6 +1371,19 @@ static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs, return thread_pool_submit_aio(aio_worker, acb, cb, opaque); } +static coroutine_fn BlockDriverAIOCB *hdev_aio_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + BDRVRawState *s = bs->opaque; + + if (fd_open(bs) < 0) { + return NULL; + } + return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors, + cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV); +} + #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) static int fd_open(BlockDriverState *bs) { @@ -1391,6 +1455,7 @@ static BlockDriver bdrv_host_device = { .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, .bdrv_aio_flush = raw_aio_flush, + .bdrv_aio_discard = hdev_aio_discard, .bdrv_truncate = raw_truncate, .bdrv_getlength = raw_getlength, diff --git a/block/sheepdog.c b/block/sheepdog.c index 462c4b2d5d..3e49bb83bb 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -266,6 +266,7 @@ typedef struct AIOReq { enum AIOCBState { AIOCB_WRITE_UDATA, AIOCB_READ_UDATA, + AIOCB_FLUSH_CACHE, }; struct SheepdogAIOCB { @@ -299,7 +300,6 @@ typedef struct BDRVSheepdogState { char *addr; char *port; int fd; - int flush_fd; CoMutex lock; Coroutine *co_send; @@ -427,12 +427,11 @@ static const AIOCBInfo sd_aiocb_info = { }; static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, - int64_t sector_num, int nb_sectors, - BlockDriverCompletionFunc *cb, void *opaque) + int64_t sector_num, int nb_sectors) { SheepdogAIOCB *acb; - acb = qemu_aio_get(&sd_aiocb_info, bs, cb, opaque); + acb = qemu_aio_get(&sd_aiocb_info, bs, NULL, NULL); acb->qiov = qiov; @@ -736,6 +735,13 @@ static void coroutine_fn aio_read_response(void *opaque) goto out; } break; + case AIOCB_FLUSH_CACHE: + if (rsp.result == SD_RES_INVALID_PARMS) { + dprintf("disable cache since the server doesn't support it\n"); + s->cache_flags = SD_FLAG_CMD_DIRECT; + rsp.result = SD_RES_SUCCESS; + } + break; } if (rsp.result != SD_RES_SUCCESS) { @@ -950,7 +956,7 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, { int nr_copies = s->inode.nr_copies; SheepdogObjReq hdr; - unsigned int wlen; + unsigned int wlen = 0; int ret; uint64_t oid = aio_req->oid; unsigned int datalen = aio_req->data_len; @@ -964,18 +970,23 @@ static int coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, memset(&hdr, 0, sizeof(hdr)); - if (aiocb_type == AIOCB_READ_UDATA) { - wlen = 0; + switch (aiocb_type) { + case AIOCB_FLUSH_CACHE: + hdr.opcode = SD_OP_FLUSH_VDI; + break; + case AIOCB_READ_UDATA: hdr.opcode = SD_OP_READ_OBJ; hdr.flags = flags; - } else if (create) { + break; + case AIOCB_WRITE_UDATA: + if (create) { + hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ; + } else { + hdr.opcode = SD_OP_WRITE_OBJ; + } wlen = datalen; - hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ; - hdr.flags = SD_FLAG_CMD_WRITE | flags; - } else { - wlen = datalen; - hdr.opcode = SD_OP_WRITE_OBJ; hdr.flags = SD_FLAG_CMD_WRITE | flags; + break; } if (s->cache_flags) { @@ -1127,15 +1138,6 @@ static int sd_open(BlockDriverState *bs, const char *filename, int flags) s->cache_flags = SD_FLAG_CMD_DIRECT; } - if (s->cache_flags == SD_FLAG_CMD_CACHE) { - s->flush_fd = connect_to_sdog(s->addr, s->port); - if (s->flush_fd < 0) { - error_report("failed to connect"); - ret = s->flush_fd; - goto out; - } - } - if (snapid || tag[0] != '\0') { dprintf("%" PRIx32 " snapshot inode was open.\n", vid); s->is_snapshot = true; @@ -1397,9 +1399,6 @@ static void sd_close(BlockDriverState *bs) qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL, NULL); closesocket(s->fd); - if (s->cache_flags) { - closesocket(s->flush_fd); - } g_free(s->addr); } @@ -1672,7 +1671,7 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, bs->total_sectors = sector_num + nb_sectors; } - acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, NULL, NULL); + acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors); acb->aio_done_func = sd_write_done; acb->aiocb_type = AIOCB_WRITE_UDATA; @@ -1693,7 +1692,7 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, SheepdogAIOCB *acb; int ret; - acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors, NULL, NULL); + acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors); acb->aiocb_type = AIOCB_READ_UDATA; acb->aio_done_func = sd_finish_aiocb; @@ -1711,39 +1710,31 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs) { BDRVSheepdogState *s = bs->opaque; - SheepdogObjReq hdr = { 0 }; - SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr; - SheepdogInode *inode = &s->inode; + SheepdogAIOCB *acb; + AIOReq *aio_req; int ret; - unsigned int wlen = 0, rlen = 0; if (s->cache_flags != SD_FLAG_CMD_CACHE) { return 0; } - hdr.opcode = SD_OP_FLUSH_VDI; - hdr.oid = vid_to_vdi_oid(inode->vdi_id); + acb = sd_aio_setup(bs, NULL, 0, 0); + acb->aiocb_type = AIOCB_FLUSH_CACHE; + acb->aio_done_func = sd_finish_aiocb; - ret = do_req(s->flush_fd, (SheepdogReq *)&hdr, NULL, &wlen, &rlen); - if (ret) { - error_report("failed to send a request to the sheep"); + aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id), + 0, 0, 0, 0, 0); + QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); + ret = add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type); + if (ret < 0) { + error_report("add_aio_request is failed"); + free_aio_req(s, aio_req); + qemu_aio_release(acb); return ret; } - if (rsp->result == SD_RES_INVALID_PARMS) { - dprintf("disable write cache since the server doesn't support it\n"); - - s->cache_flags = SD_FLAG_CMD_DIRECT; - closesocket(s->flush_fd); - return 0; - } - - if (rsp->result != SD_RES_SUCCESS) { - error_report("%s", sd_strerror(rsp->result)); - return -EIO; - } - - return 0; + qemu_coroutine_yield(); + return acb->ret; } static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) diff --git a/block/win32-aio.c b/block/win32-aio.c index 46a5db78cc..03833700b4 100644 --- a/block/win32-aio.c +++ b/block/win32-aio.c @@ -87,7 +87,7 @@ static void win32_aio_process_completion(QEMUWin32AIOState *s, memcpy(p, qiov->iov[i].iov_base, qiov->iov[i].iov_len); p += qiov->iov[i].iov_len; } - g_free(waiocb->buf); + qemu_vfree(waiocb->buf); } } diff --git a/configure b/configure index c908f66583..40d250ccac 100755 --- a/configure +++ b/configure @@ -2581,6 +2581,22 @@ if compile_prog "" "" ; then fallocate=yes fi +# check for fallocate hole punching +fallocate_punch_hole=no +cat > $TMPC << EOF +#include +#include + +int main(void) +{ + fallocate(0, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, 0); + return 0; +} +EOF +if compile_prog "" "" ; then + fallocate_punch_hole=yes +fi + # check for sync_file_range sync_file_range=no cat > $TMPC << EOF @@ -3490,6 +3506,9 @@ fi if test "$fallocate" = "yes" ; then echo "CONFIG_FALLOCATE=y" >> $config_host_mak fi +if test "$fallocate_punch_hole" = "yes" ; then + echo "CONFIG_FALLOCATE_PUNCH_HOLE=y" >> $config_host_mak +fi if test "$sync_file_range" = "yes" ; then echo "CONFIG_SYNC_FILE_RANGE=y" >> $config_host_mak fi diff --git a/hw/ide/core.c b/hw/ide/core.c index 6f1938a0a8..14ad0799c3 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -325,14 +325,26 @@ typedef struct TrimAIOCB { BlockDriverAIOCB common; QEMUBH *bh; int ret; + QEMUIOVector *qiov; + BlockDriverAIOCB *aiocb; + int i, j; } TrimAIOCB; static void trim_aio_cancel(BlockDriverAIOCB *acb) { TrimAIOCB *iocb = container_of(acb, TrimAIOCB, common); + /* Exit the loop in case bdrv_aio_cancel calls ide_issue_trim_cb again. */ + iocb->j = iocb->qiov->niov - 1; + iocb->i = (iocb->qiov->iov[iocb->j].iov_len / 8) - 1; + + /* Tell ide_issue_trim_cb not to trigger the completion, too. */ qemu_bh_delete(iocb->bh); iocb->bh = NULL; + + if (iocb->aiocb) { + bdrv_aio_cancel(iocb->aiocb); + } qemu_aio_release(iocb); } @@ -349,43 +361,60 @@ static void ide_trim_bh_cb(void *opaque) qemu_bh_delete(iocb->bh); iocb->bh = NULL; - qemu_aio_release(iocb); } +static void ide_issue_trim_cb(void *opaque, int ret) +{ + TrimAIOCB *iocb = opaque; + if (ret >= 0) { + while (iocb->j < iocb->qiov->niov) { + int j = iocb->j; + while (++iocb->i < iocb->qiov->iov[j].iov_len / 8) { + int i = iocb->i; + uint64_t *buffer = iocb->qiov->iov[j].iov_base; + + /* 6-byte LBA + 2-byte range per entry */ + uint64_t entry = le64_to_cpu(buffer[i]); + uint64_t sector = entry & 0x0000ffffffffffffULL; + uint16_t count = entry >> 48; + + if (count == 0) { + continue; + } + + /* Got an entry! Submit and exit. */ + iocb->aiocb = bdrv_aio_discard(iocb->common.bs, sector, count, + ide_issue_trim_cb, opaque); + return; + } + + iocb->j++; + iocb->i = -1; + } + } else { + iocb->ret = ret; + } + + iocb->aiocb = NULL; + if (iocb->bh) { + qemu_bh_schedule(iocb->bh); + } +} + BlockDriverAIOCB *ide_issue_trim(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockDriverCompletionFunc *cb, void *opaque) { TrimAIOCB *iocb; - int i, j, ret; iocb = qemu_aio_get(&trim_aiocb_info, bs, cb, opaque); iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); iocb->ret = 0; - - for (j = 0; j < qiov->niov; j++) { - uint64_t *buffer = qiov->iov[j].iov_base; - - for (i = 0; i < qiov->iov[j].iov_len / 8; i++) { - /* 6-byte LBA + 2-byte range per entry */ - uint64_t entry = le64_to_cpu(buffer[i]); - uint64_t sector = entry & 0x0000ffffffffffffULL; - uint16_t count = entry >> 48; - - if (count == 0) { - break; - } - - ret = bdrv_discard(bs, sector, count); - if (!iocb->ret) { - iocb->ret = ret; - } - } - } - - qemu_bh_schedule(iocb->bh); - + iocb->qiov = qiov; + iocb->i = -1; + iocb->j = 0; + ide_issue_trim_cb(iocb, 0); return &iocb->common; } diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index f8d7ef3374..96db9a73c7 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -85,9 +85,7 @@ static void scsi_free_request(SCSIRequest *req) { SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); - if (r->iov.iov_base) { - qemu_vfree(r->iov.iov_base); - } + qemu_vfree(r->iov.iov_base); } /* Helper function for command completion with sense. */ diff --git a/util/oslib-win32.c b/util/oslib-win32.c index e7e283e875..640194c0cf 100644 --- a/util/oslib-win32.c +++ b/util/oslib-win32.c @@ -71,7 +71,9 @@ void *qemu_vmalloc(size_t size) void qemu_vfree(void *ptr) { trace_qemu_vfree(ptr); - VirtualFree(ptr, 0, MEM_RELEASE); + if (ptr) { + VirtualFree(ptr, 0, MEM_RELEASE); + } } /* FIXME: add proper locking */