diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt index 39b6016460..4b24ec900d 100644 --- a/QMP/qmp-events.txt +++ b/QMP/qmp-events.txt @@ -18,6 +18,28 @@ Example: "data": { "actual": 944766976 }, "timestamp": { "seconds": 1267020223, "microseconds": 435656 } } +BLOCK_IMAGE_CORRUPTED +--------------------- + +Emitted when a disk image is being marked corrupt. + +Data: + +- "device": Device name (json-string) +- "msg": Informative message (e.g., reason for the corruption) (json-string) +- "offset": If the corruption resulted from an image access, this is the access + offset into the image (json-int) +- "size": If the corruption resulted from an image access, this is the access + size (json-int) + +Example: + +{ "event": "BLOCK_IMAGE_CORRUPTED", + "data": { "device": "ide0-hd0", + "msg": "Prevented active L1 table overwrite", "offset": 196608, + "size": 65536 }, + "timestamp": { "seconds": 1378126126, "microseconds": 966463 } } + BLOCK_IO_ERROR -------------- diff --git a/block-migration.c b/block-migration.c index f803f2006f..daf9ec1eab 100644 --- a/block-migration.c +++ b/block-migration.c @@ -336,8 +336,8 @@ static void init_blk_migration_it(void *opaque, BlockDriverState *bs) bmds->completed_sectors = 0; bmds->shared_base = block_mig_state.shared_base; alloc_aio_bitmap(bmds); - drive_get_ref(drive_get_by_blockdev(bs)); bdrv_set_in_use(bs, 1); + bdrv_ref(bs); block_mig_state.total_sector_sum += sectors; @@ -575,7 +575,7 @@ static void blk_mig_cleanup(void) while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) { QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry); bdrv_set_in_use(bmds->bs, 0); - drive_put_ref(drive_get_by_blockdev(bmds->bs)); + bdrv_unref(bmds->bs); g_free(bmds->aio_bitmap); g_free(bmds); } diff --git a/block.c b/block.c index 26639e8b70..a325efcb21 100644 --- a/block.c +++ b/block.c @@ -86,13 +86,6 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque); static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors); -static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, double elapsed_time, uint64_t *wait); -static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, - double elapsed_time, uint64_t *wait); -static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, int64_t *wait); - static QTAILQ_HEAD(, BlockDriverState) bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states); @@ -123,69 +116,101 @@ int is_windows_drive(const char *filename) #endif /* throttling disk I/O limits */ +void bdrv_set_io_limits(BlockDriverState *bs, + ThrottleConfig *cfg) +{ + int i; + + throttle_config(&bs->throttle_state, cfg); + + for (i = 0; i < 2; i++) { + qemu_co_enter_next(&bs->throttled_reqs[i]); + } +} + +/* this function drain all the throttled IOs */ +static bool bdrv_start_throttled_reqs(BlockDriverState *bs) +{ + bool drained = false; + bool enabled = bs->io_limits_enabled; + int i; + + bs->io_limits_enabled = false; + + for (i = 0; i < 2; i++) { + while (qemu_co_enter_next(&bs->throttled_reqs[i])) { + drained = true; + } + } + + bs->io_limits_enabled = enabled; + + return drained; +} + void bdrv_io_limits_disable(BlockDriverState *bs) { bs->io_limits_enabled = false; - do {} while (qemu_co_enter_next(&bs->throttled_reqs)); + bdrv_start_throttled_reqs(bs); - if (bs->block_timer) { - timer_del(bs->block_timer); - timer_free(bs->block_timer); - bs->block_timer = NULL; - } - - bs->slice_start = 0; - bs->slice_end = 0; + throttle_destroy(&bs->throttle_state); } -static void bdrv_block_timer(void *opaque) +static void bdrv_throttle_read_timer_cb(void *opaque) { BlockDriverState *bs = opaque; - - qemu_co_enter_next(&bs->throttled_reqs); + qemu_co_enter_next(&bs->throttled_reqs[0]); } +static void bdrv_throttle_write_timer_cb(void *opaque) +{ + BlockDriverState *bs = opaque; + qemu_co_enter_next(&bs->throttled_reqs[1]); +} + +/* should be called before bdrv_set_io_limits if a limit is set */ void bdrv_io_limits_enable(BlockDriverState *bs) { - bs->block_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, bdrv_block_timer, bs); + assert(!bs->io_limits_enabled); + throttle_init(&bs->throttle_state, + QEMU_CLOCK_VIRTUAL, + bdrv_throttle_read_timer_cb, + bdrv_throttle_write_timer_cb, + bs); bs->io_limits_enabled = true; } -bool bdrv_io_limits_enabled(BlockDriverState *bs) -{ - BlockIOLimit *io_limits = &bs->io_limits; - return io_limits->bps[BLOCK_IO_LIMIT_READ] - || io_limits->bps[BLOCK_IO_LIMIT_WRITE] - || io_limits->bps[BLOCK_IO_LIMIT_TOTAL] - || io_limits->iops[BLOCK_IO_LIMIT_READ] - || io_limits->iops[BLOCK_IO_LIMIT_WRITE] - || io_limits->iops[BLOCK_IO_LIMIT_TOTAL]; -} - +/* This function makes an IO wait if needed + * + * @nb_sectors: the number of sectors of the IO + * @is_write: is the IO a write + */ static void bdrv_io_limits_intercept(BlockDriverState *bs, - bool is_write, int nb_sectors) + int nb_sectors, + bool is_write) { - int64_t wait_time = -1; + /* does this io must wait */ + bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write); - if (!qemu_co_queue_empty(&bs->throttled_reqs)) { - qemu_co_queue_wait(&bs->throttled_reqs); + /* if must wait or any request of this type throttled queue the IO */ + if (must_wait || + !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) { + qemu_co_queue_wait(&bs->throttled_reqs[is_write]); } - /* In fact, we hope to keep each request's timing, in FIFO mode. The next - * throttled requests will not be dequeued until the current request is - * allowed to be serviced. So if the current request still exceeds the - * limits, it will be inserted to the head. All requests followed it will - * be still in throttled_reqs queue. - */ + /* the IO will be executed, do the accounting */ + throttle_account(&bs->throttle_state, + is_write, + nb_sectors * BDRV_SECTOR_SIZE); - while (bdrv_exceed_io_limits(bs, nb_sectors, is_write, &wait_time)) { - timer_mod(bs->block_timer, - wait_time + qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); - qemu_co_queue_wait_insert_head(&bs->throttled_reqs); + /* if the next request must wait -> do nothing */ + if (throttle_schedule_timer(&bs->throttle_state, is_write)) { + return; } - qemu_co_queue_next(&bs->throttled_reqs); + /* else queue next request for execution */ + qemu_co_queue_next(&bs->throttled_reqs[is_write]); } /* check if the path starts with ":" */ @@ -305,7 +330,9 @@ BlockDriverState *bdrv_new(const char *device_name) bdrv_iostatus_disable(bs); notifier_list_init(&bs->close_notifiers); notifier_with_return_list_init(&bs->before_write_notifiers); - qemu_co_queue_init(&bs->throttled_reqs); + qemu_co_queue_init(&bs->throttled_reqs[0]); + qemu_co_queue_init(&bs->throttled_reqs[1]); + bs->refcnt = 1; return bs; } @@ -876,7 +903,7 @@ fail: if (!bs->drv) { QDECREF(bs->options); } - bdrv_delete(bs); + bdrv_unref(bs); return ret; } @@ -927,7 +954,7 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options) *backing_filename ? backing_filename : NULL, options, back_flags, back_drv); if (ret < 0) { - bdrv_delete(bs->backing_hd); + bdrv_unref(bs->backing_hd); bs->backing_hd = NULL; bs->open_flags |= BDRV_O_NO_BACKING; return ret; @@ -1002,12 +1029,12 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, bs1 = bdrv_new(""); ret = bdrv_open(bs1, filename, NULL, 0, drv); if (ret < 0) { - bdrv_delete(bs1); + bdrv_unref(bs1); goto fail; } total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK; - bdrv_delete(bs1); + bdrv_unref(bs1); ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename)); if (ret < 0) { @@ -1081,7 +1108,7 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, } if (bs->file != file) { - bdrv_delete(file); + bdrv_unref(file); file = NULL; } @@ -1112,16 +1139,11 @@ int bdrv_open(BlockDriverState *bs, const char *filename, QDict *options, bdrv_dev_change_media_cb(bs, true); } - /* throttling disk I/O limits */ - if (bs->io_limits_enabled) { - bdrv_io_limits_enable(bs); - } - return 0; unlink_and_fail: if (file != NULL) { - bdrv_delete(file); + bdrv_unref(file); } if (bs->is_temporary) { unlink(filename); @@ -1382,7 +1404,7 @@ void bdrv_close(BlockDriverState *bs) if (bs->drv) { if (bs->backing_hd) { - bdrv_delete(bs->backing_hd); + bdrv_unref(bs->backing_hd); bs->backing_hd = NULL; } bs->drv->bdrv_close(bs); @@ -1407,7 +1429,7 @@ void bdrv_close(BlockDriverState *bs) bs->options = NULL; if (bs->file != NULL) { - bdrv_delete(bs->file); + bdrv_unref(bs->file); bs->file = NULL; } } @@ -1435,7 +1457,10 @@ static bool bdrv_requests_pending(BlockDriverState *bs) if (!QLIST_EMPTY(&bs->tracked_requests)) { return true; } - if (!qemu_co_queue_empty(&bs->throttled_reqs)) { + if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) { + return true; + } + if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) { return true; } if (bs->file && bdrv_requests_pending(bs->file)) { @@ -1481,7 +1506,7 @@ void bdrv_drain_all(void) * a busy wait. */ QTAILQ_FOREACH(bs, &bdrv_states, list) { - while (qemu_co_enter_next(&bs->throttled_reqs)) { + if (bdrv_start_throttled_reqs(bs)) { busy = true; } } @@ -1523,13 +1548,12 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->enable_write_cache = bs_src->enable_write_cache; - /* i/o timing parameters */ - bs_dest->slice_start = bs_src->slice_start; - bs_dest->slice_end = bs_src->slice_end; - bs_dest->slice_submitted = bs_src->slice_submitted; - bs_dest->io_limits = bs_src->io_limits; - bs_dest->throttled_reqs = bs_src->throttled_reqs; - bs_dest->block_timer = bs_src->block_timer; + /* i/o throttled req */ + memcpy(&bs_dest->throttle_state, + &bs_src->throttle_state, + sizeof(ThrottleState)); + bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0]; + bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1]; bs_dest->io_limits_enabled = bs_src->io_limits_enabled; /* r/w error */ @@ -1543,6 +1567,9 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, /* dirty bitmap */ bs_dest->dirty_bitmap = bs_src->dirty_bitmap; + /* reference count */ + bs_dest->refcnt = bs_src->refcnt; + /* job */ bs_dest->in_use = bs_src->in_use; bs_dest->job = bs_src->job; @@ -1576,7 +1603,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) assert(bs_new->dev == NULL); assert(bs_new->in_use == 0); assert(bs_new->io_limits_enabled == false); - assert(bs_new->block_timer == NULL); + assert(!throttle_have_timer(&bs_new->throttle_state)); tmp = *bs_new; *bs_new = *bs_old; @@ -1595,7 +1622,7 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) assert(bs_new->job == NULL); assert(bs_new->in_use == 0); assert(bs_new->io_limits_enabled == false); - assert(bs_new->block_timer == NULL); + assert(!throttle_have_timer(&bs_new->throttle_state)); bdrv_rebind(bs_new); bdrv_rebind(bs_old); @@ -1626,11 +1653,12 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) bs_new->drv ? bs_new->drv->format_name : ""); } -void bdrv_delete(BlockDriverState *bs) +static void bdrv_delete(BlockDriverState *bs) { assert(!bs->dev); assert(!bs->job); assert(!bs->in_use); + assert(!bs->refcnt); bdrv_close(bs); @@ -1829,8 +1857,11 @@ int bdrv_commit(BlockDriverState *bs) buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE); for (sector = 0; sector < total_sectors; sector += n) { - if (bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) { - + ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n); + if (ret < 0) { + goto ro_cleanup; + } + if (ret) { if (bdrv_read(bs, sector, buf, n) != 0) { ret = -EIO; goto ro_cleanup; @@ -2146,7 +2177,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { /* so that bdrv_close() does not recursively close the chain */ intermediate_state->bs->backing_hd = NULL; - bdrv_delete(intermediate_state->bs); + bdrv_unref(intermediate_state->bs); } ret = 0; @@ -2538,11 +2569,6 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, return -EIO; } - /* throttling disk read I/O */ - if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, false, nb_sectors); - } - if (bs->copy_on_read) { flags |= BDRV_REQ_COPY_ON_READ; } @@ -2554,12 +2580,17 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs, wait_for_overlapping_requests(bs, sector_num, nb_sectors); } + /* throttling disk I/O */ + if (bs->io_limits_enabled) { + bdrv_io_limits_intercept(bs, nb_sectors, false); + } + tracked_request_begin(&req, bs, sector_num, nb_sectors, false); if (flags & BDRV_REQ_COPY_ON_READ) { int pnum; - ret = bdrv_co_is_allocated(bs, sector_num, nb_sectors, &pnum); + ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum); if (ret < 0) { goto out; } @@ -2679,15 +2710,15 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, return -EIO; } - /* throttling disk write I/O */ - if (bs->io_limits_enabled) { - bdrv_io_limits_intercept(bs, true, nb_sectors); - } - if (bs->copy_on_read_in_flight) { wait_for_overlapping_requests(bs, sector_num, nb_sectors); } + /* throttling disk I/O */ + if (bs->io_limits_enabled) { + bdrv_io_limits_intercept(bs, nb_sectors, true); + } + tracked_request_begin(&req, bs, sector_num, nb_sectors, true); ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req); @@ -2711,6 +2742,9 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, if (bs->wr_highest_sector < sector_num + nb_sectors - 1) { bs->wr_highest_sector = sector_num + nb_sectors - 1; } + if (bs->growable && ret >= 0) { + bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors); + } tracked_request_end(&req); @@ -2785,7 +2819,7 @@ int64_t bdrv_getlength(BlockDriverState *bs) if (!drv) return -ENOMEDIUM; - if (bs->growable || bdrv_dev_has_removable_media(bs)) { + if (bdrv_dev_has_removable_media(bs)) { if (drv->bdrv_getlength) { return drv->bdrv_getlength(bs); } @@ -2805,14 +2839,6 @@ void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr) *nb_sectors_ptr = length; } -/* throttling disk io limits */ -void bdrv_set_io_limits(BlockDriverState *bs, - BlockIOLimit *io_limits) -{ - bs->io_limits = *io_limits; - bs->io_limits_enabled = bdrv_io_limits_enabled(bs); -} - void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, BlockdevOnError on_write_error) { @@ -3005,6 +3031,11 @@ int bdrv_has_zero_init(BlockDriverState *bs) { assert(bs->drv); + /* If BS is a copy on write image, it is initialized to + the contents of the base image, which may not be zeroes. */ + if (bs->backing_hd) { + return 0; + } if (bs->drv->bdrv_has_zero_init) { return bs->drv->bdrv_has_zero_init(bs); } @@ -3013,15 +3044,15 @@ int bdrv_has_zero_init(BlockDriverState *bs) return 0; } -typedef struct BdrvCoIsAllocatedData { +typedef struct BdrvCoGetBlockStatusData { BlockDriverState *bs; BlockDriverState *base; int64_t sector_num; int nb_sectors; int *pnum; - int ret; + int64_t ret; bool done; -} BdrvCoIsAllocatedData; +} BdrvCoGetBlockStatusData; /* * Returns true iff the specified sector is present in the disk image. Drivers @@ -3038,12 +3069,20 @@ typedef struct BdrvCoIsAllocatedData { * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes * beyond the end of the disk image it will be clamped. */ -int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) +static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, int *pnum) { + int64_t length; int64_t n; + int64_t ret, ret2; - if (sector_num >= bs->total_sectors) { + length = bdrv_getlength(bs); + if (length < 0) { + return length; + } + + if (sector_num >= (length >> BDRV_SECTOR_BITS)) { *pnum = 0; return 0; } @@ -3053,35 +3092,69 @@ int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num, nb_sectors = n; } - if (!bs->drv->bdrv_co_is_allocated) { + if (!bs->drv->bdrv_co_get_block_status) { *pnum = nb_sectors; - return 1; + ret = BDRV_BLOCK_DATA; + if (bs->drv->protocol_name) { + ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE); + } + return ret; } - return bs->drv->bdrv_co_is_allocated(bs, sector_num, nb_sectors, pnum); + ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum); + if (ret < 0) { + return ret; + } + + if (!(ret & BDRV_BLOCK_DATA)) { + if (bdrv_has_zero_init(bs)) { + ret |= BDRV_BLOCK_ZERO; + } else { + BlockDriverState *bs2 = bs->backing_hd; + int64_t length2 = bdrv_getlength(bs2); + if (length2 >= 0 && sector_num >= (length2 >> BDRV_SECTOR_BITS)) { + ret |= BDRV_BLOCK_ZERO; + } + } + } + + if (bs->file && + (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && + (ret & BDRV_BLOCK_OFFSET_VALID)) { + ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, + *pnum, pnum); + if (ret2 >= 0) { + /* Ignore errors. This is just providing extra information, it + * is useful but not necessary. + */ + ret |= (ret2 & BDRV_BLOCK_ZERO); + } + } + + return ret; } -/* Coroutine wrapper for bdrv_is_allocated() */ -static void coroutine_fn bdrv_is_allocated_co_entry(void *opaque) +/* Coroutine wrapper for bdrv_get_block_status() */ +static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque) { - BdrvCoIsAllocatedData *data = opaque; + BdrvCoGetBlockStatusData *data = opaque; BlockDriverState *bs = data->bs; - data->ret = bdrv_co_is_allocated(bs, data->sector_num, data->nb_sectors, - data->pnum); + data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors, + data->pnum); data->done = true; } /* - * Synchronous wrapper around bdrv_co_is_allocated(). + * Synchronous wrapper around bdrv_co_get_block_status(). * - * See bdrv_co_is_allocated() for details. + * See bdrv_co_get_block_status() for details. */ -int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, - int *pnum) +int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum) { Coroutine *co; - BdrvCoIsAllocatedData data = { + BdrvCoGetBlockStatusData data = { .bs = bs, .sector_num = sector_num, .nb_sectors = nb_sectors, @@ -3089,14 +3162,31 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, .done = false, }; - co = qemu_coroutine_create(bdrv_is_allocated_co_entry); - qemu_coroutine_enter(co, &data); - while (!data.done) { - qemu_aio_wait(); + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + bdrv_get_block_status_co_entry(&data); + } else { + co = qemu_coroutine_create(bdrv_get_block_status_co_entry); + qemu_coroutine_enter(co, &data); + while (!data.done) { + qemu_aio_wait(); + } } return data.ret; } +int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum) +{ + int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum); + if (ret < 0) { + return ret; + } + return + (ret & BDRV_BLOCK_DATA) || + ((ret & BDRV_BLOCK_ZERO) && !bdrv_has_zero_init(bs)); +} + /* * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP] * @@ -3109,10 +3199,10 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, * allocated/unallocated state. * */ -int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, - BlockDriverState *base, - int64_t sector_num, - int nb_sectors, int *pnum) +int bdrv_is_allocated_above(BlockDriverState *top, + BlockDriverState *base, + int64_t sector_num, + int nb_sectors, int *pnum) { BlockDriverState *intermediate; int ret, n = nb_sectors; @@ -3120,8 +3210,8 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, intermediate = top; while (intermediate && intermediate != base) { int pnum_inter; - ret = bdrv_co_is_allocated(intermediate, sector_num, nb_sectors, - &pnum_inter); + ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors, + &pnum_inter); if (ret < 0) { return ret; } else if (ret) { @@ -3148,44 +3238,6 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, return 0; } -/* Coroutine wrapper for bdrv_is_allocated_above() */ -static void coroutine_fn bdrv_is_allocated_above_co_entry(void *opaque) -{ - BdrvCoIsAllocatedData *data = opaque; - BlockDriverState *top = data->bs; - BlockDriverState *base = data->base; - - data->ret = bdrv_co_is_allocated_above(top, base, data->sector_num, - data->nb_sectors, data->pnum); - data->done = true; -} - -/* - * Synchronous wrapper around bdrv_co_is_allocated_above(). - * - * See bdrv_co_is_allocated_above() for details. - */ -int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, - int64_t sector_num, int nb_sectors, int *pnum) -{ - Coroutine *co; - BdrvCoIsAllocatedData data = { - .bs = top, - .base = base, - .sector_num = sector_num, - .nb_sectors = nb_sectors, - .pnum = pnum, - .done = false, - }; - - co = qemu_coroutine_create(bdrv_is_allocated_above_co_entry); - qemu_coroutine_enter(co, &data); - while (!data.done) { - qemu_aio_wait(); - } - return data.ret; -} - const char *bdrv_get_encrypted_filename(BlockDriverState *bs) { if (bs->backing_hd && bs->backing_hd->encrypted) @@ -3622,169 +3674,6 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb) acb->aiocb_info->cancel(acb); } -/* block I/O throttling */ -static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, double elapsed_time, uint64_t *wait) -{ - uint64_t bps_limit = 0; - uint64_t extension; - double bytes_limit, bytes_base, bytes_res; - double slice_time, wait_time; - - if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { - bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]; - } else if (bs->io_limits.bps[is_write]) { - bps_limit = bs->io_limits.bps[is_write]; - } else { - if (wait) { - *wait = 0; - } - - return false; - } - - slice_time = bs->slice_end - bs->slice_start; - slice_time /= (NANOSECONDS_PER_SECOND); - bytes_limit = bps_limit * slice_time; - bytes_base = bs->slice_submitted.bytes[is_write]; - if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) { - bytes_base += bs->slice_submitted.bytes[!is_write]; - } - - /* bytes_base: the bytes of data which have been read/written; and - * it is obtained from the history statistic info. - * bytes_res: the remaining bytes of data which need to be read/written. - * (bytes_base + bytes_res) / bps_limit: used to calcuate - * the total time for completing reading/writting all data. - */ - bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE; - - if (bytes_base + bytes_res <= bytes_limit) { - if (wait) { - *wait = 0; - } - - return false; - } - - /* Calc approx time to dispatch */ - wait_time = (bytes_base + bytes_res) / bps_limit - elapsed_time; - - /* When the I/O rate at runtime exceeds the limits, - * bs->slice_end need to be extended in order that the current statistic - * info can be kept until the timer fire, so it is increased and tuned - * based on the result of experiment. - */ - extension = wait_time * NANOSECONDS_PER_SECOND; - extension = DIV_ROUND_UP(extension, BLOCK_IO_SLICE_TIME) * - BLOCK_IO_SLICE_TIME; - bs->slice_end += extension; - if (wait) { - *wait = wait_time * NANOSECONDS_PER_SECOND; - } - - return true; -} - -static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write, - double elapsed_time, uint64_t *wait) -{ - uint64_t iops_limit = 0; - double ios_limit, ios_base; - double slice_time, wait_time; - - if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { - iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]; - } else if (bs->io_limits.iops[is_write]) { - iops_limit = bs->io_limits.iops[is_write]; - } else { - if (wait) { - *wait = 0; - } - - return false; - } - - slice_time = bs->slice_end - bs->slice_start; - slice_time /= (NANOSECONDS_PER_SECOND); - ios_limit = iops_limit * slice_time; - ios_base = bs->slice_submitted.ios[is_write]; - if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) { - ios_base += bs->slice_submitted.ios[!is_write]; - } - - if (ios_base + 1 <= ios_limit) { - if (wait) { - *wait = 0; - } - - return false; - } - - /* Calc approx time to dispatch, in seconds */ - wait_time = (ios_base + 1) / iops_limit; - if (wait_time > elapsed_time) { - wait_time = wait_time - elapsed_time; - } else { - wait_time = 0; - } - - /* Exceeded current slice, extend it by another slice time */ - bs->slice_end += BLOCK_IO_SLICE_TIME; - if (wait) { - *wait = wait_time * NANOSECONDS_PER_SECOND; - } - - return true; -} - -static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors, - bool is_write, int64_t *wait) -{ - int64_t now, max_wait; - uint64_t bps_wait = 0, iops_wait = 0; - double elapsed_time; - int bps_ret, iops_ret; - - now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - if (now > bs->slice_end) { - bs->slice_start = now; - bs->slice_end = now + BLOCK_IO_SLICE_TIME; - memset(&bs->slice_submitted, 0, sizeof(bs->slice_submitted)); - } - - elapsed_time = now - bs->slice_start; - elapsed_time /= (NANOSECONDS_PER_SECOND); - - bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors, - is_write, elapsed_time, &bps_wait); - iops_ret = bdrv_exceed_iops_limits(bs, is_write, - elapsed_time, &iops_wait); - if (bps_ret || iops_ret) { - max_wait = bps_wait > iops_wait ? bps_wait : iops_wait; - if (wait) { - *wait = max_wait; - } - - now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - if (bs->slice_end < now + max_wait) { - bs->slice_end = now + max_wait; - } - - return true; - } - - if (wait) { - *wait = 0; - } - - bs->slice_submitted.bytes[is_write] += (int64_t)nb_sectors * - BDRV_SECTOR_SIZE; - bs->slice_submitted.ios[is_write]++; - - return false; -} - /**************************************************************/ /* async block device emulation */ @@ -4445,6 +4334,23 @@ int64_t bdrv_get_dirty_count(BlockDriverState *bs) } } +/* Get a reference to bs */ +void bdrv_ref(BlockDriverState *bs) +{ + bs->refcnt++; +} + +/* Release a previously grabbed reference to bs. + * If after releasing, reference count is zero, the BlockDriverState is + * deleted. */ +void bdrv_unref(BlockDriverState *bs) +{ + assert(bs->refcnt > 0); + if (--bs->refcnt == 0) { + bdrv_delete(bs); + } +} + void bdrv_set_in_use(BlockDriverState *bs, int in_use) { assert(bs->in_use != in_use); @@ -4658,7 +4564,7 @@ out: free_option_parameters(param); if (bs) { - bdrv_delete(bs); + bdrv_unref(bs); } } diff --git a/block/backup.c b/block/backup.c index 23c7264488..04c4b5c263 100644 --- a/block/backup.c +++ b/block/backup.c @@ -289,14 +289,14 @@ static void coroutine_fn backup_run(void *opaque) * backing file. */ for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) { - /* bdrv_co_is_allocated() only returns true/false based + /* bdrv_is_allocated() only returns true/false based * on the first set of sectors it comes across that * are are all in the same state. * For that reason we must verify each sector in the * backup cluster length. We end up copying more than * needed but at some point that is always the case. */ alloced = - bdrv_co_is_allocated(bs, + bdrv_is_allocated(bs, start * BACKUP_SECTORS_PER_CLUSTER + i, BACKUP_SECTORS_PER_CLUSTER - i, &n); i += n; @@ -338,7 +338,7 @@ static void coroutine_fn backup_run(void *opaque) hbitmap_free(job->bitmap); bdrv_iostatus_disable(target); - bdrv_delete(target); + bdrv_unref(target); block_job_completed(&job->common, ret); } diff --git a/block/blkverify.c b/block/blkverify.c index 1d58cc3932..c4e961eeb1 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -155,7 +155,7 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags) s->test_file = bdrv_new(""); ret = bdrv_open(s->test_file, filename, NULL, flags, NULL); if (ret < 0) { - bdrv_delete(s->test_file); + bdrv_unref(s->test_file); s->test_file = NULL; goto fail; } @@ -169,7 +169,7 @@ static void blkverify_close(BlockDriverState *bs) { BDRVBlkverifyState *s = bs->opaque; - bdrv_delete(s->test_file); + bdrv_unref(s->test_file); s->test_file = NULL; } diff --git a/block/commit.c b/block/commit.c index 51a1ab3678..ac4b7ccbc9 100644 --- a/block/commit.c +++ b/block/commit.c @@ -108,9 +108,9 @@ wait: break; } /* Copy if allocated above the base */ - ret = bdrv_co_is_allocated_above(top, base, sector_num, - COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE, - &n); + ret = bdrv_is_allocated_above(top, base, sector_num, + COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE, + &n); copy = (ret == 1); trace_commit_one_iteration(s, sector_num, n, ret); if (copy) { diff --git a/block/cow.c b/block/cow.c index 1cc2e89c7c..764b93fae0 100644 --- a/block/cow.c +++ b/block/cow.c @@ -106,7 +106,7 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags) * XXX(hch): right now these functions are extremely inefficient. * We should just read the whole bitmap we'll need in one go instead. */ -static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum) +static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum, bool *first) { uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8; uint8_t bitmap; @@ -117,27 +117,52 @@ static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum) return ret; } + if (bitmap & (1 << (bitnum % 8))) { + return 0; + } + + if (*first) { + ret = bdrv_flush(bs->file); + if (ret < 0) { + return ret; + } + *first = false; + } + bitmap |= (1 << (bitnum % 8)); - ret = bdrv_pwrite_sync(bs->file, offset, &bitmap, sizeof(bitmap)); + ret = bdrv_pwrite(bs->file, offset, &bitmap, sizeof(bitmap)); if (ret < 0) { return ret; } return 0; } -static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum) +#define BITS_PER_BITMAP_SECTOR (512 * 8) + +/* Cannot use bitmap.c on big-endian machines. */ +static int cow_test_bit(int64_t bitnum, const uint8_t *bitmap) { - uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8; - uint8_t bitmap; - int ret; + return (bitmap[bitnum / 8] & (1 << (bitnum & 7))) != 0; +} - ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap)); - if (ret < 0) { - return ret; +static int cow_find_streak(const uint8_t *bitmap, int value, int start, int nb_sectors) +{ + int streak_value = value ? 0xFF : 0; + int last = MIN(start + nb_sectors, BITS_PER_BITMAP_SECTOR); + int bitnum = start; + while (bitnum < last) { + if ((bitnum & 7) == 0 && bitmap[bitnum / 8] == streak_value) { + bitnum += 8; + continue; + } + if (cow_test_bit(bitnum, bitmap) == value) { + bitnum++; + continue; + } + break; } - - return !!(bitmap & (1 << (bitnum % 8))); + return MIN(bitnum, last) - start; } /* Return true if first block has been changed (ie. current version is @@ -146,34 +171,44 @@ static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum) static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *num_same) { + int64_t bitnum = sector_num + sizeof(struct cow_header_v2) * 8; + uint64_t offset = (bitnum / 8) & -BDRV_SECTOR_SIZE; + uint8_t bitmap[BDRV_SECTOR_SIZE]; + int ret; int changed; - if (nb_sectors == 0) { - *num_same = nb_sectors; - return 0; - } - - changed = is_bit_set(bs, sector_num); - if (changed < 0) { - return 0; /* XXX: how to return I/O errors? */ - } - - for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) { - if (is_bit_set(bs, sector_num + *num_same) != changed) - break; + ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap)); + if (ret < 0) { + return ret; } + bitnum &= BITS_PER_BITMAP_SECTOR - 1; + changed = cow_test_bit(bitnum, bitmap); + *num_same = cow_find_streak(bitmap, changed, bitnum, nb_sectors); return changed; } +static int64_t coroutine_fn cow_co_get_block_status(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, int *num_same) +{ + BDRVCowState *s = bs->opaque; + int ret = cow_co_is_allocated(bs, sector_num, nb_sectors, num_same); + int64_t offset = s->cow_sectors_offset + (sector_num << BDRV_SECTOR_BITS); + if (ret < 0) { + return ret; + } + return (ret ? BDRV_BLOCK_DATA : 0) | offset | BDRV_BLOCK_OFFSET_VALID; +} + static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { int error = 0; int i; + bool first = true; for (i = 0; i < nb_sectors; i++) { - error = cow_set_bit(bs, sector_num + i); + error = cow_set_bit(bs, sector_num + i, &first); if (error) { break; } @@ -189,7 +224,11 @@ static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num, int ret, n; while (nb_sectors > 0) { - if (bdrv_co_is_allocated(bs, sector_num, nb_sectors, &n)) { + ret = cow_co_is_allocated(bs, sector_num, nb_sectors, &n); + if (ret < 0) { + return ret; + } + if (ret) { ret = bdrv_pread(bs->file, s->cow_sectors_offset + sector_num * 512, buf, n * 512); @@ -314,7 +353,7 @@ static int cow_create(const char *filename, QEMUOptionParameter *options) } exit: - bdrv_delete(cow_bs); + bdrv_unref(cow_bs); return ret; } @@ -344,7 +383,7 @@ static BlockDriver bdrv_cow = { .bdrv_read = cow_co_read, .bdrv_write = cow_co_write, - .bdrv_co_is_allocated = cow_co_is_allocated, + .bdrv_co_get_block_status = cow_co_get_block_status, .create_options = cow_create_options, }; diff --git a/block/iscsi.c b/block/iscsi.c index 2bbee1f6e5..813abd8fef 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -1241,11 +1241,11 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options) { int ret = 0; int64_t total_size = 0; - BlockDriverState bs; + BlockDriverState *bs; IscsiLun *iscsilun = NULL; QDict *bs_options; - memset(&bs, 0, sizeof(BlockDriverState)); + bs = bdrv_new(""); /* Read out options */ while (options && options->name) { @@ -1255,12 +1255,12 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options) options++; } - bs.opaque = g_malloc0(sizeof(struct IscsiLun)); - iscsilun = bs.opaque; + bs->opaque = g_malloc0(sizeof(struct IscsiLun)); + iscsilun = bs->opaque; bs_options = qdict_new(); qdict_put(bs_options, "filename", qstring_from_str(filename)); - ret = iscsi_open(&bs, bs_options, 0); + ret = iscsi_open(bs, bs_options, 0); QDECREF(bs_options); if (ret != 0) { @@ -1274,7 +1274,7 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options) ret = -ENODEV; goto out; } - if (bs.total_sectors < total_size) { + if (bs->total_sectors < total_size) { ret = -ENOSPC; goto out; } @@ -1284,7 +1284,9 @@ out: if (iscsilun->iscsi != NULL) { iscsi_destroy_context(iscsilun->iscsi); } - g_free(bs.opaque); + g_free(bs->opaque); + bs->opaque = NULL; + bdrv_unref(bs); return ret; } diff --git a/block/mirror.c b/block/mirror.c index 86de4582b4..f61a7799de 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -338,8 +338,8 @@ static void coroutine_fn mirror_run(void *opaque) base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd; for (sector_num = 0; sector_num < end; ) { int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1; - ret = bdrv_co_is_allocated_above(bs, base, - sector_num, next - sector_num, &n); + ret = bdrv_is_allocated_above(bs, base, + sector_num, next - sector_num, &n); if (ret < 0) { goto immediate_exit; @@ -480,7 +480,7 @@ immediate_exit: bdrv_swap(s->target, s->common.bs); } bdrv_close(s->target); - bdrv_delete(s->target); + bdrv_unref(s->target); block_job_completed(&s->common, ret); } diff --git a/block/qapi.c b/block/qapi.c index a4bc4113b7..782051c65d 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -223,18 +223,44 @@ void bdrv_query_info(BlockDriverState *bs, info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs); if (bs->io_limits_enabled) { - info->inserted->bps = - bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]; - info->inserted->bps_rd = - bs->io_limits.bps[BLOCK_IO_LIMIT_READ]; - info->inserted->bps_wr = - bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE]; - info->inserted->iops = - bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]; - info->inserted->iops_rd = - bs->io_limits.iops[BLOCK_IO_LIMIT_READ]; - info->inserted->iops_wr = - bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE]; + ThrottleConfig cfg; + throttle_get_config(&bs->throttle_state, &cfg); + info->inserted->bps = cfg.buckets[THROTTLE_BPS_TOTAL].avg; + info->inserted->bps_rd = cfg.buckets[THROTTLE_BPS_READ].avg; + info->inserted->bps_wr = cfg.buckets[THROTTLE_BPS_WRITE].avg; + + info->inserted->iops = cfg.buckets[THROTTLE_OPS_TOTAL].avg; + info->inserted->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg; + info->inserted->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg; + + info->inserted->has_bps_max = + cfg.buckets[THROTTLE_BPS_TOTAL].max; + info->inserted->bps_max = + cfg.buckets[THROTTLE_BPS_TOTAL].max; + info->inserted->has_bps_rd_max = + cfg.buckets[THROTTLE_BPS_READ].max; + info->inserted->bps_rd_max = + cfg.buckets[THROTTLE_BPS_READ].max; + info->inserted->has_bps_wr_max = + cfg.buckets[THROTTLE_BPS_WRITE].max; + info->inserted->bps_wr_max = + cfg.buckets[THROTTLE_BPS_WRITE].max; + + info->inserted->has_iops_max = + cfg.buckets[THROTTLE_OPS_TOTAL].max; + info->inserted->iops_max = + cfg.buckets[THROTTLE_OPS_TOTAL].max; + info->inserted->has_iops_rd_max = + cfg.buckets[THROTTLE_OPS_READ].max; + info->inserted->iops_rd_max = + cfg.buckets[THROTTLE_OPS_READ].max; + info->inserted->has_iops_wr_max = + cfg.buckets[THROTTLE_OPS_WRITE].max; + info->inserted->iops_wr_max = + cfg.buckets[THROTTLE_OPS_WRITE].max; + + info->inserted->has_iops_size = cfg.op_size; + info->inserted->iops_size = cfg.op_size; } bs0 = bs; diff --git a/block/qcow.c b/block/qcow.c index 5239bd68f1..93a993bb44 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -395,7 +395,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, return cluster_offset; } -static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs, +static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { BDRVQcowState *s = bs->opaque; @@ -410,7 +410,14 @@ static int coroutine_fn qcow_co_is_allocated(BlockDriverState *bs, if (n > nb_sectors) n = nb_sectors; *pnum = n; - return (cluster_offset != 0); + if (!cluster_offset) { + return 0; + } + if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypt_method) { + return BDRV_BLOCK_DATA; + } + cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset; } static int decompress_buffer(uint8_t *out_buf, int out_buf_size, @@ -751,7 +758,7 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options) g_free(tmp); ret = 0; exit: - bdrv_delete(qcow_bs); + bdrv_unref(qcow_bs); return ret; } @@ -896,7 +903,7 @@ static BlockDriver bdrv_qcow = { .bdrv_co_readv = qcow_co_readv, .bdrv_co_writev = qcow_co_writev, - .bdrv_co_is_allocated = qcow_co_is_allocated, + .bdrv_co_get_block_status = qcow_co_get_block_status, .bdrv_set_key = qcow_set_key, .bdrv_make_empty = qcow_make_empty, diff --git a/block/qcow2.c b/block/qcow2.c index 4bc679a155..578792f0a3 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -688,24 +688,34 @@ static int qcow2_reopen_prepare(BDRVReopenState *state, return 0; } -static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs, +static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { BDRVQcowState *s = bs->opaque; uint64_t cluster_offset; - int ret; + int index_in_cluster, ret; + int64_t status = 0; *pnum = nb_sectors; - /* FIXME We can get errors here, but the bdrv_co_is_allocated interface - * can't pass them on today */ qemu_co_mutex_lock(&s->lock); ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset); qemu_co_mutex_unlock(&s->lock); if (ret < 0) { - *pnum = 0; + return ret; } - return (cluster_offset != 0) || (ret == QCOW2_CLUSTER_ZERO); + if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED && + !s->crypt_method) { + index_in_cluster = sector_num & (s->cluster_sectors - 1); + cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); + status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset; + } + if (ret == QCOW2_CLUSTER_ZERO) { + status |= BDRV_BLOCK_ZERO; + } else if (ret != QCOW2_CLUSTER_UNALLOCATED) { + status |= BDRV_BLOCK_DATA; + } + return status; } /* handle reading after the end of the backing file */ @@ -1452,7 +1462,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, ret = 0; out: - bdrv_delete(bs); + bdrv_unref(bs); return ret; } @@ -1868,7 +1878,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_reopen_prepare = qcow2_reopen_prepare, .bdrv_create = qcow2_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_co_is_allocated = qcow2_co_is_allocated, + .bdrv_co_get_block_status = qcow2_co_get_block_status, .bdrv_set_key = qcow2_set_key, .bdrv_make_empty = qcow2_make_empty, diff --git a/block/qed.c b/block/qed.c index cc904c4834..49b3a37ed5 100644 --- a/block/qed.c +++ b/block/qed.c @@ -599,7 +599,7 @@ static int qed_create(const char *filename, uint32_t cluster_size, ret = 0; /* success */ out: g_free(l1_table); - bdrv_delete(bs); + bdrv_unref(bs); return ret; } @@ -652,45 +652,66 @@ static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options) } typedef struct { + BlockDriverState *bs; Coroutine *co; - int is_allocated; + uint64_t pos; + int64_t status; int *pnum; } QEDIsAllocatedCB; static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len) { QEDIsAllocatedCB *cb = opaque; + BDRVQEDState *s = cb->bs->opaque; *cb->pnum = len / BDRV_SECTOR_SIZE; - cb->is_allocated = (ret == QED_CLUSTER_FOUND || ret == QED_CLUSTER_ZERO); + switch (ret) { + case QED_CLUSTER_FOUND: + offset |= qed_offset_into_cluster(s, cb->pos); + cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; + break; + case QED_CLUSTER_ZERO: + cb->status = BDRV_BLOCK_ZERO; + break; + case QED_CLUSTER_L2: + case QED_CLUSTER_L1: + cb->status = 0; + break; + default: + assert(ret < 0); + cb->status = ret; + break; + } + if (cb->co) { qemu_coroutine_enter(cb->co, NULL); } } -static int coroutine_fn bdrv_qed_co_is_allocated(BlockDriverState *bs, +static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { BDRVQEDState *s = bs->opaque; - uint64_t pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE; QEDIsAllocatedCB cb = { - .is_allocated = -1, + .bs = bs, + .pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE, + .status = BDRV_BLOCK_OFFSET_MASK, .pnum = pnum, }; QEDRequest request = { .l2_table = NULL }; - qed_find_cluster(s, &request, pos, len, qed_is_allocated_cb, &cb); + qed_find_cluster(s, &request, cb.pos, len, qed_is_allocated_cb, &cb); /* Now sleep if the callback wasn't invoked immediately */ - while (cb.is_allocated == -1) { + while (cb.status == BDRV_BLOCK_OFFSET_MASK) { cb.co = qemu_coroutine_self(); qemu_coroutine_yield(); } qed_unref_l2_cache_entry(request.l2_table); - return cb.is_allocated; + return cb.status; } static int bdrv_qed_make_empty(BlockDriverState *bs) @@ -1575,7 +1596,7 @@ static BlockDriver bdrv_qed = { .bdrv_reopen_prepare = bdrv_qed_reopen_prepare, .bdrv_create = bdrv_qed_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_co_is_allocated = bdrv_qed_co_is_allocated, + .bdrv_co_get_block_status = bdrv_qed_co_get_block_status, .bdrv_make_empty = bdrv_qed_make_empty, .bdrv_aio_readv = bdrv_qed_aio_readv, .bdrv_aio_writev = bdrv_qed_aio_writev, diff --git a/block/raw-posix.c b/block/raw-posix.c index ba721d3f5b..1b41ea3356 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1084,12 +1084,12 @@ static int raw_create(const char *filename, QEMUOptionParameter *options) * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes * beyond the end of the disk image it will be clamped. */ -static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs, +static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { off_t start, data, hole; - int ret; + int64_t ret; ret = fd_open(bs); if (ret < 0) { @@ -1097,6 +1097,7 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs, } start = sector_num * BDRV_SECTOR_SIZE; + ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; #ifdef CONFIG_FIEMAP @@ -1114,7 +1115,7 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs, if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) { /* Assume everything is allocated. */ *pnum = nb_sectors; - return 1; + return ret; } if (f.fm.fm_mapped_extents == 0) { @@ -1127,6 +1128,9 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs, } else { data = f.fe.fe_logical; hole = f.fe.fe_logical + f.fe.fe_length; + if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) { + ret |= BDRV_BLOCK_ZERO; + } } #elif defined SEEK_HOLE && defined SEEK_DATA @@ -1141,7 +1145,7 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs, /* Most likely EINVAL. Assume everything is allocated. */ *pnum = nb_sectors; - return 1; + return ret; } if (hole > start) { @@ -1154,19 +1158,21 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs, } } #else - *pnum = nb_sectors; - return 1; + data = 0; + hole = start + nb_sectors * BDRV_SECTOR_SIZE; #endif if (data <= start) { /* On a data extent, compute sectors to the end of the extent. */ *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE); - return 1; } else { /* On a hole, compute sectors to the beginning of the next extent. */ *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE); - return 0; + ret &= ~BDRV_BLOCK_DATA; + ret |= BDRV_BLOCK_ZERO; } + + return ret; } static coroutine_fn BlockDriverAIOCB *raw_aio_discard(BlockDriverState *bs, @@ -1200,7 +1206,7 @@ static BlockDriver bdrv_file = { .bdrv_close = raw_close, .bdrv_create = raw_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_co_is_allocated = raw_co_is_allocated, + .bdrv_co_get_block_status = raw_co_get_block_status, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, diff --git a/block/raw-win32.c b/block/raw-win32.c index 9b5b2af4e8..d2d2d9f4d4 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -535,13 +535,29 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags) { BDRVRawState *s = bs->opaque; int access_flags, create_flags; + int ret = 0; DWORD overlapped; char device_name[64]; - const char *filename = qdict_get_str(options, "filename"); + + Error *local_err = NULL; + const char *filename; + + QemuOpts *opts = qemu_opts_create_nofail(&raw_runtime_opts); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (error_is_set(&local_err)) { + qerror_report_err(local_err); + error_free(local_err); + ret = -EINVAL; + goto done; + } + + filename = qemu_opt_get(opts, "filename"); if (strstart(filename, "/dev/cdrom", NULL)) { - if (find_cdrom(device_name, sizeof(device_name)) < 0) - return -ENOENT; + if (find_cdrom(device_name, sizeof(device_name)) < 0) { + ret = -ENOENT; + goto done; + } filename = device_name; } else { /* transform drive letters into device name */ @@ -564,11 +580,17 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags) if (s->hfile == INVALID_HANDLE_VALUE) { int err = GetLastError(); - if (err == ERROR_ACCESS_DENIED) - return -EACCES; - return -1; + if (err == ERROR_ACCESS_DENIED) { + ret = -EACCES; + } else { + ret = -1; + } + goto done; } - return 0; + +done: + qemu_opts_del(opts); + return ret; } static BlockDriver bdrv_host_device = { diff --git a/block/raw_bsd.c b/block/raw_bsd.c index ab2b0fd7d2..a9060caec4 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -58,11 +58,11 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num, return bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov); } -static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - int *pnum) +static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, int *pnum) { - return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum); + return bdrv_get_block_status(bs->file, sector_num, nb_sectors, pnum); } static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs, @@ -164,7 +164,7 @@ static BlockDriver bdrv_raw = { .bdrv_co_writev = &raw_co_writev, .bdrv_co_write_zeroes = &raw_co_write_zeroes, .bdrv_co_discard = &raw_co_discard, - .bdrv_co_is_allocated = &raw_co_is_allocated, + .bdrv_co_get_block_status = &raw_co_get_block_status, .bdrv_truncate = &raw_truncate, .bdrv_getlength = &raw_getlength, .bdrv_get_info = &raw_get_info, diff --git a/block/sheepdog.c b/block/sheepdog.c index 1ad4d070e7..f9988d35ba 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1430,7 +1430,7 @@ static int sd_prealloc(const char *filename) } out: if (bs) { - bdrv_delete(bs); + bdrv_unref(bs); } g_free(buf); @@ -1509,13 +1509,13 @@ static int sd_create(const char *filename, QEMUOptionParameter *options) if (!is_snapshot(&s->inode)) { error_report("cannot clone from a non snapshot vdi"); - bdrv_delete(bs); + bdrv_unref(bs); ret = -EINVAL; goto out; } base_vid = s->inode.vdi_id; - bdrv_delete(bs); + bdrv_unref(bs); } ret = do_sd_create(s, vdi, vdi_size, base_vid, &vid, 0); @@ -2270,9 +2270,9 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num, return acb->ret; } -static coroutine_fn int -sd_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, - int *pnum) +static coroutine_fn int64_t +sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, + int *pnum) { BDRVSheepdogState *s = bs->opaque; SheepdogInode *inode = &s->inode; @@ -2280,7 +2280,7 @@ sd_co_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, end = DIV_ROUND_UP((sector_num + nb_sectors) * BDRV_SECTOR_SIZE, SD_DATA_OBJ_SIZE); unsigned long idx; - int ret = 1; + int64_t ret = BDRV_BLOCK_DATA; for (idx = start; idx < end; idx++) { if (inode->data_vdi_id[idx] == 0) { @@ -2338,7 +2338,7 @@ static BlockDriver bdrv_sheepdog = { .bdrv_co_writev = sd_co_writev, .bdrv_co_flush_to_disk = sd_co_flush_to_disk, .bdrv_co_discard = sd_co_discard, - .bdrv_co_is_allocated = sd_co_is_allocated, + .bdrv_co_get_block_status = sd_co_get_block_status, .bdrv_snapshot_create = sd_snapshot_create, .bdrv_snapshot_goto = sd_snapshot_goto, @@ -2366,7 +2366,7 @@ static BlockDriver bdrv_sheepdog_tcp = { .bdrv_co_writev = sd_co_writev, .bdrv_co_flush_to_disk = sd_co_flush_to_disk, .bdrv_co_discard = sd_co_discard, - .bdrv_co_is_allocated = sd_co_is_allocated, + .bdrv_co_get_block_status = sd_co_get_block_status, .bdrv_snapshot_create = sd_snapshot_create, .bdrv_snapshot_goto = sd_snapshot_goto, @@ -2394,7 +2394,7 @@ static BlockDriver bdrv_sheepdog_unix = { .bdrv_co_writev = sd_co_writev, .bdrv_co_flush_to_disk = sd_co_flush_to_disk, .bdrv_co_discard = sd_co_discard, - .bdrv_co_is_allocated = sd_co_is_allocated, + .bdrv_co_get_block_status = sd_co_get_block_status, .bdrv_snapshot_create = sd_snapshot_create, .bdrv_snapshot_goto = sd_snapshot_goto, diff --git a/block/snapshot.c b/block/snapshot.c index 6c6d9deea1..8f61cc0745 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -99,7 +99,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs, ret = bdrv_snapshot_goto(bs->file, snapshot_id); open_ret = drv->bdrv_open(bs, NULL, bs->open_flags); if (open_ret < 0) { - bdrv_delete(bs->file); + bdrv_unref(bs->file); bs->drv = NULL; return open_ret; } diff --git a/block/stream.c b/block/stream.c index 99821252b1..078ce4aa6a 100644 --- a/block/stream.c +++ b/block/stream.c @@ -73,7 +73,7 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base, unused = intermediate; intermediate = intermediate->backing_hd; unused->backing_hd = NULL; - bdrv_delete(unused); + bdrv_unref(unused); } } @@ -119,16 +119,16 @@ wait: break; } - ret = bdrv_co_is_allocated(bs, sector_num, - STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n); + ret = bdrv_is_allocated(bs, sector_num, + STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n); if (ret == 1) { /* Allocated in the top, no need to copy. */ copy = false; - } else { + } else if (ret >= 0) { /* Copy if allocated in the intermediate images. Limit to the * known-unallocated area [sector_num, sector_num+n). */ - ret = bdrv_co_is_allocated_above(bs->backing_hd, base, - sector_num, n, &n); + ret = bdrv_is_allocated_above(bs->backing_hd, base, + sector_num, n, &n); /* Finish early if end of backing file has been reached */ if (ret == 0 && n == 0) { diff --git a/block/vdi.c b/block/vdi.c index 8a915257e8..1bf7dc575a 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -470,7 +470,7 @@ static int vdi_reopen_prepare(BDRVReopenState *state, return 0; } -static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs, +static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */ @@ -479,12 +479,23 @@ static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs, size_t sector_in_block = sector_num % s->block_sectors; int n_sectors = s->block_sectors - sector_in_block; uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]); + uint64_t offset; + int result; + logout("%p, %" PRId64 ", %d, %p\n", bs, sector_num, nb_sectors, pnum); if (n_sectors > nb_sectors) { n_sectors = nb_sectors; } *pnum = n_sectors; - return VDI_IS_ALLOCATED(bmap_entry); + result = VDI_IS_ALLOCATED(bmap_entry); + if (!result) { + return 0; + } + + offset = s->header.offset_data + + (uint64_t)bmap_entry * s->block_size + + sector_in_block * SECTOR_SIZE; + return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; } static int vdi_co_read(BlockDriverState *bs, @@ -780,7 +791,7 @@ static BlockDriver bdrv_vdi = { .bdrv_reopen_prepare = vdi_reopen_prepare, .bdrv_create = vdi_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_co_is_allocated = vdi_co_is_allocated, + .bdrv_co_get_block_status = vdi_co_get_block_status, .bdrv_make_empty = vdi_make_empty, .bdrv_read = vdi_co_read, diff --git a/block/vmdk.c b/block/vmdk.c index 63b489d29e..fb5b5297ce 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -216,7 +216,7 @@ static void vmdk_free_extents(BlockDriverState *bs) g_free(e->l2_cache); g_free(e->l1_backup_table); if (e->file != bs->file) { - bdrv_delete(e->file); + bdrv_unref(e->file); } } g_free(s->extents); @@ -746,7 +746,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/ ret = vmdk_open_sparse(bs, extent_file, bs->open_flags); if (ret) { - bdrv_delete(extent_file); + bdrv_unref(extent_file); return ret; } } else { @@ -1042,7 +1042,7 @@ static VmdkExtent *find_extent(BDRVVmdkState *s, return NULL; } -static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs, +static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { BDRVVmdkState *s = bs->opaque; @@ -1059,7 +1059,24 @@ static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs, sector_num * 512, 0, &offset); qemu_co_mutex_unlock(&s->lock); - ret = (ret == VMDK_OK || ret == VMDK_ZEROED); + switch (ret) { + case VMDK_ERROR: + ret = -EIO; + break; + case VMDK_UNALLOC: + ret = 0; + break; + case VMDK_ZEROED: + ret = BDRV_BLOCK_ZERO; + break; + case VMDK_OK: + ret = BDRV_BLOCK_DATA; + if (extent->file == bs->file) { + ret |= BDRV_BLOCK_OFFSET_VALID | offset; + } + + break; + } index_in_cluster = sector_num % extent->cluster_sectors; n = extent->cluster_sectors - index_in_cluster; @@ -1636,15 +1653,15 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options) BlockDriverState *bs = bdrv_new(""); ret = bdrv_open(bs, backing_file, NULL, 0, NULL); if (ret != 0) { - bdrv_delete(bs); + bdrv_unref(bs); return ret; } if (strcmp(bs->drv->format_name, "vmdk")) { - bdrv_delete(bs); + bdrv_unref(bs); return -EINVAL; } parent_cid = vmdk_read_cid(bs, 0); - bdrv_delete(bs); + bdrv_unref(bs); snprintf(parent_desc_line, sizeof(parent_desc_line), "parentFileNameHint=\"%s\"", backing_file); } @@ -1837,7 +1854,7 @@ static BlockDriver bdrv_vmdk = { .bdrv_close = vmdk_close, .bdrv_create = vmdk_create, .bdrv_co_flush_to_disk = vmdk_co_flush, - .bdrv_co_is_allocated = vmdk_co_is_allocated, + .bdrv_co_get_block_status = vmdk_co_get_block_status, .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, .bdrv_has_zero_init = vmdk_has_zero_init, diff --git a/block/vvfat.c b/block/vvfat.c index cd3b8edd9f..0129195e29 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -2874,16 +2874,17 @@ static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num, return ret; } -static int coroutine_fn vvfat_co_is_allocated(BlockDriverState *bs, +static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int* n) { BDRVVVFATState* s = bs->opaque; *n = s->sector_count - sector_num; - if (*n > nb_sectors) - *n = nb_sectors; - else if (*n < 0) - return 0; - return 1; + if (*n > nb_sectors) { + *n = nb_sectors; + } else if (*n < 0) { + return 0; + } + return BDRV_BLOCK_DATA; } static int write_target_commit(BlockDriverState *bs, int64_t sector_num, @@ -2894,7 +2895,7 @@ static int write_target_commit(BlockDriverState *bs, int64_t sector_num, static void write_target_close(BlockDriverState *bs) { BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque); - bdrv_delete(s->qcow); + bdrv_unref(s->qcow); g_free(s->qcow_filename); } @@ -2935,7 +2936,7 @@ static int enable_write_target(BDRVVVFATState *s) ret = bdrv_open(s->qcow, s->qcow_filename, NULL, BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow); if (ret < 0) { - bdrv_delete(s->qcow); + bdrv_unref(s->qcow); goto err; } @@ -2943,7 +2944,7 @@ static int enable_write_target(BDRVVVFATState *s) unlink(s->qcow_filename); #endif - s->bs->backing_hd = calloc(sizeof(BlockDriverState), 1); + s->bs->backing_hd = bdrv_new(""); s->bs->backing_hd->drv = &vvfat_write_target; s->bs->backing_hd->opaque = g_malloc(sizeof(void*)); *(void**)s->bs->backing_hd->opaque = s; @@ -2984,7 +2985,7 @@ static BlockDriver bdrv_vvfat = { .bdrv_read = vvfat_co_read, .bdrv_write = vvfat_co_write, - .bdrv_co_is_allocated = vvfat_co_is_allocated, + .bdrv_co_get_block_status = vvfat_co_get_block_status, }; static void bdrv_vvfat_init(void) diff --git a/blockdev-nbd.c b/blockdev-nbd.c index 95f10c81e3..922cf5657b 100644 --- a/blockdev-nbd.c +++ b/blockdev-nbd.c @@ -69,12 +69,6 @@ static void nbd_close_notifier(Notifier *n, void *data) g_free(cn); } -static void nbd_server_put_ref(NBDExport *exp) -{ - BlockDriverState *bs = nbd_export_get_blockdev(exp); - drive_put_ref(drive_get_by_blockdev(bs)); -} - void qmp_nbd_server_add(const char *device, bool has_writable, bool writable, Error **errp) { @@ -105,11 +99,9 @@ void qmp_nbd_server_add(const char *device, bool has_writable, bool writable, writable = false; } - exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, - nbd_server_put_ref); + exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, NULL); nbd_export_set_name(exp, device); - drive_get_ref(drive_get_by_blockdev(bs)); n = g_malloc0(sizeof(NBDCloseNotifier)); n->n.notify = nbd_close_notifier; diff --git a/blockdev.c b/blockdev.c index e70e16e4de..07dac05a2c 100644 --- a/blockdev.c +++ b/blockdev.c @@ -212,7 +212,7 @@ static void bdrv_format_print(void *opaque, const char *name) static void drive_uninit(DriveInfo *dinfo) { qemu_opts_del(dinfo->opts); - bdrv_delete(dinfo->bdrv); + bdrv_unref(dinfo->bdrv); g_free(dinfo->id); QTAILQ_REMOVE(&drives, dinfo, next); g_free(dinfo->serial); @@ -234,32 +234,32 @@ void drive_get_ref(DriveInfo *dinfo) typedef struct { QEMUBH *bh; - DriveInfo *dinfo; -} DrivePutRefBH; + BlockDriverState *bs; +} BDRVPutRefBH; -static void drive_put_ref_bh(void *opaque) +static void bdrv_put_ref_bh(void *opaque) { - DrivePutRefBH *s = opaque; + BDRVPutRefBH *s = opaque; - drive_put_ref(s->dinfo); + bdrv_unref(s->bs); qemu_bh_delete(s->bh); g_free(s); } /* - * Release a drive reference in a BH + * Release a BDS reference in a BH * - * It is not possible to use drive_put_ref() from a callback function when the - * callers still need the drive. In such cases we schedule a BH to release the - * reference. + * It is not safe to use bdrv_unref() from a callback function when the callers + * still need the BlockDriverState. In such cases we schedule a BH to release + * the reference. */ -static void drive_put_ref_bh_schedule(DriveInfo *dinfo) +static void bdrv_put_ref_bh_schedule(BlockDriverState *bs) { - DrivePutRefBH *s; + BDRVPutRefBH *s; - s = g_new(DrivePutRefBH, 1); - s->bh = qemu_bh_new(drive_put_ref_bh, s); - s->dinfo = dinfo; + s = g_new(BDRVPutRefBH, 1); + s->bh = qemu_bh_new(bdrv_put_ref_bh, s); + s->bs = bs; qemu_bh_schedule(s->bh); } @@ -280,32 +280,16 @@ static int parse_block_error_action(const char *buf, bool is_read) } } -static bool do_check_io_limits(BlockIOLimit *io_limits, Error **errp) +static bool check_throttle_config(ThrottleConfig *cfg, Error **errp) { - bool bps_flag; - bool iops_flag; - - assert(io_limits); - - bps_flag = (io_limits->bps[BLOCK_IO_LIMIT_TOTAL] != 0) - && ((io_limits->bps[BLOCK_IO_LIMIT_READ] != 0) - || (io_limits->bps[BLOCK_IO_LIMIT_WRITE] != 0)); - iops_flag = (io_limits->iops[BLOCK_IO_LIMIT_TOTAL] != 0) - && ((io_limits->iops[BLOCK_IO_LIMIT_READ] != 0) - || (io_limits->iops[BLOCK_IO_LIMIT_WRITE] != 0)); - if (bps_flag || iops_flag) { - error_setg(errp, "bps(iops) and bps_rd/bps_wr(iops_rd/iops_wr) " - "cannot be used at the same time"); + if (throttle_conflicting(cfg)) { + error_setg(errp, "bps/iops/max total values and read/write values" + " cannot be used at the same time"); return false; } - if (io_limits->bps[BLOCK_IO_LIMIT_TOTAL] < 0 || - io_limits->bps[BLOCK_IO_LIMIT_WRITE] < 0 || - io_limits->bps[BLOCK_IO_LIMIT_READ] < 0 || - io_limits->iops[BLOCK_IO_LIMIT_TOTAL] < 0 || - io_limits->iops[BLOCK_IO_LIMIT_WRITE] < 0 || - io_limits->iops[BLOCK_IO_LIMIT_READ] < 0) { - error_setg(errp, "bps and iops values must be 0 or greater"); + if (!throttle_is_valid(cfg)) { + error_setg(errp, "bps/iops/maxs values must be 0 or greater"); return false; } @@ -330,7 +314,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts, int on_read_error, on_write_error; const char *devaddr; DriveInfo *dinfo; - BlockIOLimit io_limits; + ThrottleConfig cfg; int snapshot = 0; bool copy_on_read; int ret; @@ -496,20 +480,36 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts, } /* disk I/O throttling */ - io_limits.bps[BLOCK_IO_LIMIT_TOTAL] = + memset(&cfg, 0, sizeof(cfg)); + cfg.buckets[THROTTLE_BPS_TOTAL].avg = qemu_opt_get_number(opts, "throttling.bps-total", 0); - io_limits.bps[BLOCK_IO_LIMIT_READ] = + cfg.buckets[THROTTLE_BPS_READ].avg = qemu_opt_get_number(opts, "throttling.bps-read", 0); - io_limits.bps[BLOCK_IO_LIMIT_WRITE] = + cfg.buckets[THROTTLE_BPS_WRITE].avg = qemu_opt_get_number(opts, "throttling.bps-write", 0); - io_limits.iops[BLOCK_IO_LIMIT_TOTAL] = + cfg.buckets[THROTTLE_OPS_TOTAL].avg = qemu_opt_get_number(opts, "throttling.iops-total", 0); - io_limits.iops[BLOCK_IO_LIMIT_READ] = + cfg.buckets[THROTTLE_OPS_READ].avg = qemu_opt_get_number(opts, "throttling.iops-read", 0); - io_limits.iops[BLOCK_IO_LIMIT_WRITE] = + cfg.buckets[THROTTLE_OPS_WRITE].avg = qemu_opt_get_number(opts, "throttling.iops-write", 0); - if (!do_check_io_limits(&io_limits, &error)) { + cfg.buckets[THROTTLE_BPS_TOTAL].max = + qemu_opt_get_number(opts, "throttling.bps-total-max", 0); + cfg.buckets[THROTTLE_BPS_READ].max = + qemu_opt_get_number(opts, "throttling.bps-read-max", 0); + cfg.buckets[THROTTLE_BPS_WRITE].max = + qemu_opt_get_number(opts, "throttling.bps-write-max", 0); + cfg.buckets[THROTTLE_OPS_TOTAL].max = + qemu_opt_get_number(opts, "throttling.iops-total-max", 0); + cfg.buckets[THROTTLE_OPS_READ].max = + qemu_opt_get_number(opts, "throttling.iops-read-max", 0); + cfg.buckets[THROTTLE_OPS_WRITE].max = + qemu_opt_get_number(opts, "throttling.iops-write-max", 0); + + cfg.op_size = qemu_opt_get_number(opts, "throttling.iops-size", 0); + + if (!check_throttle_config(&cfg, &error)) { error_report("%s", error_get_pretty(error)); error_free(error); return NULL; @@ -636,7 +636,10 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts, bdrv_set_on_error(dinfo->bdrv, on_read_error, on_write_error); /* disk I/O throttling */ - bdrv_set_io_limits(dinfo->bdrv, &io_limits); + if (throttle_enabled(&cfg)) { + bdrv_io_limits_enable(dinfo->bdrv); + bdrv_set_io_limits(dinfo->bdrv, &cfg); + } switch(type) { case IF_IDE: @@ -732,7 +735,7 @@ static DriveInfo *blockdev_init(QemuOpts *all_opts, err: qemu_opts_del(opts); QDECREF(bs_opts); - bdrv_delete(dinfo->bdrv); + bdrv_unref(dinfo->bdrv); g_free(dinfo->id); QTAILQ_REMOVE(&drives, dinfo, next); g_free(dinfo); @@ -763,6 +766,17 @@ DriveInfo *drive_init(QemuOpts *all_opts, BlockInterfaceType block_default_type) qemu_opt_rename(all_opts, "bps_rd", "throttling.bps-read"); qemu_opt_rename(all_opts, "bps_wr", "throttling.bps-write"); + qemu_opt_rename(all_opts, "iops_max", "throttling.iops-total-max"); + qemu_opt_rename(all_opts, "iops_rd_max", "throttling.iops-read-max"); + qemu_opt_rename(all_opts, "iops_wr_max", "throttling.iops-write-max"); + + qemu_opt_rename(all_opts, "bps_max", "throttling.bps-total-max"); + qemu_opt_rename(all_opts, "bps_rd_max", "throttling.bps-read-max"); + qemu_opt_rename(all_opts, "bps_wr_max", "throttling.bps-write-max"); + + qemu_opt_rename(all_opts, + "iops_size", "throttling.iops-size"); + qemu_opt_rename(all_opts, "readonly", "read-only"); value = qemu_opt_get(all_opts, "cache"); @@ -982,7 +996,7 @@ static void external_snapshot_abort(BlkTransactionState *common) ExternalSnapshotState *state = DO_UPCAST(ExternalSnapshotState, common, common); if (state->new_bs) { - bdrv_delete(state->new_bs); + bdrv_unref(state->new_bs); } } @@ -1247,10 +1261,26 @@ void qmp_change_blockdev(const char *device, const char *filename, /* throttling disk I/O limits */ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd, - int64_t bps_wr, int64_t iops, int64_t iops_rd, - int64_t iops_wr, Error **errp) + int64_t bps_wr, + int64_t iops, + int64_t iops_rd, + int64_t iops_wr, + bool has_bps_max, + int64_t bps_max, + bool has_bps_rd_max, + int64_t bps_rd_max, + bool has_bps_wr_max, + int64_t bps_wr_max, + bool has_iops_max, + int64_t iops_max, + bool has_iops_rd_max, + int64_t iops_rd_max, + bool has_iops_wr_max, + int64_t iops_wr_max, + bool has_iops_size, + int64_t iops_size, Error **errp) { - BlockIOLimit io_limits; + ThrottleConfig cfg; BlockDriverState *bs; bs = bdrv_find(device); @@ -1259,27 +1289,50 @@ void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd, return; } - io_limits.bps[BLOCK_IO_LIMIT_TOTAL] = bps; - io_limits.bps[BLOCK_IO_LIMIT_READ] = bps_rd; - io_limits.bps[BLOCK_IO_LIMIT_WRITE] = bps_wr; - io_limits.iops[BLOCK_IO_LIMIT_TOTAL]= iops; - io_limits.iops[BLOCK_IO_LIMIT_READ] = iops_rd; - io_limits.iops[BLOCK_IO_LIMIT_WRITE]= iops_wr; + memset(&cfg, 0, sizeof(cfg)); + cfg.buckets[THROTTLE_BPS_TOTAL].avg = bps; + cfg.buckets[THROTTLE_BPS_READ].avg = bps_rd; + cfg.buckets[THROTTLE_BPS_WRITE].avg = bps_wr; - if (!do_check_io_limits(&io_limits, errp)) { + cfg.buckets[THROTTLE_OPS_TOTAL].avg = iops; + cfg.buckets[THROTTLE_OPS_READ].avg = iops_rd; + cfg.buckets[THROTTLE_OPS_WRITE].avg = iops_wr; + + if (has_bps_max) { + cfg.buckets[THROTTLE_BPS_TOTAL].max = bps_max; + } + if (has_bps_rd_max) { + cfg.buckets[THROTTLE_BPS_READ].max = bps_rd_max; + } + if (has_bps_wr_max) { + cfg.buckets[THROTTLE_BPS_WRITE].max = bps_wr_max; + } + if (has_iops_max) { + cfg.buckets[THROTTLE_OPS_TOTAL].max = iops_max; + } + if (has_iops_rd_max) { + cfg.buckets[THROTTLE_OPS_READ].max = iops_rd_max; + } + if (has_iops_wr_max) { + cfg.buckets[THROTTLE_OPS_WRITE].max = iops_wr_max; + } + + if (has_iops_size) { + cfg.op_size = iops_size; + } + + if (!check_throttle_config(&cfg, errp)) { return; } - bs->io_limits = io_limits; - - if (!bs->io_limits_enabled && bdrv_io_limits_enabled(bs)) { + if (!bs->io_limits_enabled && throttle_enabled(&cfg)) { bdrv_io_limits_enable(bs); - } else if (bs->io_limits_enabled && !bdrv_io_limits_enabled(bs)) { + } else if (bs->io_limits_enabled && !throttle_enabled(&cfg)) { bdrv_io_limits_disable(bs); - } else { - if (bs->block_timer) { - timer_mod(bs->block_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL)); - } + } + + if (bs->io_limits_enabled) { + bdrv_set_io_limits(bs, &cfg); } } @@ -1383,7 +1436,7 @@ static void block_job_cb(void *opaque, int ret) } qobject_decref(obj); - drive_put_ref_bh_schedule(drive_get_by_blockdev(bs)); + bdrv_put_ref_bh_schedule(bs); } void qmp_block_stream(const char *device, bool has_base, @@ -1420,11 +1473,6 @@ void qmp_block_stream(const char *device, bool has_base, return; } - /* Grab a reference so hotplug does not delete the BlockDriverState from - * underneath us. - */ - drive_get_ref(drive_get_by_blockdev(bs)); - trace_qmp_block_stream(bs, bs->job); } @@ -1481,10 +1529,6 @@ void qmp_block_commit(const char *device, error_propagate(errp, local_err); return; } - /* Grab a reference so hotplug does not delete the BlockDriverState from - * underneath us. - */ - drive_get_ref(drive_get_by_blockdev(bs)); } void qmp_drive_backup(const char *device, const char *target, @@ -1585,7 +1629,7 @@ void qmp_drive_backup(const char *device, const char *target, target_bs = bdrv_new(""); ret = bdrv_open(target_bs, target, NULL, flags, drv); if (ret < 0) { - bdrv_delete(target_bs); + bdrv_unref(target_bs); error_setg_file_open(errp, -ret, target); return; } @@ -1593,15 +1637,10 @@ void qmp_drive_backup(const char *device, const char *target, backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error, block_job_cb, bs, &local_err); if (local_err != NULL) { - bdrv_delete(target_bs); + bdrv_unref(target_bs); error_propagate(errp, local_err); return; } - - /* Grab a reference so hotplug does not delete the BlockDriverState from - * underneath us. - */ - drive_get_ref(drive_get_by_blockdev(bs)); } #define DEFAULT_MIRROR_BUF_SIZE (10 << 20) @@ -1725,7 +1764,7 @@ void qmp_drive_mirror(const char *device, const char *target, target_bs = bdrv_new(""); ret = bdrv_open(target_bs, target, NULL, flags | BDRV_O_NO_BACKING, drv); if (ret < 0) { - bdrv_delete(target_bs); + bdrv_unref(target_bs); error_setg_file_open(errp, -ret, target); return; } @@ -1734,15 +1773,10 @@ void qmp_drive_mirror(const char *device, const char *target, on_source_error, on_target_error, block_job_cb, bs, &local_err); if (local_err != NULL) { - bdrv_delete(target_bs); + bdrv_unref(target_bs); error_propagate(errp, local_err); return; } - - /* Grab a reference so hotplug does not delete the BlockDriverState from - * underneath us. - */ - drive_get_ref(drive_get_by_blockdev(bs)); } static BlockJob *find_block_job(const char *device) @@ -1967,6 +2001,34 @@ QemuOptsList qemu_common_drive_opts = { .name = "throttling.bps-write", .type = QEMU_OPT_NUMBER, .help = "limit write bytes per second", + },{ + .name = "throttling.iops-total-max", + .type = QEMU_OPT_NUMBER, + .help = "I/O operations burst", + },{ + .name = "throttling.iops-read-max", + .type = QEMU_OPT_NUMBER, + .help = "I/O operations read burst", + },{ + .name = "throttling.iops-write-max", + .type = QEMU_OPT_NUMBER, + .help = "I/O operations write burst", + },{ + .name = "throttling.bps-total-max", + .type = QEMU_OPT_NUMBER, + .help = "total bytes burst", + },{ + .name = "throttling.bps-read-max", + .type = QEMU_OPT_NUMBER, + .help = "total bytes read burst", + },{ + .name = "throttling.bps-write-max", + .type = QEMU_OPT_NUMBER, + .help = "total bytes write burst", + },{ + .name = "throttling.iops-size", + .type = QEMU_OPT_NUMBER, + .help = "when limiting by iops max size of an I/O in bytes", },{ .name = "copy-on-read", .type = QEMU_OPT_BOOL, diff --git a/blockjob.c b/blockjob.c index 7edc945119..e7d49b7169 100644 --- a/blockjob.c +++ b/blockjob.c @@ -45,6 +45,7 @@ void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); return NULL; } + bdrv_ref(bs); bdrv_set_in_use(bs, 1); job = g_malloc0(job_type->instance_size); diff --git a/hmp.c b/hmp.c index fcca6aea8f..2bd31d1c69 100644 --- a/hmp.c +++ b/hmp.c @@ -344,14 +344,30 @@ void hmp_info_block(Monitor *mon, const QDict *qdict) { monitor_printf(mon, " I/O throttling: bps=%" PRId64 " bps_rd=%" PRId64 " bps_wr=%" PRId64 + " bps_max=%" PRId64 + " bps_rd_max=%" PRId64 + " bps_wr_max=%" PRId64 " iops=%" PRId64 " iops_rd=%" PRId64 - " iops_wr=%" PRId64 "\n", + " iops_wr=%" PRId64 + " iops_max=%" PRId64 + " iops_rd_max=%" PRId64 + " iops_wr_max=%" PRId64 + " iops_size=%" PRId64 "\n", info->value->inserted->bps, info->value->inserted->bps_rd, info->value->inserted->bps_wr, + info->value->inserted->bps_max, + info->value->inserted->bps_rd_max, + info->value->inserted->bps_wr_max, info->value->inserted->iops, info->value->inserted->iops_rd, - info->value->inserted->iops_wr); + info->value->inserted->iops_wr, + info->value->inserted->iops_max, + info->value->inserted->iops_rd_max, + info->value->inserted->iops_wr_max, + info->value->inserted->iops_size); + } else { + monitor_printf(mon, " [not inserted]"); } if (verbose) { @@ -1098,7 +1114,21 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict) qdict_get_int(qdict, "bps_wr"), qdict_get_int(qdict, "iops"), qdict_get_int(qdict, "iops_rd"), - qdict_get_int(qdict, "iops_wr"), &err); + qdict_get_int(qdict, "iops_wr"), + false, /* no burst max via HMP */ + 0, + false, + 0, + false, + 0, + false, + 0, + false, + 0, + false, + 0, + false, /* No default I/O size */ + 0, &err); hmp_handle_error(mon, &err); } diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 5a96ccd416..f2d7350a50 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -42,6 +42,7 @@ typedef struct { struct VirtIOBlockDataPlane { bool started; + bool starting; bool stopping; QEMUBH *start_bh; QemuThread thread; @@ -451,8 +452,15 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) return; } + if (s->starting) { + return; + } + + s->starting = true; + vq = virtio_get_queue(s->vdev, 0); if (!vring_setup(&s->vring, s->vdev, 0)) { + s->starting = false; return; } @@ -482,6 +490,7 @@ void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s) s->io_notifier = *ioq_get_notifier(&s->ioqueue); aio_set_event_notifier(s->ctx, &s->io_notifier, handle_io); + s->starting = false; s->started = true; trace_virtio_blk_data_plane_start(s); diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c index 727f4331c0..668cc069ff 100644 --- a/hw/block/xen_disk.c +++ b/hw/block/xen_disk.c @@ -813,7 +813,7 @@ static int blk_connect(struct XenDevice *xendev) readonly); if (bdrv_open(blkdev->bs, blkdev->filename, NULL, qflags, drv) != 0) { - bdrv_delete(blkdev->bs); + bdrv_unref(blkdev->bs); blkdev->bs = NULL; } } @@ -824,6 +824,9 @@ static int blk_connect(struct XenDevice *xendev) /* setup via qemu cmdline -> already setup for us */ xen_be_printf(&blkdev->xendev, 2, "get configured bdrv (cmdline setup)\n"); blkdev->bs = blkdev->dinfo->bdrv; + /* blkdev->bs is not create by us, we get a reference + * so we can bdrv_unref() unconditionally */ + bdrv_ref(blkdev->bs); } bdrv_attach_dev_nofail(blkdev->bs, blkdev); blkdev->file_size = bdrv_getlength(blkdev->bs); @@ -922,12 +925,8 @@ static void blk_disconnect(struct XenDevice *xendev) struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev); if (blkdev->bs) { - if (!blkdev->dinfo) { - /* close/delete only if we created it ourself */ - bdrv_close(blkdev->bs); - bdrv_detach_dev(blkdev->bs, blkdev); - bdrv_delete(blkdev->bs); - } + bdrv_detach_dev(blkdev->bs, blkdev); + bdrv_unref(blkdev->bs); blkdev->bs = NULL; } xen_be_unbind_evtchn(&blkdev->xendev); diff --git a/include/block/block.h b/include/block/block.h index e6b391ce88..728ec1aebf 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -81,6 +81,32 @@ typedef struct BlockDevOps { #define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) #define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1) +/* BDRV_BLOCK_DATA: data is read from bs->file or another file + * BDRV_BLOCK_ZERO: sectors read as zero + * BDRV_BLOCK_OFFSET_VALID: sector stored in bs->file as raw data + * + * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 represent the offset in + * bs->file where sector data can be read from as raw data. + * + * DATA == 0 && ZERO == 0 means that data is read from backing_hd if present. + * + * DATA ZERO OFFSET_VALID + * t t t sectors read as zero, bs->file is zero at offset + * t f t sectors read as valid from bs->file at offset + * f t t sectors preallocated, read as zero, bs->file not + * necessarily zero at offset + * f f t sectors preallocated but read from backing_hd, + * bs->file contains garbage at offset + * t t f sectors preallocated, read as zero, unknown offset + * t f f sectors read from unknown file or offset + * f t f not allocated or unknown offset, read as zero + * f f f not allocated or unknown offset, read from backing_hd + */ +#define BDRV_BLOCK_DATA 1 +#define BDRV_BLOCK_ZERO 2 +#define BDRV_BLOCK_OFFSET_VALID 4 +#define BDRV_BLOCK_OFFSET_MASK BDRV_SECTOR_MASK + typedef enum { BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP } BlockErrorAction; @@ -107,7 +133,6 @@ void bdrv_info_stats(Monitor *mon, QObject **ret_data); /* disk I/O throttling */ void bdrv_io_limits_enable(BlockDriverState *bs); void bdrv_io_limits_disable(BlockDriverState *bs); -bool bdrv_io_limits_enabled(BlockDriverState *bs); void bdrv_init(void); void bdrv_init_with_whitelist(void); @@ -123,7 +148,6 @@ BlockDriverState *bdrv_new(const char *device_name); void bdrv_make_anon(BlockDriverState *bs); void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old); void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); -void bdrv_delete(BlockDriverState *bs); int bdrv_parse_cache_flags(const char *mode, int *flags); int bdrv_parse_discard_flags(const char *mode, int *flags); int bdrv_file_open(BlockDriverState **pbs, const char *filename, @@ -181,12 +205,6 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, */ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors); -int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum); -int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top, - BlockDriverState *base, - int64_t sector_num, - int nb_sectors, int *pnum); BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, const char *backing_file); int bdrv_get_backing_file_depth(BlockDriverState *bs); @@ -277,6 +295,8 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int bdrv_has_zero_init_1(BlockDriverState *bs); int bdrv_has_zero_init(BlockDriverState *bs); +int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum); int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, @@ -356,6 +376,8 @@ int64_t bdrv_get_dirty_count(BlockDriverState *bs); void bdrv_enable_copy_on_read(BlockDriverState *bs); void bdrv_disable_copy_on_read(BlockDriverState *bs); +void bdrv_ref(BlockDriverState *bs); +void bdrv_unref(BlockDriverState *bs); void bdrv_set_in_use(BlockDriverState *bs, int in_use); int bdrv_in_use(BlockDriverState *bs); diff --git a/include/block/block_int.h b/include/block/block_int.h index 8012e253c9..7c35198ad7 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -35,18 +35,12 @@ #include "qemu/hbitmap.h" #include "block/snapshot.h" #include "qemu/main-loop.h" +#include "qemu/throttle.h" #define BLOCK_FLAG_ENCRYPT 1 #define BLOCK_FLAG_COMPAT6 4 #define BLOCK_FLAG_LAZY_REFCOUNTS 8 -#define BLOCK_IO_LIMIT_READ 0 -#define BLOCK_IO_LIMIT_WRITE 1 -#define BLOCK_IO_LIMIT_TOTAL 2 - -#define BLOCK_IO_SLICE_TIME 100000000 -#define NANOSECONDS_PER_SECOND 1000000000.0 - #define BLOCK_OPT_SIZE "size" #define BLOCK_OPT_ENCRYPT "encryption" #define BLOCK_OPT_COMPAT6 "compat6" @@ -70,17 +64,6 @@ typedef struct BdrvTrackedRequest { CoQueue wait_queue; /* coroutines blocked on this request */ } BdrvTrackedRequest; - -typedef struct BlockIOLimit { - int64_t bps[3]; - int64_t iops[3]; -} BlockIOLimit; - -typedef struct BlockIOBaseValue { - uint64_t bytes[2]; - uint64_t ios[2]; -} BlockIOBaseValue; - struct BlockDriver { const char *format_name; int instance_size; @@ -135,7 +118,7 @@ struct BlockDriver { int64_t sector_num, int nb_sectors); int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs, int64_t sector_num, int nb_sectors); - int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs, + int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); /* @@ -264,13 +247,9 @@ struct BlockDriverState { /* number of in-flight copy-on-read requests */ unsigned int copy_on_read_in_flight; - /* the time for latest disk I/O */ - int64_t slice_start; - int64_t slice_end; - BlockIOLimit io_limits; - BlockIOBaseValue slice_submitted; - CoQueue throttled_reqs; - QEMUTimer *block_timer; + /* I/O throttling */ + ThrottleState throttle_state; + CoQueue throttled_reqs[2]; bool io_limits_enabled; /* I/O stats (display with "info blockstats"). */ @@ -298,6 +277,7 @@ struct BlockDriverState { BlockDeviceIoStatus iostatus; char device_name[32]; HBitmap *dirty_bitmap; + int refcnt; int in_use; /* users other than guest access, eg. block migration */ QTAILQ_ENTRY(BlockDriverState) list; @@ -312,7 +292,8 @@ struct BlockDriverState { int get_tmp_filename(char *filename, int size); void bdrv_set_io_limits(BlockDriverState *bs, - BlockIOLimit *io_limits); + ThrottleConfig *cfg); + /** * bdrv_add_before_write_notifier: diff --git a/include/qemu/throttle.h b/include/qemu/throttle.h new file mode 100644 index 0000000000..ab29b0b918 --- /dev/null +++ b/include/qemu/throttle.h @@ -0,0 +1,110 @@ +/* + * QEMU throttling infrastructure + * + * Copyright (C) Nodalink, SARL. 2013 + * + * Author: + * Benoît Canet + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef THROTTLE_H +#define THROTTLE_H + +#include +#include "qemu-common.h" +#include "qemu/timer.h" + +#define NANOSECONDS_PER_SECOND 1000000000.0 + +typedef enum { + THROTTLE_BPS_TOTAL, + THROTTLE_BPS_READ, + THROTTLE_BPS_WRITE, + THROTTLE_OPS_TOTAL, + THROTTLE_OPS_READ, + THROTTLE_OPS_WRITE, + BUCKETS_COUNT, +} BucketType; + +/* + * The max parameter of the leaky bucket throttling algorithm can be used to + * allow the guest to do bursts. + * The max value is a pool of I/O that the guest can use without being throttled + * at all. Throttling is triggered once this pool is empty. + */ + +typedef struct LeakyBucket { + double avg; /* average goal in units per second */ + double max; /* leaky bucket max burst in units */ + double level; /* bucket level in units */ +} LeakyBucket; + +/* The following structure is used to configure a ThrottleState + * It contains a bit of state: the bucket field of the LeakyBucket structure. + * However it allows to keep the code clean and the bucket field is reset to + * zero at the right time. + */ +typedef struct ThrottleConfig { + LeakyBucket buckets[BUCKETS_COUNT]; /* leaky buckets */ + uint64_t op_size; /* size of an operation in bytes */ +} ThrottleConfig; + +typedef struct ThrottleState { + ThrottleConfig cfg; /* configuration */ + int64_t previous_leak; /* timestamp of the last leak done */ + QEMUTimer * timers[2]; /* timers used to do the throttling */ + QEMUClockType clock_type; /* the clock used */ +} ThrottleState; + +/* operations on single leaky buckets */ +void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta); + +int64_t throttle_compute_wait(LeakyBucket *bkt); + +/* expose timer computation function for unit tests */ +bool throttle_compute_timer(ThrottleState *ts, + bool is_write, + int64_t now, + int64_t *next_timestamp); + +/* init/destroy cycle */ +void throttle_init(ThrottleState *ts, + QEMUClockType clock_type, + void (read_timer)(void *), + void (write_timer)(void *), + void *timer_opaque); + +void throttle_destroy(ThrottleState *ts); + +bool throttle_have_timer(ThrottleState *ts); + +/* configuration */ +bool throttle_enabled(ThrottleConfig *cfg); + +bool throttle_conflicting(ThrottleConfig *cfg); + +bool throttle_is_valid(ThrottleConfig *cfg); + +void throttle_config(ThrottleState *ts, ThrottleConfig *cfg); + +void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg); + +/* usage */ +bool throttle_schedule_timer(ThrottleState *ts, bool is_write); + +void throttle_account(ThrottleState *ts, bool is_write, uint64_t size); + +#endif diff --git a/nbd.c b/nbd.c index 0fd05836ca..f847940f3e 100644 --- a/nbd.c +++ b/nbd.c @@ -882,6 +882,7 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, exp->nbdflags = nbdflags; exp->size = size == -1 ? bdrv_getlength(bs) : size; exp->close = close; + bdrv_ref(bs); return exp; } @@ -928,6 +929,10 @@ void nbd_export_close(NBDExport *exp) } nbd_export_set_name(exp, NULL); nbd_export_put(exp); + if (exp->bs) { + bdrv_unref(exp->bs); + exp->bs = NULL; + } } void nbd_export_get(NBDExport *exp) diff --git a/qapi-schema.json b/qapi-schema.json index a51f7d2d6e..2b2c8bce07 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -785,6 +785,20 @@ # # @image: the info of image used (since: 1.6) # +# @bps_max: #optional total max in bytes (Since 1.7) +# +# @bps_rd_max: #optional read max in bytes (Since 1.7) +# +# @bps_wr_max: #optional write max in bytes (Since 1.7) +# +# @iops_max: #optional total I/O operations max (Since 1.7) +# +# @iops_rd_max: #optional read I/O operations max (Since 1.7) +# +# @iops_wr_max: #optional write I/O operations max (Since 1.7) +# +# @iops_size: #optional an I/O size in bytes (Since 1.7) +# # Since: 0.14.0 # # Notes: This interface is only found in @BlockInfo. @@ -795,7 +809,11 @@ 'encrypted': 'bool', 'encryption_key_missing': 'bool', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int', 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int', - 'image': 'ImageInfo' } } + 'image': 'ImageInfo', + '*bps_max': 'int', '*bps_rd_max': 'int', + '*bps_wr_max': 'int', '*iops_max': 'int', + '*iops_rd_max': 'int', '*iops_wr_max': 'int', + '*iops_size': 'int' } } ## # @BlockDeviceIoStatus: @@ -812,6 +830,35 @@ ## { 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] } +## +# @BlockDeviceMapEntry: +# +# Entry in the metadata map of the device (returned by "qemu-img map") +# +# @start: Offset in the image of the first byte described by this entry +# (in bytes) +# +# @length: Length of the range described by this entry (in bytes) +# +# @depth: Number of layers (0 = top image, 1 = top image's backing file, etc.) +# before reaching one for which the range is allocated. The value is +# in the range 0 to the depth of the image chain - 1. +# +# @zero: the sectors in this range read as zeros +# +# @data: reading the image will actually read data from a file (in particular, +# if @offset is present this means that the sectors are not simply +# preallocated, but contain actual data in raw format) +# +# @offset: if present, the image file stores the data for this range in +# raw format at the given offset. +# +# Since 1.7 +## +{ 'type': 'BlockDeviceMapEntry', + 'data': { 'start': 'int', 'length': 'int', 'depth': 'int', 'zero': 'bool', + 'data': 'bool', '*offset': 'int' } } + ## # @BlockDirtyInfo: # @@ -2174,6 +2221,20 @@ # # @iops_wr: write I/O operations per second # +# @bps_max: #optional total max in bytes (Since 1.7) +# +# @bps_rd_max: #optional read max in bytes (Since 1.7) +# +# @bps_wr_max: #optional write max in bytes (Since 1.7) +# +# @iops_max: #optional total I/O operations max (Since 1.7) +# +# @iops_rd_max: #optional read I/O operations max (Since 1.7) +# +# @iops_wr_max: #optional write I/O operations max (Since 1.7) +# +# @iops_size: #optional an I/O size in bytes (Since 1.7) +# # Returns: Nothing on success # If @device is not a valid block device, DeviceNotFound # @@ -2181,7 +2242,11 @@ ## { 'command': 'block_set_io_throttle', 'data': { 'device': 'str', 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int', - 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int' } } + 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int', + '*bps_max': 'int', '*bps_rd_max': 'int', + '*bps_wr_max': 'int', '*iops_max': 'int', + '*iops_rd_max': 'int', '*iops_wr_max': 'int', + '*iops_size': 'int' } } ## # @block-stream: diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx index 4ca7e95655..0c36e5968f 100644 --- a/qemu-img-cmds.hx +++ b/qemu-img-cmds.hx @@ -34,9 +34,9 @@ STEXI ETEXI DEF("convert", img_convert, - "convert [-c] [-p] [-q] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s snapshot_name] [-S sparse_size] filename [filename2 [...]] output_filename") + "convert [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-O output_fmt] [-o options] [-s snapshot_name] [-S sparse_size] filename [filename2 [...]] output_filename") STEXI -@item convert [-c] [-p] [-q] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} ETEXI DEF("info", img_info, @@ -45,6 +45,12 @@ STEXI @item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename} ETEXI +DEF("map", img_map, + "map [-f fmt] [--output=ofmt] filename") +STEXI +@item map [-f @var{fmt}] [--output=@var{ofmt}] @var{filename} +ETEXI + DEF("snapshot", img_snapshot, "snapshot [-q] [-l | -a snapshot | -c snapshot | -d snapshot] filename") STEXI diff --git a/qemu-img.c b/qemu-img.c index b9a848db74..3e5e388d1c 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -103,6 +103,8 @@ static void help(void) " '-S' indicates the consecutive number of bytes that must contain only zeros\n" " for qemu-img to create a sparse image during conversion\n" " '--output' takes the format in which the output must be done (human or json)\n" + " '-n' skips the target volume creation (useful if the volume is created\n" + " prior to running qemu-img)\n" "\n" "Parameters to check subcommand:\n" " '-r' tries to repair any inconsistencies that are found during the check.\n" @@ -298,7 +300,7 @@ static BlockDriverState *bdrv_new_open(const char *filename, return bs; fail: if (bs) { - bdrv_delete(bs); + bdrv_unref(bs); } return NULL; } @@ -652,7 +654,7 @@ static int img_check(int argc, char **argv) fail: qapi_free_ImageCheck(check); - bdrv_delete(bs); + bdrv_unref(bs); return ret; } @@ -722,7 +724,7 @@ static int img_commit(int argc, char **argv) break; } - bdrv_delete(bs); + bdrv_unref(bs); if (ret) { return 1; } @@ -1104,11 +1106,11 @@ static int img_compare(int argc, char **argv) ret = 0; out: - bdrv_delete(bs2); + bdrv_unref(bs2); qemu_vfree(buf1); qemu_vfree(buf2); out2: - bdrv_delete(bs1); + bdrv_unref(bs1); out3: qemu_progress_end(); return ret; @@ -1116,7 +1118,8 @@ out3: static int img_convert(int argc, char **argv) { - int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, cluster_sectors; + int c, ret = 0, n, n1, bs_n, bs_i, compress, cluster_size, + cluster_sectors, skip_create; int progress = 0, flags; const char *fmt, *out_fmt, *cache, *out_baseimg, *out_filename; BlockDriver *drv, *proto_drv; @@ -1139,8 +1142,9 @@ static int img_convert(int argc, char **argv) cache = "unsafe"; out_baseimg = NULL; compress = 0; + skip_create = 0; for(;;) { - c = getopt(argc, argv, "f:O:B:s:hce6o:pS:t:q"); + c = getopt(argc, argv, "f:O:B:s:hce6o:pS:t:qn"); if (c == -1) { break; } @@ -1197,6 +1201,9 @@ static int img_convert(int argc, char **argv) case 'q': quiet = true; break; + case 'n': + skip_create = 1; + break; } } @@ -1329,20 +1336,22 @@ static int img_convert(int argc, char **argv) } } - /* Create the new image */ - ret = bdrv_create(drv, out_filename, param); - if (ret < 0) { - if (ret == -ENOTSUP) { - error_report("Formatting not supported for file format '%s'", - out_fmt); - } else if (ret == -EFBIG) { - error_report("The image size is too large for file format '%s'", - out_fmt); - } else { - error_report("%s: error while converting %s: %s", - out_filename, out_fmt, strerror(-ret)); + if (!skip_create) { + /* Create the new image */ + ret = bdrv_create(drv, out_filename, param); + if (ret < 0) { + if (ret == -ENOTSUP) { + error_report("Formatting not supported for file format '%s'", + out_fmt); + } else if (ret == -EFBIG) { + error_report("The image size is too large for file format '%s'", + out_fmt); + } else { + error_report("%s: error while converting %s: %s", + out_filename, out_fmt, strerror(-ret)); + } + goto out; } - goto out; } flags = BDRV_O_RDWR; @@ -1363,6 +1372,20 @@ static int img_convert(int argc, char **argv) bdrv_get_geometry(bs[0], &bs_sectors); buf = qemu_blockalign(out_bs, IO_BUF_SIZE); + if (skip_create) { + int64_t output_length = bdrv_getlength(out_bs); + if (output_length < 0) { + error_report("unable to get output image length: %s\n", + strerror(-output_length)); + ret = -1; + goto out; + } else if (output_length < total_sectors << BDRV_SECTOR_BITS) { + error_report("output file is smaller than input file"); + ret = -1; + goto out; + } + } + if (compress) { ret = bdrv_get_info(out_bs, &bdi); if (ret < 0) { @@ -1479,21 +1502,26 @@ static int img_convert(int argc, char **argv) n = bs_offset + bs_sectors - sector_num; } - if (has_zero_init) { - /* If the output image is being created as a copy on write image, - assume that sectors which are unallocated in the input image - are present in both the output's and input's base images (no - need to copy them). */ - if (out_baseimg) { - if (!bdrv_is_allocated(bs[bs_i], sector_num - bs_offset, - n, &n1)) { - sector_num += n1; - continue; - } - /* The next 'n1' sectors are allocated in the input image. Copy - only those as they may be followed by unallocated sectors. */ - n = n1; + /* If the output image is being created as a copy on write image, + assume that sectors which are unallocated in the input image + are present in both the output's and input's base images (no + need to copy them). */ + if (out_baseimg) { + ret = bdrv_is_allocated(bs[bs_i], sector_num - bs_offset, + n, &n1); + if (ret < 0) { + error_report("error while reading metadata for sector " + "%" PRId64 ": %s", + sector_num - bs_offset, strerror(-ret)); + goto out; } + if (!ret) { + sector_num += n1; + continue; + } + /* The next 'n1' sectors are allocated in the input image. Copy + only those as they may be followed by unallocated sectors. */ + n = n1; } else { n1 = n; } @@ -1509,14 +1537,7 @@ static int img_convert(int argc, char **argv) should add a specific call to have the info to go faster */ buf1 = buf; while (n > 0) { - /* If the output image is being created as a copy on write image, - copy all sectors even the ones containing only NUL bytes, - because they may differ from the sectors in the base image. - - If the output is to a host device, we also write out - sectors that are entirely 0, since whatever data was - already there is garbage, not 0s. */ - if (!has_zero_init || out_baseimg || + if (!has_zero_init || is_allocated_sectors_min(buf1, n, &n1, min_sparse)) { ret = bdrv_write(out_bs, sector_num, buf1, n1); if (ret < 0) { @@ -1538,12 +1559,12 @@ out: free_option_parameters(param); qemu_vfree(buf); if (out_bs) { - bdrv_delete(out_bs); + bdrv_unref(out_bs); } if (bs) { for (bs_i = 0; bs_i < bs_n; bs_i++) { if (bs[bs_i]) { - bdrv_delete(bs[bs_i]); + bdrv_unref(bs[bs_i]); } } g_free(bs); @@ -1681,7 +1702,7 @@ static ImageInfoList *collect_image_info_list(const char *filename, *last = elem; last = &elem->next; - bdrv_delete(bs); + bdrv_unref(bs); filename = fmt = NULL; if (chain) { @@ -1780,6 +1801,197 @@ static int img_info(int argc, char **argv) return 0; } + +typedef struct MapEntry { + int flags; + int depth; + int64_t start; + int64_t length; + int64_t offset; + BlockDriverState *bs; +} MapEntry; + +static void dump_map_entry(OutputFormat output_format, MapEntry *e, + MapEntry *next) +{ + switch (output_format) { + case OFORMAT_HUMAN: + if ((e->flags & BDRV_BLOCK_DATA) && + !(e->flags & BDRV_BLOCK_OFFSET_VALID)) { + error_report("File contains external, encrypted or compressed clusters."); + exit(1); + } + if ((e->flags & (BDRV_BLOCK_DATA|BDRV_BLOCK_ZERO)) == BDRV_BLOCK_DATA) { + printf("%#-16"PRIx64"%#-16"PRIx64"%#-16"PRIx64"%s\n", + e->start, e->length, e->offset, e->bs->filename); + } + /* This format ignores the distinction between 0, ZERO and ZERO|DATA. + * Modify the flags here to allow more coalescing. + */ + if (next && + (next->flags & (BDRV_BLOCK_DATA|BDRV_BLOCK_ZERO)) != BDRV_BLOCK_DATA) { + next->flags &= ~BDRV_BLOCK_DATA; + next->flags |= BDRV_BLOCK_ZERO; + } + break; + case OFORMAT_JSON: + printf("%s{ \"start\": %"PRId64", \"length\": %"PRId64", \"depth\": %d," + " \"zero\": %s, \"data\": %s", + (e->start == 0 ? "[" : ",\n"), + e->start, e->length, e->depth, + (e->flags & BDRV_BLOCK_ZERO) ? "true" : "false", + (e->flags & BDRV_BLOCK_DATA) ? "true" : "false"); + if (e->flags & BDRV_BLOCK_OFFSET_VALID) { + printf(", 'offset': %"PRId64"", e->offset); + } + putchar('}'); + + if (!next) { + printf("]\n"); + } + break; + } +} + +static int get_block_status(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, MapEntry *e) +{ + int64_t ret; + int depth; + + /* As an optimization, we could cache the current range of unallocated + * clusters in each file of the chain, and avoid querying the same + * range repeatedly. + */ + + depth = 0; + for (;;) { + ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &nb_sectors); + if (ret < 0) { + return ret; + } + assert(nb_sectors); + if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) { + break; + } + bs = bs->backing_hd; + if (bs == NULL) { + ret = 0; + break; + } + + depth++; + } + + e->start = sector_num * BDRV_SECTOR_SIZE; + e->length = nb_sectors * BDRV_SECTOR_SIZE; + e->flags = ret & ~BDRV_BLOCK_OFFSET_MASK; + e->offset = ret & BDRV_BLOCK_OFFSET_MASK; + e->depth = depth; + e->bs = bs; + return 0; +} + +static int img_map(int argc, char **argv) +{ + int c; + OutputFormat output_format = OFORMAT_HUMAN; + BlockDriverState *bs; + const char *filename, *fmt, *output; + int64_t length; + MapEntry curr = { .length = 0 }, next; + int ret = 0; + + fmt = NULL; + output = NULL; + for (;;) { + int option_index = 0; + static const struct option long_options[] = { + {"help", no_argument, 0, 'h'}, + {"format", required_argument, 0, 'f'}, + {"output", required_argument, 0, OPTION_OUTPUT}, + {0, 0, 0, 0} + }; + c = getopt_long(argc, argv, "f:h", + long_options, &option_index); + if (c == -1) { + break; + } + switch (c) { + case '?': + case 'h': + help(); + break; + case 'f': + fmt = optarg; + break; + case OPTION_OUTPUT: + output = optarg; + break; + } + } + if (optind >= argc) { + help(); + } + filename = argv[optind++]; + + if (output && !strcmp(output, "json")) { + output_format = OFORMAT_JSON; + } else if (output && !strcmp(output, "human")) { + output_format = OFORMAT_HUMAN; + } else if (output) { + error_report("--output must be used with human or json as argument."); + return 1; + } + + bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS, true, false); + if (!bs) { + return 1; + } + + if (output_format == OFORMAT_HUMAN) { + printf("%-16s%-16s%-16s%s\n", "Offset", "Length", "Mapped to", "File"); + } + + length = bdrv_getlength(bs); + while (curr.start + curr.length < length) { + int64_t nsectors_left; + int64_t sector_num; + int n; + + sector_num = (curr.start + curr.length) >> BDRV_SECTOR_BITS; + + /* Probe up to 1 GiB at a time. */ + nsectors_left = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE) - sector_num; + n = MIN(1 << (30 - BDRV_SECTOR_BITS), nsectors_left); + ret = get_block_status(bs, sector_num, n, &next); + + if (ret < 0) { + error_report("Could not read file metadata: %s", strerror(-ret)); + goto out; + } + + if (curr.length != 0 && curr.flags == next.flags && + curr.depth == next.depth && + ((curr.flags & BDRV_BLOCK_OFFSET_VALID) == 0 || + curr.offset + curr.length == next.offset)) { + curr.length += next.length; + continue; + } + + if (curr.length > 0) { + dump_map_entry(output_format, &curr, &next); + } + curr = next; + } + + dump_map_entry(output_format, &curr, NULL); + +out: + bdrv_unref(bs); + return ret < 0; +} + #define SNAPSHOT_LIST 1 #define SNAPSHOT_CREATE 2 #define SNAPSHOT_APPLY 3 @@ -1895,7 +2107,7 @@ static int img_snapshot(int argc, char **argv) } /* Cleanup */ - bdrv_delete(bs); + bdrv_unref(bs); if (ret) { return 1; } @@ -2076,6 +2288,11 @@ static int img_rebase(int argc, char **argv) /* If the cluster is allocated, we don't need to take action */ ret = bdrv_is_allocated(bs, sector, n, &n); + if (ret < 0) { + error_report("error while reading image metadata: %s", + strerror(-ret)); + goto out; + } if (ret) { continue; } @@ -2170,14 +2387,14 @@ out: /* Cleanup */ if (!unsafe) { if (bs_old_backing != NULL) { - bdrv_delete(bs_old_backing); + bdrv_unref(bs_old_backing); } if (bs_new_backing != NULL) { - bdrv_delete(bs_new_backing); + bdrv_unref(bs_new_backing); } } - bdrv_delete(bs); + bdrv_unref(bs); if (ret) { return 1; } @@ -2300,7 +2517,7 @@ static int img_resize(int argc, char **argv) } out: if (bs) { - bdrv_delete(bs); + bdrv_unref(bs); } if (ret) { return 1; diff --git a/qemu-img.texi b/qemu-img.texi index 69f1bda6ae..43ee4eb5c4 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -96,6 +96,14 @@ Second image format Strict mode - fail on on different image size or sector allocation @end table +Parameters to convert subcommand: + +@table @option + +@item -n +Skip the creation of the target volume +@end table + Command description: @table @option @@ -171,7 +179,7 @@ Error on reading data @end table -@item convert [-c] [-p] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} +@item convert [-c] [-p] [-n] [-f @var{fmt}] [-t @var{cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_name}] [-S @var{sparse_size}] @var{filename} [@var{filename2} [...]] @var{output_filename} Convert the disk image @var{filename} or a snapshot @var{snapshot_name} to disk image @var{output_filename} using format @var{output_fmt}. It can be optionally compressed (@code{-c} @@ -190,6 +198,11 @@ created as a copy on write image of the specified base image; the @var{backing_file} should have the same content as the input's base image, however the path, image format, etc may differ. +If the @code{-n} option is specified, the target volume creation will be +skipped. This is useful for formats such as @code{rbd} if the target +volume has already been created with site specific options that cannot +be supplied through qemu-img. + @item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename} Give information about the disk image @var{filename}. Use it in @@ -213,6 +226,61 @@ To enumerate information about each disk image in the above chain, starting from qemu-img info --backing-chain snap2.qcow2 @end example +@item map [-f @var{fmt}] [--output=@var{ofmt}] @var{filename} + +Dump the metadata of image @var{filename} and its backing file chain. +In particular, this commands dumps the allocation state of every sector +of @var{filename}, together with the topmost file that allocates it in +the backing file chain. + +Two option formats are possible. The default format (@code{human}) +only dumps known-nonzero areas of the file. Known-zero parts of the +file are omitted altogether, and likewise for parts that are not allocated +throughout the chain. @command{qemu-img} output will identify a file +from where the data can be read, and the offset in the file. Each line +will include four fields, the first three of which are hexadecimal +numbers. For example the first line of: +@example +Offset Length Mapped to File +0 0x20000 0x50000 /tmp/overlay.qcow2 +0x100000 0x10000 0x95380000 /tmp/backing.qcow2 +@end example +@noindent +means that 0x20000 (131072) bytes starting at offset 0 in the image are +available in /tmp/overlay.qcow2 (opened in @code{raw} format) starting +at offset 0x50000 (327680). Data that is compressed, encrypted, or +otherwise not available in raw format will cause an error if @code{human} +format is in use. Note that file names can include newlines, thus it is +not safe to parse this output format in scripts. + +The alternative format @code{json} will return an array of dictionaries +in JSON format. It will include similar information in +the @code{start}, @code{length}, @code{offset} fields; +it will also include other more specific information: +@itemize @minus +@item +whether the sectors contain actual data or not (boolean field @code{data}; +if false, the sectors are either unallocated or stored as optimized +all-zero clusters); + +@item +whether the data is known to read as zero (boolean field @code{zero}); + +@item +in order to make the output shorter, the target file is expressed as +a @code{depth}; for example, a depth of 2 refers to the backing file +of the backing file of @var{filename}. +@end itemize + +In JSON format, the @code{offset} field is optional; it is absent in +cases where @code{human} format would omit the entry or exit with an error. +If @code{data} is false and the @code{offset} field is present, the +corresponding sectors in the file are not yet in use, but they are +preallocated. + +For more information, consult @file{include/block/block.h} in QEMU's +source code. + @item snapshot [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot} ] @var{filename} List, apply, create or delete snapshots in image @var{filename}. diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index f91b6c4f02..8565d49336 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -1830,6 +1830,10 @@ static int alloc_f(BlockDriverState *bs, int argc, char **argv) sector_num = offset >> 9; while (remaining) { ret = bdrv_is_allocated(bs, sector_num, remaining, &num); + if (ret < 0) { + printf("is_allocated failed: %s\n", strerror(-ret)); + return 0; + } sector_num += num; remaining -= num; if (ret) { diff --git a/qemu-io.c b/qemu-io.c index d54dc86921..71f4ff1302 100644 --- a/qemu-io.c +++ b/qemu-io.c @@ -32,7 +32,7 @@ static char **cmdline; static int close_f(BlockDriverState *bs, int argc, char **argv) { - bdrv_delete(bs); + bdrv_unref(bs); qemuio_bs = NULL; return 0; } @@ -61,7 +61,7 @@ static int openfile(char *name, int flags, int growable) if (bdrv_open(qemuio_bs, name, NULL, flags, NULL) < 0) { fprintf(stderr, "%s: can't open device %s\n", progname, name); - bdrv_delete(qemuio_bs); + bdrv_unref(qemuio_bs); qemuio_bs = NULL; return 1; } @@ -422,7 +422,7 @@ int main(int argc, char **argv) bdrv_drain_all(); if (qemuio_bs) { - bdrv_delete(qemuio_bs); + bdrv_unref(qemuio_bs); } return 0; } diff --git a/qemu-options.hx b/qemu-options.hx index d15338e879..5dc8b75cdb 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -409,7 +409,11 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive, " [,cache=writethrough|writeback|none|directsync|unsafe][,format=f]\n" " [,serial=s][,addr=A][,id=name][,aio=threads|native]\n" " [,readonly=on|off][,copy-on-read=on|off]\n" - " [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]][[,iops=i]|[[,iops_rd=r][,iops_wr=w]]\n" + " [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n" + " [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n" + " [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n" + " [[,iops_max=im]|[[,iops_rd_max=irm][,iops_wr_max=iwm]]]\n" + " [[,iops_size=is]]\n" " use 'file' as a drive image\n", QEMU_ARCH_ALL) STEXI @item -drive @var{option}[,@var{option}[,@var{option}[,...]]] diff --git a/qmp-commands.hx b/qmp-commands.hx index 8a8f342eab..008cad95a2 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -1389,7 +1389,7 @@ EQMP { .name = "block_set_io_throttle", - .args_type = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l", + .args_type = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l,bps_max:l?,bps_rd_max:l?,bps_wr_max:l?,iops_max:l?,iops_rd_max:l?,iops_wr_max:l?,iops_size:l?", .mhandler.cmd_new = qmp_marshal_input_block_set_io_throttle, }, @@ -1408,6 +1408,13 @@ Arguments: - "iops": total I/O operations per second (json-int) - "iops_rd": read I/O operations per second (json-int) - "iops_wr": write I/O operations per second (json-int) +- "bps_max": total max in bytes (json-int) +- "bps_rd_max": read max in bytes (json-int) +- "bps_wr_max": write max in bytes (json-int) +- "iops_max": total I/O operations max (json-int) +- "iops_rd_max": read I/O operations max (json-int) +- "iops_wr_max": write I/O operations max (json-int) +- "iops_size": I/O size in bytes when limiting (json-int) Example: @@ -1417,7 +1424,14 @@ Example: "bps_wr": 0, "iops": 0, "iops_rd": 0, - "iops_wr": 0 } } + "iops_wr": 0, + "bps_max": 8000000, + "bps_rd_max": 0, + "bps_wr_max": 0, + "iops_max": 0, + "iops_rd_max": 0, + "iops_wr_max": 0, + "iops_size": 0 } } <- { "return": {} } EQMP @@ -1758,6 +1772,13 @@ Each json-object contain the following: - "iops": limit total I/O operations per second (json-int) - "iops_rd": limit read operations per second (json-int) - "iops_wr": limit write operations per second (json-int) + - "bps_max": total max in bytes (json-int) + - "bps_rd_max": read max in bytes (json-int) + - "bps_wr_max": write max in bytes (json-int) + - "iops_max": total I/O operations max (json-int) + - "iops_rd_max": read I/O operations max (json-int) + - "iops_wr_max": write I/O operations max (json-int) + - "iops_size": I/O size when limiting by iops (json-int) - "image": the detail of the image, it is a json-object containing the following: - "filename": image file name (json-string) @@ -1827,6 +1848,13 @@ Example: "iops":1000000, "iops_rd":0, "iops_wr":0, + "bps_max": 8000000, + "bps_rd_max": 0, + "bps_wr_max": 0, + "iops_max": 0, + "iops_rd_max": 0, + "iops_wr_max": 0, + "iops_size": 0, "image":{ "filename":"disks/test.qcow2", "format":"qcow2", diff --git a/tests/Makefile b/tests/Makefile index baba9e95ad..c13fefc314 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -31,6 +31,7 @@ check-unit-y += tests/test-visitor-serialization$(EXESUF) check-unit-y += tests/test-iov$(EXESUF) gcov-files-test-iov-y = util/iov.c check-unit-y += tests/test-aio$(EXESUF) +check-unit-y += tests/test-throttle$(EXESUF) gcov-files-test-aio-$(CONFIG_WIN32) = aio-win32.c gcov-files-test-aio-$(CONFIG_POSIX) = aio-posix.c check-unit-y += tests/test-thread-pool$(EXESUF) @@ -120,6 +121,7 @@ tests/check-qfloat$(EXESUF): tests/check-qfloat.o libqemuutil.a tests/check-qjson$(EXESUF): tests/check-qjson.o libqemuutil.a libqemustub.a tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(block-obj-y) libqemuutil.a libqemustub.a tests/test-aio$(EXESUF): tests/test-aio.o $(block-obj-y) libqemuutil.a libqemustub.a +tests/test-throttle$(EXESUF): tests/test-throttle.o $(block-obj-y) libqemuutil.a libqemustub.a tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(block-obj-y) libqemuutil.a libqemustub.a tests/test-iov$(EXESUF): tests/test-iov.o libqemuutil.a tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o libqemuutil.a libqemustub.a diff --git a/tests/qemu-iotests/026.out b/tests/qemu-iotests/026.out index fb4f20e7cd..0764389f8e 100644 --- a/tests/qemu-iotests/026.out +++ b/tests/qemu-iotests/026.out @@ -126,62 +126,64 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_update; errno: 5; imm: off; once: on; write write failed: Input/output error -128 leaked clusters were found on the image. +127 leaked clusters were found on the image. This means waste of disk space, but no harm to data. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_update; errno: 5; imm: off; once: on; write -b write failed: Input/output error -128 leaked clusters were found on the image. +127 leaked clusters were found on the image. This means waste of disk space, but no harm to data. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_update; errno: 5; imm: off; once: off; write write failed: Input/output error -128 leaked clusters were found on the image. +127 leaked clusters were found on the image. This means waste of disk space, but no harm to data. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_update; errno: 5; imm: off; once: off; write -b write failed: Input/output error -128 leaked clusters were found on the image. +127 leaked clusters were found on the image. This means waste of disk space, but no harm to data. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_update; errno: 28; imm: off; once: on; write write failed: No space left on device -128 leaked clusters were found on the image. +127 leaked clusters were found on the image. This means waste of disk space, but no harm to data. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_update; errno: 28; imm: off; once: on; write -b write failed: No space left on device -128 leaked clusters were found on the image. +127 leaked clusters were found on the image. This means waste of disk space, but no harm to data. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_update; errno: 28; imm: off; once: off; write write failed: No space left on device -128 leaked clusters were found on the image. +127 leaked clusters were found on the image. This means waste of disk space, but no harm to data. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_update; errno: 28; imm: off; once: off; write -b write failed: No space left on device -128 leaked clusters were found on the image. +127 leaked clusters were found on the image. This means waste of disk space, but no harm to data. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_alloc.write; errno: 5; imm: off; once: on; write write failed: Input/output error -No errors were found on the image. + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_alloc.write; errno: 5; imm: off; once: on; write -b @@ -205,7 +207,9 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_alloc.write; errno: 28; imm: off; once: on; write write failed: No space left on device -No errors were found on the image. + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l2_alloc.write; errno: 28; imm: off; once: on; write -b @@ -575,7 +579,6 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l1_grow.write_table; errno: 5; imm: off; once: off -qcow2_free_clusters failed: Input/output error write failed: Input/output error No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 @@ -586,7 +589,6 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l1_grow.write_table; errno: 28; imm: off; once: off -qcow2_free_clusters failed: No space left on device write failed: No space left on device No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 @@ -597,7 +599,6 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l1_grow.activate_table; errno: 5; imm: off; once: off -qcow2_free_clusters failed: Input/output error write failed: Input/output error 96 leaked clusters were found on the image. @@ -610,7 +611,6 @@ No errors were found on the image. Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 Event: l1_grow.activate_table; errno: 28; imm: off; once: off -qcow2_free_clusters failed: No space left on device write failed: No space left on device 96 leaked clusters were found on the image. diff --git a/tests/qemu-iotests/026.out.nocache b/tests/qemu-iotests/026.out.nocache new file mode 100644 index 0000000000..33bad0d6ae --- /dev/null +++ b/tests/qemu-iotests/026.out.nocache @@ -0,0 +1,626 @@ +QA output created by 026 +Errors while writing 128 kB + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_update; errno: 5; imm: off; once: on; write +write failed: Input/output error + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_update; errno: 5; imm: off; once: on; write -b +write failed: Input/output error + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_update; errno: 5; imm: off; once: off; write +write failed: Input/output error + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_update; errno: 5; imm: off; once: off; write -b +write failed: Input/output error + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_update; errno: 28; imm: off; once: on; write +write failed: No space left on device + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_update; errno: 28; imm: off; once: on; write -b +write failed: No space left on device + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_update; errno: 28; imm: off; once: off; write +write failed: No space left on device + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_update; errno: 28; imm: off; once: off; write -b +write failed: No space left on device + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_load; errno: 5; imm: off; once: on; write +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +write failed: Input/output error +read failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_load; errno: 5; imm: off; once: on; write -b +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +write failed: Input/output error +read failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_load; errno: 5; imm: off; once: off; write +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +write failed: Input/output error +read failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_load; errno: 5; imm: off; once: off; write -b +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +write failed: Input/output error +read failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_load; errno: 28; imm: off; once: on; write +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +write failed: No space left on device +read failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_load; errno: 28; imm: off; once: on; write -b +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +write failed: No space left on device +read failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_load; errno: 28; imm: off; once: off; write +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +write failed: No space left on device +read failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_load; errno: 28; imm: off; once: off; write -b +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +write failed: No space left on device +read failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_update; errno: 5; imm: off; once: on; write +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +127 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_update; errno: 5; imm: off; once: on; write -b +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +127 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_update; errno: 5; imm: off; once: off; write +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +127 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_update; errno: 5; imm: off; once: off; write -b +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +127 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_update; errno: 28; imm: off; once: on; write +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +127 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_update; errno: 28; imm: off; once: on; write -b +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +127 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_update; errno: 28; imm: off; once: off; write +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +127 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_update; errno: 28; imm: off; once: off; write -b +wrote 131072/131072 bytes at offset 0 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +127 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_alloc.write; errno: 5; imm: off; once: on; write +write failed: Input/output error + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_alloc.write; errno: 5; imm: off; once: on; write -b +write failed: Input/output error + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_alloc.write; errno: 5; imm: off; once: off; write +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_alloc.write; errno: 5; imm: off; once: off; write -b +write failed: Input/output error + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_alloc.write; errno: 28; imm: off; once: on; write +write failed: No space left on device + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_alloc.write; errno: 28; imm: off; once: on; write -b +write failed: No space left on device + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_alloc.write; errno: 28; imm: off; once: off; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l2_alloc.write; errno: 28; imm: off; once: off; write -b +write failed: No space left on device + +1 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: write_aio; errno: 5; imm: off; once: on; write +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: write_aio; errno: 5; imm: off; once: on; write -b +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: write_aio; errno: 5; imm: off; once: off; write +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: write_aio; errno: 5; imm: off; once: off; write -b +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: write_aio; errno: 28; imm: off; once: on; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: write_aio; errno: 28; imm: off; once: on; write -b +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: write_aio; errno: 28; imm: off; once: off; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: write_aio; errno: 28; imm: off; once: off; write -b +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_load; errno: 5; imm: off; once: on; write +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_load; errno: 5; imm: off; once: on; write -b +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_load; errno: 5; imm: off; once: off; write +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_load; errno: 5; imm: off; once: off; write -b +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_load; errno: 28; imm: off; once: on; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_load; errno: 28; imm: off; once: on; write -b +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_load; errno: 28; imm: off; once: off; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_load; errno: 28; imm: off; once: off; write -b +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_update_part; errno: 5; imm: off; once: on; write +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_update_part; errno: 5; imm: off; once: on; write -b +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_update_part; errno: 5; imm: off; once: off; write +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_update_part; errno: 5; imm: off; once: off; write -b +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_update_part; errno: 28; imm: off; once: on; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_update_part; errno: 28; imm: off; once: on; write -b +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_update_part; errno: 28; imm: off; once: off; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_update_part; errno: 28; imm: off; once: off; write -b +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc; errno: 5; imm: off; once: on; write +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc; errno: 5; imm: off; once: on; write -b +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc; errno: 5; imm: off; once: off; write +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc; errno: 5; imm: off; once: off; write -b +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc; errno: 28; imm: off; once: on; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc; errno: 28; imm: off; once: on; write -b +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc; errno: 28; imm: off; once: off; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc; errno: 28; imm: off; once: off; write -b +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: cluster_alloc; errno: 5; imm: off; once: on; write +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: cluster_alloc; errno: 5; imm: off; once: on; write -b +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: cluster_alloc; errno: 5; imm: off; once: off; write +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: cluster_alloc; errno: 5; imm: off; once: off; write -b +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: cluster_alloc; errno: 28; imm: off; once: on; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: cluster_alloc; errno: 28; imm: off; once: on; write -b +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: cluster_alloc; errno: 28; imm: off; once: off; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: cluster_alloc; errno: 28; imm: off; once: off; write -b +write failed: No space left on device +No errors were found on the image. + +=== Refcout table growth tests === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.hookup; errno: 28; imm: off; once: on; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.hookup; errno: 28; imm: off; once: on; write -b +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.hookup; errno: 28; imm: off; once: off; write +write failed: No space left on device + +55 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.hookup; errno: 28; imm: off; once: off; write -b +write failed: No space left on device + +251 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.write; errno: 28; imm: off; once: on; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.write; errno: 28; imm: off; once: on; write -b +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.write; errno: 28; imm: off; once: off; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.write; errno: 28; imm: off; once: off; write -b +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: on; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: on; write -b +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: off; write +write failed: No space left on device + +10 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.write_blocks; errno: 28; imm: off; once: off; write -b +write failed: No space left on device + +23 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.write_table; errno: 28; imm: off; once: on; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.write_table; errno: 28; imm: off; once: on; write -b +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.write_table; errno: 28; imm: off; once: off; write +write failed: No space left on device + +10 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.write_table; errno: 28; imm: off; once: off; write -b +write failed: No space left on device + +23 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.switch_table; errno: 28; imm: off; once: on; write +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.switch_table; errno: 28; imm: off; once: on; write -b +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.switch_table; errno: 28; imm: off; once: off; write +write failed: No space left on device + +10 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: refblock_alloc.switch_table; errno: 28; imm: off; once: off; write -b +write failed: No space left on device + +23 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. + +=== L1 growth tests === + +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_grow.alloc_table; errno: 5; imm: off; once: on +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_grow.alloc_table; errno: 5; imm: off; once: off +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_grow.alloc_table; errno: 28; imm: off; once: on +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_grow.alloc_table; errno: 28; imm: off; once: off +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_grow.write_table; errno: 5; imm: off; once: on +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_grow.write_table; errno: 5; imm: off; once: off +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_grow.write_table; errno: 28; imm: off; once: on +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_grow.write_table; errno: 28; imm: off; once: off +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_grow.activate_table; errno: 5; imm: off; once: on +write failed: Input/output error +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_grow.activate_table; errno: 5; imm: off; once: off +write failed: Input/output error + +96 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_grow.activate_table; errno: 28; imm: off; once: on +write failed: No space left on device +No errors were found on the image. +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824 + +Event: l1_grow.activate_table; errno: 28; imm: off; once: off +write failed: No space left on device + +96 leaked clusters were found on the image. +This means waste of disk space, but no harm to data. +*** done diff --git a/tests/qemu-iotests/039.out b/tests/qemu-iotests/039.out index cb510d6716..077fa64cbf 100644 --- a/tests/qemu-iotests/039.out +++ b/tests/qemu-iotests/039.out @@ -12,8 +12,8 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) incompatible_features 0x1 -ERROR OFLAG_COPIED: offset=8000000000050000 refcount=0 ERROR cluster 5 refcount=0 reference=1 +ERROR OFLAG_COPIED data cluster: l2_entry=8000000000050000 refcount=0 2 errors were found on the image. Data may be corrupted, or further writes to the image may corrupt it. @@ -24,7 +24,6 @@ read 512/512 bytes at offset 0 incompatible_features 0x1 == Repairing the image file must succeed == -ERROR OFLAG_COPIED: offset=8000000000050000 refcount=0 Repairing cluster 5 refcount=0 reference=1 The following inconsistencies were found and repaired: @@ -44,7 +43,6 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) incompatible_features 0x1 -ERROR OFLAG_COPIED: offset=8000000000050000 refcount=0 Repairing cluster 5 refcount=0 reference=1 wrote 512/512 bytes at offset 0 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) diff --git a/tests/qemu-iotests/063 b/tests/qemu-iotests/063 new file mode 100755 index 0000000000..de0cbbd8bb --- /dev/null +++ b/tests/qemu-iotests/063 @@ -0,0 +1,97 @@ +#!/bin/bash +# +# test of qemu-img convert -n - convert without creation +# +# Copyright (C) 2009 Red Hat, Inc. +# Copyright (C) 2013 Alex Bligh (alex@alex.org.uk) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=alex@alex.org.uk + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img + rm -f $TEST_IMG.orig $TEST_IMG.raw $TEST_IMG.raw2 +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter +. ./common.pattern + +_supported_fmt qcow qcow2 vmdk qed raw +_supported_proto generic +_supported_os Linux + +_make_test_img 4M + +echo "== Testing conversion with -n fails with no target file ==" +# check .orig file does not exist +rm -f $TEST_IMG.orig +if $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n $TEST_IMG $TEST_IMG.orig >/dev/null 2>&1; then + exit 1 +fi + +echo "== Testing conversion with -n succeeds with a target file ==" +rm -f $TEST_IMG.orig +cp $TEST_IMG $TEST_IMG.orig +if ! $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n $TEST_IMG $TEST_IMG.orig ; then + exit 1 +fi + +echo "== Testing conversion to raw is the same after conversion with -n ==" +# compare the raw files +if ! $QEMU_IMG convert -f $IMGFMT -O raw $TEST_IMG $TEST_IMG.raw1 ; then + exit 1 +fi + +if ! $QEMU_IMG convert -f $IMGFMT -O raw $TEST_IMG.orig $TEST_IMG.raw2 ; then + exit 1 +fi + +if ! cmp $TEST_IMG.raw1 $TEST_IMG.raw2 ; then + exit 1 +fi + +echo "== Testing conversion back to original format ==" +if ! $QEMU_IMG convert -f raw -O $IMGFMT -n $TEST_IMG.raw2 $TEST_IMG ; then + exit 1 +fi +_check_test_img + +echo "== Testing conversion to a smaller file fails ==" +rm -f $TEST_IMG.orig +mv $TEST_IMG $TEST_IMG.orig +_make_test_img 2M +if $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n $TEST_IMG.orig $TEST_IMG >/dev/null 2>&1; then + exit 1 +fi + +rm -f $TEST_IMG.orig $TEST_IMG.raw $TEST_IMG.raw2 + +echo "*** done" +rm -f $seq.full +status=0 +exit 0 diff --git a/tests/qemu-iotests/063.out b/tests/qemu-iotests/063.out new file mode 100644 index 0000000000..de1c99afd8 --- /dev/null +++ b/tests/qemu-iotests/063.out @@ -0,0 +1,10 @@ +QA output created by 063 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 +== Testing conversion with -n fails with no target file == +== Testing conversion with -n succeeds with a target file == +== Testing conversion to raw is the same after conversion with -n == +== Testing conversion back to original format == +No errors were found on the image. +== Testing conversion to a smaller file fails == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=2097152 +*** done diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check index 74628ae637..4ecf497d8e 100755 --- a/tests/qemu-iotests/check +++ b/tests/qemu-iotests/check @@ -78,50 +78,50 @@ _wrapup() if $showme then - : + : elif $needwrap then - if [ -f check.time -a -f $tmp.time ] - then - cat check.time $tmp.time \ - | $AWK_PROG ' - { t[$1] = $2 } -END { if (NR > 0) { - for (i in t) print i " " t[i] - } - }' \ - | sort -n >$tmp.out - mv $tmp.out check.time - fi + if [ -f check.time -a -f $tmp.time ] + then + cat check.time $tmp.time \ + | $AWK_PROG ' + { t[$1] = $2 } +END { if (NR > 0) { + for (i in t) print i " " t[i] + } + }' \ + | sort -n >$tmp.out + mv $tmp.out check.time + fi - if [ -f $tmp.expunged ] - then - notrun=`wc -l <$tmp.expunged | sed -e 's/ *//g'` - try=`expr $try - $notrun` - list=`echo "$list" | sed -f $tmp.expunged` - fi + if [ -f $tmp.expunged ] + then + notrun=`wc -l <$tmp.expunged | sed -e 's/ *//g'` + try=`expr $try - $notrun` + list=`echo "$list" | sed -f $tmp.expunged` + fi - echo "" >>check.log - date >>check.log - echo $list | fmt | sed -e 's/^/ /' >>check.log - $interrupt && echo "Interrupted!" >>check.log - - if [ ! -z "$notrun" ] - then - echo "Not run:$notrun" - echo "Not run:$notrun" >>check.log - fi + echo "" >>check.log + date >>check.log + echo $list | fmt | sed -e 's/^/ /' >>check.log + $interrupt && echo "Interrupted!" >>check.log + + if [ ! -z "$notrun" ] + then + echo "Not run:$notrun" + echo "Not run:$notrun" >>check.log + fi if [ ! -z "$n_bad" -a $n_bad != 0 ] - then - echo "Failures:$bad" - echo "Failed $n_bad of $try tests" - echo "Failures:$bad" | fmt >>check.log - echo "Failed $n_bad of $try tests" >>check.log - else - echo "Passed all $try tests" - echo "Passed all $try tests" >>check.log - fi - needwrap=false + then + echo "Failures:$bad" + echo "Failed $n_bad of $try tests" + echo "Failures:$bad" | fmt >>check.log + echo "Failed $n_bad of $try tests" >>check.log + else + echo "Passed all $try tests" + echo "Passed all $try tests" >>check.log + fi + needwrap=false fi rm -f /tmp/*.out /tmp/*.err /tmp/*.time @@ -185,82 +185,88 @@ do if $showme then - echo - continue - elif [ -f expunged ] && $expunge && egrep "^$seq([ ]|\$)" expunged >/dev/null + echo + continue + elif [ -f expunged ] && $expunge && egrep "^$seq([ ]|\$)" expunged >/dev/null then - echo " - expunged" - rm -f $seq.out.bad - echo "/^$seq\$/d" >>$tmp.expunged + echo " - expunged" + rm -f $seq.out.bad + echo "/^$seq\$/d" >>$tmp.expunged elif [ ! -f $seq ] then - echo " - no such test?" - echo "/^$seq\$/d" >>$tmp.expunged + echo " - no such test?" + echo "/^$seq\$/d" >>$tmp.expunged else - # really going to try and run this one - # - rm -f $seq.out.bad - lasttime=`sed -n -e "/^$seq /s/.* //p" /tmp/check.sts + # for hangcheck ... + echo "$seq" >/tmp/check.sts - start=`_wallclock` - $timestamp && echo -n " ["`date "+%T"`"]" - [ ! -x $seq ] && chmod u+x $seq # ensure we can run it - MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(($RANDOM % 255 + 1))} \ - ./$seq >$tmp.out 2>&1 - sts=$? - $timestamp && _timestamp - stop=`_wallclock` + start=`_wallclock` + $timestamp && echo -n " ["`date "+%T"`"]" + [ ! -x $seq ] && chmod u+x $seq # ensure we can run it + MALLOC_PERTURB_=${MALLOC_PERTURB_:-$(($RANDOM % 255 + 1))} \ + ./$seq >$tmp.out 2>&1 + sts=$? + $timestamp && _timestamp + stop=`_wallclock` - if [ -f core ] - then - echo -n " [dumped core]" - mv core $seq.core - err=true - fi + if [ -f core ] + then + echo -n " [dumped core]" + mv core $seq.core + err=true + fi - if [ -f $seq.notrun ] - then - $timestamp || echo -n " [not run] " - $timestamp && echo " [not run]" && echo -n " $seq -- " - cat $seq.notrun - notrun="$notrun $seq" - else - if [ $sts -ne 0 ] - then - echo -n " [failed, exit status $sts]" - err=true - fi - if [ ! -f $seq.out ] - then - echo " - no qualified output" - err=true - else - if diff -w $seq.out $tmp.out >/dev/null 2>&1 - then - echo "" - if $err - then - : - else - echo "$seq `expr $stop - $start`" >>$tmp.time - fi - else - echo " - output mismatch (see $seq.out.bad)" - mv $tmp.out $seq.out.bad - $diff -w $seq.out $seq.out.bad - err=true - fi - fi - fi + if [ -f $seq.notrun ] + then + $timestamp || echo -n " [not run] " + $timestamp && echo " [not run]" && echo -n " $seq -- " + cat $seq.notrun + notrun="$notrun $seq" + else + if [ $sts -ne 0 ] + then + echo -n " [failed, exit status $sts]" + err=true + fi + + reference=$seq.out + if (echo $QEMU_IO_OPTIONS | grep -s -- '--nocache' > /dev/null); then + [ -f $seq.out.nocache ] && reference=$seq.out.nocache + fi + + if [ ! -f $reference ] + then + echo " - no qualified output" + err=true + else + if diff -w $reference $tmp.out >/dev/null 2>&1 + then + echo "" + if $err + then + : + else + echo "$seq `expr $stop - $start`" >>$tmp.time + fi + else + echo " - output mismatch (see $seq.out.bad)" + mv $tmp.out $seq.out.bad + $diff -w $reference $seq.out.bad + err=true + fi + fi + fi fi @@ -268,12 +274,12 @@ do # if $err then - bad="$bad $seq" - n_bad=`expr $n_bad + 1` - quick=false + bad="$bad $seq" + n_bad=`expr $n_bad + 1` + quick=false fi [ -f $seq.notrun ] || try=`expr $try + 1` - + seq="after_$seq" done diff --git a/tests/qemu-iotests/common b/tests/qemu-iotests/common index 6826ea72fe..fecaf85074 100644 --- a/tests/qemu-iotests/common +++ b/tests/qemu-iotests/common @@ -54,58 +54,58 @@ do if $group then - # arg after -g - group_list=`sed -n /dev/null - then - : - else - echo "$t" >>$tmp.list - fi - done - group=false - continue + if [ -z "$group_list" ] + then + echo "Group \"$r\" is empty or not defined?" + exit 1 + fi + [ ! -s $tmp.list ] && touch $tmp.list + for t in $group_list + do + if grep -s "^$t\$" $tmp.list >/dev/null + then + : + else + echo "$t" >>$tmp.list + fi + done + group=false + continue elif $xgroup then - # arg after -x - [ ! -s $tmp.list ] && ls [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] >$tmp.list 2>/dev/null - group_list=`sed -n $tmp.list 2>/dev/null + group_list=`sed -n $tmp.tmp - mv $tmp.tmp $tmp.list - numsed=0 - rm -f $tmp.sed - fi - echo "/^$t\$/d" >>$tmp.sed - numsed=`expr $numsed + 1` - done - sed -f $tmp.sed <$tmp.list >$tmp.tmp - mv $tmp.tmp $tmp.list - xgroup=false - continue + if [ -z "$group_list" ] + then + echo "Group \"$r\" is empty or not defined?" + exit 1 + fi + numsed=0 + rm -f $tmp.sed + for t in $group_list + do + if [ $numsed -gt 100 ] + then + sed -f $tmp.sed <$tmp.list >$tmp.tmp + mv $tmp.tmp $tmp.list + numsed=0 + rm -f $tmp.sed + fi + echo "/^$t\$/d" >>$tmp.sed + numsed=`expr $numsed + 1` + done + sed -f $tmp.sed <$tmp.list >$tmp.tmp + mv $tmp.tmp $tmp.list + xgroup=false + continue elif $imgopts then @@ -119,11 +119,11 @@ s/ .*//p case "$r" in - -\? | -h | --help) # usage - echo "Usage: $0 [options] [testlist]"' + -\? | -h | --help) # usage + echo "Usage: $0 [options] [testlist]"' common options - -v verbose + -v verbose check options -raw test raw (default) @@ -138,162 +138,162 @@ check options -sheepdog test sheepdog -nbd test nbd -ssh test ssh - -xdiff graphical mode diff - -nocache use O_DIRECT on backing file - -misalign misalign memory allocations - -n show me, do not run tests + -xdiff graphical mode diff + -nocache use O_DIRECT on backing file + -misalign misalign memory allocations + -n show me, do not run tests -o options -o options to pass to qemu-img create/convert - -T output timestamps - -r randomize test order - + -T output timestamps + -r randomize test order + testlist options - -g group[,group...] include tests from these groups - -x group[,group...] exclude tests from these groups - NNN include test NNN - NNN-NNN include test range (eg. 012-021) + -g group[,group...] include tests from these groups + -x group[,group...] exclude tests from these groups + NNN include test NNN + NNN-NNN include test range (eg. 012-021) ' - exit 0 - ;; + exit 0 + ;; - -raw) - IMGFMT=raw - xpand=false - ;; + -raw) + IMGFMT=raw + xpand=false + ;; - -cow) - IMGFMT=cow - xpand=false - ;; + -cow) + IMGFMT=cow + xpand=false + ;; - -qcow) - IMGFMT=qcow - xpand=false - ;; + -qcow) + IMGFMT=qcow + xpand=false + ;; - -qcow2) - IMGFMT=qcow2 - xpand=false - ;; + -qcow2) + IMGFMT=qcow2 + xpand=false + ;; - -qed) - IMGFMT=qed - xpand=false - ;; + -qed) + IMGFMT=qed + xpand=false + ;; - -vdi) - IMGFMT=vdi - xpand=false - ;; + -vdi) + IMGFMT=vdi + xpand=false + ;; - -vmdk) - IMGFMT=vmdk - xpand=false - ;; + -vmdk) + IMGFMT=vmdk + xpand=false + ;; - -vpc) - IMGFMT=vpc - xpand=false - ;; + -vpc) + IMGFMT=vpc + xpand=false + ;; - -rbd) - IMGPROTO=rbd - xpand=false - ;; - -sheepdog) - IMGPROTO=sheepdog - xpand=false - ;; - -nbd) - IMGPROTO=nbd - xpand=false - ;; + -rbd) + IMGPROTO=rbd + xpand=false + ;; + -sheepdog) + IMGPROTO=sheepdog + xpand=false + ;; + -nbd) + IMGPROTO=nbd + xpand=false + ;; -ssh) IMGPROTO=ssh xpand=false ;; - -nocache) - QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --nocache" - xpand=false - ;; + -nocache) + QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --nocache" + xpand=false + ;; - -misalign) - QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --misalign" - xpand=false - ;; + -misalign) + QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --misalign" + xpand=false + ;; -valgrind) valgrind=true - xpand=false + xpand=false ;; - -g) # -g group ... pick from group file - group=true - xpand=false - ;; + -g) # -g group ... pick from group file + group=true + xpand=false + ;; - -xdiff) # graphical diff mode - xpand=false + -xdiff) # graphical diff mode + xpand=false - if [ ! -z "$DISPLAY" ] - then - which xdiff >/dev/null 2>&1 && diff=xdiff - which gdiff >/dev/null 2>&1 && diff=gdiff - which tkdiff >/dev/null 2>&1 && diff=tkdiff - which xxdiff >/dev/null 2>&1 && diff=xxdiff - fi - ;; + if [ ! -z "$DISPLAY" ] + then + which xdiff >/dev/null 2>&1 && diff=xdiff + which gdiff >/dev/null 2>&1 && diff=gdiff + which tkdiff >/dev/null 2>&1 && diff=tkdiff + which xxdiff >/dev/null 2>&1 && diff=xxdiff + fi + ;; - -n) # show me, don't do it - showme=true - xpand=false - ;; + -n) # show me, don't do it + showme=true + xpand=false + ;; -o) imgopts=true xpand=false ;; - -r) # randomize test order - randomize=true - xpand=false - ;; + -r) # randomize test order + randomize=true + xpand=false + ;; - -T) # turn on timestamp output - timestamp=true - xpand=false - ;; + -T) # turn on timestamp output + timestamp=true + xpand=false + ;; - -v) - verbose=true - xpand=false - ;; - -x) # -x group ... exclude from group file - xgroup=true - xpand=false - ;; - '[0-9][0-9][0-9] [0-9][0-9][0-9][0-9]') - echo "No tests?" - status=1 - exit $status - ;; + -v) + verbose=true + xpand=false + ;; + -x) # -x group ... exclude from group file + xgroup=true + xpand=false + ;; + '[0-9][0-9][0-9] [0-9][0-9][0-9][0-9]') + echo "No tests?" + status=1 + exit $status + ;; - [0-9]*-[0-9]*) - eval `echo $r | sed -e 's/^/start=/' -e 's/-/ end=/'` - ;; + [0-9]*-[0-9]*) + eval `echo $r | sed -e 's/^/start=/' -e 's/-/ end=/'` + ;; - [0-9]*-) - eval `echo $r | sed -e 's/^/start=/' -e 's/-//'` - end=`echo [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] | sed -e 's/\[0-9]//g' -e 's/ *$//' -e 's/.* //'` - if [ -z "$end" ] - then - echo "No tests in range \"$r\"?" - status=1 - exit $status - fi - ;; + [0-9]*-) + eval `echo $r | sed -e 's/^/start=/' -e 's/-//'` + end=`echo [0-9][0-9][0-9] [0-9][0-9][0-9][0-9] | sed -e 's/\[0-9]//g' -e 's/ *$//' -e 's/.* //'` + if [ -z "$end" ] + then + echo "No tests in range \"$r\"?" + status=1 + exit $status + fi + ;; - *) - start=$r - end=$r - ;; + *) + start=$r + end=$r + ;; esac @@ -303,26 +303,26 @@ testlist options if $xpand then - have_test_arg=true - $AWK_PROG /dev/null - then - # in group file ... OK - echo $id >>$tmp.list - else - if [ -f expunged ] && $expunge && egrep "^$id([ ]|\$)" expunged >/dev/null - then - # expunged ... will be reported, but not run, later - echo $id >>$tmp.list - else - # oops - echo "$id - unknown test, ignored" - fi - fi - done + have_test_arg=true + $AWK_PROG /dev/null + then + # in group file ... OK + echo $id >>$tmp.list + else + if [ -f expunged ] && $expunge && egrep "^$id([ ]|\$)" expunged >/dev/null + then + # expunged ... will be reported, but not run, later + echo $id >>$tmp.list + else + # oops + echo "$id - unknown test, ignored" + fi + fi + done fi done @@ -337,11 +337,11 @@ then else if $have_test_arg then - # had test numbers, but none in group file ... do nothing - touch $tmp.list + # had test numbers, but none in group file ... do nothing + touch $tmp.list else - # no test numbers, do everything from group file - sed -n -e '/^[0-9][0-9][0-9]*/s/[ ].*//p' $tmp.list + # no test numbers, do everything from group file + sed -n -e '/^[0-9][0-9][0-9]*/s/[ ].*//p' $tmp.list fi fi diff --git a/tests/qemu-iotests/common.config b/tests/qemu-iotests/common.config index 08a3f100b8..d794e624e7 100644 --- a/tests/qemu-iotests/common.config +++ b/tests/qemu-iotests/common.config @@ -19,7 +19,7 @@ # setup and check for config parameters, and in particular # # EMAIL - email of the script runner. -# TEST_DIR - scratch test directory +# TEST_DIR - scratch test directory # # - These can be added to $HOST_CONFIG_DIR (witch default to ./config) # below or a separate local configuration file can be used (using @@ -111,11 +111,11 @@ export QEMU_NBD=$QEMU_NBD_PROG [ -f /etc/qemu-iotest.config ] && . /etc/qemu-iotest.config if [ -z "$TEST_DIR" ]; then - TEST_DIR=`pwd`/scratch + TEST_DIR=`pwd`/scratch fi if [ ! -e "$TEST_DIR" ]; then - mkdir "$TEST_DIR" + mkdir "$TEST_DIR" fi if [ ! -d "$TEST_DIR" ]; then diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter index 97a31ff0b1..5dfda63e59 100644 --- a/tests/qemu-iotests/common.filter +++ b/tests/qemu-iotests/common.filter @@ -25,19 +25,19 @@ # Outputs suitable message to stdout if it's not in range. # # A verbose option, -v, may be used as the LAST argument -# -# e.g. +# +# e.g. # foo: 0.0298 = 0.03 +/- 5% -# _within_tolerance "foo" 0.0298 0.03 5% -# +# _within_tolerance "foo" 0.0298 0.03 5% +# # foo: 0.0298 = 0.03 +/- 0.01 # _within_tolerance "foo" 0.0298 0.03 0.01 # # foo: 0.0298 = 0.03 -0.01 +0.002 # _within_tolerance "foo" 0.0298 0.03 0.01 0.002 # -# foo: verbose output of 0.0298 = 0.03 +/- 5% -# _within_tolerance "foo" 0.0298 0.03 5% -v +# foo: verbose output of 0.0298 = 0.03 +/- 5% +# _within_tolerance "foo" 0.0298 0.03 5% -v _within_tolerance() { _name=$1 @@ -51,10 +51,10 @@ _within_tolerance() # maxtol arg is optional # verbose arg is optional if [ $# -ge 5 ] - then + then if [ "$5" = "-v" ] then - _verbose=1 + _verbose=1 else _maxtol=$5 fi @@ -65,18 +65,18 @@ _within_tolerance() fi # find min with or without % - _mintolerance=`echo $_mintol | sed -e 's/%//'` + _mintolerance=`echo $_mintol | sed -e 's/%//'` if [ $_mintol = $_mintolerance ] - then + then _min=`echo "scale=5; $_correct_val-$_mintolerance" | bc` else _min=`echo "scale=5; $_correct_val-$_mintolerance*0.01*$_correct_val" | bc` fi # find max with or without % - _maxtolerance=`echo $_maxtol | sed -e 's/%//'` + _maxtolerance=`echo $_maxtol | sed -e 's/%//'` if [ $_maxtol = $_maxtolerance ] - then + then _max=`echo "scale=5; $_correct_val+$_maxtolerance" | bc` else _max=`echo "scale=5; $_correct_val+$_maxtolerance*0.01*$_correct_val" | bc` @@ -88,7 +88,7 @@ _within_tolerance() cat <$tmp.bc.1 scale=5; if ($_min <= $_given_val) 1; -if ($_min > $_given_val) 0; +if ($_min > $_given_val) 0; EOF cat <$tmp.bc.2 @@ -102,21 +102,21 @@ EOF rm -f $tmp.bc.[12] - _in_range=`expr $_above_min \& $_below_max` + _in_range=`expr $_above_min \& $_below_max` # fix up min, max precision for output # can vary for 5.3, 6.2 _min=`echo $_min | sed -e 's/0*$//'` # get rid of trailling zeroes _max=`echo $_max | sed -e 's/0*$//'` # get rid of trailling zeroes - if [ $_in_range -eq 1 ] + if [ $_in_range -eq 1 ] then - [ $_verbose -eq 1 ] && echo $_name is in range - return 0 + [ $_verbose -eq 1 ] && echo $_name is in range + return 0 else - [ $_verbose -eq 1 ] && echo $_name has value of $_given_val - [ $_verbose -eq 1 ] && echo $_name is NOT in range $_min .. $_max - return 1 + [ $_verbose -eq 1 ] && echo $_name has value of $_given_val + [ $_verbose -eq 1 ] && echo $_name is NOT in range $_min .. $_max + return 1 fi } @@ -125,7 +125,7 @@ EOF _filter_date() { sed \ - -e 's/[A-Z][a-z][a-z] [A-z][a-z][a-z] *[0-9][0-9]* [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]$/DATE/' + -e 's/[A-Z][a-z][a-z] [A-z][a-z][a-z] *[0-9][0-9]* [0-9][0-9]:[0-9][0-9]:[0-9][0-9] [0-9][0-9][0-9][0-9]$/DATE/' } # replace occurrences of the actual TEST_DIR value with TEST_DIR diff --git a/tests/qemu-iotests/common.pattern b/tests/qemu-iotests/common.pattern index 85a40eecc0..00e0f605fd 100644 --- a/tests/qemu-iotests/common.pattern +++ b/tests/qemu-iotests/common.pattern @@ -106,8 +106,8 @@ function io_test2() { local num=$3 # Pattern (repeat after 9 clusters): - # used - used - free - used - compressed - compressed - - # free - free - compressed + # used - used - free - used - compressed - compressed - + # free - free - compressed # Write the clusters to be compressed echo === Clusters to be compressed [1] diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc index 5e077c3573..88fecf7870 100644 --- a/tests/qemu-iotests/common.rc +++ b/tests/qemu-iotests/common.rc @@ -20,17 +20,17 @@ dd() { if [ "$HOSTOS" == "Linux" ] - then - command dd --help | grep noxfer > /dev/null 2>&1 - - if [ "$?" -eq 0 ] - then - command dd status=noxfer $@ - else - command dd $@ - fi + then + command dd --help | grep noxfer > /dev/null 2>&1 + + if [ "$?" -eq 0 ] + then + command dd status=noxfer $@ + else + command dd $@ + fi else - command dd $@ + command dd $@ fi } @@ -193,8 +193,8 @@ _get_pids_by_name() { if [ $# -ne 1 ] then - echo "Usage: _get_pids_by_name process-name" 1>&2 - exit 1 + echo "Usage: _get_pids_by_name process-name" 1>&2 + exit 1 fi # Algorithm ... all ps(1) variants have a time of the form MM:SS or @@ -206,12 +206,12 @@ _get_pids_by_name() ps $PS_ALL_FLAGS \ | sed -n \ - -e 's/$/ /' \ - -e 's/[ ][ ]*/ /g' \ - -e 's/^ //' \ - -e 's/^[^ ]* //' \ - -e "/[0-9]:[0-9][0-9] *[^ ]*\/$1 /s/ .*//p" \ - -e "/[0-9]:[0-9][0-9] *$1 /s/ .*//p" + -e 's/$/ /' \ + -e 's/[ ][ ]*/ /g' \ + -e 's/^ //' \ + -e 's/^[^ ]* //' \ + -e "/[0-9]:[0-9][0-9] *[^ ]*\/$1 /s/ .*//p" \ + -e "/[0-9]:[0-9][0-9] *$1 /s/ .*//p" } # fqdn for localhost @@ -229,8 +229,8 @@ _need_to_be_root() id=`id | $SED_PROG -e 's/(.*//' -e 's/.*=//'` if [ "$id" -ne 0 ] then - echo "Arrgh ... you need to be root (not uid=$id) to run this test" - exit 1 + echo "Arrgh ... you need to be root (not uid=$id) to run this test" + exit 1 fi } @@ -248,33 +248,33 @@ _need_to_be_root() _do() { if [ $# -eq 1 ]; then - _cmd=$1 + _cmd=$1 elif [ $# -eq 2 ]; then - _note=$1 - _cmd=$2 - echo -n "$_note... " + _note=$1 + _cmd=$2 + echo -n "$_note... " else - echo "Usage: _do [note] cmd" 1>&2 - status=1; exit + echo "Usage: _do [note] cmd" 1>&2 + status=1; exit fi (eval "echo '---' \"$_cmd\"") >>$here/$seq.full (eval "$_cmd") >$tmp._out 2>&1; ret=$? cat $tmp._out >>$here/$seq.full if [ $# -eq 2 ]; then - if [ $ret -eq 0 ]; then - echo "done" - else - echo "fail" - fi + if [ $ret -eq 0 ]; then + echo "done" + else + echo "fail" + fi fi if [ $ret -ne 0 ] \ - && [ "$_do_die_on_error" = "always" \ - -o \( $# -eq 2 -a "$_do_die_on_error" = "message_only" \) ] + && [ "$_do_die_on_error" = "always" \ + -o \( $# -eq 2 -a "$_do_die_on_error" = "message_only" \) ] then - [ $# -ne 2 ] && echo - eval "echo \"$_cmd\" failed \(returned $ret\): see $seq.full" - status=1; exit + [ $# -ne 2 ] && echo + eval "echo \"$_cmd\" failed \(returned $ret\): see $seq.full" + status=1; exit fi return $ret @@ -305,9 +305,9 @@ _fail() _supported_fmt() { for f; do - if [ "$f" = "$IMGFMT" -o "$f" = "generic" ]; then - return - fi + if [ "$f" = "$IMGFMT" -o "$f" = "generic" ]; then + return + fi done _notrun "not suitable for this image format: $IMGFMT" @@ -318,9 +318,9 @@ _supported_fmt() _supported_proto() { for f; do - if [ "$f" = "$IMGPROTO" -o "$f" = "generic" ]; then - return - fi + if [ "$f" = "$IMGPROTO" -o "$f" = "generic" ]; then + return + fi done _notrun "not suitable for this image protocol: $IMGPROTO" @@ -332,10 +332,10 @@ _supported_os() { for h do - if [ "$h" = "$HOSTOS" ] - then - return - fi + if [ "$h" = "$HOSTOS" ] + then + return + fi done _notrun "not suitable for this OS: $HOSTOS" diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index b6962421fa..316b1dd75c 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -66,3 +66,4 @@ 059 rw auto 060 rw auto 062 rw auto +063 rw auto diff --git a/tests/test-aio.c b/tests/test-aio.c index 532a1de3f9..c4fe0fc3b7 100644 --- a/tests/test-aio.c +++ b/tests/test-aio.c @@ -13,6 +13,7 @@ #include #include "block/aio.h" #include "qemu/timer.h" +#include "qemu/sockets.h" AioContext *ctx; @@ -375,7 +376,10 @@ static void test_timer_schedule(void) /* aio_poll will not block to wait for timers to complete unless it has * an fd to wait on. Fixing this breaks other tests. So create a dummy one. */ - g_assert(!pipe2(pipefd, O_NONBLOCK)); + g_assert(!qemu_pipe(pipefd)); + qemu_set_nonblock(pipefd[0]); + qemu_set_nonblock(pipefd[1]); + aio_set_fd_handler(ctx, pipefd[0], dummy_io_handler_read, NULL, NULL); aio_poll(ctx, false); @@ -716,7 +720,10 @@ static void test_source_timer_schedule(void) /* aio_poll will not block to wait for timers to complete unless it has * an fd to wait on. Fixing this breaks other tests. So create a dummy one. */ - g_assert(!pipe2(pipefd, O_NONBLOCK)); + g_assert(!qemu_pipe(pipefd)); + qemu_set_nonblock(pipefd[0]); + qemu_set_nonblock(pipefd[1]); + aio_set_fd_handler(ctx, pipefd[0], dummy_io_handler_read, NULL, NULL); do {} while (g_main_context_iteration(NULL, false)); diff --git a/tests/test-throttle.c b/tests/test-throttle.c new file mode 100644 index 0000000000..760812645b --- /dev/null +++ b/tests/test-throttle.c @@ -0,0 +1,481 @@ +/* + * Throttle infrastructure tests + * + * Copyright Nodalink, SARL. 2013 + * + * Authors: + * Benoît Canet + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ + +#include +#include +#include "qemu/throttle.h" + +LeakyBucket bkt; +ThrottleConfig cfg; +ThrottleState ts; + +/* usefull function */ +static bool double_cmp(double x, double y) +{ + return fabsl(x - y) < 1e-6; +} + +/* tests for single bucket operations */ +static void test_leak_bucket(void) +{ + /* set initial value */ + bkt.avg = 150; + bkt.max = 15; + bkt.level = 1.5; + + /* leak an op work of time */ + throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 150); + g_assert(bkt.avg == 150); + g_assert(bkt.max == 15); + g_assert(double_cmp(bkt.level, 0.5)); + + /* leak again emptying the bucket */ + throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 150); + g_assert(bkt.avg == 150); + g_assert(bkt.max == 15); + g_assert(double_cmp(bkt.level, 0)); + + /* check that the bucket level won't go lower */ + throttle_leak_bucket(&bkt, NANOSECONDS_PER_SECOND / 150); + g_assert(bkt.avg == 150); + g_assert(bkt.max == 15); + g_assert(double_cmp(bkt.level, 0)); +} + +static void test_compute_wait(void) +{ + int64_t wait; + int64_t result; + + /* no operation limit set */ + bkt.avg = 0; + bkt.max = 15; + bkt.level = 1.5; + wait = throttle_compute_wait(&bkt); + g_assert(!wait); + + /* zero delta */ + bkt.avg = 150; + bkt.max = 15; + bkt.level = 15; + wait = throttle_compute_wait(&bkt); + g_assert(!wait); + + /* below zero delta */ + bkt.avg = 150; + bkt.max = 15; + bkt.level = 9; + wait = throttle_compute_wait(&bkt); + g_assert(!wait); + + /* half an operation above max */ + bkt.avg = 150; + bkt.max = 15; + bkt.level = 15.5; + wait = throttle_compute_wait(&bkt); + /* time required to do half an operation */ + result = (int64_t) NANOSECONDS_PER_SECOND / 150 / 2; + g_assert(wait == result); +} + +/* functions to test ThrottleState initialization/destroy methods */ +static void read_timer_cb(void *opaque) +{ +} + +static void write_timer_cb(void *opaque) +{ +} + +static void test_init(void) +{ + int i; + + /* fill the structure with crap */ + memset(&ts, 1, sizeof(ts)); + + /* init the structure */ + throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts); + + /* check initialized fields */ + g_assert(ts.clock_type == QEMU_CLOCK_VIRTUAL); + g_assert(ts.timers[0]); + g_assert(ts.timers[1]); + + /* check other fields where cleared */ + g_assert(!ts.previous_leak); + g_assert(!ts.cfg.op_size); + for (i = 0; i < BUCKETS_COUNT; i++) { + g_assert(!ts.cfg.buckets[i].avg); + g_assert(!ts.cfg.buckets[i].max); + g_assert(!ts.cfg.buckets[i].level); + } + + throttle_destroy(&ts); +} + +static void test_destroy(void) +{ + int i; + throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts); + throttle_destroy(&ts); + for (i = 0; i < 2; i++) { + g_assert(!ts.timers[i]); + } +} + +/* function to test throttle_config and throttle_get_config */ +static void test_config_functions(void) +{ + int i; + ThrottleConfig orig_cfg, final_cfg; + + orig_cfg.buckets[THROTTLE_BPS_TOTAL].avg = 153; + orig_cfg.buckets[THROTTLE_BPS_READ].avg = 56; + orig_cfg.buckets[THROTTLE_BPS_WRITE].avg = 1; + + orig_cfg.buckets[THROTTLE_OPS_TOTAL].avg = 150; + orig_cfg.buckets[THROTTLE_OPS_READ].avg = 69; + orig_cfg.buckets[THROTTLE_OPS_WRITE].avg = 23; + + orig_cfg.buckets[THROTTLE_BPS_TOTAL].max = 0; /* should be corrected */ + orig_cfg.buckets[THROTTLE_BPS_READ].max = 1; /* should not be corrected */ + orig_cfg.buckets[THROTTLE_BPS_WRITE].max = 120; + + orig_cfg.buckets[THROTTLE_OPS_TOTAL].max = 150; + orig_cfg.buckets[THROTTLE_OPS_READ].max = 400; + orig_cfg.buckets[THROTTLE_OPS_WRITE].max = 500; + + orig_cfg.buckets[THROTTLE_BPS_TOTAL].level = 45; + orig_cfg.buckets[THROTTLE_BPS_READ].level = 65; + orig_cfg.buckets[THROTTLE_BPS_WRITE].level = 23; + + orig_cfg.buckets[THROTTLE_OPS_TOTAL].level = 1; + orig_cfg.buckets[THROTTLE_OPS_READ].level = 90; + orig_cfg.buckets[THROTTLE_OPS_WRITE].level = 75; + + orig_cfg.op_size = 1; + + throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts); + /* structure reset by throttle_init previous_leak should be null */ + g_assert(!ts.previous_leak); + throttle_config(&ts, &orig_cfg); + + /* has previous leak been initialized by throttle_config ? */ + g_assert(ts.previous_leak); + + /* get back the fixed configuration */ + throttle_get_config(&ts, &final_cfg); + + throttle_destroy(&ts); + + g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].avg == 153); + g_assert(final_cfg.buckets[THROTTLE_BPS_READ].avg == 56); + g_assert(final_cfg.buckets[THROTTLE_BPS_WRITE].avg == 1); + + g_assert(final_cfg.buckets[THROTTLE_OPS_TOTAL].avg == 150); + g_assert(final_cfg.buckets[THROTTLE_OPS_READ].avg == 69); + g_assert(final_cfg.buckets[THROTTLE_OPS_WRITE].avg == 23); + + g_assert(final_cfg.buckets[THROTTLE_BPS_TOTAL].max == 15.3);/* fixed */ + g_assert(final_cfg.buckets[THROTTLE_BPS_READ].max == 1); /* not fixed */ + g_assert(final_cfg.buckets[THROTTLE_BPS_WRITE].max == 120); + + g_assert(final_cfg.buckets[THROTTLE_OPS_TOTAL].max == 150); + g_assert(final_cfg.buckets[THROTTLE_OPS_READ].max == 400); + g_assert(final_cfg.buckets[THROTTLE_OPS_WRITE].max == 500); + + g_assert(final_cfg.op_size == 1); + + /* check bucket have been cleared */ + for (i = 0; i < BUCKETS_COUNT; i++) { + g_assert(!final_cfg.buckets[i].level); + } +} + +/* functions to test is throttle is enabled by a config */ +static void set_cfg_value(bool is_max, int index, int value) +{ + if (is_max) { + cfg.buckets[index].max = value; + } else { + cfg.buckets[index].avg = value; + } +} + +static void test_enabled(void) +{ + int i; + + memset(&cfg, 0, sizeof(cfg)); + g_assert(!throttle_enabled(&cfg)); + + for (i = 0; i < BUCKETS_COUNT; i++) { + memset(&cfg, 0, sizeof(cfg)); + set_cfg_value(false, i, 150); + g_assert(throttle_enabled(&cfg)); + } + + for (i = 0; i < BUCKETS_COUNT; i++) { + memset(&cfg, 0, sizeof(cfg)); + set_cfg_value(false, i, -150); + g_assert(!throttle_enabled(&cfg)); + } +} + +/* tests functions for throttle_conflicting */ + +static void test_conflicts_for_one_set(bool is_max, + int total, + int read, + int write) +{ + memset(&cfg, 0, sizeof(cfg)); + g_assert(!throttle_conflicting(&cfg)); + + set_cfg_value(is_max, total, 1); + set_cfg_value(is_max, read, 1); + g_assert(throttle_conflicting(&cfg)); + + memset(&cfg, 0, sizeof(cfg)); + set_cfg_value(is_max, total, 1); + set_cfg_value(is_max, write, 1); + g_assert(throttle_conflicting(&cfg)); + + memset(&cfg, 0, sizeof(cfg)); + set_cfg_value(is_max, total, 1); + set_cfg_value(is_max, read, 1); + set_cfg_value(is_max, write, 1); + g_assert(throttle_conflicting(&cfg)); + + memset(&cfg, 0, sizeof(cfg)); + set_cfg_value(is_max, total, 1); + g_assert(!throttle_conflicting(&cfg)); + + memset(&cfg, 0, sizeof(cfg)); + set_cfg_value(is_max, read, 1); + set_cfg_value(is_max, write, 1); + g_assert(!throttle_conflicting(&cfg)); +} + +static void test_conflicting_config(void) +{ + /* bps average conflicts */ + test_conflicts_for_one_set(false, + THROTTLE_BPS_TOTAL, + THROTTLE_BPS_READ, + THROTTLE_BPS_WRITE); + + /* ops average conflicts */ + test_conflicts_for_one_set(false, + THROTTLE_OPS_TOTAL, + THROTTLE_OPS_READ, + THROTTLE_OPS_WRITE); + + /* bps average conflicts */ + test_conflicts_for_one_set(true, + THROTTLE_BPS_TOTAL, + THROTTLE_BPS_READ, + THROTTLE_BPS_WRITE); + /* ops average conflicts */ + test_conflicts_for_one_set(true, + THROTTLE_OPS_TOTAL, + THROTTLE_OPS_READ, + THROTTLE_OPS_WRITE); +} +/* functions to test the throttle_is_valid function */ +static void test_is_valid_for_value(int value, bool should_be_valid) +{ + int is_max, index; + for (is_max = 0; is_max < 2; is_max++) { + for (index = 0; index < BUCKETS_COUNT; index++) { + memset(&cfg, 0, sizeof(cfg)); + set_cfg_value(is_max, index, value); + g_assert(throttle_is_valid(&cfg) == should_be_valid); + } + } +} + +static void test_is_valid(void) +{ + /* negative number are invalid */ + test_is_valid_for_value(-1, false); + /* zero are valids */ + test_is_valid_for_value(0, true); + /* positives numers are valids */ + test_is_valid_for_value(1, true); +} + +static void test_have_timer(void) +{ + /* zero the structure */ + memset(&ts, 0, sizeof(ts)); + + /* no timer set shoudl return false */ + g_assert(!throttle_have_timer(&ts)); + + /* init the structure */ + throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts); + + /* timer set by init should return true */ + g_assert(throttle_have_timer(&ts)); + + throttle_destroy(&ts); +} + +static bool do_test_accounting(bool is_ops, /* are we testing bps or ops */ + int size, /* size of the operation to do */ + double avg, /* io limit */ + uint64_t op_size, /* ideal size of an io */ + double total_result, + double read_result, + double write_result) +{ + BucketType to_test[2][3] = { { THROTTLE_BPS_TOTAL, + THROTTLE_BPS_READ, + THROTTLE_BPS_WRITE, }, + { THROTTLE_OPS_TOTAL, + THROTTLE_OPS_READ, + THROTTLE_OPS_WRITE, } }; + ThrottleConfig cfg; + BucketType index; + int i; + + for (i = 0; i < 3; i++) { + BucketType index = to_test[is_ops][i]; + cfg.buckets[index].avg = avg; + } + + cfg.op_size = op_size; + + throttle_init(&ts, QEMU_CLOCK_VIRTUAL, read_timer_cb, write_timer_cb, &ts); + throttle_config(&ts, &cfg); + + /* account a read */ + throttle_account(&ts, false, size); + /* account a write */ + throttle_account(&ts, true, size); + + /* check total result */ + index = to_test[is_ops][0]; + if (!double_cmp(ts.cfg.buckets[index].level, total_result)) { + return false; + } + + /* check read result */ + index = to_test[is_ops][1]; + if (!double_cmp(ts.cfg.buckets[index].level, read_result)) { + return false; + } + + /* check write result */ + index = to_test[is_ops][2]; + if (!double_cmp(ts.cfg.buckets[index].level, write_result)) { + return false; + } + + throttle_destroy(&ts); + + return true; +} + +static void test_accounting(void) +{ + /* tests for bps */ + + /* op of size 1 */ + g_assert(do_test_accounting(false, + 1 * 512, + 150, + 0, + 1024, + 512, + 512)); + + /* op of size 2 */ + g_assert(do_test_accounting(false, + 2 * 512, + 150, + 0, + 2048, + 1024, + 1024)); + + /* op of size 2 and orthogonal parameter change */ + g_assert(do_test_accounting(false, + 2 * 512, + 150, + 17, + 2048, + 1024, + 1024)); + + + /* tests for ops */ + + /* op of size 1 */ + g_assert(do_test_accounting(true, + 1 * 512, + 150, + 0, + 2, + 1, + 1)); + + /* op of size 2 */ + g_assert(do_test_accounting(true, + 2 * 512, + 150, + 0, + 2, + 1, + 1)); + + /* jumbo op accounting fragmentation : size 64 with op size of 13 units */ + g_assert(do_test_accounting(true, + 64 * 512, + 150, + 13 * 512, + (64.0 * 2) / 13, + (64.0 / 13), + (64.0 / 13))); + + /* same with orthogonal parameters changes */ + g_assert(do_test_accounting(true, + 64 * 512, + 300, + 13 * 512, + (64.0 * 2) / 13, + (64.0 / 13), + (64.0 / 13))); +} + +int main(int argc, char **argv) +{ + init_clocks(); + do {} while (g_main_context_iteration(NULL, false)); + + /* tests in the same order as the header function declarations */ + g_test_init(&argc, &argv, NULL); + g_test_add_func("/throttle/leak_bucket", test_leak_bucket); + g_test_add_func("/throttle/compute_wait", test_compute_wait); + g_test_add_func("/throttle/init", test_init); + g_test_add_func("/throttle/destroy", test_destroy); + g_test_add_func("/throttle/have_timer", test_have_timer); + g_test_add_func("/throttle/config/enabled", test_enabled); + g_test_add_func("/throttle/config/conflicting", test_conflicting_config); + g_test_add_func("/throttle/config/is_valid", test_is_valid); + g_test_add_func("/throttle/config_functions", test_config_functions); + g_test_add_func("/throttle/accounting", test_accounting); + return g_test_run(); +} + diff --git a/util/Makefile.objs b/util/Makefile.objs index dc72ab0721..2bb13a2a59 100644 --- a/util/Makefile.objs +++ b/util/Makefile.objs @@ -11,3 +11,4 @@ util-obj-y += iov.o aes.o qemu-config.o qemu-sockets.o uri.o notify.o util-obj-y += qemu-option.o qemu-progress.o util-obj-y += hexdump.o util-obj-y += crc32c.o +util-obj-y += throttle.o diff --git a/util/throttle.c b/util/throttle.c new file mode 100644 index 0000000000..02e6f15587 --- /dev/null +++ b/util/throttle.c @@ -0,0 +1,396 @@ +/* + * QEMU throttling infrastructure + * + * Copyright (C) Nodalink, SARL. 2013 + * + * Author: + * Benoît Canet + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#include "qemu/throttle.h" +#include "qemu/timer.h" + +/* This function make a bucket leak + * + * @bkt: the bucket to make leak + * @delta_ns: the time delta + */ +void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns) +{ + double leak; + + /* compute how much to leak */ + leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND; + + /* make the bucket leak */ + bkt->level = MAX(bkt->level - leak, 0); +} + +/* Calculate the time delta since last leak and make proportionals leaks + * + * @now: the current timestamp in ns + */ +static void throttle_do_leak(ThrottleState *ts, int64_t now) +{ + /* compute the time elapsed since the last leak */ + int64_t delta_ns = now - ts->previous_leak; + int i; + + ts->previous_leak = now; + + if (delta_ns <= 0) { + return; + } + + /* make each bucket leak */ + for (i = 0; i < BUCKETS_COUNT; i++) { + throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns); + } +} + +/* do the real job of computing the time to wait + * + * @limit: the throttling limit + * @extra: the number of operation to delay + * @ret: the time to wait in ns + */ +static int64_t throttle_do_compute_wait(double limit, double extra) +{ + double wait = extra * NANOSECONDS_PER_SECOND; + wait /= limit; + return wait; +} + +/* This function compute the wait time in ns that a leaky bucket should trigger + * + * @bkt: the leaky bucket we operate on + * @ret: the resulting wait time in ns or 0 if the operation can go through + */ +int64_t throttle_compute_wait(LeakyBucket *bkt) +{ + double extra; /* the number of extra units blocking the io */ + + if (!bkt->avg) { + return 0; + } + + extra = bkt->level - bkt->max; + + if (extra <= 0) { + return 0; + } + + return throttle_do_compute_wait(bkt->avg, extra); +} + +/* This function compute the time that must be waited while this IO + * + * @is_write: true if the current IO is a write, false if it's a read + * @ret: time to wait + */ +static int64_t throttle_compute_wait_for(ThrottleState *ts, + bool is_write) +{ + BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL, + THROTTLE_OPS_TOTAL, + THROTTLE_BPS_READ, + THROTTLE_OPS_READ}, + {THROTTLE_BPS_TOTAL, + THROTTLE_OPS_TOTAL, + THROTTLE_BPS_WRITE, + THROTTLE_OPS_WRITE}, }; + int64_t wait, max_wait = 0; + int i; + + for (i = 0; i < 4; i++) { + BucketType index = to_check[is_write][i]; + wait = throttle_compute_wait(&ts->cfg.buckets[index]); + if (wait > max_wait) { + max_wait = wait; + } + } + + return max_wait; +} + +/* compute the timer for this type of operation + * + * @is_write: the type of operation + * @now: the current clock timestamp + * @next_timestamp: the resulting timer + * @ret: true if a timer must be set + */ +bool throttle_compute_timer(ThrottleState *ts, + bool is_write, + int64_t now, + int64_t *next_timestamp) +{ + int64_t wait; + + /* leak proportionally to the time elapsed */ + throttle_do_leak(ts, now); + + /* compute the wait time if any */ + wait = throttle_compute_wait_for(ts, is_write); + + /* if the code must wait compute when the next timer should fire */ + if (wait) { + *next_timestamp = now + wait; + return true; + } + + /* else no need to wait at all */ + *next_timestamp = now; + return false; +} + +/* To be called first on the ThrottleState */ +void throttle_init(ThrottleState *ts, + QEMUClockType clock_type, + QEMUTimerCB *read_timer_cb, + QEMUTimerCB *write_timer_cb, + void *timer_opaque) +{ + memset(ts, 0, sizeof(ThrottleState)); + + ts->clock_type = clock_type; + ts->timers[0] = timer_new_ns(clock_type, read_timer_cb, timer_opaque); + ts->timers[1] = timer_new_ns(clock_type, write_timer_cb, timer_opaque); +} + +/* destroy a timer */ +static void throttle_timer_destroy(QEMUTimer **timer) +{ + assert(*timer != NULL); + + timer_del(*timer); + timer_free(*timer); + *timer = NULL; +} + +/* To be called last on the ThrottleState */ +void throttle_destroy(ThrottleState *ts) +{ + int i; + + for (i = 0; i < 2; i++) { + throttle_timer_destroy(&ts->timers[i]); + } +} + +/* is any throttling timer configured */ +bool throttle_have_timer(ThrottleState *ts) +{ + if (ts->timers[0]) { + return true; + } + + return false; +} + +/* Does any throttling must be done + * + * @cfg: the throttling configuration to inspect + * @ret: true if throttling must be done else false + */ +bool throttle_enabled(ThrottleConfig *cfg) +{ + int i; + + for (i = 0; i < BUCKETS_COUNT; i++) { + if (cfg->buckets[i].avg > 0) { + return true; + } + } + + return false; +} + +/* return true if any two throttling parameters conflicts + * + * @cfg: the throttling configuration to inspect + * @ret: true if any conflict detected else false + */ +bool throttle_conflicting(ThrottleConfig *cfg) +{ + bool bps_flag, ops_flag; + bool bps_max_flag, ops_max_flag; + + bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg && + (cfg->buckets[THROTTLE_BPS_READ].avg || + cfg->buckets[THROTTLE_BPS_WRITE].avg); + + ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg && + (cfg->buckets[THROTTLE_OPS_READ].avg || + cfg->buckets[THROTTLE_OPS_WRITE].avg); + + bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max && + (cfg->buckets[THROTTLE_BPS_READ].max || + cfg->buckets[THROTTLE_BPS_WRITE].max); + + ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max && + (cfg->buckets[THROTTLE_OPS_READ].max || + cfg->buckets[THROTTLE_OPS_WRITE].max); + + return bps_flag || ops_flag || bps_max_flag || ops_max_flag; +} + +/* check if a throttling configuration is valid + * @cfg: the throttling configuration to inspect + * @ret: true if valid else false + */ +bool throttle_is_valid(ThrottleConfig *cfg) +{ + bool invalid = false; + int i; + + for (i = 0; i < BUCKETS_COUNT; i++) { + if (cfg->buckets[i].avg < 0) { + invalid = true; + } + } + + for (i = 0; i < BUCKETS_COUNT; i++) { + if (cfg->buckets[i].max < 0) { + invalid = true; + } + } + + return !invalid; +} + +/* fix bucket parameters */ +static void throttle_fix_bucket(LeakyBucket *bkt) +{ + double min; + + /* zero bucket level */ + bkt->level = 0; + + /* The following is done to cope with the Linux CFQ block scheduler + * which regroup reads and writes by block of 100ms in the guest. + * When they are two process one making reads and one making writes cfq + * make a pattern looking like the following: + * WWWWWWWWWWWRRRRRRRRRRRRRRWWWWWWWWWWWWWwRRRRRRRRRRRRRRRRR + * Having a max burst value of 100ms of the average will help smooth the + * throttling + */ + min = bkt->avg / 10; + if (bkt->avg && !bkt->max) { + bkt->max = min; + } +} + +/* take care of canceling a timer */ +static void throttle_cancel_timer(QEMUTimer *timer) +{ + assert(timer != NULL); + + timer_del(timer); +} + +/* Used to configure the throttle + * + * @ts: the throttle state we are working on + * @cfg: the config to set + */ +void throttle_config(ThrottleState *ts, ThrottleConfig *cfg) +{ + int i; + + ts->cfg = *cfg; + + for (i = 0; i < BUCKETS_COUNT; i++) { + throttle_fix_bucket(&ts->cfg.buckets[i]); + } + + ts->previous_leak = qemu_clock_get_ns(ts->clock_type); + + for (i = 0; i < 2; i++) { + throttle_cancel_timer(ts->timers[i]); + } +} + +/* used to get config + * + * @ts: the throttle state we are working on + * @cfg: the config to write + */ +void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg) +{ + *cfg = ts->cfg; +} + + +/* Schedule the read or write timer if needed + * + * NOTE: this function is not unit tested due to it's usage of timer_mod + * + * @is_write: the type of operation (read/write) + * @ret: true if the timer has been scheduled else false + */ +bool throttle_schedule_timer(ThrottleState *ts, bool is_write) +{ + int64_t now = qemu_clock_get_ns(ts->clock_type); + int64_t next_timestamp; + bool must_wait; + + must_wait = throttle_compute_timer(ts, + is_write, + now, + &next_timestamp); + + /* request not throttled */ + if (!must_wait) { + return false; + } + + /* request throttled and timer pending -> do nothing */ + if (timer_pending(ts->timers[is_write])) { + return true; + } + + /* request throttled and timer not pending -> arm timer */ + timer_mod(ts->timers[is_write], next_timestamp); + return true; +} + +/* do the accounting for this operation + * + * @is_write: the type of operation (read/write) + * @size: the size of the operation + */ +void throttle_account(ThrottleState *ts, bool is_write, uint64_t size) +{ + double units = 1.0; + + /* if cfg.op_size is defined and smaller than size we compute unit count */ + if (ts->cfg.op_size && size > ts->cfg.op_size) { + units = (double) size / ts->cfg.op_size; + } + + ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size; + ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units; + + if (is_write) { + ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size; + ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units; + } else { + ts->cfg.buckets[THROTTLE_BPS_READ].level += size; + ts->cfg.buckets[THROTTLE_OPS_READ].level += units; + } +} +