Merge remote-tracking branch 'stefanha/block' into staging

# By Peter Lieven (5) and others
# Via Stefan Hajnoczi
* stefanha/block:
  block/raw: add .bdrv_get_info
  block: fix bdrv_read_unthrottled()
  cpus: Let vm_stop[_force_state]() always flush block devices
  block-migration: efficiently encode zero blocks
  block/raw: add bdrv_co_write_zeroes
  block: add bdrv_write_zeroes()
  block: fix vvfat error path for enable_write_target
  QEMUBH: make AioContext's bh re-entrant
  dataplane: sync virtio.c and vring.c virtqueue state
  gluster: Add discard support for GlusterFS block driver.
  gluster: Use pkg-config to configure GlusterFS block driver

Message-id: 1374223132-29107-1-git-send-email-stefanha@redhat.com
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
This commit is contained in:
Anthony Liguori 2013-07-22 10:13:34 -05:00
commit 5447a9afc4
16 changed files with 203 additions and 48 deletions

33
async.c
View file

@ -47,11 +47,16 @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
bh->ctx = ctx; bh->ctx = ctx;
bh->cb = cb; bh->cb = cb;
bh->opaque = opaque; bh->opaque = opaque;
qemu_mutex_lock(&ctx->bh_lock);
bh->next = ctx->first_bh; bh->next = ctx->first_bh;
/* Make sure that the members are ready before putting bh into list */
smp_wmb();
ctx->first_bh = bh; ctx->first_bh = bh;
qemu_mutex_unlock(&ctx->bh_lock);
return bh; return bh;
} }
/* Multiple occurrences of aio_bh_poll cannot be called concurrently */
int aio_bh_poll(AioContext *ctx) int aio_bh_poll(AioContext *ctx)
{ {
QEMUBH *bh, **bhp, *next; QEMUBH *bh, **bhp, *next;
@ -61,9 +66,15 @@ int aio_bh_poll(AioContext *ctx)
ret = 0; ret = 0;
for (bh = ctx->first_bh; bh; bh = next) { for (bh = ctx->first_bh; bh; bh = next) {
/* Make sure that fetching bh happens before accessing its members */
smp_read_barrier_depends();
next = bh->next; next = bh->next;
if (!bh->deleted && bh->scheduled) { if (!bh->deleted && bh->scheduled) {
bh->scheduled = 0; bh->scheduled = 0;
/* Paired with write barrier in bh schedule to ensure reading for
* idle & callbacks coming after bh's scheduling.
*/
smp_rmb();
if (!bh->idle) if (!bh->idle)
ret = 1; ret = 1;
bh->idle = 0; bh->idle = 0;
@ -75,6 +86,7 @@ int aio_bh_poll(AioContext *ctx)
/* remove deleted bhs */ /* remove deleted bhs */
if (!ctx->walking_bh) { if (!ctx->walking_bh) {
qemu_mutex_lock(&ctx->bh_lock);
bhp = &ctx->first_bh; bhp = &ctx->first_bh;
while (*bhp) { while (*bhp) {
bh = *bhp; bh = *bhp;
@ -85,6 +97,7 @@ int aio_bh_poll(AioContext *ctx)
bhp = &bh->next; bhp = &bh->next;
} }
} }
qemu_mutex_unlock(&ctx->bh_lock);
} }
return ret; return ret;
@ -94,24 +107,38 @@ void qemu_bh_schedule_idle(QEMUBH *bh)
{ {
if (bh->scheduled) if (bh->scheduled)
return; return;
bh->scheduled = 1;
bh->idle = 1; bh->idle = 1;
/* Make sure that idle & any writes needed by the callback are done
* before the locations are read in the aio_bh_poll.
*/
smp_wmb();
bh->scheduled = 1;
} }
void qemu_bh_schedule(QEMUBH *bh) void qemu_bh_schedule(QEMUBH *bh)
{ {
if (bh->scheduled) if (bh->scheduled)
return; return;
bh->scheduled = 1;
bh->idle = 0; bh->idle = 0;
/* Make sure that idle & any writes needed by the callback are done
* before the locations are read in the aio_bh_poll.
*/
smp_wmb();
bh->scheduled = 1;
aio_notify(bh->ctx); aio_notify(bh->ctx);
} }
/* This func is async.
*/
void qemu_bh_cancel(QEMUBH *bh) void qemu_bh_cancel(QEMUBH *bh)
{ {
bh->scheduled = 0; bh->scheduled = 0;
} }
/* This func is async.The bottom half will do the delete action at the finial
* end.
*/
void qemu_bh_delete(QEMUBH *bh) void qemu_bh_delete(QEMUBH *bh)
{ {
bh->scheduled = 0; bh->scheduled = 0;
@ -176,6 +203,7 @@ aio_ctx_finalize(GSource *source)
thread_pool_free(ctx->thread_pool); thread_pool_free(ctx->thread_pool);
aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL); aio_set_event_notifier(ctx, &ctx->notifier, NULL, NULL);
event_notifier_cleanup(&ctx->notifier); event_notifier_cleanup(&ctx->notifier);
qemu_mutex_destroy(&ctx->bh_lock);
g_array_free(ctx->pollfds, TRUE); g_array_free(ctx->pollfds, TRUE);
} }
@ -211,6 +239,7 @@ AioContext *aio_context_new(void)
ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext)); ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD)); ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
ctx->thread_pool = NULL; ctx->thread_pool = NULL;
qemu_mutex_init(&ctx->bh_lock);
event_notifier_init(&ctx->notifier, false); event_notifier_init(&ctx->notifier, false);
aio_set_event_notifier(ctx, &ctx->notifier, aio_set_event_notifier(ctx, &ctx->notifier,
(EventNotifierHandler *) (EventNotifierHandler *)

View file

@ -29,6 +29,7 @@
#define BLK_MIG_FLAG_DEVICE_BLOCK 0x01 #define BLK_MIG_FLAG_DEVICE_BLOCK 0x01
#define BLK_MIG_FLAG_EOS 0x02 #define BLK_MIG_FLAG_EOS 0x02
#define BLK_MIG_FLAG_PROGRESS 0x04 #define BLK_MIG_FLAG_PROGRESS 0x04
#define BLK_MIG_FLAG_ZERO_BLOCK 0x08
#define MAX_IS_ALLOCATED_SEARCH 65536 #define MAX_IS_ALLOCATED_SEARCH 65536
@ -80,6 +81,7 @@ typedef struct BlkMigState {
int shared_base; int shared_base;
QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list; QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
int64_t total_sector_sum; int64_t total_sector_sum;
bool zero_blocks;
/* Protected by lock. */ /* Protected by lock. */
QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list; QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
@ -114,16 +116,30 @@ static void blk_mig_unlock(void)
static void blk_send(QEMUFile *f, BlkMigBlock * blk) static void blk_send(QEMUFile *f, BlkMigBlock * blk)
{ {
int len; int len;
uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK;
if (block_mig_state.zero_blocks &&
buffer_is_zero(blk->buf, BLOCK_SIZE)) {
flags |= BLK_MIG_FLAG_ZERO_BLOCK;
}
/* sector number and flags */ /* sector number and flags */
qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS) qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
| BLK_MIG_FLAG_DEVICE_BLOCK); | flags);
/* device name */ /* device name */
len = strlen(blk->bmds->bs->device_name); len = strlen(blk->bmds->bs->device_name);
qemu_put_byte(f, len); qemu_put_byte(f, len);
qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len); qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
/* if a block is zero we need to flush here since the network
* bandwidth is now a lot higher than the storage device bandwidth.
* thus if we queue zero blocks we slow down the migration */
if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
qemu_fflush(f);
return;
}
qemu_put_buffer(f, blk->buf, BLOCK_SIZE); qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
} }
@ -344,6 +360,7 @@ static void init_blk_migration(QEMUFile *f)
block_mig_state.total_sector_sum = 0; block_mig_state.total_sector_sum = 0;
block_mig_state.prev_progress = -1; block_mig_state.prev_progress = -1;
block_mig_state.bulk_completed = 0; block_mig_state.bulk_completed = 0;
block_mig_state.zero_blocks = migrate_zero_blocks();
bdrv_iterate(init_blk_migration_it, NULL); bdrv_iterate(init_blk_migration_it, NULL);
} }
@ -762,12 +779,15 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK; nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
} }
buf = g_malloc(BLOCK_SIZE); if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
ret = bdrv_write_zeroes(bs, addr, nr_sectors);
} else {
buf = g_malloc(BLOCK_SIZE);
qemu_get_buffer(f, buf, BLOCK_SIZE);
ret = bdrv_write(bs, addr, buf, nr_sectors);
g_free(buf);
}
qemu_get_buffer(f, buf, BLOCK_SIZE);
ret = bdrv_write(bs, addr, buf, nr_sectors);
g_free(buf);
if (ret < 0) { if (ret < 0) {
return ret; return ret;
} }

29
block.c
View file

@ -2162,6 +2162,7 @@ typedef struct RwCo {
QEMUIOVector *qiov; QEMUIOVector *qiov;
bool is_write; bool is_write;
int ret; int ret;
BdrvRequestFlags flags;
} RwCo; } RwCo;
static void coroutine_fn bdrv_rw_co_entry(void *opaque) static void coroutine_fn bdrv_rw_co_entry(void *opaque)
@ -2170,10 +2171,12 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
if (!rwco->is_write) { if (!rwco->is_write) {
rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num, rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
rwco->nb_sectors, rwco->qiov, 0); rwco->nb_sectors, rwco->qiov,
rwco->flags);
} else { } else {
rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num, rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
rwco->nb_sectors, rwco->qiov, 0); rwco->nb_sectors, rwco->qiov,
rwco->flags);
} }
} }
@ -2181,7 +2184,8 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
* Process a vectored synchronous request using coroutines * Process a vectored synchronous request using coroutines
*/ */
static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num, static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
QEMUIOVector *qiov, bool is_write) QEMUIOVector *qiov, bool is_write,
BdrvRequestFlags flags)
{ {
Coroutine *co; Coroutine *co;
RwCo rwco = { RwCo rwco = {
@ -2191,6 +2195,7 @@ static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
.qiov = qiov, .qiov = qiov,
.is_write = is_write, .is_write = is_write,
.ret = NOT_DONE, .ret = NOT_DONE,
.flags = flags,
}; };
assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0); assert((qiov->size & (BDRV_SECTOR_SIZE - 1)) == 0);
@ -2222,7 +2227,7 @@ static int bdrv_rwv_co(BlockDriverState *bs, int64_t sector_num,
* Process a synchronous request using coroutines * Process a synchronous request using coroutines
*/ */
static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
int nb_sectors, bool is_write) int nb_sectors, bool is_write, BdrvRequestFlags flags)
{ {
QEMUIOVector qiov; QEMUIOVector qiov;
struct iovec iov = { struct iovec iov = {
@ -2231,14 +2236,14 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
}; };
qemu_iovec_init_external(&qiov, &iov, 1); qemu_iovec_init_external(&qiov, &iov, 1);
return bdrv_rwv_co(bs, sector_num, &qiov, is_write); return bdrv_rwv_co(bs, sector_num, &qiov, is_write, flags);
} }
/* return < 0 if error. See bdrv_write() for the return codes */ /* return < 0 if error. See bdrv_write() for the return codes */
int bdrv_read(BlockDriverState *bs, int64_t sector_num, int bdrv_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors) uint8_t *buf, int nb_sectors)
{ {
return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false); return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
} }
/* Just like bdrv_read(), but with I/O throttling temporarily disabled */ /* Just like bdrv_read(), but with I/O throttling temporarily disabled */
@ -2250,7 +2255,7 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
enabled = bs->io_limits_enabled; enabled = bs->io_limits_enabled;
bs->io_limits_enabled = false; bs->io_limits_enabled = false;
ret = bdrv_read(bs, 0, buf, 1); ret = bdrv_read(bs, sector_num, buf, nb_sectors);
bs->io_limits_enabled = enabled; bs->io_limits_enabled = enabled;
return ret; return ret;
} }
@ -2264,12 +2269,18 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
int bdrv_write(BlockDriverState *bs, int64_t sector_num, int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors) const uint8_t *buf, int nb_sectors)
{ {
return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true); return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
} }
int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov) int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov)
{ {
return bdrv_rwv_co(bs, sector_num, qiov, true); return bdrv_rwv_co(bs, sector_num, qiov, true, 0);
}
int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
{
return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
BDRV_REQ_ZERO_WRITE);
} }
int bdrv_pread(BlockDriverState *bs, int64_t offset, int bdrv_pread(BlockDriverState *bs, int64_t offset,

View file

@ -532,6 +532,39 @@ out:
return NULL; return NULL;
} }
#ifdef CONFIG_GLUSTERFS_DISCARD
static BlockDriverAIOCB *qemu_gluster_aio_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BlockDriverCompletionFunc *cb,
void *opaque)
{
int ret;
GlusterAIOCB *acb;
BDRVGlusterState *s = bs->opaque;
size_t size;
off_t offset;
offset = sector_num * BDRV_SECTOR_SIZE;
size = nb_sectors * BDRV_SECTOR_SIZE;
acb = qemu_aio_get(&gluster_aiocb_info, bs, cb, opaque);
acb->size = 0;
acb->ret = 0;
acb->finished = NULL;
s->qemu_aio_count++;
ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
if (ret < 0) {
goto out;
}
return &acb->common;
out:
s->qemu_aio_count--;
qemu_aio_release(acb);
return NULL;
}
#endif
static int64_t qemu_gluster_getlength(BlockDriverState *bs) static int64_t qemu_gluster_getlength(BlockDriverState *bs)
{ {
BDRVGlusterState *s = bs->opaque; BDRVGlusterState *s = bs->opaque;
@ -602,6 +635,9 @@ static BlockDriver bdrv_gluster = {
.bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush, .bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init, .bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_aio_discard = qemu_gluster_aio_discard,
#endif
.create_options = qemu_gluster_create_options, .create_options = qemu_gluster_create_options,
}; };
@ -618,6 +654,9 @@ static BlockDriver bdrv_gluster_tcp = {
.bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush, .bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init, .bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_aio_discard = qemu_gluster_aio_discard,
#endif
.create_options = qemu_gluster_create_options, .create_options = qemu_gluster_create_options,
}; };
@ -634,6 +673,9 @@ static BlockDriver bdrv_gluster_unix = {
.bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush, .bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init, .bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_aio_discard = qemu_gluster_aio_discard,
#endif
.create_options = qemu_gluster_create_options, .create_options = qemu_gluster_create_options,
}; };
@ -650,6 +692,9 @@ static BlockDriver bdrv_gluster_rdma = {
.bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_writev = qemu_gluster_aio_writev,
.bdrv_aio_flush = qemu_gluster_aio_flush, .bdrv_aio_flush = qemu_gluster_aio_flush,
.bdrv_has_zero_init = qemu_gluster_has_zero_init, .bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
.bdrv_aio_discard = qemu_gluster_aio_discard,
#endif
.create_options = qemu_gluster_create_options, .create_options = qemu_gluster_create_options,
}; };

View file

@ -42,6 +42,13 @@ static int coroutine_fn raw_co_is_allocated(BlockDriverState *bs,
return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum); return bdrv_co_is_allocated(bs->file, sector_num, nb_sectors, pnum);
} }
static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors)
{
return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors);
}
static int64_t raw_getlength(BlockDriverState *bs) static int64_t raw_getlength(BlockDriverState *bs)
{ {
return bdrv_getlength(bs->file); return bdrv_getlength(bs->file);
@ -114,6 +121,11 @@ static int raw_has_zero_init(BlockDriverState *bs)
return bdrv_has_zero_init(bs->file); return bdrv_has_zero_init(bs->file);
} }
static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
return bdrv_get_info(bs->file, bdi);
}
static BlockDriver bdrv_raw = { static BlockDriver bdrv_raw = {
.format_name = "raw", .format_name = "raw",
@ -128,10 +140,12 @@ static BlockDriver bdrv_raw = {
.bdrv_co_readv = raw_co_readv, .bdrv_co_readv = raw_co_readv,
.bdrv_co_writev = raw_co_writev, .bdrv_co_writev = raw_co_writev,
.bdrv_co_is_allocated = raw_co_is_allocated, .bdrv_co_is_allocated = raw_co_is_allocated,
.bdrv_co_write_zeroes = raw_co_write_zeroes,
.bdrv_co_discard = raw_co_discard, .bdrv_co_discard = raw_co_discard,
.bdrv_probe = raw_probe, .bdrv_probe = raw_probe,
.bdrv_getlength = raw_getlength, .bdrv_getlength = raw_getlength,
.bdrv_get_info = raw_get_info,
.bdrv_truncate = raw_truncate, .bdrv_truncate = raw_truncate,
.bdrv_is_inserted = raw_is_inserted, .bdrv_is_inserted = raw_is_inserted,

View file

@ -1164,8 +1164,8 @@ DLOG(if (stderr == NULL) {
s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1); s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);
if (qemu_opt_get_bool(opts, "rw", false)) { if (qemu_opt_get_bool(opts, "rw", false)) {
if (enable_write_target(s)) { ret = enable_write_target(s);
ret = -EIO; if (ret < 0) {
goto fail; goto fail;
} }
bs->read_only = 0; bs->read_only = 0;
@ -2917,9 +2917,7 @@ static int enable_write_target(BDRVVVFATState *s)
s->qcow_filename = g_malloc(1024); s->qcow_filename = g_malloc(1024);
ret = get_tmp_filename(s->qcow_filename, 1024); ret = get_tmp_filename(s->qcow_filename, 1024);
if (ret < 0) { if (ret < 0) {
g_free(s->qcow_filename); goto err;
s->qcow_filename = NULL;
return ret;
} }
bdrv_qcow = bdrv_find_format("qcow"); bdrv_qcow = bdrv_find_format("qcow");
@ -2927,18 +2925,18 @@ static int enable_write_target(BDRVVVFATState *s)
set_option_parameter_int(options, BLOCK_OPT_SIZE, s->sector_count * 512); set_option_parameter_int(options, BLOCK_OPT_SIZE, s->sector_count * 512);
set_option_parameter(options, BLOCK_OPT_BACKING_FILE, "fat:"); set_option_parameter(options, BLOCK_OPT_BACKING_FILE, "fat:");
if (bdrv_create(bdrv_qcow, s->qcow_filename, options) < 0) ret = bdrv_create(bdrv_qcow, s->qcow_filename, options);
return -1; if (ret < 0) {
goto err;
}
s->qcow = bdrv_new(""); s->qcow = bdrv_new("");
if (s->qcow == NULL) {
return -1;
}
ret = bdrv_open(s->qcow, s->qcow_filename, NULL, ret = bdrv_open(s->qcow, s->qcow_filename, NULL,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow); BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow);
if (ret < 0) { if (ret < 0) {
return ret; bdrv_delete(s->qcow);
goto err;
} }
#ifndef _WIN32 #ifndef _WIN32
@ -2951,6 +2949,11 @@ static int enable_write_target(BDRVVVFATState *s)
*(void**)s->bs->backing_hd->opaque = s; *(void**)s->bs->backing_hd->opaque = s;
return 0; return 0;
err:
g_free(s->qcow_filename);
s->qcow_filename = NULL;
return ret;
} }
static void vvfat_close(BlockDriverState *bs) static void vvfat_close(BlockDriverState *bs)

25
configure vendored
View file

@ -237,6 +237,7 @@ libiscsi=""
coroutine="" coroutine=""
seccomp="" seccomp=""
glusterfs="" glusterfs=""
glusterfs_discard="no"
virtio_blk_data_plane="" virtio_blk_data_plane=""
gtk="" gtk=""
gtkabi="2.0" gtkabi="2.0"
@ -2570,23 +2571,21 @@ fi
########################################## ##########################################
# glusterfs probe # glusterfs probe
if test "$glusterfs" != "no" ; then if test "$glusterfs" != "no" ; then
cat > $TMPC <<EOF if $pkg_config --atleast-version=3 glusterfs-api >/dev/null 2>&1; then
#include <glusterfs/api/glfs.h> glusterfs="yes"
int main(void) { glusterfs_cflags=`$pkg_config --cflags glusterfs-api 2>/dev/null`
(void) glfs_new("volume"); glusterfs_libs=`$pkg_config --libs glusterfs-api 2>/dev/null`
return 0; CFLAGS="$CFLAGS $glusterfs_cflags"
}
EOF
glusterfs_libs="-lgfapi -lgfrpc -lgfxdr"
if compile_prog "" "$glusterfs_libs" ; then
glusterfs=yes
libs_tools="$glusterfs_libs $libs_tools" libs_tools="$glusterfs_libs $libs_tools"
libs_softmmu="$glusterfs_libs $libs_softmmu" libs_softmmu="$glusterfs_libs $libs_softmmu"
if $pkg_config --atleast-version=5 glusterfs-api >/dev/null 2>&1; then
glusterfs_discard="yes"
fi
else else
if test "$glusterfs" = "yes" ; then if test "$glusterfs" = "yes" ; then
feature_not_found "GlusterFS backend support" feature_not_found "GlusterFS backend support"
fi fi
glusterfs=no glusterfs="no"
fi fi
fi fi
@ -3969,6 +3968,10 @@ if test "$glusterfs" = "yes" ; then
echo "CONFIG_GLUSTERFS=y" >> $config_host_mak echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
fi fi
if test "$glusterfs_discard" = "yes" ; then
echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak
fi
if test "$libssh2" = "yes" ; then if test "$libssh2" = "yes" ; then
echo "CONFIG_LIBSSH2=y" >> $config_host_mak echo "CONFIG_LIBSSH2=y" >> $config_host_mak
fi fi

9
cpus.c
View file

@ -443,11 +443,12 @@ static int do_vm_stop(RunState state)
pause_all_vcpus(); pause_all_vcpus();
runstate_set(state); runstate_set(state);
vm_state_notify(0, state); vm_state_notify(0, state);
bdrv_drain_all();
ret = bdrv_flush_all();
monitor_protocol_event(QEVENT_STOP, NULL); monitor_protocol_event(QEVENT_STOP, NULL);
} }
bdrv_drain_all();
ret = bdrv_flush_all();
return ret; return ret;
} }
@ -1126,7 +1127,9 @@ int vm_stop_force_state(RunState state)
return vm_stop(state); return vm_stop(state);
} else { } else {
runstate_set(state); runstate_set(state);
return 0; /* Make sure to return an error if the flush in a previous vm_stop()
* failed. */
return bdrv_flush_all();
} }
} }

View file

@ -537,7 +537,7 @@ void virtio_blk_data_plane_stop(VirtIOBlockDataPlane *s)
/* Clean up guest notifier (irq) */ /* Clean up guest notifier (irq) */
k->set_guest_notifiers(qbus->parent, 1, false); k->set_guest_notifiers(qbus->parent, 1, false);
vring_teardown(&s->vring); vring_teardown(&s->vring, s->vdev, 0);
s->started = false; s->started = false;
s->stopping = false; s->stopping = false;
} }

View file

@ -39,8 +39,8 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
vring_init(&vring->vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096); vring_init(&vring->vr, virtio_queue_get_num(vdev, n), vring_ptr, 4096);
vring->last_avail_idx = 0; vring->last_avail_idx = virtio_queue_get_last_avail_idx(vdev, n);
vring->last_used_idx = 0; vring->last_used_idx = vring->vr.used->idx;
vring->signalled_used = 0; vring->signalled_used = 0;
vring->signalled_used_valid = false; vring->signalled_used_valid = false;
@ -49,8 +49,10 @@ bool vring_setup(Vring *vring, VirtIODevice *vdev, int n)
return true; return true;
} }
void vring_teardown(Vring *vring) void vring_teardown(Vring *vring, VirtIODevice *vdev, int n)
{ {
virtio_queue_set_last_avail_idx(vdev, n, vring->last_avail_idx);
hostmem_finalize(&vring->hostmem); hostmem_finalize(&vring->hostmem);
} }

View file

@ -17,6 +17,7 @@
#include "qemu-common.h" #include "qemu-common.h"
#include "qemu/queue.h" #include "qemu/queue.h"
#include "qemu/event_notifier.h" #include "qemu/event_notifier.h"
#include "qemu/thread.h"
typedef struct BlockDriverAIOCB BlockDriverAIOCB; typedef struct BlockDriverAIOCB BlockDriverAIOCB;
typedef void BlockDriverCompletionFunc(void *opaque, int ret); typedef void BlockDriverCompletionFunc(void *opaque, int ret);
@ -53,6 +54,8 @@ typedef struct AioContext {
*/ */
int walking_handlers; int walking_handlers;
/* lock to protect between bh's adders and deleter */
QemuMutex bh_lock;
/* Anchor of the list of Bottom Halves belonging to the context */ /* Anchor of the list of Bottom Halves belonging to the context */
struct QEMUBH *first_bh; struct QEMUBH *first_bh;
@ -127,6 +130,8 @@ void aio_notify(AioContext *ctx);
* aio_bh_poll: Poll bottom halves for an AioContext. * aio_bh_poll: Poll bottom halves for an AioContext.
* *
* These are internal functions used by the QEMU main loop. * These are internal functions used by the QEMU main loop.
* And notice that multiple occurrences of aio_bh_poll cannot
* be called concurrently
*/ */
int aio_bh_poll(AioContext *ctx); int aio_bh_poll(AioContext *ctx);
@ -163,6 +168,8 @@ void qemu_bh_cancel(QEMUBH *bh);
* Deleting a bottom half frees the memory that was allocated for it by * Deleting a bottom half frees the memory that was allocated for it by
* qemu_bh_new. It also implies canceling the bottom half if it was * qemu_bh_new. It also implies canceling the bottom half if it was
* scheduled. * scheduled.
* This func is async. The bottom half will do the delete action at the finial
* end.
* *
* @bh: The bottom half to be deleted. * @bh: The bottom half to be deleted.
*/ */

View file

@ -157,6 +157,8 @@ int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors); uint8_t *buf, int nb_sectors);
int bdrv_write(BlockDriverState *bs, int64_t sector_num, int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors); const uint8_t *buf, int nb_sectors);
int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
int nb_sectors);
int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov); int bdrv_writev(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov);
int bdrv_pread(BlockDriverState *bs, int64_t offset, int bdrv_pread(BlockDriverState *bs, int64_t offset,
void *buf, int count); void *buf, int count);

View file

@ -50,7 +50,7 @@ static inline void vring_set_broken(Vring *vring)
} }
bool vring_setup(Vring *vring, VirtIODevice *vdev, int n); bool vring_setup(Vring *vring, VirtIODevice *vdev, int n);
void vring_teardown(Vring *vring); void vring_teardown(Vring *vring, VirtIODevice *vdev, int n);
void vring_disable_notification(VirtIODevice *vdev, Vring *vring); void vring_disable_notification(VirtIODevice *vdev, Vring *vring);
bool vring_enable_notification(VirtIODevice *vdev, Vring *vring); bool vring_enable_notification(VirtIODevice *vdev, Vring *vring);
bool vring_should_notify(VirtIODevice *vdev, Vring *vring); bool vring_should_notify(VirtIODevice *vdev, Vring *vring);

View file

@ -124,6 +124,7 @@ void migrate_add_blocker(Error *reason);
void migrate_del_blocker(Error *reason); void migrate_del_blocker(Error *reason);
bool migrate_rdma_pin_all(void); bool migrate_rdma_pin_all(void);
bool migrate_zero_blocks(void);
bool migrate_auto_converge(void); bool migrate_auto_converge(void);

View file

@ -493,6 +493,15 @@ bool migrate_auto_converge(void)
return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
} }
bool migrate_zero_blocks(void)
{
MigrationState *s;
s = migrate_get_current();
return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
}
int migrate_use_xbzrle(void) int migrate_use_xbzrle(void)
{ {
MigrationState *s; MigrationState *s;

View file

@ -613,10 +613,16 @@
# Disabled by default. Experimental: may (or may not) be renamed after # Disabled by default. Experimental: may (or may not) be renamed after
# further testing is complete. (since 1.6) # further testing is complete. (since 1.6)
# #
# @zero-blocks: During storage migration encode blocks of zeroes efficiently. This
# essentially saves 1MB of zeroes per block on the wire. Enabling requires
# source and target VM to support this feature. To enable it is sufficient
# to enable the capability on the source VM. The feature is disabled by
# default. (since 1.6)
#
# Since: 1.2 # Since: 1.2
## ##
{ 'enum': 'MigrationCapability', { 'enum': 'MigrationCapability',
'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge'] } 'data': ['xbzrle', 'x-rdma-pin-all', 'auto-converge', 'zero-blocks'] }
## ##
# @MigrationCapabilityStatus # @MigrationCapabilityStatus