Block layer patches

- Fix permission update order problems with block graph changes
 - qemu-img convert: Unshare write permission for source
 - vhost-user-blk: Fail gracefully on too large queue size
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmCL26cRHGt3b2xmQHJl
 ZGhhdC5jb20ACgkQfwmycsiPL9Zbsw//YyeNj+dNYX0IWZELgPGdGqDB/7HuApZP
 zUxzqqGjdoyUDsWqi2tfnPX9I2lhBi4NabeQwIDG911RDbX4gSD7hYl7ui6kMqBh
 M14x+vc02rCEyb2g0ReUms+uEOTbLcs44mavxBCFFooefFsY8xavGkcnV23jSipE
 5YUem0DBNq9LXQ8JWPN6HdJ1OPy+vrVyC9wM7r2Lg/+gQHeUD7DOzJq+wQScuu1H
 uS8O1fhx9oaI6Mn+KWdjRJ13/SMYnvqFYqNGASD/02HwoygfxOiCX7yWeXuHZWQr
 7EzRlKtVe4oj03VNWQKpbGVWYvWgkbTqAM8zAys9dJ9vRmnjPVlZCU5oV3+BGvXc
 NHhlqlXNVYpcfKFZXt+N8H2PXkKK5xPrJapkhH3cm+io7b0x5P0OmNpbuPXsEx3D
 S1gm02VDAwP+RnzDpYDrJK6G56glQPTd/k3msUyFD57AaH8bnCgpaJmQwWmxSYMc
 +zo5n2roBvYYtc+fWgT4HqdGsGrCJeKEGRYGilfb6ZjJ5Crn2hxwLRnry9z23ldo
 So4kQXEPHfIOp5yUSADUIgiQN0msE5ZaXLMdUM2Y3AzXEkGglm5/R2OjxYLuE/YY
 +ujOfQsNXScgLEOlLl/NsTZIl62w/youYx5REkhwD2SmpI79bN2Yg+C4HMPQn4ys
 mXDeU81BlKI=
 =xZIY
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches

- Fix permission update order problems with block graph changes
- qemu-img convert: Unshare write permission for source
- vhost-user-blk: Fail gracefully on too large queue size

# gpg: Signature made Fri 30 Apr 2021 11:27:51 BST
# gpg:                using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg:                issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (39 commits)
  vhost-user-blk: Fail gracefully on too large queue size
  qemu-img convert: Unshare write permission for source
  block: Add BDRV_O_NO_SHARE for blk_new_open()
  block: refactor bdrv_node_check_perm()
  block: rename bdrv_replace_child_safe() to bdrv_replace_child()
  block: refactor bdrv_child_set_perm_safe() transaction action
  block: inline bdrv_replace_child()
  block: inline bdrv_check_perm_common()
  block: drop unused permission update functions
  block: bdrv_reopen_multiple: refresh permissions on updated graph
  block: bdrv_reopen_multiple(): move bdrv_flush to separate pre-prepare
  block: add bdrv_set_backing_noperm() transaction action
  block: make bdrv_refresh_limits() to be a transaction action
  block: make bdrv_unset_inherits_from to be a transaction action
  block: drop ignore_children for permission update functions
  block/backup-top: drop .active
  block: introduce bdrv_drop_filter()
  block: add bdrv_remove_filter_or_cow transaction action
  block: adapt bdrv_append() for inserting filters
  block: split out bdrv_replace_node_noperm()
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2021-04-30 13:46:42 +01:00
commit f38d1ea497
22 changed files with 1293 additions and 693 deletions

View file

@ -2532,6 +2532,12 @@ M: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
S: Maintained
F: scripts/simplebench/
Transactions helper
M: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
S: Maintained
F: include/qemu/transactions.h
F: util/transactions.c
QAPI
M: Markus Armbruster <armbru@redhat.com>
M: Michael Roth <michael.roth@amd.com>

1353
block.c

File diff suppressed because it is too large Load diff

View file

@ -37,7 +37,6 @@
typedef struct BDRVBackupTopState {
BlockCopyState *bcs;
BdrvChild *target;
bool active;
int64_t cluster_size;
} BDRVBackupTopState;
@ -45,12 +44,6 @@ static coroutine_fn int backup_top_co_preadv(
BlockDriverState *bs, uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov, int flags)
{
BDRVBackupTopState *s = bs->opaque;
if (!s->active) {
return -EIO;
}
return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
}
@ -60,10 +53,6 @@ static coroutine_fn int backup_top_cbw(BlockDriverState *bs, uint64_t offset,
BDRVBackupTopState *s = bs->opaque;
uint64_t off, end;
if (!s->active) {
return -EIO;
}
if (flags & BDRV_REQ_WRITE_UNCHANGED) {
return 0;
}
@ -137,21 +126,6 @@ static void backup_top_child_perm(BlockDriverState *bs, BdrvChild *c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
BDRVBackupTopState *s = bs->opaque;
if (!s->active) {
/*
* The filter node may be in process of bdrv_append(), which firstly do
* bdrv_set_backing_hd() and then bdrv_replace_node(). This means that
* we can't unshare BLK_PERM_WRITE during bdrv_append() operation. So,
* let's require nothing during bdrv_append() and refresh permissions
* after it (see bdrv_backup_top_append()).
*/
*nperm = 0;
*nshared = BLK_PERM_ALL;
return;
}
if (!(role & BDRV_CHILD_FILTERED)) {
/*
* Target child
@ -234,7 +208,6 @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
bdrv_drained_begin(source);
bdrv_ref(top);
ret = bdrv_append(top, source, errp);
if (ret < 0) {
error_prepend(errp, "Cannot append backup-top filter: ");
@ -242,17 +215,6 @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
}
appended = true;
/*
* bdrv_append() finished successfully, now we can require permissions
* we want.
*/
state->active = true;
ret = bdrv_child_refresh_perms(top, top->backing, errp);
if (ret < 0) {
error_prepend(errp, "Cannot set permissions for backup-top filter: ");
goto fail;
}
state->cluster_size = cluster_size;
state->bcs = block_copy_state_new(top->backing, state->target,
cluster_size, perf->use_copy_range,
@ -269,7 +231,6 @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
fail:
if (appended) {
state->active = false;
bdrv_backup_top_drop(top);
} else {
bdrv_unref(top);
@ -284,16 +245,9 @@ void bdrv_backup_top_drop(BlockDriverState *bs)
{
BDRVBackupTopState *s = bs->opaque;
bdrv_drained_begin(bs);
bdrv_drop_filter(bs, &error_abort);
block_copy_state_free(s->bcs);
s->active = false;
bdrv_child_refresh_perms(bs, bs->backing, &error_abort);
bdrv_replace_node(bs, bs->backing->bs, &error_abort);
bdrv_set_backing_hd(bs, NULL, &error_abort);
bdrv_drained_end(bs);
bdrv_unref(bs);
}

View file

@ -298,6 +298,13 @@ static void blk_root_detach(BdrvChild *child)
}
}
static AioContext *blk_root_get_parent_aio_context(BdrvChild *c)
{
BlockBackend *blk = c->opaque;
return blk_get_aio_context(blk);
}
static const BdrvChildClass child_root = {
.inherit_options = blk_root_inherit_options,
@ -318,6 +325,8 @@ static const BdrvChildClass child_root = {
.can_set_aio_ctx = blk_root_can_set_aio_ctx,
.set_aio_ctx = blk_root_set_aio_ctx,
.get_parent_aio_context = blk_root_get_parent_aio_context,
};
/*
@ -398,15 +407,19 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
BlockBackend *blk;
BlockDriverState *bs;
uint64_t perm = 0;
uint64_t shared = BLK_PERM_ALL;
/* blk_new_open() is mainly used in .bdrv_create implementations and the
* tools where sharing isn't a concern because the BDS stays private, so we
* just request permission according to the flags.
/*
* blk_new_open() is mainly used in .bdrv_create implementations and the
* tools where sharing isn't a major concern because the BDS stays private
* and the file is generally not supposed to be used by a second process,
* so we just request permission according to the flags.
*
* The exceptions are xen_disk and blockdev_init(); in these cases, the
* caller of blk_new_open() doesn't make use of the permissions, but they
* shouldn't hurt either. We can still share everything here because the
* guest devices will add their own blockers if they can't share. */
* guest devices will add their own blockers if they can't share.
*/
if ((flags & BDRV_O_NO_IO) == 0) {
perm |= BLK_PERM_CONSISTENT_READ;
if (flags & BDRV_O_RDWR) {
@ -416,8 +429,11 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
if (flags & BDRV_O_RESIZE) {
perm |= BLK_PERM_RESIZE;
}
if (flags & BDRV_O_NO_SHARE) {
shared = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
}
blk = blk_new(qemu_get_aio_context(), perm, BLK_PERM_ALL);
blk = blk_new(qemu_get_aio_context(), perm, shared);
bs = bdrv_open(filename, reference, options, flags, errp);
if (!bs) {
blk_unref(blk);
@ -426,7 +442,7 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
blk->ctx, perm, BLK_PERM_ALL, blk, errp);
perm, shared, blk, errp);
if (!blk->root) {
blk_unref(blk);
return NULL;
@ -840,7 +856,7 @@ int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp)
bdrv_ref(bs);
blk->root = bdrv_root_attach_child(bs, "root", &child_root,
BDRV_CHILD_FILTERED | BDRV_CHILD_PRIMARY,
blk->ctx, blk->perm, blk->shared_perm,
blk->perm, blk->shared_perm,
blk, errp);
if (blk->root == NULL) {
return -EPERM;

View file

@ -312,6 +312,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
commit_top_bs->total_sectors = top->total_sectors;
ret = bdrv_append(commit_top_bs, top, errp);
bdrv_unref(commit_top_bs); /* referenced by new parents or failed */
if (ret < 0) {
commit_top_bs = NULL;
goto fail;

View file

@ -175,7 +175,6 @@ typedef struct BDRVRawState {
} BDRVRawState;
typedef struct BDRVRawReopenState {
int fd;
int open_flags;
bool drop_cache;
bool check_cache_dropped;
@ -1075,7 +1074,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
BDRVRawReopenState *rs;
QemuOpts *opts;
int ret;
Error *local_err = NULL;
assert(state != NULL);
assert(state->bs != NULL);
@ -1101,32 +1099,18 @@ static int raw_reopen_prepare(BDRVReopenState *state,
* bdrv_reopen_prepare() will detect changes and complain. */
qemu_opts_to_qdict(opts, state->options);
rs->fd = raw_reconfigure_getfd(state->bs, state->flags, &rs->open_flags,
state->perm, true, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -1;
goto out;
}
/* Fail already reopen_prepare() if we can't get a working O_DIRECT
* alignment with the new fd. */
if (rs->fd != -1) {
raw_probe_alignment(state->bs, rs->fd, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto out_fd;
}
}
/*
* As part of reopen prepare we also want to create new fd by
* raw_reconfigure_getfd(). But it wants updated "perm", when in
* bdrv_reopen_multiple() .bdrv_reopen_prepare() callback called prior to
* permission update. Happily, permission update is always a part (a seprate
* stage) of bdrv_reopen_multiple() so we can rely on this fact and
* reconfigure fd in raw_check_perm().
*/
s->reopen_state = state;
ret = 0;
out_fd:
if (ret < 0) {
qemu_close(rs->fd);
rs->fd = -1;
}
out:
qemu_opts_del(opts);
return ret;
@ -1140,10 +1124,6 @@ static void raw_reopen_commit(BDRVReopenState *state)
s->drop_cache = rs->drop_cache;
s->check_cache_dropped = rs->check_cache_dropped;
s->open_flags = rs->open_flags;
qemu_close(s->fd);
s->fd = rs->fd;
g_free(state->opaque);
state->opaque = NULL;
@ -1162,10 +1142,6 @@ static void raw_reopen_abort(BDRVReopenState *state)
return;
}
if (rs->fd >= 0) {
qemu_close(rs->fd);
rs->fd = -1;
}
g_free(state->opaque);
state->opaque = NULL;
@ -3073,39 +3049,30 @@ static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
Error **errp)
{
BDRVRawState *s = bs->opaque;
BDRVRawReopenState *rs = NULL;
int input_flags = s->reopen_state ? s->reopen_state->flags : bs->open_flags;
int open_flags;
int ret;
if (s->perm_change_fd) {
/*
* In the context of reopen, this function may be called several times
* (directly and recursively while change permissions of the parent).
* This is even true for children that don't inherit from the original
* reopen node, so s->reopen_state is not set.
*
* Ignore all but the first call.
*/
return 0;
}
/* We may need a new fd if auto-read-only switches the mode */
ret = raw_reconfigure_getfd(bs, input_flags, &open_flags, perm,
false, errp);
if (ret < 0) {
return ret;
} else if (ret != s->fd) {
Error *local_err = NULL;
if (s->reopen_state) {
/* We already have a new file descriptor to set permissions for */
assert(s->reopen_state->perm == perm);
assert(s->reopen_state->shared_perm == shared);
rs = s->reopen_state->opaque;
s->perm_change_fd = rs->fd;
s->perm_change_flags = rs->open_flags;
} else {
/* We may need a new fd if auto-read-only switches the mode */
ret = raw_reconfigure_getfd(bs, bs->open_flags, &open_flags, perm,
false, errp);
if (ret < 0) {
return ret;
} else if (ret != s->fd) {
s->perm_change_fd = ret;
s->perm_change_flags = open_flags;
/*
* Fail already check_perm() if we can't get a working O_DIRECT
* alignment with the new fd.
*/
raw_probe_alignment(bs, ret, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return -EINVAL;
}
s->perm_change_fd = ret;
s->perm_change_flags = open_flags;
}
/* Prepare permissions on old fd to avoid conflicts between old and new,
@ -3127,7 +3094,7 @@ static int raw_check_perm(BlockDriverState *bs, uint64_t perm, uint64_t shared,
return 0;
fail:
if (s->perm_change_fd && !s->reopen_state) {
if (s->perm_change_fd) {
qemu_close(s->perm_change_fd);
}
s->perm_change_fd = 0;
@ -3158,7 +3125,7 @@ static void raw_abort_perm_update(BlockDriverState *bs)
/* For reopen, .bdrv_reopen_abort is called afterwards and will close
* the file descriptor. */
if (s->perm_change_fd && !s->reopen_state) {
if (s->perm_change_fd) {
qemu_close(s->perm_change_fd);
}
s->perm_change_fd = 0;

View file

@ -133,13 +133,40 @@ static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
}
void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
typedef struct BdrvRefreshLimitsState {
BlockDriverState *bs;
BlockLimits old_bl;
} BdrvRefreshLimitsState;
static void bdrv_refresh_limits_abort(void *opaque)
{
BdrvRefreshLimitsState *s = opaque;
s->bs->bl = s->old_bl;
}
static TransactionActionDrv bdrv_refresh_limits_drv = {
.abort = bdrv_refresh_limits_abort,
.clean = g_free,
};
/* @tran is allowed to be NULL, in this case no rollback is possible. */
void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
{
ERRP_GUARD();
BlockDriver *drv = bs->drv;
BdrvChild *c;
bool have_limits;
if (tran) {
BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
*s = (BdrvRefreshLimitsState) {
.bs = bs,
.old_bl = bs->bl,
};
tran_add(tran, &bdrv_refresh_limits_drv, s);
}
memset(&bs->bl, 0, sizeof(bs->bl));
if (!drv) {
@ -156,7 +183,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
QLIST_FOREACH(c, &bs->children, next) {
if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
{
bdrv_refresh_limits(c->bs, errp);
bdrv_refresh_limits(c->bs, tran, errp);
if (*errp) {
return;
}

View file

@ -1630,9 +1630,6 @@ static BlockJob *mirror_start_job(
bs_opaque->is_commit = target_is_backing;
/* bdrv_append takes ownership of the mirror_top_bs reference, need to keep
* it alive until block_job_create() succeeds even if bs has no parent. */
bdrv_ref(mirror_top_bs);
bdrv_drained_begin(bs);
ret = bdrv_append(mirror_top_bs, bs, errp);
bdrv_drained_end(bs);

View file

@ -1576,10 +1576,6 @@ static void external_snapshot_prepare(BlkActionState *common,
goto out;
}
/* This removes our old bs and adds the new bs. This is an operation that
* can fail, so we need to do it in .prepare; undoing it for abort is
* always possible. */
bdrv_ref(state->new_bs);
ret = bdrv_append(state->new_bs, state->old_bs, errp);
if (ret < 0) {
goto out;

View file

@ -163,6 +163,13 @@ static void child_job_set_aio_ctx(BdrvChild *c, AioContext *ctx,
job->job.aio_context = ctx;
}
static AioContext *child_job_get_parent_aio_context(BdrvChild *c)
{
BlockJob *job = c->opaque;
return job->job.aio_context;
}
static const BdrvChildClass child_job = {
.get_parent_desc = child_job_get_parent_desc,
.drained_begin = child_job_drained_begin,
@ -171,6 +178,7 @@ static const BdrvChildClass child_job = {
.can_set_aio_ctx = child_job_can_set_aio_ctx,
.set_aio_ctx = child_job_set_aio_ctx,
.stay_at_node = true,
.get_parent_aio_context = child_job_get_parent_aio_context,
};
void block_job_remove_all_bdrv(BlockJob *job)
@ -221,8 +229,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) {
aio_context_release(job->job.aio_context);
}
c = bdrv_root_attach_child(bs, name, &child_job, 0,
job->job.aio_context, perm, shared_perm, job,
c = bdrv_root_attach_child(bs, name, &child_job, 0, perm, shared_perm, job,
errp);
if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) {
aio_context_acquire(job->job.aio_context);

View file

@ -467,6 +467,11 @@ static void vhost_user_blk_device_realize(DeviceState *dev, Error **errp)
error_setg(errp, "vhost-user-blk: queue size must be non-zero");
return;
}
if (s->queue_size > VIRTQUEUE_MAX_SIZE) {
error_setg(errp, "vhost-user-blk: queue size must not exceed %d",
VIRTQUEUE_MAX_SIZE);
return;
}
if (!vhost_user_init(&s->vhost_user, &s->chardev, errp)) {
return;

View file

@ -9,6 +9,7 @@
#include "block/dirty-bitmap.h"
#include "block/blockjob.h"
#include "qemu/hbitmap.h"
#include "qemu/transactions.h"
/*
* generated_co_wrapper
@ -101,6 +102,7 @@ typedef struct HDGeometry {
uint32_t cylinders;
} HDGeometry;
#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */
#define BDRV_O_RDWR 0x0002
#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */
#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
@ -207,8 +209,7 @@ typedef struct BDRVReopenState {
BlockdevDetectZeroesOptions detect_zeroes;
bool backing_missing;
bool replace_backing_bs; /* new_backing_bs is ignored if this is false */
BlockDriverState *new_backing_bs; /* If NULL then detach the current bs */
uint64_t perm, shared_perm;
BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
QDict *options;
QDict *explicit_options;
void *opaque;
@ -362,6 +363,7 @@ int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
Error **errp);
BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
int flags, Error **errp);
int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
int bdrv_parse_aio(const char *mode, int *flags);
int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
@ -387,10 +389,6 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
Error **errp);
int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
BlockReopenQueue *queue, Error **errp);
void bdrv_reopen_commit(BDRVReopenState *reopen_state);
void bdrv_reopen_abort(BDRVReopenState *reopen_state);
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int64_t bytes, BdrvRequestFlags flags);
int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
@ -424,7 +422,7 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
BlockDriverState *in_bs, Error **errp);
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
void bdrv_refresh_limits(BlockDriverState *bs, Error **errp);
void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp);
int bdrv_commit(BlockDriverState *bs);
int bdrv_make_empty(BdrvChild *c, Error **errp);
int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
@ -702,6 +700,8 @@ bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
GSList **ignore, Error **errp);
bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
GSList **ignore, Error **errp);
AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);

View file

@ -789,6 +789,8 @@ struct BdrvChildClass {
bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx,
GSList **ignore, Error **errp);
void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore);
AioContext *(*get_parent_aio_context)(BdrvChild *child);
};
extern const BdrvChildClass child_of_bds;
@ -811,11 +813,6 @@ struct BdrvChild {
*/
uint64_t shared_perm;
/* backup of permissions during permission update procedure */
bool has_backup_perm;
uint64_t backup_perm;
uint64_t backup_shared_perm;
/*
* This link is frozen: the child can neither be replaced nor
* detached from the parent.
@ -1306,7 +1303,6 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
const BdrvChildClass *child_class,
BdrvChildRole child_role,
AioContext *ctx,
uint64_t perm, uint64_t shared_perm,
void *opaque, Error **errp);
void bdrv_root_unref_child(BdrvChild *child);

View file

@ -0,0 +1,63 @@
/*
* Simple transactions API
*
* Copyright (c) 2021 Virtuozzo International GmbH.
*
* Author:
* Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*
* = Generic transaction API =
*
* The intended usage is the following: you create "prepare" functions, which
* represents the actions. They will usually have Transaction* argument, and
* call tran_add() to register finalization callbacks. For finalization
* callbacks, prepare corresponding TransactionActionDrv structures.
*
* Then, when you need to make a transaction, create an empty Transaction by
* tran_create(), call your "prepare" functions on it, and finally call
* tran_abort() or tran_commit() to finalize the transaction by corresponding
* finalization actions in reverse order.
*/
#ifndef QEMU_TRANSACTIONS_H
#define QEMU_TRANSACTIONS_H
#include <gmodule.h>
typedef struct TransactionActionDrv {
void (*abort)(void *opaque);
void (*commit)(void *opaque);
void (*clean)(void *opaque);
} TransactionActionDrv;
typedef struct Transaction Transaction;
Transaction *tran_new(void);
void tran_add(Transaction *tran, TransactionActionDrv *drv, void *opaque);
void tran_abort(Transaction *tran);
void tran_commit(Transaction *tran);
static inline void tran_finalize(Transaction *tran, int ret)
{
if (ret < 0) {
tran_abort(tran);
} else {
tran_commit(tran);
}
}
#endif /* QEMU_TRANSACTIONS_H */

View file

@ -2146,7 +2146,7 @@ static void set_rate_limit(BlockBackend *blk, int64_t rate_limit)
static int img_convert(int argc, char **argv)
{
int c, bs_i, flags, src_flags = 0;
int c, bs_i, flags, src_flags = BDRV_O_NO_SHARE;
const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
*src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
*out_filename, *out_baseimg_param, *snapshot_name = NULL;

View file

@ -905,7 +905,7 @@ class TestBlockdevReopen(iotests.QMPTestCase):
# We can't reopen hd1 to read-only, as block-stream requires it to be
# read-write
self.reopen(opts['backing'], {'read-only': True},
"Cannot make block node read-only, there is a writer on it")
"Read-only block node 'hd1' cannot support read-write users")
# We can't remove hd2 while the stream job is ongoing
opts['backing']['backing'] = None

View file

@ -5,7 +5,7 @@
{"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": "base", "node-name": "other", "take-child-perms": ["write"]}}
{"return": {}}
{"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}}
{"error": {"class": "GenericError", "desc": "Cannot set permissions for backup-top filter: Conflicts with use by other as 'image', which uses 'write' on base"}}
{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by source as 'image', which does not allow 'write' on base"}}
=== backup-top should be gone after job-finalize ===

View file

@ -16,7 +16,7 @@ QMP_VERSION
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
{"error": {"class": "GenericError", "desc": "Conflicts with use by a block device as 'root', which uses 'write' on fmt_base"}}
{"error": {"class": "GenericError", "desc": "Conflicts with use by stream job 'job0' as 'intermediate node', which does not allow 'write' on fmt_base"}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export1"}}
*** done

View file

@ -1478,7 +1478,6 @@ static void test_append_to_drained(void)
g_assert_cmpint(base_s->drain_count, ==, 1);
g_assert_cmpint(base->in_flight, ==, 0);
/* Takes ownership of overlay, so we don't have to unref it later */
bdrv_append(overlay, base, &error_abort);
g_assert_cmpint(base->in_flight, ==, 0);
g_assert_cmpint(overlay->in_flight, ==, 0);
@ -1495,6 +1494,7 @@ static void test_append_to_drained(void)
g_assert_cmpint(overlay->quiesce_counter, ==, 0);
g_assert_cmpint(overlay_s->drain_count, ==, 0);
bdrv_unref(overlay);
bdrv_unref(base);
blk_unref(blk);
}

View file

@ -1,7 +1,7 @@
/*
* Block node graph modifications tests
*
* Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved.
* Copyright (c) 2019-2021 Virtuozzo International GmbH. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -44,6 +44,21 @@ static BlockDriver bdrv_no_perm = {
.bdrv_child_perm = no_perm_default_perms,
};
static void exclusive_write_perms(BlockDriverState *bs, BdrvChild *c,
BdrvChildRole role,
BlockReopenQueue *reopen_queue,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
*nperm = BLK_PERM_WRITE;
*nshared = BLK_PERM_ALL & ~BLK_PERM_WRITE;
}
static BlockDriver bdrv_exclusive_writer = {
.format_name = "exclusive-writer",
.bdrv_child_perm = exclusive_write_perms,
};
static BlockDriverState *no_perm_node(const char *name)
{
return bdrv_new_open_driver(&bdrv_no_perm, name, BDRV_O_RDWR, &error_abort);
@ -55,6 +70,12 @@ static BlockDriverState *pass_through_node(const char *name)
BDRV_O_RDWR, &error_abort);
}
static BlockDriverState *exclusive_writer_node(const char *name)
{
return bdrv_new_open_driver(&bdrv_exclusive_writer, name,
BDRV_O_RDWR, &error_abort);
}
/*
* test_update_perm_tree
*
@ -117,6 +138,7 @@ static void test_update_perm_tree(void)
ret = bdrv_append(filter, bs, NULL);
g_assert_cmpint(ret, <, 0);
bdrv_unref(filter);
blk_unref(root);
}
@ -181,10 +203,189 @@ static void test_should_update_child(void)
bdrv_append(filter, bs, &error_abort);
g_assert(target->backing->bs == bs);
bdrv_unref(filter);
bdrv_unref(bs);
blk_unref(root);
}
/*
* test_parallel_exclusive_write
*
* Check that when we replace node, old permissions of the node being removed
* doesn't break the replacement.
*/
static void test_parallel_exclusive_write(void)
{
BlockDriverState *top = exclusive_writer_node("top");
BlockDriverState *base = no_perm_node("base");
BlockDriverState *fl1 = pass_through_node("fl1");
BlockDriverState *fl2 = pass_through_node("fl2");
/*
* bdrv_attach_child() eats child bs reference, so we need two @base
* references for two filters:
*/
bdrv_ref(base);
bdrv_attach_child(top, fl1, "backing", &child_of_bds, BDRV_CHILD_DATA,
&error_abort);
bdrv_attach_child(fl1, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
&error_abort);
bdrv_attach_child(fl2, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
&error_abort);
bdrv_replace_node(fl1, fl2, &error_abort);
bdrv_unref(fl2);
bdrv_unref(top);
}
static void write_to_file_perms(BlockDriverState *bs, BdrvChild *c,
BdrvChildRole role,
BlockReopenQueue *reopen_queue,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
if (bs->file && c == bs->file) {
*nperm = BLK_PERM_WRITE;
*nshared = BLK_PERM_ALL & ~BLK_PERM_WRITE;
} else {
*nperm = 0;
*nshared = BLK_PERM_ALL;
}
}
static BlockDriver bdrv_write_to_file = {
.format_name = "tricky-perm",
.bdrv_child_perm = write_to_file_perms,
};
/*
* The following test shows that topological-sort order is required for
* permission update, simple DFS is not enough.
*
* Consider the block driver which has two filter children: one active
* with exclusive write access and one inactive with no specific
* permissions.
*
* And, these two children has a common base child, like this:
*
*
* fl2 top
*
*
* w
*
*
* fl1
*
*
* w
*
*
* base
*
*
* So, exclusive write is propagated.
*
* Assume, we want to make fl2 active instead of fl1.
* So, we set some option for top driver and do permission update.
*
* With simple DFS, if permission update goes first through
* top->fl1->base branch it will succeed: it firstly drop exclusive write
* permissions and than apply them for another BdrvChildren.
* But if permission update goes first through top->fl2->base branch it
* will fail, as when we try to update fl2->base child, old not yet
* updated fl1->base child will be in conflict.
*
* With topological-sort order we always update parents before children, so fl1
* and fl2 are both updated when we update base and there is no conflict.
*/
static void test_parallel_perm_update(void)
{
BlockDriverState *top = no_perm_node("top");
BlockDriverState *tricky =
bdrv_new_open_driver(&bdrv_write_to_file, "tricky", BDRV_O_RDWR,
&error_abort);
BlockDriverState *base = no_perm_node("base");
BlockDriverState *fl1 = pass_through_node("fl1");
BlockDriverState *fl2 = pass_through_node("fl2");
BdrvChild *c_fl1, *c_fl2;
/*
* bdrv_attach_child() eats child bs reference, so we need two @base
* references for two filters:
*/
bdrv_ref(base);
bdrv_attach_child(top, tricky, "file", &child_of_bds, BDRV_CHILD_DATA,
&error_abort);
c_fl1 = bdrv_attach_child(tricky, fl1, "first", &child_of_bds,
BDRV_CHILD_FILTERED, &error_abort);
c_fl2 = bdrv_attach_child(tricky, fl2, "second", &child_of_bds,
BDRV_CHILD_FILTERED, &error_abort);
bdrv_attach_child(fl1, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
&error_abort);
bdrv_attach_child(fl2, base, "backing", &child_of_bds, BDRV_CHILD_FILTERED,
&error_abort);
/* Select fl1 as first child to be active */
tricky->file = c_fl1;
bdrv_child_refresh_perms(top, top->children.lh_first, &error_abort);
assert(c_fl1->perm & BLK_PERM_WRITE);
assert(!(c_fl2->perm & BLK_PERM_WRITE));
/* Now, try to switch active child and update permissions */
tricky->file = c_fl2;
bdrv_child_refresh_perms(top, top->children.lh_first, &error_abort);
assert(c_fl2->perm & BLK_PERM_WRITE);
assert(!(c_fl1->perm & BLK_PERM_WRITE));
/* Switch once more, to not care about real child order in the list */
tricky->file = c_fl1;
bdrv_child_refresh_perms(top, top->children.lh_first, &error_abort);
assert(c_fl1->perm & BLK_PERM_WRITE);
assert(!(c_fl2->perm & BLK_PERM_WRITE));
bdrv_unref(top);
}
/*
* It's possible that filter required permissions allows to insert it to backing
* chain, like:
*
* 1. [top] -> [filter] -> [base]
*
* but doesn't allow to add it as a branch:
*
* 2. [filter] --\
* v
* [top] -> [base]
*
* So, inserting such filter should do all graph modifications and only then
* update permissions. If we try to go through intermediate state [2] and update
* permissions on it we'll fail.
*
* Let's check that bdrv_append() can append such a filter.
*/
static void test_append_greedy_filter(void)
{
BlockDriverState *top = exclusive_writer_node("top");
BlockDriverState *base = no_perm_node("base");
BlockDriverState *fl = exclusive_writer_node("fl1");
bdrv_attach_child(top, base, "backing", &child_of_bds, BDRV_CHILD_COW,
&error_abort);
bdrv_append(fl, base, &error_abort);
bdrv_unref(fl);
bdrv_unref(top);
}
int main(int argc, char *argv[])
{
bdrv_init();
@ -195,6 +396,12 @@ int main(int argc, char *argv[])
g_test_add_func("/bdrv-graph-mod/update-perm-tree", test_update_perm_tree);
g_test_add_func("/bdrv-graph-mod/should-update-child",
test_should_update_child);
g_test_add_func("/bdrv-graph-mod/parallel-perm-update",
test_parallel_perm_update);
g_test_add_func("/bdrv-graph-mod/parallel-exclusive-write",
test_parallel_exclusive_write);
g_test_add_func("/bdrv-graph-mod/append-greedy-filter",
test_append_greedy_filter);
return g_test_run();
}

View file

@ -41,6 +41,7 @@ util_ss.add(files('qsp.c'))
util_ss.add(files('range.c'))
util_ss.add(files('stats64.c'))
util_ss.add(files('systemd.c'))
util_ss.add(files('transactions.c'))
util_ss.add(when: 'CONFIG_POSIX', if_true: files('drm.c'))
util_ss.add(files('guest-random.c'))
util_ss.add(files('yank.c'))

96
util/transactions.c Normal file
View file

@ -0,0 +1,96 @@
/*
* Simple transactions API
*
* Copyright (c) 2021 Virtuozzo International GmbH.
*
* Author:
* Sementsov-Ogievskiy Vladimir <vsementsov@virtuozzo.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "qemu/transactions.h"
#include "qemu/queue.h"
typedef struct TransactionAction {
TransactionActionDrv *drv;
void *opaque;
QSLIST_ENTRY(TransactionAction) entry;
} TransactionAction;
struct Transaction {
QSLIST_HEAD(, TransactionAction) actions;
};
Transaction *tran_new(void)
{
Transaction *tran = g_new(Transaction, 1);
QSLIST_INIT(&tran->actions);
return tran;
}
void tran_add(Transaction *tran, TransactionActionDrv *drv, void *opaque)
{
TransactionAction *act;
act = g_new(TransactionAction, 1);
*act = (TransactionAction) {
.drv = drv,
.opaque = opaque
};
QSLIST_INSERT_HEAD(&tran->actions, act, entry);
}
void tran_abort(Transaction *tran)
{
TransactionAction *act, *next;
QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) {
if (act->drv->abort) {
act->drv->abort(act->opaque);
}
if (act->drv->clean) {
act->drv->clean(act->opaque);
}
g_free(act);
}
g_free(tran);
}
void tran_commit(Transaction *tran)
{
TransactionAction *act, *next;
QSLIST_FOREACH_SAFE(act, &tran->actions, entry, next) {
if (act->drv->commit) {
act->drv->commit(act->opaque);
}
if (act->drv->clean) {
act->drv->clean(act->opaque);
}
g_free(act);
}
g_free(tran);
}