Block layer patches

- NBD server: Fix crashes related to switching between AioContexts
 - file-posix: Workaround for discard/write_zeroes on buggy filesystems
 - Follow-up fixes for the reopen vs. permission changes
 - quorum: Fix error handling for flush
 - block-copy: Refactor copy_range handling
 - docs: Describe how to use 'null-co' block driver
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmC3iy8RHGt3b2xmQHJl
 ZGhhdC5jb20ACgkQfwmycsiPL9ZP0hAAuh07CFWLzHCcRC7PBzekSPfzRYYBLDSW
 EObJ1Ov4mvz8UZoP6BDJ5QVzLPhel6hXkxTd83B1D7t/Dq+yJYR0z8Kv3USpaVJ4
 2U26SsoGQM8BmtVDL1Q8tQ5eDWQ4ykxNx6F2/lKBe1EH1lfaun04Xj1rNh7jpilo
 nNmKMDDI1UOkH0lKDR3tqfEV0XQE7o+ZKfPlIbvYMjXk9ZPKUHfjNPGVdCLQVnqH
 VJI01hF7eEx1ykSMdlC+TzNoVGG+mCBokGuW0JlUvOpX6FcGnAlxXQx8u53c1I8O
 lggZV8b2IbrNBUVwXQHLLrXxjOo+u54Ct9y/gXUTAj8qai+9jVRp60Y1pnCyeIeu
 DzFx10xwy04PGleb7AAZ4dT8du2+PTkuyQ9KmlvQ2U4IUcgW124CrDeO7XYr1aif
 hCTJPeEDrC9YNU6AQ8rLXrYUtkumSm2zUzU5nZ//i5WH41475/vsmgP5A+Jr457A
 Xu0yiI2Gqkr9CNsP9ZzMkNj03oIBhPFuGxiwibLQsy/6UVnaDYS0+rQ8FXYnF5+K
 iEpgXe3vKTWxM097kzJMBTDVRMXRa75NtK7KWXMDgVpHTbcv1t1otsn+6dfv+B55
 ULJM1ETsyYS0T6BqNglvdytsraSt7JgSF+ZLHbYk3KVDshwnq/0ksgSqHNNA14ca
 kYTzHhMgo5w=
 =gSq/
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches

- NBD server: Fix crashes related to switching between AioContexts
- file-posix: Workaround for discard/write_zeroes on buggy filesystems
- Follow-up fixes for the reopen vs. permission changes
- quorum: Fix error handling for flush
- block-copy: Refactor copy_range handling
- docs: Describe how to use 'null-co' block driver

# gpg: Signature made Wed 02 Jun 2021 14:44:15 BST
# gpg:                using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg:                issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  docs/secure-coding-practices: Describe how to use 'null-co' block driver
  block-copy: refactor copy_range handling
  block-copy: fix block_copy_task_entry() progress update
  nbd/server: Use drained block ops to quiesce the server
  block-backend: add drained_poll
  block: improve permission conflict error message
  block: simplify bdrv_child_user_desc()
  block/vvfat: inherit child_vvfat_qcow from child_of_bds
  block: improve bdrv_child_get_parent_desc()
  block-backend: improve blk_root_get_parent_desc()
  block: document child argument of bdrv_attach_child_common()
  block/file-posix: Try other fallbacks after invalid FALLOC_FL_ZERO_RANGE
  block/file-posix: Fix problem with fallocate(PUNCH_HOLE) on GPFS
  block: drop BlockBackendRootState::read_only
  block: drop BlockDriverState::read_only
  block: consistently use bdrv_is_read_only()
  block/vvfat: fix vvfat_child_perm crash
  block/vvfat: child_vvfat_qcow: add .get_parent_aio_context, fix crash
  qemu-io-cmds: assert that we don't have .perm requested in no-blk case
  block/quorum: Provide .bdrv_co_flush instead of .bdrv_co_flush_to_disk

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2021-06-02 19:34:03 +01:00
commit 8e6dad2028
24 changed files with 250 additions and 147 deletions

82
block.c
View file

@ -84,14 +84,6 @@ static BlockDriverState *bdrv_open_inherit(const char *filename,
static void bdrv_replace_child_noperm(BdrvChild *child,
BlockDriverState *new_bs);
static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const char *child_name,
const BdrvChildClass *child_class,
BdrvChildRole child_role,
BdrvChild **child,
Transaction *tran,
Error **errp);
static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
Transaction *tran);
@ -265,7 +257,7 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
* image is inactivated. */
bool bdrv_is_read_only(BlockDriverState *bs)
{
return bs->read_only;
return !(bs->open_flags & BDRV_O_RDWR);
}
int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
@ -317,7 +309,6 @@ int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
goto fail;
}
bs->read_only = true;
bs->open_flags &= ~BDRV_O_RDWR;
return 0;
@ -1158,7 +1149,7 @@ int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
static char *bdrv_child_get_parent_desc(BdrvChild *c)
{
BlockDriverState *parent = c->opaque;
return g_strdup(bdrv_get_device_or_node_name(parent));
return g_strdup_printf("node '%s'", bdrv_get_node_name(parent));
}
static void bdrv_child_cb_drained_begin(BdrvChild *child)
@ -1412,7 +1403,7 @@ static int bdrv_child_cb_update_filename(BdrvChild *c, BlockDriverState *base,
return 0;
}
static AioContext *bdrv_child_cb_get_parent_aio_context(BdrvChild *c)
AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c)
{
BlockDriverState *bs = c->opaque;
@ -1432,7 +1423,7 @@ const BdrvChildClass child_of_bds = {
.can_set_aio_ctx = bdrv_child_cb_can_set_aio_ctx,
.set_aio_ctx = bdrv_child_cb_set_aio_ctx,
.update_filename = bdrv_child_cb_update_filename,
.get_parent_aio_context = bdrv_child_cb_get_parent_aio_context,
.get_parent_aio_context = child_of_bds_get_parent_aio_context,
};
AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c)
@ -1549,7 +1540,6 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
}
bs->drv = drv;
bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
bs->opaque = g_malloc0(drv->instance_size);
if (drv->bdrv_file_open) {
@ -1720,6 +1710,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
QemuOpts *opts;
BlockDriver *drv;
Error *local_err = NULL;
bool ro;
assert(bs->file == NULL);
assert(options != NULL && bs->options != options);
@ -1770,17 +1761,17 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
drv->format_name);
bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
ro = bdrv_is_read_only(bs);
if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
if (!bs->read_only && bdrv_is_whitelisted(drv, true)) {
if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, ro)) {
if (!ro && bdrv_is_whitelisted(drv, true)) {
ret = bdrv_apply_auto_read_only(bs, NULL, NULL);
} else {
ret = -ENOTSUP;
}
if (ret < 0) {
error_setg(errp,
!bs->read_only && bdrv_is_whitelisted(drv, true)
!ro && bdrv_is_whitelisted(drv, true)
? "Driver '%s' can only be used for read-only devices"
: "Driver '%s' is not whitelisted",
drv->format_name);
@ -1792,7 +1783,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockBackend *file,
assert(qatomic_read(&bs->copy_on_read) == 0);
if (bs->open_flags & BDRV_O_COPY_ON_READ) {
if (!bs->read_only) {
if (!ro) {
bdrv_enable_copy_on_read(bs);
} else {
error_setg(errp, "Can't use copy-on-read on read-only device");
@ -2035,27 +2026,38 @@ bool bdrv_is_writable(BlockDriverState *bs)
static char *bdrv_child_user_desc(BdrvChild *c)
{
if (c->klass->get_parent_desc) {
return c->klass->get_parent_desc(c);
}
return g_strdup("another user");
return c->klass->get_parent_desc(c);
}
/*
* Check that @a allows everything that @b needs. @a and @b must reference same
* child node.
*/
static bool bdrv_a_allow_b(BdrvChild *a, BdrvChild *b, Error **errp)
{
g_autofree char *user = NULL;
g_autofree char *perm_names = NULL;
const char *child_bs_name;
g_autofree char *a_user = NULL;
g_autofree char *b_user = NULL;
g_autofree char *perms = NULL;
assert(a->bs);
assert(a->bs == b->bs);
if ((b->perm & a->shared_perm) == b->perm) {
return true;
}
perm_names = bdrv_perm_names(b->perm & ~a->shared_perm);
user = bdrv_child_user_desc(a);
error_setg(errp, "Conflicts with use by %s as '%s', which does not "
"allow '%s' on %s",
user, a->name, perm_names, bdrv_get_node_name(b->bs));
child_bs_name = bdrv_get_node_name(b->bs);
a_user = bdrv_child_user_desc(a);
b_user = bdrv_child_user_desc(b);
perms = bdrv_perm_names(b->perm & ~a->shared_perm);
error_setg(errp, "Permission conflict on node '%s': permissions '%s' are "
"both required by %s (uses node '%s' as '%s' child) and "
"unshared by %s (uses node '%s' as '%s' child).",
child_bs_name, perms,
b_user, child_bs_name, b->name,
a_user, child_bs_name, a->name);
return false;
}
@ -2760,6 +2762,12 @@ static TransactionActionDrv bdrv_attach_child_common_drv = {
/*
* Common part of attaching bdrv child to bs or to blk or to job
*
* Resulting new child is returned through @child.
* At start *@child must be NULL.
* @child is saved to a new entry of @tran, so that *@child could be reverted to
* NULL on abort(). So referenced variable must live at least until transaction
* end.
*/
static int bdrv_attach_child_common(BlockDriverState *child_bs,
const char *child_name,
@ -2775,6 +2783,7 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs,
assert(child);
assert(*child == NULL);
assert(child_class->get_parent_desc);
new_child = g_new(BdrvChild, 1);
*new_child = (BdrvChild) {
@ -2834,6 +2843,10 @@ static int bdrv_attach_child_common(BlockDriverState *child_bs,
return 0;
}
/*
* Variable referenced by @child must live at least until transaction end.
* (see bdrv_attach_child_common() doc for details)
*/
static int bdrv_attach_child_noperm(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const char *child_name,
@ -2916,7 +2929,6 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
child_role, perm, shared_perm, opaque,
&child, tran, errp);
if (ret < 0) {
assert(child == NULL);
goto out;
}
@ -2924,6 +2936,9 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
out:
tran_finalize(tran, ret);
/* child is unset on failure by bdrv_attach_child_common_abort() */
assert((ret < 0) == !child);
bdrv_unref(child_bs);
return child;
}
@ -2963,6 +2978,8 @@ BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
out:
tran_finalize(tran, ret);
/* child is unset on failure by bdrv_attach_child_common_abort() */
assert((ret < 0) == !child);
bdrv_unref(child_bs);
@ -4545,7 +4562,6 @@ static void bdrv_reopen_commit(BDRVReopenState *reopen_state)
bs->explicit_options = reopen_state->explicit_options;
bs->options = reopen_state->options;
bs->open_flags = reopen_state->flags;
bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
bs->detect_zeroes = reopen_state->detect_zeroes;
if (reopen_state->replace_backing_bs) {

View file

@ -141,19 +141,18 @@ static void blk_root_set_aio_ctx(BdrvChild *child, AioContext *ctx,
static char *blk_root_get_parent_desc(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
char *dev_id;
g_autofree char *dev_id = NULL;
if (blk->name) {
return g_strdup(blk->name);
return g_strdup_printf("block device '%s'", blk->name);
}
dev_id = blk_get_attached_dev_id(blk);
if (*dev_id) {
return dev_id;
return g_strdup_printf("block device '%s'", dev_id);
} else {
/* TODO Callback into the BB owner for something more detailed */
g_free(dev_id);
return g_strdup("a block device");
return g_strdup("an unnamed block device");
}
}
@ -1852,7 +1851,7 @@ bool blk_supports_write_perm(BlockBackend *blk)
if (bs) {
return !bdrv_is_read_only(bs);
} else {
return !blk->root_state.read_only;
return blk->root_state.open_flags & BDRV_O_RDWR;
}
}
@ -2269,7 +2268,6 @@ void blk_update_root_state(BlockBackend *blk)
assert(blk->root);
blk->root_state.open_flags = blk->root->bs->open_flags;
blk->root_state.read_only = blk->root->bs->read_only;
blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
}
@ -2288,12 +2286,7 @@ bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
*/
int blk_get_open_flags_from_root_state(BlockBackend *blk)
{
int bs_flags;
bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
return bs_flags;
return blk->root_state.open_flags;
}
BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
@ -2393,8 +2386,13 @@ static void blk_root_drained_begin(BdrvChild *child)
static bool blk_root_drained_poll(BdrvChild *child)
{
BlockBackend *blk = child->opaque;
bool busy = false;
assert(blk->quiesce_counter);
return !!blk->in_flight;
if (blk->dev_ops && blk->dev_ops->drained_poll) {
busy = blk->dev_ops->drained_poll(blk->dev_opaque);
}
return busy || !!blk->in_flight;
}
static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)

View file

@ -65,6 +65,7 @@ typedef struct BlockCopyTask {
int64_t offset;
int64_t bytes;
bool zeroes;
bool copy_range;
QLIST_ENTRY(BlockCopyTask) list;
CoQueue wait_queue; /* coroutines blocked on this task */
} BlockCopyTask;
@ -183,6 +184,7 @@ static BlockCopyTask *block_copy_task_create(BlockCopyState *s,
.call_state = call_state,
.offset = offset,
.bytes = bytes,
.copy_range = s->use_copy_range,
};
qemu_co_queue_init(&task->wait_queue);
QLIST_INSERT_HEAD(&s->tasks, task, list);
@ -342,11 +344,18 @@ static coroutine_fn int block_copy_task_run(AioTaskPool *pool,
*
* No sync here: nor bitmap neighter intersecting requests handling, only copy.
*
* @copy_range is an in-out argument: if *copy_range is false, copy_range is not
* done. If *copy_range is true, copy_range is attempted. If the copy_range
* attempt fails, the function falls back to the usual read+write and
* *copy_range is set to false. *copy_range and zeroes must not be true
* simultaneously.
*
* Returns 0 on success.
*/
static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
int64_t offset, int64_t bytes,
bool zeroes, bool *error_is_read)
bool zeroes, bool *copy_range,
bool *error_is_read)
{
int ret;
int64_t nbytes = MIN(offset + bytes, s->len) - offset;
@ -359,6 +368,7 @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
assert(offset + bytes <= s->len ||
offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
assert(nbytes < INT_MAX);
assert(!(*copy_range && zeroes));
if (zeroes) {
ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags &
@ -370,32 +380,15 @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
return ret;
}
if (s->use_copy_range) {
if (*copy_range) {
ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
0, s->write_flags);
if (ret < 0) {
trace_block_copy_copy_range_fail(s, offset, ret);
s->use_copy_range = false;
s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
*copy_range = false;
/* Fallback to read+write with allocated buffer */
} else {
if (s->use_copy_range) {
/*
* Successful copy-range. Now increase copy_size. copy_range
* does not respect max_transfer (it's a TODO), so we factor
* that in here.
*
* Note: we double-check s->use_copy_range for the case when
* parallel block-copy request unsets it during previous
* bdrv_co_copy_range call.
*/
s->copy_size =
MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
s->target),
s->cluster_size));
}
goto out;
return 0;
}
}
@ -431,17 +424,49 @@ out:
return ret;
}
static void block_copy_handle_copy_range_result(BlockCopyState *s,
bool is_success)
{
if (!s->use_copy_range) {
/* already disabled */
return;
}
if (is_success) {
/*
* Successful copy-range. Now increase copy_size. copy_range
* does not respect max_transfer (it's a TODO), so we factor
* that in here.
*/
s->copy_size =
MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
s->target),
s->cluster_size));
} else {
/* Copy-range failed, disable it. */
s->use_copy_range = false;
s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
}
}
static coroutine_fn int block_copy_task_entry(AioTask *task)
{
BlockCopyTask *t = container_of(task, BlockCopyTask, task);
bool error_is_read = false;
bool copy_range = t->copy_range;
int ret;
ret = block_copy_do_copy(t->s, t->offset, t->bytes, t->zeroes,
&error_is_read);
if (ret < 0 && !t->call_state->ret) {
t->call_state->ret = ret;
t->call_state->error_is_read = error_is_read;
&copy_range, &error_is_read);
if (t->copy_range) {
block_copy_handle_copy_range_result(t->s, copy_range);
}
if (ret < 0) {
if (!t->call_state->ret) {
t->call_state->ret = ret;
t->call_state->error_is_read = error_is_read;
}
} else {
progress_work_done(t->s->progress, t->bytes);
}
@ -617,7 +642,10 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
g_free(task);
continue;
}
task->zeroes = ret & BDRV_BLOCK_ZERO;
if (ret & BDRV_BLOCK_ZERO) {
task->zeroes = true;
task->copy_range = false;
}
if (s->speed) {
if (!call_state->ignore_ratelimit) {

View file

@ -453,7 +453,7 @@ int bdrv_commit(BlockDriverState *bs)
return -EBUSY;
}
ro = backing_file_bs->read_only;
ro = bdrv_is_read_only(backing_file_bs);
if (ro) {
if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) {

View file

@ -1625,17 +1625,17 @@ static int handle_aiocb_write_zeroes(void *opaque)
if (s->has_write_zeroes) {
int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
aiocb->aio_offset, aiocb->aio_nbytes);
if (ret == -EINVAL) {
/*
* Allow falling back to pwrite for file systems that
* do not support fallocate() for an unaligned byte range.
*/
return -ENOTSUP;
}
if (ret == 0 || ret != -ENOTSUP) {
if (ret == -ENOTSUP) {
s->has_write_zeroes = false;
} else if (ret == 0 || ret != -EINVAL) {
return ret;
}
s->has_write_zeroes = false;
/*
* Note: Some file systems do not like unaligned byte ranges, and
* return EINVAL in such a case, though they should not do it according
* to the man-page of fallocate(). Thus we simply ignore this return
* value and try the other fallbacks instead.
*/
}
#endif
@ -1650,6 +1650,17 @@ static int handle_aiocb_write_zeroes(void *opaque)
return ret;
}
s->has_fallocate = false;
} else if (ret == -EINVAL) {
/*
* Some file systems like older versions of GPFS do not like un-
* aligned byte ranges, and return EINVAL in such a case, though
* they should not do it according to the man-page of fallocate().
* Warn about the bad filesystem and try the final fallback instead.
*/
warn_report_once("Your file system is misbehaving: "
"fallocate(FALLOC_FL_PUNCH_HOLE) returned EINVAL. "
"Please report this bug to your file sytem "
"vendor.");
} else if (ret != -ENOTSUP) {
return ret;
} else {

View file

@ -1973,7 +1973,7 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
bdrv_check_request(offset, bytes, &error_abort);
if (bs->read_only) {
if (bdrv_is_read_only(bs)) {
return -EPERM;
}
@ -3406,7 +3406,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
if (new_bytes) {
bdrv_make_request_serialising(&req, 1);
}
if (bs->read_only) {
if (bdrv_is_read_only(bs)) {
error_setg(errp, "Image is read-only");
ret = -EACCES;
goto out;

View file

@ -59,7 +59,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
info = g_malloc0(sizeof(*info));
info->file = g_strdup(bs->filename);
info->ro = bs->read_only;
info->ro = bdrv_is_read_only(bs);
info->drv = g_strdup(bs->drv->format_name);
info->encrypted = bs->encrypted;

View file

@ -1026,7 +1026,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
int new_l1_bytes;
int ret;
assert(bs->read_only);
assert(bdrv_is_read_only(bs));
/* Search the snapshot */
snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);

View file

@ -1723,8 +1723,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
/* Clear unknown autoclear feature bits */
update_header |= s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK;
update_header =
update_header && !bs->read_only && !(flags & BDRV_O_INACTIVE);
update_header = update_header && bdrv_is_writable(bs);
if (update_header) {
s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
}
@ -1811,7 +1810,7 @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
bs->supported_truncate_flags = BDRV_REQ_ZERO_WRITE;
/* Repair image if dirty */
if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
if (!(flags & BDRV_O_CHECK) && bdrv_is_writable(bs) &&
(s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
BdrvCheckResult result = {0};

View file

@ -1279,7 +1279,7 @@ static BlockDriver bdrv_quorum = {
.bdrv_dirname = quorum_dirname,
.bdrv_co_block_status = quorum_co_block_status,
.bdrv_co_flush_to_disk = quorum_co_flush,
.bdrv_co_flush = quorum_co_flush,
.bdrv_getlength = quorum_getlength,

View file

@ -415,7 +415,7 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
error_setg(errp, "snapshot_id and name are both NULL");
return -EINVAL;
}
if (!bs->read_only) {
if (!bdrv_is_read_only(bs)) {
error_setg(errp, "Device is not readonly");
return -EINVAL;
}

View file

@ -801,7 +801,7 @@ int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
}
if (logs.valid) {
if (bs->read_only) {
if (bdrv_is_read_only(bs)) {
bdrv_refresh_filename(bs);
ret = -EPERM;
error_setg(errp,

View file

@ -3127,10 +3127,7 @@ static void vvfat_qcow_options(BdrvChildRole role, bool parent_is_format,
qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
}
static const BdrvChildClass child_vvfat_qcow = {
.parent_is_bds = true,
.inherit_options = vvfat_qcow_options,
};
static BdrvChildClass child_vvfat_qcow;
static int enable_write_target(BlockDriverState *bs, Error **errp)
{
@ -3208,15 +3205,12 @@ static void vvfat_child_perm(BlockDriverState *bs, BdrvChild *c,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared)
{
BDRVVVFATState *s = bs->opaque;
assert(c == s->qcow || (role & BDRV_CHILD_COW));
if (c == s->qcow) {
if (role & BDRV_CHILD_DATA) {
/* This is a private node, nobody should try to attach to it */
*nperm = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
*nshared = BLK_PERM_WRITE_UNCHANGED;
} else {
assert(role & BDRV_CHILD_COW);
/* The backing file is there so 'commit' can use it. vvfat doesn't
* access it in any way. */
*nperm = 0;
@ -3270,6 +3264,8 @@ static BlockDriver bdrv_vvfat = {
static void bdrv_vvfat_init(void)
{
child_vvfat_qcow = child_of_bds;
child_vvfat_qcow.inherit_options = vvfat_qcow_options;
bdrv_register(&bdrv_vvfat);
}

View file

@ -583,8 +583,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
blk = blk_new(qemu_get_aio_context(), 0, BLK_PERM_ALL);
blk_rs = blk_get_root_state(blk);
blk_rs->open_flags = bdrv_flags;
blk_rs->read_only = read_only;
blk_rs->open_flags = bdrv_flags | (read_only ? 0 : BDRV_O_RDWR);
blk_rs->detect_zeroes = detect_zeroes;
qobject_unref(bs_opts);

View file

@ -104,3 +104,12 @@ structures and only process the local copy. This prevents
time-of-check-to-time-of-use (TOCTOU) race conditions that could cause QEMU to
crash when a vCPU thread modifies guest RAM while device emulation is
processing it.
Use of null-co block drivers
----------------------------
The ``null-co`` block driver is designed for performance: its read accesses are
not initialized by default. In case this driver has to be used for security
research, it must be used with the ``read-zeroes=on`` option which fills read
buffers with zeroes. Security issues reported with the default
(``read-zeroes=off``) will be discarded.

View file

@ -701,6 +701,7 @@ bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
GSList **ignore, Error **errp);
AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);

View file

@ -843,7 +843,6 @@ struct BlockDriverState {
* locking needed during I/O...
*/
int open_flags; /* flags used to open the file, re-used for re-open */
bool read_only; /* if true, the media is read only */
bool encrypted; /* if true, the media is encrypted */
bool sg; /* if true, the device is a /dev/sg* */
bool probed; /* if true, format was probed rather than specified */
@ -1008,7 +1007,6 @@ struct BlockDriverState {
struct BlockBackendRootState {
int open_flags;
bool read_only;
BlockdevDetectZeroesOptions detect_zeroes;
};

View file

@ -66,6 +66,10 @@ typedef struct BlockDevOps {
* Runs when the backend's last drain request ends.
*/
void (*drained_end)(void *opaque);
/*
* Is the device still busy?
*/
bool (*drained_poll)(void *opaque);
} BlockDevOps;
/* This struct is embedded in (the private) BlockBackend struct and contains

View file

@ -1513,6 +1513,11 @@ static void nbd_request_put(NBDRequestData *req)
g_free(req);
client->nb_requests--;
if (client->quiescing && client->nb_requests == 0) {
aio_wait_kick();
}
nbd_client_receive_next_request(client);
nbd_client_put(client);
@ -1530,51 +1535,70 @@ static void blk_aio_attached(AioContext *ctx, void *opaque)
QTAILQ_FOREACH(client, &exp->clients, next) {
qio_channel_attach_aio_context(client->ioc, ctx);
assert(client->nb_requests == 0);
assert(client->recv_coroutine == NULL);
assert(client->send_coroutine == NULL);
if (client->quiescing) {
client->quiescing = false;
nbd_client_receive_next_request(client);
}
}
}
static void nbd_aio_detach_bh(void *opaque)
{
NBDExport *exp = opaque;
NBDClient *client;
QTAILQ_FOREACH(client, &exp->clients, next) {
qio_channel_detach_aio_context(client->ioc);
client->quiescing = true;
if (client->recv_coroutine) {
if (client->read_yielding) {
qemu_aio_coroutine_enter(exp->common.ctx,
client->recv_coroutine);
} else {
AIO_WAIT_WHILE(exp->common.ctx, client->recv_coroutine != NULL);
}
}
if (client->send_coroutine) {
AIO_WAIT_WHILE(exp->common.ctx, client->send_coroutine != NULL);
}
}
}
static void blk_aio_detach(void *opaque)
{
NBDExport *exp = opaque;
NBDClient *client;
trace_nbd_blk_aio_detach(exp->name, exp->common.ctx);
aio_wait_bh_oneshot(exp->common.ctx, nbd_aio_detach_bh, exp);
QTAILQ_FOREACH(client, &exp->clients, next) {
qio_channel_detach_aio_context(client->ioc);
}
exp->common.ctx = NULL;
}
static void nbd_drained_begin(void *opaque)
{
NBDExport *exp = opaque;
NBDClient *client;
QTAILQ_FOREACH(client, &exp->clients, next) {
client->quiescing = true;
}
}
static void nbd_drained_end(void *opaque)
{
NBDExport *exp = opaque;
NBDClient *client;
QTAILQ_FOREACH(client, &exp->clients, next) {
client->quiescing = false;
nbd_client_receive_next_request(client);
}
}
static bool nbd_drained_poll(void *opaque)
{
NBDExport *exp = opaque;
NBDClient *client;
QTAILQ_FOREACH(client, &exp->clients, next) {
if (client->nb_requests != 0) {
/*
* If there's a coroutine waiting for a request on nbd_read_eof()
* enter it here so we don't depend on the client to wake it up.
*/
if (client->recv_coroutine != NULL && client->read_yielding) {
qemu_aio_coroutine_enter(exp->common.ctx,
client->recv_coroutine);
}
return true;
}
}
return false;
}
static void nbd_eject_notifier(Notifier *n, void *data)
{
NBDExport *exp = container_of(n, NBDExport, eject_notifier);
@ -1594,6 +1618,12 @@ void nbd_export_set_on_eject_blk(BlockExport *exp, BlockBackend *blk)
blk_add_remove_bs_notifier(blk, &nbd_exp->eject_notifier);
}
static const BlockDevOps nbd_block_ops = {
.drained_begin = nbd_drained_begin,
.drained_end = nbd_drained_end,
.drained_poll = nbd_drained_poll,
};
static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
Error **errp)
{
@ -1715,8 +1745,17 @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
exp->allocation_depth = arg->allocation_depth;
/*
* We need to inhibit request queuing in the block layer to ensure we can
* be properly quiesced when entering a drained section, as our coroutines
* servicing pending requests might enter blk_pread().
*/
blk_set_disable_request_queuing(blk, true);
blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
blk_set_dev_ops(blk, &nbd_block_ops, exp);
QTAILQ_INSERT_TAIL(&exports, exp, next);
return 0;
@ -1788,6 +1827,7 @@ static void nbd_export_delete(BlockExport *blk_exp)
}
blk_remove_aio_context_notifier(exp->common.blk, blk_aio_attached,
blk_aio_detach, exp);
blk_set_disable_request_queuing(exp->common.blk, false);
}
for (i = 0; i < exp->nr_export_bitmaps; i++) {

View file

@ -92,9 +92,19 @@ static int command(BlockBackend *blk, const cmdinfo_t *ct, int argc,
return -EINVAL;
}
/* Request additional permissions if necessary for this command. The caller
/*
* Request additional permissions if necessary for this command. The caller
* is responsible for restoring the original permissions afterwards if this
* is what it wants. */
* is what it wants.
*
* Coverity thinks that blk may be NULL in the following if condition. It's
* not so: in init_check_command() we fail if blk is NULL for command with
* both CMD_FLAG_GLOBAL and CMD_NOFILE_OK flags unset. And in
* qemuio_add_command() we assert that command with non-zero .perm field
* doesn't set this flags. So, the following assertion is to silence
* Coverity:
*/
assert(blk || !ct->perm);
if (ct->perm && blk_is_available(blk)) {
uint64_t orig_perm, orig_shared_perm;
blk_get_perm(blk, &orig_perm, &orig_shared_perm);

View file

@ -5,7 +5,7 @@
{"execute": "blockdev-add", "arguments": {"driver": "blkdebug", "image": "base", "node-name": "other", "take-child-perms": ["write"]}}
{"return": {}}
{"execute": "blockdev-backup", "arguments": {"device": "source", "sync": "full", "target": "target"}}
{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Conflicts with use by source as 'image', which does not allow 'write' on base"}}
{"error": {"class": "GenericError", "desc": "Cannot append backup-top filter: Permission conflict on node 'base': permissions 'write' are both required by node 'other' (uses node 'base' as 'image' child) and unshared by node 'source' (uses node 'base' as 'image' child)."}}
=== backup-top should be gone after job-finalize ===

View file

@ -53,7 +53,7 @@ exports available: 1
=== Add a writable export ===
{"execute": "block-export-add", "arguments": {"description": "This is the writable second export", "id": "export1", "name": "export1", "node-name": "fmt", "type": "nbd", "writable": true, "writethrough": true}}
{"error": {"class": "GenericError", "desc": "Conflicts with use by sda as 'root', which does not allow 'write' on fmt"}}
{"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt': permissions 'write' are both required by an unnamed block device (uses node 'fmt' as 'root' child) and unshared by block device 'sda' (uses node 'fmt' as 'root' child)."}}
{"execute": "device_del", "arguments": {"id": "sda"}}
{"return": {}}
{"data": {"device": "sda", "path": "/machine/peripheral/sda"}, "event": "DEVICE_DELETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}

View file

@ -16,7 +16,7 @@ QMP_VERSION
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "job0"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "job0"}}
{"error": {"class": "GenericError", "desc": "Conflicts with use by stream job 'job0' as 'intermediate node', which does not allow 'write' on fmt_base"}}
{"error": {"class": "GenericError", "desc": "Permission conflict on node 'fmt_base': permissions 'write' are both required by an unnamed block device (uses node 'fmt_base' as 'root' child) and unshared by stream job 'job0' (uses node 'fmt_base' as 'intermediate node' child)."}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_EXPORT_DELETED", "data": {"id": "export1"}}
*** done

View file

@ -194,13 +194,11 @@ static void test_sync_op_truncate(BdrvChild *c)
g_assert_cmpint(ret, ==, -EINVAL);
/* Error: Read-only image */
c->bs->read_only = true;
c->bs->open_flags &= ~BDRV_O_RDWR;
ret = bdrv_truncate(c, 65536, false, PREALLOC_MODE_OFF, 0, NULL);
g_assert_cmpint(ret, ==, -EACCES);
c->bs->read_only = false;
c->bs->open_flags |= BDRV_O_RDWR;
}
@ -236,13 +234,11 @@ static void test_sync_op_flush(BdrvChild *c)
g_assert_cmpint(ret, ==, 0);
/* Early success: Read-only image */
c->bs->read_only = true;
c->bs->open_flags &= ~BDRV_O_RDWR;
ret = bdrv_flush(c->bs);
g_assert_cmpint(ret, ==, 0);
c->bs->read_only = false;
c->bs->open_flags |= BDRV_O_RDWR;
}
@ -256,13 +252,11 @@ static void test_sync_op_blk_flush(BlockBackend *blk)
g_assert_cmpint(ret, ==, 0);
/* Early success: Read-only image */
bs->read_only = true;
bs->open_flags &= ~BDRV_O_RDWR;
ret = blk_flush(blk);
g_assert_cmpint(ret, ==, 0);
bs->read_only = false;
bs->open_flags |= BDRV_O_RDWR;
}