From ac5de4984df282d64feb4af33b92e0a75652e2b6 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Tue, 19 Jun 2018 15:39:18 -0500 Subject: [PATCH 1/9] tests: Simplify .gitignore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 0bcc8e5b was yet another instance of 'git status' reporting dirty files after an in-tree build, thanks to the new binary tests/check-block-qdict. Instead of piecemeal exemptions of each new binary as they are added, let's use git's negative globbing feature to exempt ALL files that have a 'test-' or 'check-' prefix, except for the ones ending in '.c' or '.sh'. We still have a couple of generated files that then need (re-)exclusion, but the overall list is a LOT shorter, and less prone to needing future edits. Signed-off-by: Eric Blake Message-Id: <20180619203918.65450-1-eblake@redhat.com> Reviewed-by: Philippe Mathieu-Daudé --- tests/.gitignore | 93 +++--------------------------------------------- 1 file changed, 5 insertions(+), 88 deletions(-) diff --git a/tests/.gitignore b/tests/.gitignore index 2bc61a9a58..08e2df1ce1 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -2,101 +2,18 @@ atomic_add-bench benchmark-crypto-cipher benchmark-crypto-hash benchmark-crypto-hmac -check-qdict -check-qnum -check-qjson -check-qlist -check-qlit -check-qnull -check-qobject -check-qstring -check-qom-interface -check-qom-proplist +check-* +!check-*.c +!check-*.sh qht-bench rcutorture -test-aio -test-aio-multithread -test-arm-mptimer -test-base64 -test-bdrv-drain -test-bitops -test-bitcnt -test-block-backend -test-blockjob -test-blockjob-txn -test-bufferiszero -test-char -test-clone-visitor -test-coroutine -test-crypto-afsplit -test-crypto-block -test-crypto-cipher -test-crypto-hash -test-crypto-hmac -test-crypto-ivgen -test-crypto-pbkdf -test-crypto-secret -test-crypto-tlscredsx509 -test-crypto-tlscredsx509-work/ -test-crypto-tlscredsx509-certs/ -test-crypto-tlssession -test-crypto-tlssession-work/ -test-crypto-tlssession-client/ -test-crypto-tlssession-server/ -test-crypto-xts -test-cutils -test-hbitmap -test-hmp -test-int128 -test-iov -test-io-channel-buffer -test-io-channel-command -test-io-channel-command.fifo -test-io-channel-file -test-io-channel-file.txt -test-io-channel-socket -test-io-channel-tls -test-io-task -test-keyval -test-logging -test-mul64 -test-opts-visitor +test-* +!test-*.c test-qapi-commands.[ch] test-qapi-events.[ch] test-qapi-types.[ch] -test-qapi-util test-qapi-visit.[ch] -test-qdev-global-props -test-qemu-opts -test-qdist -test-qga -test-qht -test-qht-par -test-qmp-cmds -test-qmp-event -test-qobject-input-strict -test-qobject-input-visitor test-qapi-introspect.[ch] -test-qobject-output-visitor -test-rcu-list -test-replication -test-shift128 -test-string-input-visitor -test-string-output-visitor -test-thread-pool -test-throttle -test-timed-average -test-uuid -test-util-sockets -test-visitor-serialization -test-vmstate -test-write-threshold -test-x86-cpuid -test-x86-cpuid-compat -test-xbzrle -test-netfilter -test-filter-mirror -test-filter-redirector *-test qapi-schema/*.test.* vm/*.img From d8b20291cba6aa9bb295885a34f2b5f05d59d1b2 Mon Sep 17 00:00:00 2001 From: Eric Blake Date: Thu, 21 Jun 2018 07:49:37 -0500 Subject: [PATCH 2/9] nbd/server: Reject 0-length block status request The NBD spec says that behavior is unspecified if the client requests 0 length for block status; but since the structured reply is documenting as returning a non-zero length, it's easier to just diagnose this with an EINVAL error than to figure out what to return. CC: qemu-stable@nongnu.org Signed-off-by: Eric Blake Message-Id: <20180621124937.166549-1-eblake@redhat.com> Reviewed-by: Vladimir Sementsov-Ogievskiy --- nbd/server.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nbd/server.c b/nbd/server.c index 9e1f227178..493a926e06 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -2007,6 +2007,10 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, "discard failed", errp); case NBD_CMD_BLOCK_STATUS: + if (!request->len) { + return nbd_send_generic_reply(client, request->handle, -EINVAL, + "need non-zero length", errp); + } if (client->export_meta.valid && client->export_meta.base_allocation) { return nbd_co_send_block_status(client, request->handle, blk_bs(exp->blk), request->from, From dbb8b396bb46388cee92e9094c563297d04c43ed Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Sat, 9 Jun 2018 18:17:53 +0300 Subject: [PATCH 3/9] nbd/server: fix trace Return code = 1 doesn't mean that we parsed base:allocation. Use correct traces in both -parsed and -skipped cases. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20180609151758.17343-2-vsementsov@virtuozzo.com> Reviewed-by: Eric Blake [eblake: comment tweaks] Signed-off-by: Eric Blake --- nbd/server.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 493a926e06..942c016c2a 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -736,12 +736,16 @@ static int nbd_negotiate_send_meta_context(NBDClient *client, /* nbd_meta_base_query * - * Handle query to 'base' namespace. For now, only base:allocation context is - * available in it. 'len' is the amount of text remaining to be read from + * Handle queries to 'base' namespace. For now, only the base:allocation + * context is available. 'len' is the amount of text remaining to be read from * the current name, after the 'base:' portion has been stripped. * * Return -errno on I/O error, 0 if option was completely handled by - * sending a reply about inconsistent lengths, or 1 on success. */ + * sending a reply about inconsistent lengths, or 1 on success. + * + * Note: return code = 1 doesn't mean that we've parsed the "base:allocation" + * namespace. It only means that there are no errors. + */ static int nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta, uint32_t len, Error **errp) { @@ -768,10 +772,12 @@ static int nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta, } if (strncmp(query, "allocation", alen) == 0) { + trace_nbd_negotiate_meta_query_parse("base:allocation"); meta->base_allocation = true; + } else { + trace_nbd_negotiate_meta_query_skip("not base:allocation"); } - trace_nbd_negotiate_meta_query_parse("base:allocation"); return 1; } From af736e546717d832168dd332a328bfcf74a0ab3d Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Sat, 9 Jun 2018 18:17:54 +0300 Subject: [PATCH 4/9] nbd/server: refactor NBDExportMetaContexts Use NBDExport pointer instead of just export name: there is no need to store a duplicated name in the struct; moreover, NBDExport will be used further. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20180609151758.17343-3-vsementsov@virtuozzo.com> Reviewed-by: Eric Blake [eblake: commit message grammar tweak] Signed-off-by: Eric Blake --- nbd/server.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 942c016c2a..26cc41ad75 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -88,7 +88,7 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); * as selected by NBD_OPT_SET_META_CONTEXT. Also used for * NBD_OPT_LIST_META_CONTEXT. */ typedef struct NBDExportMetaContexts { - char export_name[NBD_MAX_NAME_SIZE + 1]; + NBDExport *exp; bool valid; /* means that negotiation of the option finished without errors */ bool base_allocation; /* export base:allocation context (block status) */ @@ -399,10 +399,9 @@ static int nbd_negotiate_handle_list(NBDClient *client, Error **errp) return nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); } -static void nbd_check_meta_export_name(NBDClient *client) +static void nbd_check_meta_export(NBDClient *client) { - client->export_meta.valid &= !strcmp(client->exp->name, - client->export_meta.export_name); + client->export_meta.valid &= client->exp == client->export_meta.exp; } /* Send a reply to NBD_OPT_EXPORT_NAME. @@ -456,7 +455,7 @@ static int nbd_negotiate_handle_export_name(NBDClient *client, QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); nbd_export_get(client->exp); - nbd_check_meta_export_name(client); + nbd_check_meta_export(client); return 0; } @@ -650,7 +649,7 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags, client->exp = exp; QTAILQ_INSERT_TAIL(&client->exp->clients, client, next); nbd_export_get(client->exp); - nbd_check_meta_export_name(client); + nbd_check_meta_export(client); rc = 1; } return rc; @@ -835,7 +834,7 @@ static int nbd_negotiate_meta_queries(NBDClient *client, NBDExportMetaContexts *meta, Error **errp) { int ret; - NBDExport *exp; + char export_name[NBD_MAX_NAME_SIZE + 1]; NBDExportMetaContexts local_meta; uint32_t nb_queries; int i; @@ -854,15 +853,15 @@ static int nbd_negotiate_meta_queries(NBDClient *client, memset(meta, 0, sizeof(*meta)); - ret = nbd_opt_read_name(client, meta->export_name, NULL, errp); + ret = nbd_opt_read_name(client, export_name, NULL, errp); if (ret <= 0) { return ret; } - exp = nbd_export_find(meta->export_name); - if (exp == NULL) { + meta->exp = nbd_export_find(export_name); + if (meta->exp == NULL) { return nbd_opt_drop(client, NBD_REP_ERR_UNKNOWN, errp, - "export '%s' not present", meta->export_name); + "export '%s' not present", export_name); } ret = nbd_opt_read(client, &nb_queries, sizeof(nb_queries), errp); @@ -871,7 +870,7 @@ static int nbd_negotiate_meta_queries(NBDClient *client, } cpu_to_be32s(&nb_queries); trace_nbd_negotiate_meta_context(nbd_opt_lookup(client->opt), - meta->export_name, nb_queries); + export_name, nb_queries); if (client->opt == NBD_OPT_LIST_META_CONTEXT && !nb_queries) { /* enable all known contexts */ From b0769d8f8df0b51881f1f15c9e29722cf6191a43 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Tue, 19 Jun 2018 16:55:09 -0500 Subject: [PATCH 5/9] nbd/server: add nbd_meta_empty_or_pattern helper Add nbd_meta_pattern() and nbd_meta_empty_or_pattern() helpers for metadata query parsing. nbd_meta_pattern() will be reused for the "qemu" namespace in following patches. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20180609151758.17343-4-vsementsov@virtuozzo.com> Reviewed-by: Eric Blake [eblake: comment tweaks] Signed-off-by: Eric Blake --- nbd/server.c | 101 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 68 insertions(+), 33 deletions(-) diff --git a/nbd/server.c b/nbd/server.c index 26cc41ad75..9171cd4168 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -733,6 +733,71 @@ static int nbd_negotiate_send_meta_context(NBDClient *client, return qio_channel_writev_all(client->ioc, iov, 2, errp) < 0 ? -EIO : 0; } +/* Read strlen(@pattern) bytes, and set @match to true if they match @pattern. + * @match is never set to false. + * + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. + * + * Note: return code = 1 doesn't mean that we've read exactly @pattern. + * It only means that there are no errors. + */ +static int nbd_meta_pattern(NBDClient *client, const char *pattern, bool *match, + Error **errp) +{ + int ret; + char *query; + size_t len = strlen(pattern); + + assert(len); + + query = g_malloc(len); + ret = nbd_opt_read(client, query, len, errp); + if (ret <= 0) { + g_free(query); + return ret; + } + + if (strncmp(query, pattern, len) == 0) { + trace_nbd_negotiate_meta_query_parse(pattern); + *match = true; + } else { + trace_nbd_negotiate_meta_query_skip("pattern not matched"); + } + g_free(query); + + return 1; +} + +/* + * Read @len bytes, and set @match to true if they match @pattern, or if @len + * is 0 and the client is performing _LIST_. @match is never set to false. + * + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. + * + * Note: return code = 1 doesn't mean that we've read exactly @pattern. + * It only means that there are no errors. + */ +static int nbd_meta_empty_or_pattern(NBDClient *client, const char *pattern, + uint32_t len, bool *match, Error **errp) +{ + if (len == 0) { + if (client->opt == NBD_OPT_LIST_META_CONTEXT) { + *match = true; + } + trace_nbd_negotiate_meta_query_parse("empty"); + return 1; + } + + if (len != strlen(pattern)) { + trace_nbd_negotiate_meta_query_skip("different lengths"); + return nbd_opt_skip(client, len, errp); + } + + return nbd_meta_pattern(client, pattern, match, errp); +} + /* nbd_meta_base_query * * Handle queries to 'base' namespace. For now, only the base:allocation @@ -741,43 +806,12 @@ static int nbd_negotiate_send_meta_context(NBDClient *client, * * Return -errno on I/O error, 0 if option was completely handled by * sending a reply about inconsistent lengths, or 1 on success. - * - * Note: return code = 1 doesn't mean that we've parsed the "base:allocation" - * namespace. It only means that there are no errors. */ static int nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta, uint32_t len, Error **errp) { - int ret; - char query[sizeof("allocation") - 1]; - size_t alen = strlen("allocation"); - - if (len == 0) { - if (client->opt == NBD_OPT_LIST_META_CONTEXT) { - meta->base_allocation = true; - } - trace_nbd_negotiate_meta_query_parse("base:"); - return 1; - } - - if (len != alen) { - trace_nbd_negotiate_meta_query_skip("not base:allocation"); - return nbd_opt_skip(client, len, errp); - } - - ret = nbd_opt_read(client, query, len, errp); - if (ret <= 0) { - return ret; - } - - if (strncmp(query, "allocation", alen) == 0) { - trace_nbd_negotiate_meta_query_parse("base:allocation"); - meta->base_allocation = true; - } else { - trace_nbd_negotiate_meta_query_skip("not base:allocation"); - } - - return 1; + return nbd_meta_empty_or_pattern(client, "allocation", len, + &meta->base_allocation, errp); } /* nbd_negotiate_meta_query @@ -823,6 +857,7 @@ static int nbd_negotiate_meta_query(NBDClient *client, return nbd_opt_skip(client, len, errp); } + trace_nbd_negotiate_meta_query_parse("base:"); return nbd_meta_base_query(client, meta, len, errp); } From 3d068aff16d6dbf066328977c5152847a62f2a0a Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Sat, 9 Jun 2018 18:17:56 +0300 Subject: [PATCH 6/9] nbd/server: implement dirty bitmap export Handle a new NBD meta namespace: "qemu", and corresponding queries: "qemu:dirty-bitmap:". With the new metadata context negotiated, BLOCK_STATUS query will reply with dirty-bitmap data, converted to extents. The new public function nbd_export_bitmap selects which bitmap to export. For now, only one bitmap may be exported. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20180609151758.17343-5-vsementsov@virtuozzo.com> Reviewed-by: Eric Blake [eblake: wording tweaks, minor cleanups, additional tracing] Signed-off-by: Eric Blake --- include/block/nbd.h | 8 +- nbd/server.c | 278 ++++++++++++++++++++++++++++++++++++++++---- nbd/trace-events | 1 + 3 files changed, 262 insertions(+), 25 deletions(-) diff --git a/include/block/nbd.h b/include/block/nbd.h index fcdcd54502..8bb9606c39 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -229,11 +229,13 @@ enum { #define NBD_REPLY_TYPE_ERROR NBD_REPLY_ERR(1) #define NBD_REPLY_TYPE_ERROR_OFFSET NBD_REPLY_ERR(2) -/* Flags for extents (NBDExtent.flags) of NBD_REPLY_TYPE_BLOCK_STATUS, - * for base:allocation meta context */ +/* Extent flags for base:allocation in NBD_REPLY_TYPE_BLOCK_STATUS */ #define NBD_STATE_HOLE (1 << 0) #define NBD_STATE_ZERO (1 << 1) +/* Extent flags for qemu:dirty-bitmap in NBD_REPLY_TYPE_BLOCK_STATUS */ +#define NBD_STATE_DIRTY (1 << 0) + static inline bool nbd_reply_type_is_error(int type) { return type & (1 << 15); @@ -315,6 +317,8 @@ void nbd_client_put(NBDClient *client); void nbd_server_start(SocketAddress *addr, const char *tls_creds, Error **errp); +void nbd_export_bitmap(NBDExport *exp, const char *bitmap, + const char *bitmap_export_name, Error **errp); /* nbd_read * Reads @size bytes from @ioc. Returns 0 on success. diff --git a/nbd/server.c b/nbd/server.c index 9171cd4168..2c2d62c636 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -23,6 +23,13 @@ #include "nbd-internal.h" #define NBD_META_ID_BASE_ALLOCATION 0 +#define NBD_META_ID_DIRTY_BITMAP 1 + +/* NBD_MAX_BITMAP_EXTENTS: 1 mb of extents data. An empirical + * constant. If an increase is needed, note that the NBD protocol + * recommends no larger than 32 mb, so that the client won't consider + * the reply as a denial of service attack. */ +#define NBD_MAX_BITMAP_EXTENTS (0x100000 / 8) static int system_errno_to_nbd_errno(int err) { @@ -80,6 +87,9 @@ struct NBDExport { BlockBackend *eject_notifier_blk; Notifier eject_notifier; + + BdrvDirtyBitmap *export_bitmap; + char *export_bitmap_context; }; static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports); @@ -92,6 +102,7 @@ typedef struct NBDExportMetaContexts { bool valid; /* means that negotiation of the option finished without errors */ bool base_allocation; /* export base:allocation context (block status) */ + bool bitmap; /* export qemu:dirty-bitmap: */ } NBDExportMetaContexts; struct NBDClient { @@ -814,6 +825,56 @@ static int nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta, &meta->base_allocation, errp); } +/* nbd_meta_bitmap_query + * + * Handle query to 'qemu:' namespace. + * @len is the amount of text remaining to be read from the current name, after + * the 'qemu:' portion has been stripped. + * + * Return -errno on I/O error, 0 if option was completely handled by + * sending a reply about inconsistent lengths, or 1 on success. */ +static int nbd_meta_qemu_query(NBDClient *client, NBDExportMetaContexts *meta, + uint32_t len, Error **errp) +{ + bool dirty_bitmap = false; + size_t dirty_bitmap_len = strlen("dirty-bitmap:"); + int ret; + + if (!meta->exp->export_bitmap) { + trace_nbd_negotiate_meta_query_skip("no dirty-bitmap exported"); + return nbd_opt_skip(client, len, errp); + } + + if (len == 0) { + if (client->opt == NBD_OPT_LIST_META_CONTEXT) { + meta->bitmap = true; + } + trace_nbd_negotiate_meta_query_parse("empty"); + return 1; + } + + if (len < dirty_bitmap_len) { + trace_nbd_negotiate_meta_query_skip("not dirty-bitmap:"); + return nbd_opt_skip(client, len, errp); + } + + len -= dirty_bitmap_len; + ret = nbd_meta_pattern(client, "dirty-bitmap:", &dirty_bitmap, errp); + if (ret <= 0) { + return ret; + } + if (!dirty_bitmap) { + trace_nbd_negotiate_meta_query_skip("not dirty-bitmap:"); + return nbd_opt_skip(client, len, errp); + } + + trace_nbd_negotiate_meta_query_parse("dirty-bitmap:"); + + return nbd_meta_empty_or_pattern( + client, meta->exp->export_bitmap_context + + strlen("qemu:dirty_bitmap:"), len, &meta->bitmap, errp); +} + /* nbd_negotiate_meta_query * * Parse namespace name and call corresponding function to parse body of the @@ -829,9 +890,14 @@ static int nbd_meta_base_query(NBDClient *client, NBDExportMetaContexts *meta, static int nbd_negotiate_meta_query(NBDClient *client, NBDExportMetaContexts *meta, Error **errp) { + /* + * Both 'qemu' and 'base' namespaces have length = 5 including a + * colon. If another length namespace is later introduced, this + * should certainly be refactored. + */ int ret; - char query[sizeof("base:") - 1]; - size_t baselen = strlen("base:"); + size_t ns_len = 5; + char ns[5]; uint32_t len; ret = nbd_opt_read(client, &len, sizeof(len), errp); @@ -840,25 +906,27 @@ static int nbd_negotiate_meta_query(NBDClient *client, } cpu_to_be32s(&len); - /* The only supported namespace for now is 'base'. So query should start - * with 'base:'. Otherwise, we can ignore it and skip the remainder. */ - if (len < baselen) { + if (len < ns_len) { trace_nbd_negotiate_meta_query_skip("length too short"); return nbd_opt_skip(client, len, errp); } - len -= baselen; - ret = nbd_opt_read(client, query, baselen, errp); + len -= ns_len; + ret = nbd_opt_read(client, ns, ns_len, errp); if (ret <= 0) { return ret; } - if (strncmp(query, "base:", baselen) != 0) { - trace_nbd_negotiate_meta_query_skip("not for base: namespace"); - return nbd_opt_skip(client, len, errp); + + if (!strncmp(ns, "base:", ns_len)) { + trace_nbd_negotiate_meta_query_parse("base:"); + return nbd_meta_base_query(client, meta, len, errp); + } else if (!strncmp(ns, "qemu:", ns_len)) { + trace_nbd_negotiate_meta_query_parse("qemu:"); + return nbd_meta_qemu_query(client, meta, len, errp); } - trace_nbd_negotiate_meta_query_parse("base:"); - return nbd_meta_base_query(client, meta, len, errp); + trace_nbd_negotiate_meta_query_skip("unknown namespace"); + return nbd_opt_skip(client, len, errp); } /* nbd_negotiate_meta_queries @@ -928,6 +996,16 @@ static int nbd_negotiate_meta_queries(NBDClient *client, } } + if (meta->bitmap) { + ret = nbd_negotiate_send_meta_context(client, + meta->exp->export_bitmap_context, + NBD_META_ID_DIRTY_BITMAP, + errp); + if (ret < 0) { + return ret; + } + } + ret = nbd_negotiate_send_rep(client, NBD_REP_ACK, errp); if (ret == 0) { meta->valid = true; @@ -1556,6 +1634,11 @@ void nbd_export_put(NBDExport *exp) exp->blk = NULL; } + if (exp->export_bitmap) { + bdrv_dirty_bitmap_set_qmp_locked(exp->export_bitmap, false); + g_free(exp->export_bitmap_context); + } + g_free(exp); } } @@ -1797,9 +1880,15 @@ static int blockstatus_to_extent_be(BlockDriverState *bs, uint64_t offset, } /* nbd_co_send_extents - * @extents should be in big-endian */ + * + * @length is only for tracing purposes (and may be smaller or larger + * than the client's original request). @last controls whether + * NBD_REPLY_FLAG_DONE is sent. @extents should already be in + * big-endian format. + */ static int nbd_co_send_extents(NBDClient *client, uint64_t handle, - NBDExtent *extents, unsigned nb_extents, + NBDExtent *extents, unsigned int nb_extents, + uint64_t length, bool last, uint32_t context_id, Error **errp) { NBDStructuredMeta chunk; @@ -1809,7 +1898,9 @@ static int nbd_co_send_extents(NBDClient *client, uint64_t handle, {.iov_base = extents, .iov_len = nb_extents * sizeof(extents[0])} }; - set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_BLOCK_STATUS, + trace_nbd_co_send_extents(handle, nb_extents, context_id, length, last); + set_be_chunk(&chunk.h, last ? NBD_REPLY_FLAG_DONE : 0, + NBD_REPLY_TYPE_BLOCK_STATUS, handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len); stl_be_p(&chunk.context_id, context_id); @@ -1819,8 +1910,8 @@ static int nbd_co_send_extents(NBDClient *client, uint64_t handle, /* Get block status from the exported device and send it to the client */ static int nbd_co_send_block_status(NBDClient *client, uint64_t handle, BlockDriverState *bs, uint64_t offset, - uint64_t length, uint32_t context_id, - Error **errp) + uint64_t length, bool last, + uint32_t context_id, Error **errp) { int ret; NBDExtent extent; @@ -1831,7 +1922,84 @@ static int nbd_co_send_block_status(NBDClient *client, uint64_t handle, client, handle, -ret, "can't get block status", errp); } - return nbd_co_send_extents(client, handle, &extent, 1, context_id, errp); + return nbd_co_send_extents(client, handle, &extent, 1, length, last, + context_id, errp); +} + +/* + * Populate @extents from a dirty bitmap. Unless @dont_fragment, the + * final extent may exceed the original @length. Store in @length the + * byte length encoded (which may be smaller or larger than the + * original), and return the number of extents used. + */ +static unsigned int bitmap_to_extents(BdrvDirtyBitmap *bitmap, uint64_t offset, + uint64_t *length, NBDExtent *extents, + unsigned int nb_extents, + bool dont_fragment) +{ + uint64_t begin = offset, end; + uint64_t overall_end = offset + *length; + unsigned int i = 0; + BdrvDirtyBitmapIter *it; + bool dirty; + + bdrv_dirty_bitmap_lock(bitmap); + + it = bdrv_dirty_iter_new(bitmap); + dirty = bdrv_get_dirty_locked(NULL, bitmap, offset); + + assert(begin < overall_end && nb_extents); + while (begin < overall_end && i < nb_extents) { + if (dirty) { + end = bdrv_dirty_bitmap_next_zero(bitmap, begin); + } else { + bdrv_set_dirty_iter(it, begin); + end = bdrv_dirty_iter_next(it); + } + if (end == -1 || end - begin > UINT32_MAX) { + /* Cap to an aligned value < 4G beyond begin. */ + end = MIN(bdrv_dirty_bitmap_size(bitmap), + begin + UINT32_MAX + 1 - + bdrv_dirty_bitmap_granularity(bitmap)); + } + if (dont_fragment && end > overall_end) { + end = overall_end; + } + + extents[i].length = cpu_to_be32(end - begin); + extents[i].flags = cpu_to_be32(dirty ? NBD_STATE_DIRTY : 0); + i++; + begin = end; + dirty = !dirty; + } + + bdrv_dirty_iter_free(it); + + bdrv_dirty_bitmap_unlock(bitmap); + + *length = end - offset; + return i; +} + +static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle, + BdrvDirtyBitmap *bitmap, uint64_t offset, + uint32_t length, bool dont_fragment, bool last, + uint32_t context_id, Error **errp) +{ + int ret; + unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BITMAP_EXTENTS; + NBDExtent *extents = g_new(NBDExtent, nb_extents); + uint64_t final_length = length; + + nb_extents = bitmap_to_extents(bitmap, offset, &final_length, extents, + nb_extents, dont_fragment); + + ret = nbd_co_send_extents(client, handle, extents, nb_extents, + final_length, last, context_id, errp); + + g_free(extents); + + return ret; } /* nbd_co_receive_request @@ -2051,11 +2219,34 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, return nbd_send_generic_reply(client, request->handle, -EINVAL, "need non-zero length", errp); } - if (client->export_meta.valid && client->export_meta.base_allocation) { - return nbd_co_send_block_status(client, request->handle, - blk_bs(exp->blk), request->from, - request->len, - NBD_META_ID_BASE_ALLOCATION, errp); + if (client->export_meta.valid && + (client->export_meta.base_allocation || + client->export_meta.bitmap)) + { + if (client->export_meta.base_allocation) { + ret = nbd_co_send_block_status(client, request->handle, + blk_bs(exp->blk), request->from, + request->len, + !client->export_meta.bitmap, + NBD_META_ID_BASE_ALLOCATION, + errp); + if (ret < 0) { + return ret; + } + } + + if (client->export_meta.bitmap) { + ret = nbd_co_send_bitmap(client, request->handle, + client->exp->export_bitmap, + request->from, request->len, + request->flags & NBD_CMD_FLAG_REQ_ONE, + true, NBD_META_ID_DIRTY_BITMAP, errp); + if (ret < 0) { + return ret; + } + } + + return ret; } else { return nbd_send_generic_reply(client, request->handle, -EINVAL, "CMD_BLOCK_STATUS not negotiated", @@ -2207,3 +2398,44 @@ void nbd_client_new(NBDExport *exp, co = qemu_coroutine_create(nbd_co_client_start, client); qemu_coroutine_enter(co); } + +void nbd_export_bitmap(NBDExport *exp, const char *bitmap, + const char *bitmap_export_name, Error **errp) +{ + BdrvDirtyBitmap *bm = NULL; + BlockDriverState *bs = blk_bs(exp->blk); + + if (exp->export_bitmap) { + error_setg(errp, "Export bitmap is already set"); + return; + } + + while (true) { + bm = bdrv_find_dirty_bitmap(bs, bitmap); + if (bm != NULL || bs->backing == NULL) { + break; + } + + bs = bs->backing->bs; + } + + if (bm == NULL) { + error_setg(errp, "Bitmap '%s' is not found", bitmap); + return; + } + + if (bdrv_dirty_bitmap_enabled(bm)) { + error_setg(errp, "Bitmap '%s' is enabled", bitmap); + return; + } + + if (bdrv_dirty_bitmap_qmp_locked(bm)) { + error_setg(errp, "Bitmap '%s' is locked", bitmap); + return; + } + + bdrv_dirty_bitmap_set_qmp_locked(bm, true); + exp->export_bitmap = bm; + exp->export_bitmap_context = + g_strdup_printf("qemu:dirty-bitmap:%s", bitmap_export_name); +} diff --git a/nbd/trace-events b/nbd/trace-events index dee081e775..5e1d4afe8e 100644 --- a/nbd/trace-events +++ b/nbd/trace-events @@ -64,6 +64,7 @@ nbd_co_send_simple_reply(uint64_t handle, uint32_t error, const char *errname, i nbd_co_send_structured_done(uint64_t handle) "Send structured reply done: handle = %" PRIu64 nbd_co_send_structured_read(uint64_t handle, uint64_t offset, void *data, size_t size) "Send structured read data reply: handle = %" PRIu64 ", offset = %" PRIu64 ", data = %p, len = %zu" nbd_co_send_structured_read_hole(uint64_t handle, uint64_t offset, size_t size) "Send structured read hole reply: handle = %" PRIu64 ", offset = %" PRIu64 ", len = %zu" +nbd_co_send_extents(uint64_t handle, unsigned int extents, uint32_t id, uint64_t length, int last) "Send block status reply: handle = %" PRIu64 ", extents = %u, context = %d (extents cover %" PRIu64 " bytes, last chunk = %d)" nbd_co_send_structured_error(uint64_t handle, int err, const char *errname, const char *msg) "Send structured error reply: handle = %" PRIu64 ", error = %d (%s), msg = '%s'" nbd_co_receive_request_decode_type(uint64_t handle, uint16_t type, const char *name) "Decoding type: handle = %" PRIu64 ", type = %" PRIu16 " (%s)" nbd_co_receive_request_payload_received(uint64_t handle, uint32_t len) "Payload received: handle = %" PRIu64 ", len = %" PRIu32 From 767f0c7d6cddedbc97ad700bd1e0229cc2ce5eb5 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Sat, 9 Jun 2018 18:17:57 +0300 Subject: [PATCH 7/9] qapi: new qmp command nbd-server-add-bitmap For now, the actual command ix x-nbd-server-add-bitmap, reflecting the fact that we are still working on libvirt code that proves the command works as needed, and also the fact that we may remove bitmap-export-name (and just require that the exported name be the bitmap name). Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20180609151758.17343-6-vsementsov@virtuozzo.com> Reviewed-by: Eric Blake [eblake: make the command experimental by adding x- prefix] Signed-off-by: Eric Blake --- blockdev-nbd.c | 23 +++++++++++++++++++++++ qapi/block.json | 23 +++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/blockdev-nbd.c b/blockdev-nbd.c index 65a84739ed..1ef11041a7 100644 --- a/blockdev-nbd.c +++ b/blockdev-nbd.c @@ -220,3 +220,26 @@ void qmp_nbd_server_stop(Error **errp) nbd_server_free(nbd_server); nbd_server = NULL; } + +void qmp_x_nbd_server_add_bitmap(const char *name, const char *bitmap, + bool has_bitmap_export_name, + const char *bitmap_export_name, + Error **errp) +{ + NBDExport *exp; + + if (!nbd_server) { + error_setg(errp, "NBD server not running"); + return; + } + + exp = nbd_export_find(name); + if (exp == NULL) { + error_setg(errp, "Export '%s' is not found", name); + return; + } + + nbd_export_bitmap(exp, bitmap, + has_bitmap_export_name ? bitmap_export_name : bitmap, + errp); +} diff --git a/qapi/block.json b/qapi/block.json index c694524002..ca807f176a 100644 --- a/qapi/block.json +++ b/qapi/block.json @@ -268,6 +268,29 @@ { 'command': 'nbd-server-remove', 'data': {'name': 'str', '*mode': 'NbdServerRemoveMode'} } +## +# @x-nbd-server-add-bitmap: +# +# Expose a dirty bitmap associated with the selected export. The bitmap search +# starts at the device attached to the export, and includes all backing files. +# The exported bitmap is then locked until the NBD export is removed. +# +# @name: Export name. +# +# @bitmap: Bitmap name to search for. +# +# @bitmap-export-name: How the bitmap will be seen by nbd clients +# (default @bitmap) +# +# Note: the client must use NBD_OPT_SET_META_CONTEXT with a query of +# "qemu:dirty-bitmap:NAME" (where NAME matches @bitmap-export-name) to access +# the exposed bitmap. +# +# Since: 3.0 +## + { 'command': 'x-nbd-server-add-bitmap', + 'data': {'name': 'str', 'bitmap': 'str', '*bitmap-export-name': 'str'} } + ## # @nbd-server-stop: # From 3229a835a3c574a8ebc605e007785c4e01c61623 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Sat, 9 Jun 2018 18:17:58 +0300 Subject: [PATCH 8/9] docs/interop: add nbd.txt Describe new metadata namespace: "qemu". Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20180609151758.17343-7-vsementsov@virtuozzo.com> Reviewed-by: Eric Blake [eblake: grammar tweaks] Signed-off-by: Eric Blake --- MAINTAINERS | 1 + docs/interop/nbd.txt | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 docs/interop/nbd.txt diff --git a/MAINTAINERS b/MAINTAINERS index da91501c7a..efb17e6ac0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1972,6 +1972,7 @@ F: nbd/ F: include/block/nbd* F: qemu-nbd.* F: blockdev-nbd.c +F: docs/interop/nbd.txt T: git git://repo.or.cz/qemu/ericb.git nbd NFS diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt new file mode 100644 index 0000000000..77b5f45911 --- /dev/null +++ b/docs/interop/nbd.txt @@ -0,0 +1,38 @@ +Qemu supports the NBD protocol, and has an internal NBD client (see +block/nbd.c), an internal NBD server (see blockdev-nbd.c), and an +external NBD server tool (see qemu-nbd.c). The common code is placed +in nbd/*. + +The NBD protocol is specified here: +https://github.com/NetworkBlockDevice/nbd/blob/master/doc/proto.md + +The following paragraphs describe some specific properties of NBD +protocol realization in Qemu. + += Metadata namespaces = + +Qemu supports the "base:allocation" metadata context as defined in the +NBD protocol specification, and also defines an additional metadata +namespace "qemu". + + +== "qemu" namespace == + +The "qemu" namespace currently contains only one type of context, +related to exposing the contents of a dirty bitmap alongside the +associated disk contents. That context has the following form: + + qemu:dirty-bitmap: + +Each dirty-bitmap metadata context defines only one flag for extents +in reply for NBD_CMD_BLOCK_STATUS: + + bit 0: NBD_STATE_DIRTY, means that the extent is "dirty" + +For NBD_OPT_LIST_META_CONTEXT the following queries are supported +in addition to "qemu:dirty-bitmap:": + +* "qemu:" - returns list of all available metadata contexts in the + namespace. +* "qemu:dirty-bitmap:" - returns list of all available dirty-bitmap + metadata contexts. From bc37b06a5cde24fb24c2a2cc44dd86756034ba9d Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Fri, 13 Apr 2018 17:31:56 +0300 Subject: [PATCH 9/9] nbd/server: introduce NBD_CMD_CACHE Handle nbd CACHE command. Just do read, without sending read data back. Cache mechanism should be done by exported node driver chain. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20180413143156.11409-1-vsementsov@virtuozzo.com> Reviewed-by: Eric Blake [eblake: fix two missing case labels in switch statements] Signed-off-by: Eric Blake --- include/block/nbd.h | 3 ++- nbd/common.c | 2 ++ nbd/server.c | 11 +++++++---- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/block/nbd.h b/include/block/nbd.h index 8bb9606c39..daaeae61bf 100644 --- a/include/block/nbd.h +++ b/include/block/nbd.h @@ -135,6 +135,7 @@ typedef struct NBDExtent { #define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */ #define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */ #define NBD_FLAG_SEND_DF (1 << 7) /* Send DF (Do not Fragment) */ +#define NBD_FLAG_SEND_CACHE (1 << 8) /* Send CACHE (prefetch) */ /* New-style handshake (global) flags, sent from server to client, and control what will happen during handshake phase. */ @@ -195,7 +196,7 @@ enum { NBD_CMD_DISC = 2, NBD_CMD_FLUSH = 3, NBD_CMD_TRIM = 4, - /* 5 reserved for failed experiment NBD_CMD_CACHE */ + NBD_CMD_CACHE = 5, NBD_CMD_WRITE_ZEROES = 6, NBD_CMD_BLOCK_STATUS = 7, }; diff --git a/nbd/common.c b/nbd/common.c index 8c95c1d606..41f5ed8d9f 100644 --- a/nbd/common.c +++ b/nbd/common.c @@ -148,6 +148,8 @@ const char *nbd_cmd_lookup(uint16_t cmd) return "flush"; case NBD_CMD_TRIM: return "trim"; + case NBD_CMD_CACHE: + return "cache"; case NBD_CMD_WRITE_ZEROES: return "write zeroes"; case NBD_CMD_BLOCK_STATUS: diff --git a/nbd/server.c b/nbd/server.c index 2c2d62c636..274604609f 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -1252,7 +1252,7 @@ static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp) int ret; const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | - NBD_FLAG_SEND_WRITE_ZEROES); + NBD_FLAG_SEND_WRITE_ZEROES | NBD_FLAG_SEND_CACHE); bool oldStyle; /* Old style negotiation header, no room for options @@ -2034,7 +2034,9 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request, return -EIO; } - if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) { + if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE || + request->type == NBD_CMD_CACHE) + { if (request->len > NBD_MAX_BUFFER_SIZE) { error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)", request->len, NBD_MAX_BUFFER_SIZE); @@ -2119,7 +2121,7 @@ static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request, int ret; NBDExport *exp = client->exp; - assert(request->type == NBD_CMD_READ); + assert(request->type == NBD_CMD_READ || request->type == NBD_CMD_CACHE); /* XXX: NBD Protocol only documents use of FUA with WRITE */ if (request->flags & NBD_CMD_FLAG_FUA) { @@ -2138,7 +2140,7 @@ static coroutine_fn int nbd_do_cmd_read(NBDClient *client, NBDRequest *request, ret = blk_pread(exp->blk, request->from + exp->dev_offset, data, request->len); - if (ret < 0) { + if (ret < 0 || request->type == NBD_CMD_CACHE) { return nbd_send_generic_reply(client, request->handle, ret, "reading from file failed", errp); } @@ -2171,6 +2173,7 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, switch (request->type) { case NBD_CMD_READ: + case NBD_CMD_CACHE: return nbd_do_cmd_read(client, request, data, errp); case NBD_CMD_WRITE: