* NBD bugfix (Changlong)

* NBD write zeroes support (Eric)
 * Memory backend fixes (Haozhong)
 * Atomics fix (Alex)
 * New AVX512 features (Luwei)
 * "make check" logging fix (Paolo)
 * Chardev refactoring fallout (Paolo)
 * Small checkpatch improvements (Paolo, Jeff)
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2
 
 iQExBAABCAAbBQJYGaRPFBxwYm9uemluaUByZWRoYXQuY29tAAoJEL/70l94x66D
 XKgH/RgNtosBTqJsmphkS7wACFAFOf7Uq46ajoKfB66Pt1J/++pFQg4TApPYkb7j
 KlKeKmXa7hb6+Jg8325H4zGkGno4kn2dE+OnznaB1xPKwiZVAMQVzQsagsEVqpno
 k/5PBVRptIiuHQKyU29Go0CxbWJBTH0O14S7rDK4YDF0YMnuT280HQOI3jdu1igV
 G/Q+CMgfk+yXf6GWHE8Z9sNq7n0ha8qgruA/X3NC7+pAvEsUcAP065zwLp9weYuK
 W1MU68L7Ub4tRo0SVf1HFkDUNdMv4T4hg+wpGe1GwthJWexHu9x0YAQBy60ykJb6
 NtHwjLwCUWtm7AiZD/btsOJPmjk=
 =+Dt/
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging

* NBD bugfix (Changlong)
* NBD write zeroes support (Eric)
* Memory backend fixes (Haozhong)
* Atomics fix (Alex)
* New AVX512 features (Luwei)
* "make check" logging fix (Paolo)
* Chardev refactoring fallout (Paolo)
* Small checkpatch improvements (Paolo, Jeff)

# gpg: Signature made Wed 02 Nov 2016 08:31:11 AM GMT
# gpg:                using RSA key 0xBFFBD25F78C7AE83
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>"
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>"
# Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4  E2F7 7E15 100C CD36 69B1
#      Subkey fingerprint: F133 3857 4B66 2389 866C  7682 BFFB D25F 78C7 AE83

* remotes/bonzini/tags/for-upstream: (30 commits)
  main-loop: Suppress I/O thread warning under qtest
  docs/rcu.txt: Fix minor typo
  vl: exit qemu on guest panic if -no-shutdown is not set
  checkpatch: allow spaces before parenthesis for 'coroutine_fn'
  x86: add AVX512_4VNNIW and AVX512_4FMAPS features
  slirp: fix CharDriver breakage
  qemu-char: do not forward events through the mux until QEMU has started
  nbd: Implement NBD_CMD_WRITE_ZEROES on client
  nbd: Implement NBD_CMD_WRITE_ZEROES on server
  nbd: Improve server handling of shutdown requests
  nbd: Refactor conversion to errno to silence checkpatch
  nbd: Support shorter handshake
  nbd: Less allocation during NBD_OPT_LIST
  nbd: Let client skip portions of server reply
  nbd: Let server know when client gives up negotiation
  nbd: Share common option-sending code in client
  nbd: Send message along with server NBD_REP_ERR errors
  nbd: Share common reply-sending code in server
  nbd: Rename struct nbd_request and nbd_reply
  nbd: Rename NbdClientSession to NBDClientSession
  ...

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Stefan Hajnoczi 2016-11-03 16:32:30 +00:00
commit 199a5bde46
27 changed files with 767 additions and 439 deletions

View file

@ -1,6 +1,7 @@
/*
* QEMU Block driver for NBD
*
* Copyright (C) 2016 Red Hat, Inc.
* Copyright (C) 2008 Bull S.A.S.
* Author: Laurent Vivier <Laurent.Vivier@bull.net>
*
@ -32,7 +33,7 @@
#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
#define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs))
static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
{
int i;
@ -45,7 +46,7 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
static void nbd_teardown_connection(BlockDriverState *bs)
{
NbdClientSession *client = nbd_get_client_session(bs);
NBDClientSession *client = nbd_get_client_session(bs);
if (!client->ioc) { /* Already closed */
return;
@ -67,7 +68,7 @@ static void nbd_teardown_connection(BlockDriverState *bs)
static void nbd_reply_ready(void *opaque)
{
BlockDriverState *bs = opaque;
NbdClientSession *s = nbd_get_client_session(bs);
NBDClientSession *s = nbd_get_client_session(bs);
uint64_t i;
int ret;
@ -115,10 +116,10 @@ static void nbd_restart_write(void *opaque)
}
static int nbd_co_send_request(BlockDriverState *bs,
struct nbd_request *request,
NBDRequest *request,
QEMUIOVector *qiov)
{
NbdClientSession *s = nbd_get_client_session(bs);
NBDClientSession *s = nbd_get_client_session(bs);
AioContext *aio_context;
int rc, ret, i;
@ -166,9 +167,9 @@ static int nbd_co_send_request(BlockDriverState *bs,
return rc;
}
static void nbd_co_receive_reply(NbdClientSession *s,
struct nbd_request *request,
struct nbd_reply *reply,
static void nbd_co_receive_reply(NBDClientSession *s,
NBDRequest *request,
NBDReply *reply,
QEMUIOVector *qiov)
{
int ret;
@ -194,13 +195,13 @@ static void nbd_co_receive_reply(NbdClientSession *s,
}
}
static void nbd_coroutine_start(NbdClientSession *s,
struct nbd_request *request)
static void nbd_coroutine_start(NBDClientSession *s,
NBDRequest *request)
{
/* Poor man semaphore. The free_sema is locked when no other request
* can be accepted, and unlocked after receiving one reply. */
if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
qemu_co_mutex_lock(&s->free_sema);
if (s->in_flight == MAX_NBD_REQUESTS) {
qemu_co_queue_wait(&s->free_sema);
assert(s->in_flight < MAX_NBD_REQUESTS);
}
s->in_flight++;
@ -208,26 +209,26 @@ static void nbd_coroutine_start(NbdClientSession *s,
/* s->recv_coroutine[i] is set as soon as we get the send_lock. */
}
static void nbd_coroutine_end(NbdClientSession *s,
struct nbd_request *request)
static void nbd_coroutine_end(NBDClientSession *s,
NBDRequest *request)
{
int i = HANDLE_TO_INDEX(s, request->handle);
s->recv_coroutine[i] = NULL;
if (s->in_flight-- == MAX_NBD_REQUESTS) {
qemu_co_mutex_unlock(&s->free_sema);
qemu_co_queue_next(&s->free_sema);
}
}
int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags)
{
NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = {
NBDClientSession *client = nbd_get_client_session(bs);
NBDRequest request = {
.type = NBD_CMD_READ,
.from = offset,
.len = bytes,
};
struct nbd_reply reply;
NBDReply reply;
ssize_t ret;
assert(bytes <= NBD_MAX_BUFFER_SIZE);
@ -247,18 +248,18 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags)
{
NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = {
NBDClientSession *client = nbd_get_client_session(bs);
NBDRequest request = {
.type = NBD_CMD_WRITE,
.from = offset,
.len = bytes,
};
struct nbd_reply reply;
NBDReply reply;
ssize_t ret;
if (flags & BDRV_REQ_FUA) {
assert(client->nbdflags & NBD_FLAG_SEND_FUA);
request.type |= NBD_CMD_FLAG_FUA;
request.flags |= NBD_CMD_FLAG_FUA;
}
assert(bytes <= NBD_MAX_BUFFER_SIZE);
@ -274,11 +275,46 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
return -reply.error;
}
int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
int count, BdrvRequestFlags flags)
{
ssize_t ret;
NBDClientSession *client = nbd_get_client_session(bs);
NBDRequest request = {
.type = NBD_CMD_WRITE_ZEROES,
.from = offset,
.len = count,
};
NBDReply reply;
if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) {
return -ENOTSUP;
}
if (flags & BDRV_REQ_FUA) {
assert(client->nbdflags & NBD_FLAG_SEND_FUA);
request.flags |= NBD_CMD_FLAG_FUA;
}
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
request.flags |= NBD_CMD_FLAG_NO_HOLE;
}
nbd_coroutine_start(client, &request);
ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
} else {
nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
return -reply.error;
}
int nbd_client_co_flush(BlockDriverState *bs)
{
NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = { .type = NBD_CMD_FLUSH };
struct nbd_reply reply;
NBDClientSession *client = nbd_get_client_session(bs);
NBDRequest request = { .type = NBD_CMD_FLUSH };
NBDReply reply;
ssize_t ret;
if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
@ -301,13 +337,13 @@ int nbd_client_co_flush(BlockDriverState *bs)
int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
{
NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = {
NBDClientSession *client = nbd_get_client_session(bs);
NBDRequest request = {
.type = NBD_CMD_TRIM,
.from = offset,
.len = count,
};
struct nbd_reply reply;
NBDReply reply;
ssize_t ret;
if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
@ -342,12 +378,8 @@ void nbd_client_attach_aio_context(BlockDriverState *bs,
void nbd_client_close(BlockDriverState *bs)
{
NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = {
.type = NBD_CMD_DISC,
.from = 0,
.len = 0
};
NBDClientSession *client = nbd_get_client_session(bs);
NBDRequest request = { .type = NBD_CMD_DISC };
if (client->ioc == NULL) {
return;
@ -365,7 +397,7 @@ int nbd_client_init(BlockDriverState *bs,
const char *hostname,
Error **errp)
{
NbdClientSession *client = nbd_get_client_session(bs);
NBDClientSession *client = nbd_get_client_session(bs);
int ret;
/* NBD handshake */
@ -386,7 +418,7 @@ int nbd_client_init(BlockDriverState *bs,
}
qemu_co_mutex_init(&client->send_mutex);
qemu_co_mutex_init(&client->free_sema);
qemu_co_queue_init(&client->free_sema);
client->sioc = sioc;
object_ref(OBJECT(client->sioc));

View file

@ -17,24 +17,24 @@
#define MAX_NBD_REQUESTS 16
typedef struct NbdClientSession {
typedef struct NBDClientSession {
QIOChannelSocket *sioc; /* The master data channel */
QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
uint16_t nbdflags;
off_t size;
CoMutex send_mutex;
CoMutex free_sema;
CoQueue free_sema;
Coroutine *send_coroutine;
int in_flight;
Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
struct nbd_reply reply;
NBDReply reply;
bool is_unix;
} NbdClientSession;
} NBDClientSession;
NbdClientSession *nbd_get_client_session(BlockDriverState *bs);
NBDClientSession *nbd_get_client_session(BlockDriverState *bs);
int nbd_client_init(BlockDriverState *bs,
QIOChannelSocket *sock,
@ -48,6 +48,8 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
int nbd_client_co_flush(BlockDriverState *bs);
int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags);
int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
int count, BdrvRequestFlags flags);
int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
uint64_t bytes, QEMUIOVector *qiov, int flags);

View file

@ -44,7 +44,7 @@
#define EN_OPTSTR ":exportname="
typedef struct BDRVNBDState {
NbdClientSession client;
NBDClientSession client;
/* For nbd_refresh_filename() */
SocketAddress *saddr;
@ -294,7 +294,7 @@ done:
return saddr;
}
NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
NBDClientSession *nbd_get_client_session(BlockDriverState *bs)
{
BDRVNBDState *s = bs->opaque;
return &s->client;
@ -466,6 +466,7 @@ static int nbd_co_flush(BlockDriverState *bs)
static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
{
bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE;
bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
}
@ -558,6 +559,7 @@ static BlockDriver bdrv_nbd = {
.bdrv_file_open = nbd_open,
.bdrv_co_preadv = nbd_client_co_preadv,
.bdrv_co_pwritev = nbd_client_co_pwritev,
.bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
@ -576,6 +578,7 @@ static BlockDriver bdrv_nbd_tcp = {
.bdrv_file_open = nbd_open,
.bdrv_co_preadv = nbd_client_co_preadv,
.bdrv_co_pwritev = nbd_client_co_pwritev,
.bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,
@ -594,6 +597,7 @@ static BlockDriver bdrv_nbd_unix = {
.bdrv_file_open = nbd_open,
.bdrv_co_preadv = nbd_client_co_preadv,
.bdrv_co_pwritev = nbd_client_co_pwritev,
.bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_pdiscard = nbd_client_co_pdiscard,

View file

@ -145,7 +145,7 @@ The core RCU API is small:
and then read from there.
RCU read-side critical sections must use atomic_rcu_read() to
read data, unless concurrent writes are presented by another
read data, unless concurrent writes are prevented by another
synchronization mechanism.
Furthermore, RCU read-side critical sections should traverse the

33
exec.c
View file

@ -493,7 +493,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
hwaddr *xlat, hwaddr *plen)
{
MemoryRegionSection *section;
AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
section = address_space_translate_internal(d, addr, xlat, plen, false);
@ -1231,6 +1231,15 @@ void qemu_mutex_unlock_ramlist(void)
}
#ifdef __linux__
static int64_t get_file_size(int fd)
{
int64_t size = lseek(fd, 0, SEEK_END);
if (size < 0) {
return -errno;
}
return size;
}
static void *file_ram_alloc(RAMBlock *block,
ram_addr_t memory,
const char *path,
@ -1242,6 +1251,7 @@ static void *file_ram_alloc(RAMBlock *block,
char *c;
void *area = MAP_FAILED;
int fd = -1;
int64_t file_size;
if (kvm_enabled() && !kvm_has_sync_mmu()) {
error_setg(errp,
@ -1304,6 +1314,8 @@ static void *file_ram_alloc(RAMBlock *block,
}
#endif
file_size = get_file_size(fd);
if (memory < block->page_size) {
error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
"or larger than page size 0x%zx",
@ -1311,6 +1323,13 @@ static void *file_ram_alloc(RAMBlock *block,
goto error;
}
if (file_size > 0 && file_size < memory) {
error_setg(errp, "backing store %s size 0x%" PRIx64
" does not match 'size' option 0x" RAM_ADDR_FMT,
path, file_size, memory);
goto error;
}
memory = ROUND_UP(memory, block->page_size);
/*
@ -1318,8 +1337,16 @@ static void *file_ram_alloc(RAMBlock *block,
* hosts, so don't bother bailing out on errors.
* If anything goes wrong with it under other filesystems,
* mmap will fail.
*
* Do not truncate the non-empty backend file to avoid corrupting
* the existing data in the file. Disabling shrinking is not
* enough. For example, the current vNVDIMM implementation stores
* the guest NVDIMM labels at the end of the backend file. If the
* backend file is later extended, QEMU will not be able to find
* those labels. Therefore, extending the non-empty backend file
* is disabled as well.
*/
if (ftruncate(fd, memory)) {
if (!file_size && ftruncate(fd, memory)) {
perror("ftruncate");
}
@ -2378,7 +2405,7 @@ static void tcg_commit(MemoryListener *listener)
* may have split the RCU critical section.
*/
d = atomic_rcu_read(&cpuas->as->dispatch);
cpuas->memory_dispatch = d;
atomic_rcu_set(&cpuas->memory_dispatch, d);
tlb_flush(cpuas->cpu, 1);
}

View file

@ -1,4 +1,5 @@
/*
* Copyright (C) 2016 Red Hat, Inc.
* Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
*
* Network Block Device
@ -25,52 +26,89 @@
#include "io/channel-socket.h"
#include "crypto/tlscreds.h"
/* Note: these are _NOT_ the same as the network representation of an NBD
/* Handshake phase structs - this struct is passed on the wire */
struct nbd_option {
uint64_t magic; /* NBD_OPTS_MAGIC */
uint32_t option; /* NBD_OPT_* */
uint32_t length;
} QEMU_PACKED;
typedef struct nbd_option nbd_option;
struct nbd_opt_reply {
uint64_t magic; /* NBD_REP_MAGIC */
uint32_t option; /* NBD_OPT_* */
uint32_t type; /* NBD_REP_* */
uint32_t length;
} QEMU_PACKED;
typedef struct nbd_opt_reply nbd_opt_reply;
/* Transmission phase structs
*
* Note: these are _NOT_ the same as the network representation of an NBD
* request and reply!
*/
struct nbd_request {
struct NBDRequest {
uint64_t handle;
uint64_t from;
uint32_t len;
uint32_t type;
uint16_t flags; /* NBD_CMD_FLAG_* */
uint16_t type; /* NBD_CMD_* */
};
typedef struct NBDRequest NBDRequest;
struct nbd_reply {
struct NBDReply {
uint64_t handle;
uint32_t error;
};
typedef struct NBDReply NBDReply;
/* Transmission (export) flags: sent from server to client during handshake,
but describe what will happen during transmission */
#define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */
#define NBD_FLAG_READ_ONLY (1 << 1) /* Device is read-only */
#define NBD_FLAG_SEND_FLUSH (1 << 2) /* Send FLUSH */
#define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit Access) */
#define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */
#define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */
#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */
/* New-style global flags. */
#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
/* New-style handshake (global) flags, sent from server to client, and
control what will happen during handshake phase. */
#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
#define NBD_FLAG_NO_ZEROES (1 << 1) /* End handshake without zeroes. */
/* New-style client flags. */
#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
/* New-style client flags, sent from client to server to control what happens
during handshake phase. */
#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */
#define NBD_FLAG_C_NO_ZEROES (1 << 1) /* End handshake without zeroes. */
/* Reply types. */
#define NBD_REP_ERR(value) ((UINT32_C(1) << 31) | (value))
#define NBD_REP_ACK (1) /* Data sending finished. */
#define NBD_REP_SERVER (2) /* Export description. */
#define NBD_REP_ERR_UNSUP ((UINT32_C(1) << 31) | 1) /* Unknown option. */
#define NBD_REP_ERR_POLICY ((UINT32_C(1) << 31) | 2) /* Server denied */
#define NBD_REP_ERR_INVALID ((UINT32_C(1) << 31) | 3) /* Invalid length. */
#define NBD_REP_ERR_TLS_REQD ((UINT32_C(1) << 31) | 5) /* TLS required */
#define NBD_REP_ERR_UNSUP NBD_REP_ERR(1) /* Unknown option */
#define NBD_REP_ERR_POLICY NBD_REP_ERR(2) /* Server denied */
#define NBD_REP_ERR_INVALID NBD_REP_ERR(3) /* Invalid length */
#define NBD_REP_ERR_PLATFORM NBD_REP_ERR(4) /* Not compiled in */
#define NBD_REP_ERR_TLS_REQD NBD_REP_ERR(5) /* TLS required */
#define NBD_REP_ERR_SHUTDOWN NBD_REP_ERR(7) /* Server shutting down */
#define NBD_CMD_MASK_COMMAND 0x0000ffff
#define NBD_CMD_FLAG_FUA (1 << 16)
/* Request flags, sent from client to server during transmission phase */
#define NBD_CMD_FLAG_FUA (1 << 0) /* 'force unit access' during write */
#define NBD_CMD_FLAG_NO_HOLE (1 << 1) /* don't punch hole on zero run */
/* Supported request types */
enum {
NBD_CMD_READ = 0,
NBD_CMD_WRITE = 1,
NBD_CMD_DISC = 2,
NBD_CMD_FLUSH = 3,
NBD_CMD_TRIM = 4
NBD_CMD_TRIM = 4,
/* 5 reserved for failed experiment NBD_CMD_CACHE */
NBD_CMD_WRITE_ZEROES = 6,
};
#define NBD_DEFAULT_PORT 10809
@ -95,8 +133,8 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
QIOChannel **outioc,
off_t *size, Error **errp);
int nbd_init(int fd, QIOChannelSocket *sioc, uint16_t flags, off_t size);
ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request);
ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply);
ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request);
ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply);
int nbd_client(int fd);
int nbd_disconnect(int fd);
@ -115,6 +153,7 @@ BlockBackend *nbd_export_get_blockdev(NBDExport *exp);
NBDExport *nbd_export_find(const char *name);
void nbd_export_set_name(NBDExport *exp, const char *name);
void nbd_export_set_description(NBDExport *exp, const char *description);
void nbd_export_close_all(void);
void nbd_client_new(NBDExport *exp,

View file

@ -315,4 +315,17 @@ static inline void g_source_set_name_by_id(guint tag, const char *name)
}
#endif
#if !GLIB_CHECK_VERSION(2, 36, 0)
/* Always fail. This will not include error_report output in the test log,
* sending it instead to stderr.
*/
#define g_test_initialized() (0)
#endif
#if !GLIB_CHECK_VERSION(2, 38, 0)
#ifdef CONFIG_HAS_GLIB_SUBPROCESS_TESTS
#error schizophrenic detection of glib subprocess testing
#endif
#define g_test_subprocess() (0)
#endif
#endif

View file

@ -32,6 +32,7 @@ void loc_set_file(const char *fname, int lno);
void error_vprintf(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
void error_printf(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
void error_vprintf_unless_qmp(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);
void error_printf_unless_qmp(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
void error_set_progname(const char *argv0);
void error_vreport(const char *fmt, va_list ap) GCC_FMT_ATTR(1, 0);

View file

@ -128,6 +128,9 @@ extern int daemon(int, int);
#if !defined(EMEDIUMTYPE)
#define EMEDIUMTYPE 4098
#endif
#if !defined(ESHUTDOWN)
#define ESHUTDOWN 4099
#endif
#ifndef TIME_MAX
#define TIME_MAX LONG_MAX
#endif

View file

@ -234,7 +234,7 @@ static int os_host_main_loop_wait(int64_t timeout)
if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) {
static bool notified;
if (!notified && !qtest_driver()) {
if (!notified && !qtest_enabled() && !qtest_driver()) {
fprintf(stderr,
"main-loop: WARNING: I/O thread spun for %d iterations\n",
MAX_MAIN_LOOP_SPIN);

View file

@ -3955,6 +3955,27 @@ static void monitor_readline_flush(void *opaque)
monitor_flush(opaque);
}
/*
* Print to current monitor if we have one, else to stderr.
* TODO should return int, so callers can calculate width, but that
* requires surgery to monitor_vprintf(). Left for another day.
*/
void error_vprintf(const char *fmt, va_list ap)
{
if (cur_mon && !monitor_cur_is_qmp()) {
monitor_vprintf(cur_mon, fmt, ap);
} else {
vfprintf(stderr, fmt, ap);
}
}
void error_vprintf_unless_qmp(const char *fmt, va_list ap)
{
if (cur_mon && !monitor_cur_is_qmp()) {
monitor_vprintf(cur_mon, fmt, ap);
}
}
static void __attribute__((constructor)) monitor_lock_init(void)
{
qemu_mutex_init(&monitor_lock);

View file

@ -1,4 +1,5 @@
/*
* Copyright (C) 2016 Red Hat, Inc.
* Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
*
* Network Block Device Client Side
@ -22,23 +23,34 @@
static int nbd_errno_to_system_errno(int err)
{
int ret;
switch (err) {
case NBD_SUCCESS:
return 0;
ret = 0;
break;
case NBD_EPERM:
return EPERM;
ret = EPERM;
break;
case NBD_EIO:
return EIO;
ret = EIO;
break;
case NBD_ENOMEM:
return ENOMEM;
ret = ENOMEM;
break;
case NBD_ENOSPC:
return ENOSPC;
ret = ENOSPC;
break;
case NBD_ESHUTDOWN:
ret = ESHUTDOWN;
break;
default:
TRACE("Squashing unexpected error %d to EINVAL", err);
/* fallthrough */
case NBD_EINVAL:
return EINVAL;
ret = EINVAL;
break;
}
return ret;
}
/* Definitions for opaque data types */
@ -74,64 +86,180 @@ static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
*/
/* Discard length bytes from channel. Return -errno on failure, or
* the amount of bytes consumed. */
static ssize_t drop_sync(QIOChannel *ioc, size_t size)
{
ssize_t ret, dropped = size;
char small[1024];
char *buffer;
/* If type represents success, return 1 without further action.
* If type represents an error reply, consume the rest of the packet on ioc.
* Then return 0 for unsupported (so the client can fall back to
* other approaches), or -1 with errp set for other errors.
buffer = sizeof(small) < size ? small : g_malloc(MIN(65536, size));
while (size > 0) {
ret = read_sync(ioc, buffer, MIN(65536, size));
if (ret < 0) {
goto cleanup;
}
assert(ret <= size);
size -= ret;
}
ret = dropped;
cleanup:
if (buffer != small) {
g_free(buffer);
}
return ret;
}
/* Send an option request.
*
* The request is for option @opt, with @data containing @len bytes of
* additional payload for the request (@len may be -1 to treat @data as
* a C string; and @data may be NULL if @len is 0).
* Return 0 if successful, -1 with errp set if it is impossible to
* continue. */
static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
uint32_t len, const char *data,
Error **errp)
{
nbd_option req;
QEMU_BUILD_BUG_ON(sizeof(req) != 16);
if (len == -1) {
req.length = len = strlen(data);
}
TRACE("Sending option request %" PRIu32", len %" PRIu32, opt, len);
stq_be_p(&req.magic, NBD_OPTS_MAGIC);
stl_be_p(&req.option, opt);
stl_be_p(&req.length, len);
if (write_sync(ioc, &req, sizeof(req)) != sizeof(req)) {
error_setg(errp, "Failed to send option request header");
return -1;
}
if (len && write_sync(ioc, (char *) data, len) != len) {
error_setg(errp, "Failed to send option request data");
return -1;
}
return 0;
}
/* Send NBD_OPT_ABORT as a courtesy to let the server know that we are
* not going to attempt further negotiation. */
static void nbd_send_opt_abort(QIOChannel *ioc)
{
/* Technically, a compliant server is supposed to reply to us; but
* older servers disconnected instead. At any rate, we're allowed
* to disconnect without waiting for the server reply, so we don't
* even care if the request makes it to the server, let alone
* waiting around for whether the server replies. */
nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL);
}
/* Receive the header of an option reply, which should match the given
* opt. Read through the length field, but NOT the length bytes of
* payload. Return 0 if successful, -1 with errp set if it is
* impossible to continue. */
static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
nbd_opt_reply *reply, Error **errp)
{
QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
if (read_sync(ioc, reply, sizeof(*reply)) != sizeof(*reply)) {
error_setg(errp, "failed to read option reply");
nbd_send_opt_abort(ioc);
return -1;
}
be64_to_cpus(&reply->magic);
be32_to_cpus(&reply->option);
be32_to_cpus(&reply->type);
be32_to_cpus(&reply->length);
TRACE("Received option reply %" PRIx32", type %" PRIx32", len %" PRIu32,
reply->option, reply->type, reply->length);
if (reply->magic != NBD_REP_MAGIC) {
error_setg(errp, "Unexpected option reply magic");
nbd_send_opt_abort(ioc);
return -1;
}
if (reply->option != opt) {
error_setg(errp, "Unexpected option type %x expected %x",
reply->option, opt);
nbd_send_opt_abort(ioc);
return -1;
}
return 0;
}
/* If reply represents success, return 1 without further action.
* If reply represents an error, consume the optional payload of
* the packet on ioc. Then return 0 for unsupported (so the client
* can fall back to other approaches), or -1 with errp set for other
* errors.
*/
static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type,
static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply,
Error **errp)
{
uint32_t len;
char *msg = NULL;
int result = -1;
if (!(type & (1 << 31))) {
if (!(reply->type & (1 << 31))) {
return 1;
}
if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) {
error_setg(errp, "failed to read option length");
return -1;
}
len = be32_to_cpu(len);
if (len) {
if (len > NBD_MAX_BUFFER_SIZE) {
if (reply->length) {
if (reply->length > NBD_MAX_BUFFER_SIZE) {
error_setg(errp, "server's error message is too long");
goto cleanup;
}
msg = g_malloc(len + 1);
if (read_sync(ioc, msg, len) != len) {
msg = g_malloc(reply->length + 1);
if (read_sync(ioc, msg, reply->length) != reply->length) {
error_setg(errp, "failed to read option error message");
goto cleanup;
}
msg[len] = '\0';
msg[reply->length] = '\0';
}
switch (type) {
switch (reply->type) {
case NBD_REP_ERR_UNSUP:
TRACE("server doesn't understand request %" PRIx32
", attempting fallback", opt);
", attempting fallback", reply->option);
result = 0;
goto cleanup;
case NBD_REP_ERR_POLICY:
error_setg(errp, "Denied by server for option %" PRIx32, opt);
error_setg(errp, "Denied by server for option %" PRIx32,
reply->option);
break;
case NBD_REP_ERR_INVALID:
error_setg(errp, "Invalid data length for option %" PRIx32, opt);
error_setg(errp, "Invalid data length for option %" PRIx32,
reply->option);
break;
case NBD_REP_ERR_PLATFORM:
error_setg(errp, "Server lacks support for option %" PRIx32,
reply->option);
break;
case NBD_REP_ERR_TLS_REQD:
error_setg(errp, "TLS negotiation required before option %" PRIx32,
opt);
reply->option);
break;
case NBD_REP_ERR_SHUTDOWN:
error_setg(errp, "Server shutting down before option %" PRIx32,
reply->option);
break;
default:
error_setg(errp, "Unknown error code when asking for option %" PRIx32,
opt);
reply->option);
break;
}
@ -141,244 +269,160 @@ static int nbd_handle_reply_err(QIOChannel *ioc, uint32_t opt, uint32_t type,
cleanup:
g_free(msg);
if (result < 0) {
nbd_send_opt_abort(ioc);
}
return result;
}
static int nbd_receive_list(QIOChannel *ioc, char **name, Error **errp)
/* Process another portion of the NBD_OPT_LIST reply. Set *@match if
* the current reply matches @want or if the server does not support
* NBD_OPT_LIST, otherwise leave @match alone. Return 0 if iteration
* is complete, positive if more replies are expected, or negative
* with @errp set if an unrecoverable error occurred. */
static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match,
Error **errp)
{
uint64_t magic;
uint32_t opt;
uint32_t type;
nbd_opt_reply reply;
uint32_t len;
uint32_t namelen;
char name[NBD_MAX_NAME_SIZE + 1];
int error;
*name = NULL;
if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
error_setg(errp, "failed to read list option magic");
if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) {
return -1;
}
magic = be64_to_cpu(magic);
if (magic != NBD_REP_MAGIC) {
error_setg(errp, "Unexpected option list magic");
return -1;
}
if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
error_setg(errp, "failed to read list option");
return -1;
}
opt = be32_to_cpu(opt);
if (opt != NBD_OPT_LIST) {
error_setg(errp, "Unexpected option type %" PRIx32 " expected %x",
opt, NBD_OPT_LIST);
return -1;
}
if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) {
error_setg(errp, "failed to read list option type");
return -1;
}
type = be32_to_cpu(type);
error = nbd_handle_reply_err(ioc, opt, type, errp);
error = nbd_handle_reply_err(ioc, &reply, errp);
if (error <= 0) {
/* The server did not support NBD_OPT_LIST, so set *match on
* the assumption that any name will be accepted. */
*match = true;
return error;
}
len = reply.length;
if (read_sync(ioc, &len, sizeof(len)) != sizeof(len)) {
error_setg(errp, "failed to read option length");
return -1;
}
len = be32_to_cpu(len);
if (type == NBD_REP_ACK) {
if (reply.type == NBD_REP_ACK) {
if (len != 0) {
error_setg(errp, "length too long for option end");
nbd_send_opt_abort(ioc);
return -1;
}
} else if (type == NBD_REP_SERVER) {
if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
error_setg(errp, "incorrect option length");
return -1;
}
if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) {
error_setg(errp, "failed to read option name length");
return -1;
}
namelen = be32_to_cpu(namelen);
len -= sizeof(namelen);
if (len < namelen) {
error_setg(errp, "incorrect option name length");
return -1;
}
if (namelen > NBD_MAX_NAME_SIZE) {
error_setg(errp, "export name length too long %" PRIu32, namelen);
return -1;
}
*name = g_new0(char, namelen + 1);
if (read_sync(ioc, *name, namelen) != namelen) {
error_setg(errp, "failed to read export name");
g_free(*name);
*name = NULL;
return -1;
}
(*name)[namelen] = '\0';
len -= namelen;
if (len) {
char *buf = g_malloc(len + 1);
if (read_sync(ioc, buf, len) != len) {
error_setg(errp, "failed to read export description");
g_free(*name);
g_free(buf);
*name = NULL;
return -1;
}
buf[len] = '\0';
TRACE("Ignoring export description: %s", buf);
g_free(buf);
}
} else {
return 0;
} else if (reply.type != NBD_REP_SERVER) {
error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
type, NBD_REP_SERVER);
reply.type, NBD_REP_SERVER);
nbd_send_opt_abort(ioc);
return -1;
}
if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
error_setg(errp, "incorrect option length %" PRIu32, len);
nbd_send_opt_abort(ioc);
return -1;
}
if (read_sync(ioc, &namelen, sizeof(namelen)) != sizeof(namelen)) {
error_setg(errp, "failed to read option name length");
nbd_send_opt_abort(ioc);
return -1;
}
namelen = be32_to_cpu(namelen);
len -= sizeof(namelen);
if (len < namelen) {
error_setg(errp, "incorrect option name length");
nbd_send_opt_abort(ioc);
return -1;
}
if (namelen != strlen(want)) {
if (drop_sync(ioc, len) != len) {
error_setg(errp, "failed to skip export name with wrong length");
nbd_send_opt_abort(ioc);
return -1;
}
return 1;
}
assert(namelen < sizeof(name));
if (read_sync(ioc, name, namelen) != namelen) {
error_setg(errp, "failed to read export name");
nbd_send_opt_abort(ioc);
return -1;
}
name[namelen] = '\0';
len -= namelen;
if (drop_sync(ioc, len) != len) {
error_setg(errp, "failed to read export description");
nbd_send_opt_abort(ioc);
return -1;
}
if (!strcmp(name, want)) {
*match = true;
}
return 1;
}
/* Return -1 on failure, 0 if wantname is an available export. */
static int nbd_receive_query_exports(QIOChannel *ioc,
const char *wantname,
Error **errp)
{
uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC);
uint32_t opt = cpu_to_be32(NBD_OPT_LIST);
uint32_t length = 0;
bool foundExport = false;
TRACE("Querying export list");
if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
error_setg(errp, "Failed to send list option magic");
return -1;
}
if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
error_setg(errp, "Failed to send list option number");
return -1;
}
if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
error_setg(errp, "Failed to send list option length");
TRACE("Querying export list for '%s'", wantname);
if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
return -1;
}
TRACE("Reading available export names");
while (1) {
char *name = NULL;
int ret = nbd_receive_list(ioc, &name, errp);
int ret = nbd_receive_list(ioc, wantname, &foundExport, errp);
if (ret < 0) {
g_free(name);
name = NULL;
/* Server gave unexpected reply */
return -1;
} else if (ret == 0) {
/* Done iterating. */
if (!foundExport) {
error_setg(errp, "No export with name '%s' available",
wantname);
nbd_send_opt_abort(ioc);
return -1;
}
TRACE("Found desired export name '%s'", wantname);
return 0;
}
if (ret == 0) {
/* Server doesn't support export listing, so
* we will just assume an export with our
* wanted name exists */
foundExport = true;
break;
}
if (name == NULL) {
TRACE("End of export name list");
break;
}
if (g_str_equal(name, wantname)) {
foundExport = true;
TRACE("Found desired export name '%s'", name);
} else {
TRACE("Ignored export name '%s'", name);
}
g_free(name);
}
if (!foundExport) {
error_setg(errp, "No export with name '%s' available", wantname);
return -1;
}
return 0;
}
static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
QCryptoTLSCreds *tlscreds,
const char *hostname, Error **errp)
{
uint64_t magic = cpu_to_be64(NBD_OPTS_MAGIC);
uint32_t opt = cpu_to_be32(NBD_OPT_STARTTLS);
uint32_t length = 0;
uint32_t type;
nbd_opt_reply reply;
QIOChannelTLS *tioc;
struct NBDTLSHandshakeData data = { 0 };
TRACE("Requesting TLS from server");
if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
error_setg(errp, "Failed to send option magic");
return NULL;
}
if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
error_setg(errp, "Failed to send option number");
return NULL;
}
if (write_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
error_setg(errp, "Failed to send option length");
return NULL;
}
TRACE("Getting TLS reply from server1");
if (read_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
error_setg(errp, "failed to read option magic");
return NULL;
}
magic = be64_to_cpu(magic);
if (magic != NBD_REP_MAGIC) {
error_setg(errp, "Unexpected option magic");
return NULL;
}
TRACE("Getting TLS reply from server2");
if (read_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
error_setg(errp, "failed to read option");
return NULL;
}
opt = be32_to_cpu(opt);
if (opt != NBD_OPT_STARTTLS) {
error_setg(errp, "Unexpected option type %" PRIx32 " expected %x",
opt, NBD_OPT_STARTTLS);
if (nbd_send_option_request(ioc, NBD_OPT_STARTTLS, 0, NULL, errp) < 0) {
return NULL;
}
TRACE("Getting TLS reply from server");
if (read_sync(ioc, &type, sizeof(type)) != sizeof(type)) {
error_setg(errp, "failed to read option type");
if (nbd_receive_option_reply(ioc, NBD_OPT_STARTTLS, &reply, errp) < 0) {
return NULL;
}
type = be32_to_cpu(type);
if (type != NBD_REP_ACK) {
if (reply.type != NBD_REP_ACK) {
error_setg(errp, "Server rejected request to start TLS %" PRIx32,
type);
reply.type);
nbd_send_opt_abort(ioc);
return NULL;
}
TRACE("Getting TLS reply from server");
if (read_sync(ioc, &length, sizeof(length)) != sizeof(length)) {
error_setg(errp, "failed to read option length");
return NULL;
}
length = be32_to_cpu(length);
if (length != 0) {
if (reply.length != 0) {
error_setg(errp, "Start TLS response was not zero %" PRIu32,
length);
reply.length);
nbd_send_opt_abort(ioc);
return NULL;
}
@ -417,6 +461,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
char buf[256];
uint64_t magic, s;
int rc;
bool zeroes = true;
TRACE("Receiving negotiation tlscreds=%p hostname=%s.",
tlscreds, hostname ? hostname : "<null>");
@ -466,8 +511,6 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
if (magic == NBD_OPTS_MAGIC) {
uint32_t clientflags = 0;
uint32_t opt;
uint32_t namesize;
uint16_t globalflags;
bool fixedNewStyle = false;
@ -483,6 +526,11 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
TRACE("Server supports fixed new style");
clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
}
if (globalflags & NBD_FLAG_NO_ZEROES) {
zeroes = false;
TRACE("Server supports no zeroes");
clientflags |= NBD_FLAG_C_NO_ZEROES;
}
/* client requested flags */
clientflags = cpu_to_be32(clientflags);
if (write_sync(ioc, &clientflags, sizeof(clientflags)) !=
@ -517,28 +565,13 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
goto fail;
}
}
/* write the export name */
magic = cpu_to_be64(magic);
if (write_sync(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
error_setg(errp, "Failed to send export name magic");
goto fail;
}
opt = cpu_to_be32(NBD_OPT_EXPORT_NAME);
if (write_sync(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
error_setg(errp, "Failed to send export name option number");
goto fail;
}
namesize = cpu_to_be32(strlen(name));
if (write_sync(ioc, &namesize, sizeof(namesize)) !=
sizeof(namesize)) {
error_setg(errp, "Failed to send export name length");
goto fail;
}
if (write_sync(ioc, (char *)name, strlen(name)) != strlen(name)) {
error_setg(errp, "Failed to send export name");
/* write the export name request */
if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, name,
errp) < 0) {
goto fail;
}
/* Read the response */
if (read_sync(ioc, &s, sizeof(s)) != sizeof(s)) {
error_setg(errp, "Failed to read export length");
goto fail;
@ -585,7 +618,7 @@ int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint16_t *flags,
}
TRACE("Size is %" PRIu64 ", export flags %" PRIx16, *size, *flags);
if (read_sync(ioc, &buf, 124) != 124) {
if (zeroes && drop_sync(ioc, 124) != 124) {
error_setg(errp, "Failed to read reserved block");
goto fail;
}
@ -707,18 +740,20 @@ int nbd_disconnect(int fd)
}
#endif
ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request)
ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request)
{
uint8_t buf[NBD_REQUEST_SIZE];
ssize_t ret;
TRACE("Sending request to server: "
"{ .from = %" PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64
", .type=%" PRIu32 " }",
request->from, request->len, request->handle, request->type);
", .flags = %" PRIx16 ", .type = %" PRIu16 " }",
request->from, request->len, request->handle,
request->flags, request->type);
stl_be_p(buf, NBD_REQUEST_MAGIC);
stl_be_p(buf + 4, request->type);
stw_be_p(buf + 4, request->flags);
stw_be_p(buf + 6, request->type);
stq_be_p(buf + 8, request->handle);
stq_be_p(buf + 16, request->from);
stl_be_p(buf + 24, request->len);
@ -735,7 +770,7 @@ ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request)
return 0;
}
ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply)
ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply)
{
uint8_t buf[NBD_REPLY_SIZE];
uint32_t magic;
@ -763,6 +798,11 @@ ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply)
reply->error = nbd_errno_to_system_errno(reply->error);
if (reply->error == ESHUTDOWN) {
/* This works even on mingw which lacks a native ESHUTDOWN */
LOG("server shutting down");
return -EINVAL;
}
TRACE("Got reply: { magic = 0x%" PRIx32 ", .error = % " PRId32
", handle = %" PRIu64" }",
magic, reply->error, reply->handle);

View file

@ -53,16 +53,16 @@
/* This is all part of the "official" NBD API.
*
* The most up-to-date documentation is available at:
* https://github.com/yoe/nbd/blob/master/doc/proto.txt
* https://github.com/yoe/nbd/blob/master/doc/proto.md
*/
#define NBD_REQUEST_SIZE (4 + 4 + 8 + 8 + 4)
#define NBD_REQUEST_SIZE (4 + 2 + 2 + 8 + 8 + 4)
#define NBD_REPLY_SIZE (4 + 4 + 8)
#define NBD_REQUEST_MAGIC 0x25609513
#define NBD_REPLY_MAGIC 0x67446698
#define NBD_OPTS_MAGIC 0x49484156454F5054LL
#define NBD_CLIENT_MAGIC 0x0000420281861253LL
#define NBD_REP_MAGIC 0x3e889045565a9LL
#define NBD_REP_MAGIC 0x0003e889045565a9LL
#define NBD_SET_SOCK _IO(0xab, 0)
#define NBD_SET_BLKSIZE _IO(0xab, 1)
@ -92,6 +92,7 @@
#define NBD_ENOMEM 12
#define NBD_EINVAL 22
#define NBD_ENOSPC 28
#define NBD_ESHUTDOWN 108
static inline ssize_t read_sync(QIOChannel *ioc, void *buffer, size_t size)
{
@ -104,9 +105,10 @@ static inline ssize_t read_sync(QIOChannel *ioc, void *buffer, size_t size)
return nbd_wr_syncv(ioc, &iov, 1, size, true);
}
static inline ssize_t write_sync(QIOChannel *ioc, void *buffer, size_t size)
static inline ssize_t write_sync(QIOChannel *ioc, const void *buffer,
size_t size)
{
struct iovec iov = { .iov_base = buffer, .iov_len = size };
struct iovec iov = { .iov_base = (void *) buffer, .iov_len = size };
return nbd_wr_syncv(ioc, &iov, 1, size, false);
}

View file

@ -1,4 +1,5 @@
/*
* Copyright (C) 2016 Red Hat, Inc.
* Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
*
* Network Block Device Server Side
@ -38,6 +39,8 @@ static int system_errno_to_nbd_errno(int err)
case EFBIG:
case ENOSPC:
return NBD_ENOSPC;
case ESHUTDOWN:
return NBD_ESHUTDOWN;
case EINVAL:
default:
return NBD_EINVAL;
@ -46,10 +49,10 @@ static int system_errno_to_nbd_errno(int err)
/* Definitions for opaque data types */
typedef struct NBDRequest NBDRequest;
typedef struct NBDRequestData NBDRequestData;
struct NBDRequest {
QSIMPLEQ_ENTRY(NBDRequest) entry;
struct NBDRequestData {
QSIMPLEQ_ENTRY(NBDRequestData) entry;
NBDClient *client;
uint8_t *data;
bool complete;
@ -61,6 +64,7 @@ struct NBDExport {
BlockBackend *blk;
char *name;
char *description;
off_t dev_offset;
off_t size;
uint16_t nbdflags;
@ -79,6 +83,7 @@ struct NBDClient {
int refcount;
void (*close)(NBDClient *client);
bool no_zeroes;
NBDExport *exp;
QCryptoTLSCreds *tlscreds;
char *tlsaclname;
@ -129,7 +134,8 @@ static ssize_t nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size)
}
static ssize_t nbd_negotiate_write(QIOChannel *ioc, void *buffer, size_t size)
static ssize_t nbd_negotiate_write(QIOChannel *ioc, const void *buffer,
size_t size)
{
ssize_t ret;
guint watch;
@ -193,12 +199,15 @@ static ssize_t nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size)
*/
static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
/* Send a reply header, including length, but no payload.
* Return -errno on error, 0 on success. */
static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type,
uint32_t opt, uint32_t len)
{
uint64_t magic;
uint32_t len;
TRACE("Reply opt=%" PRIx32 " type=%" PRIx32, type, opt);
TRACE("Reply opt=%" PRIx32 " type=%" PRIx32 " len=%" PRIu32,
type, opt, len);
magic = cpu_to_be64(NBD_REP_MAGIC);
if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
@ -215,7 +224,7 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
LOG("write failed (rep type)");
return -EINVAL;
}
len = cpu_to_be32(0);
len = cpu_to_be32(len);
if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
LOG("write failed (rep data length)");
return -EINVAL;
@ -223,45 +232,82 @@ static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
return 0;
}
/* Send a reply header with default 0 length.
* Return -errno on error, 0 on success. */
static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
{
return nbd_negotiate_send_rep_len(ioc, type, opt, 0);
}
/* Send an error reply.
* Return -errno on error, 0 on success. */
static int GCC_FMT_ATTR(4, 5)
nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type,
uint32_t opt, const char *fmt, ...)
{
va_list va;
char *msg;
int ret;
size_t len;
va_start(va, fmt);
msg = g_strdup_vprintf(fmt, va);
va_end(va);
len = strlen(msg);
assert(len < 4096);
TRACE("sending error message \"%s\"", msg);
ret = nbd_negotiate_send_rep_len(ioc, type, opt, len);
if (ret < 0) {
goto out;
}
if (nbd_negotiate_write(ioc, msg, len) != len) {
LOG("write failed (error message)");
ret = -EIO;
} else {
ret = 0;
}
out:
g_free(msg);
return ret;
}
/* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload.
* Return -errno on error, 0 on success. */
static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
{
uint64_t magic, name_len;
uint32_t opt, type, len;
size_t name_len, desc_len;
uint32_t len;
const char *name = exp->name ? exp->name : "";
const char *desc = exp->description ? exp->description : "";
int rc;
TRACE("Advertising export name '%s'", exp->name ? exp->name : "");
name_len = strlen(exp->name);
magic = cpu_to_be64(NBD_REP_MAGIC);
if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) != sizeof(magic)) {
LOG("write failed (magic)");
return -EINVAL;
}
opt = cpu_to_be32(NBD_OPT_LIST);
if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) != sizeof(opt)) {
LOG("write failed (opt)");
return -EINVAL;
}
type = cpu_to_be32(NBD_REP_SERVER);
if (nbd_negotiate_write(ioc, &type, sizeof(type)) != sizeof(type)) {
LOG("write failed (reply type)");
return -EINVAL;
}
len = cpu_to_be32(name_len + sizeof(len));
if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
LOG("write failed (length)");
return -EINVAL;
TRACE("Advertising export name '%s' description '%s'", name, desc);
name_len = strlen(name);
desc_len = strlen(desc);
len = name_len + desc_len + sizeof(len);
rc = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len);
if (rc < 0) {
return rc;
}
len = cpu_to_be32(name_len);
if (nbd_negotiate_write(ioc, &len, sizeof(len)) != sizeof(len)) {
LOG("write failed (length)");
LOG("write failed (name length)");
return -EINVAL;
}
if (nbd_negotiate_write(ioc, exp->name, name_len) != name_len) {
LOG("write failed (buffer)");
if (nbd_negotiate_write(ioc, name, name_len) != name_len) {
LOG("write failed (name buffer)");
return -EINVAL;
}
if (nbd_negotiate_write(ioc, desc, desc_len) != desc_len) {
LOG("write failed (description buffer)");
return -EINVAL;
}
return 0;
}
/* Process the NBD_OPT_LIST command, with a potential series of replies.
* Return -errno on error, 0 on success. */
static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
{
NBDExport *exp;
@ -270,8 +316,9 @@ static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
return -EIO;
}
return nbd_negotiate_send_rep(client->ioc,
NBD_REP_ERR_INVALID, NBD_OPT_LIST);
return nbd_negotiate_send_rep_err(client->ioc,
NBD_REP_ERR_INVALID, NBD_OPT_LIST,
"OPT_LIST should not have length");
}
/* For each export, send a NBD_REP_SERVER reply. */
@ -318,7 +365,8 @@ fail:
return rc;
}
/* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
* new channel for all further (now-encrypted) communication. */
static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
uint32_t length)
{
@ -332,7 +380,8 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
if (nbd_negotiate_drop_sync(ioc, length) != length) {
return NULL;
}
nbd_negotiate_send_rep(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS);
nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
"OPT_STARTTLS should not have length");
return NULL;
}
@ -371,6 +420,8 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
}
/* Process all NBD_OPT_* client option commands.
* Return -errno on error, 0 on success. */
static int nbd_negotiate_options(NBDClient *client)
{
uint32_t flags;
@ -402,6 +453,11 @@ static int nbd_negotiate_options(NBDClient *client)
fixedNewstyle = true;
flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
}
if (flags & NBD_FLAG_C_NO_ZEROES) {
TRACE("Client supports no zeroes at handshake end");
client->no_zeroes = true;
flags &= ~NBD_FLAG_C_NO_ZEROES;
}
if (flags != 0) {
TRACE("Unknown client flags 0x%" PRIx32 " received", flags);
return -EIO;
@ -461,16 +517,22 @@ static int nbd_negotiate_options(NBDClient *client)
return -EINVAL;
default:
TRACE("Option 0x%" PRIx32 " not permitted before TLS",
clientflags);
if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
return -EIO;
}
ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_TLS_REQD,
clientflags);
ret = nbd_negotiate_send_rep_err(client->ioc,
NBD_REP_ERR_TLS_REQD,
clientflags,
"Option 0x%" PRIx32
"not permitted before TLS",
clientflags);
if (ret < 0) {
return ret;
}
/* Let the client keep trying, unless they asked to quit */
if (clientflags == NBD_OPT_ABORT) {
return -EINVAL;
}
break;
}
} else if (fixedNewstyle) {
@ -483,6 +545,10 @@ static int nbd_negotiate_options(NBDClient *client)
break;
case NBD_OPT_ABORT:
/* NBD spec says we must try to reply before
* disconnecting, but that we must also tolerate
* guests that don't wait for our reply. */
nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, clientflags);
return -EINVAL;
case NBD_OPT_EXPORT_NAME:
@ -493,27 +559,30 @@ static int nbd_negotiate_options(NBDClient *client)
return -EIO;
}
if (client->tlscreds) {
TRACE("TLS already enabled");
ret = nbd_negotiate_send_rep(client->ioc,
NBD_REP_ERR_INVALID,
clientflags);
ret = nbd_negotiate_send_rep_err(client->ioc,
NBD_REP_ERR_INVALID,
clientflags,
"TLS already enabled");
} else {
TRACE("TLS not configured");
ret = nbd_negotiate_send_rep(client->ioc,
NBD_REP_ERR_POLICY,
clientflags);
ret = nbd_negotiate_send_rep_err(client->ioc,
NBD_REP_ERR_POLICY,
clientflags,
"TLS not configured");
}
if (ret < 0) {
return ret;
}
break;
default:
TRACE("Unsupported option 0x%" PRIx32, clientflags);
if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
return -EIO;
}
ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ERR_UNSUP,
clientflags);
ret = nbd_negotiate_send_rep_err(client->ioc,
NBD_REP_ERR_UNSUP,
clientflags,
"Unsupported option 0x%"
PRIx32,
clientflags);
if (ret < 0) {
return ret;
}
@ -547,8 +616,10 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
char buf[8 + 8 + 8 + 128];
int rc;
const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
NBD_FLAG_SEND_WRITE_ZEROES);
bool oldStyle;
size_t len;
/* Old style negotiation header without options
[ 0 .. 7] passwd ("NBDMAGIC")
@ -565,7 +636,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
....options sent....
[18 .. 25] size
[26 .. 27] export flags
[28 .. 151] reserved (0)
[28 .. 151] reserved (0, omit if no_zeroes)
*/
qio_channel_set_blocking(client->ioc, false, NULL);
@ -584,7 +655,7 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
stw_be_p(buf + 26, client->exp->nbdflags | myflags);
} else {
stq_be_p(buf + 8, NBD_OPTS_MAGIC);
stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE);
stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
}
if (oldStyle) {
@ -611,8 +682,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
client->exp->size, client->exp->nbdflags | myflags);
stq_be_p(buf + 18, client->exp->size);
stw_be_p(buf + 26, client->exp->nbdflags | myflags);
if (nbd_negotiate_write(client->ioc, buf + 18, sizeof(buf) - 18) !=
sizeof(buf) - 18) {
len = client->no_zeroes ? 10 : sizeof(buf) - 18;
if (nbd_negotiate_write(client->ioc, buf + 18, len) != len) {
LOG("write failed");
goto fail;
}
@ -624,7 +695,7 @@ fail:
return rc;
}
static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request)
static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request)
{
uint8_t buf[NBD_REQUEST_SIZE];
uint32_t magic;
@ -642,21 +713,23 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request)
/* Request
[ 0 .. 3] magic (NBD_REQUEST_MAGIC)
[ 4 .. 7] type (0 == READ, 1 == WRITE)
[ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...)
[ 6 .. 7] type (NBD_CMD_READ, ...)
[ 8 .. 15] handle
[16 .. 23] from
[24 .. 27] len
*/
magic = ldl_be_p(buf);
request->type = ldl_be_p(buf + 4);
request->flags = lduw_be_p(buf + 4);
request->type = lduw_be_p(buf + 6);
request->handle = ldq_be_p(buf + 8);
request->from = ldq_be_p(buf + 16);
request->len = ldl_be_p(buf + 24);
TRACE("Got request: { magic = 0x%" PRIx32 ", .type = %" PRIx32
", from = %" PRIu64 " , len = %" PRIu32 " }",
magic, request->type, request->from, request->len);
TRACE("Got request: { magic = 0x%" PRIx32 ", .flags = %" PRIx16
", .type = %" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }",
magic, request->flags, request->type, request->from, request->len);
if (magic != NBD_REQUEST_MAGIC) {
LOG("invalid magic (got 0x%" PRIx32 ")", magic);
@ -665,7 +738,7 @@ static ssize_t nbd_receive_request(QIOChannel *ioc, struct nbd_request *request)
return 0;
}
static ssize_t nbd_send_reply(QIOChannel *ioc, struct nbd_reply *reply)
static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply)
{
uint8_t buf[NBD_REPLY_SIZE];
ssize_t ret;
@ -747,21 +820,21 @@ static void client_close(NBDClient *client)
}
}
static NBDRequest *nbd_request_get(NBDClient *client)
static NBDRequestData *nbd_request_get(NBDClient *client)
{
NBDRequest *req;
NBDRequestData *req;
assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
client->nb_requests++;
nbd_update_can_read(client);
req = g_new0(NBDRequest, 1);
req = g_new0(NBDRequestData, 1);
nbd_client_get(client);
req->client = client;
return req;
}
static void nbd_request_put(NBDRequest *req)
static void nbd_request_put(NBDRequestData *req)
{
NBDClient *client = req->client;
@ -894,6 +967,12 @@ void nbd_export_set_name(NBDExport *exp, const char *name)
nbd_export_put(exp);
}
void nbd_export_set_description(NBDExport *exp, const char *description)
{
g_free(exp->description);
exp->description = g_strdup(description);
}
void nbd_export_close(NBDExport *exp)
{
NBDClient *client, *next;
@ -903,6 +982,7 @@ void nbd_export_close(NBDExport *exp)
client_close(client);
}
nbd_export_set_name(exp, NULL);
nbd_export_set_description(exp, NULL);
nbd_export_put(exp);
}
@ -921,6 +1001,7 @@ void nbd_export_put(NBDExport *exp)
if (--exp->refcount == 0) {
assert(exp->name == NULL);
assert(exp->description == NULL);
if (exp->close) {
exp->close(exp);
@ -955,7 +1036,7 @@ void nbd_export_close_all(void)
}
}
static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply,
int len)
{
NBDClient *client = req->client;
@ -991,11 +1072,10 @@ static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
* and any other negative value to report an error to the client
* (although the caller may still need to disconnect after reporting
* the error). */
static ssize_t nbd_co_receive_request(NBDRequest *req,
struct nbd_request *request)
static ssize_t nbd_co_receive_request(NBDRequestData *req,
NBDRequest *request)
{
NBDClient *client = req->client;
uint32_t command;
ssize_t rc;
g_assert(qemu_in_coroutine());
@ -1012,13 +1092,12 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
TRACE("Decoding type");
command = request->type & NBD_CMD_MASK_COMMAND;
if (command != NBD_CMD_WRITE) {
if (request->type != NBD_CMD_WRITE) {
/* No payload, we are ready to read the next request. */
req->complete = true;
}
if (command == NBD_CMD_DISC) {
if (request->type == NBD_CMD_DISC) {
/* Special case: we're going to disconnect without a reply,
* whether or not flags, from, or len are bogus */
TRACE("Request type is DISCONNECT");
@ -1035,7 +1114,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
goto out;
}
if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) {
if (request->len > NBD_MAX_BUFFER_SIZE) {
LOG("len (%" PRIu32" ) is larger than max len (%u)",
request->len, NBD_MAX_BUFFER_SIZE);
@ -1049,7 +1128,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
goto out;
}
}
if (command == NBD_CMD_WRITE) {
if (request->type == NBD_CMD_WRITE) {
TRACE("Reading %" PRIu32 " byte(s)", request->len);
if (read_sync(client->ioc, req->data, request->len) != request->len) {
@ -1065,12 +1144,17 @@ static ssize_t nbd_co_receive_request(NBDRequest *req,
LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
", Size: %" PRIu64, request->from, request->len,
(uint64_t)client->exp->size);
rc = command == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
goto out;
}
if (request->type & ~NBD_CMD_MASK_COMMAND & ~NBD_CMD_FLAG_FUA) {
LOG("unsupported flags (got 0x%x)",
request->type & ~NBD_CMD_MASK_COMMAND);
if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
LOG("unsupported flags (got 0x%x)", request->flags);
rc = -EINVAL;
goto out;
}
if (request->type != NBD_CMD_WRITE_ZEROES &&
(request->flags & NBD_CMD_FLAG_NO_HOLE)) {
LOG("unexpected flags (got 0x%x)", request->flags);
rc = -EINVAL;
goto out;
}
@ -1088,11 +1172,10 @@ static void nbd_trip(void *opaque)
{
NBDClient *client = opaque;
NBDExport *exp = client->exp;
NBDRequest *req;
struct nbd_request request;
struct nbd_reply reply;
NBDRequestData *req;
NBDRequest request;
NBDReply reply;
ssize_t ret;
uint32_t command;
int flags;
TRACE("Reading request.");
@ -1116,7 +1199,6 @@ static void nbd_trip(void *opaque)
reply.error = -ret;
goto error_reply;
}
command = request.type & NBD_CMD_MASK_COMMAND;
if (client->closing) {
/*
@ -1126,11 +1208,12 @@ static void nbd_trip(void *opaque)
goto done;
}
switch (command) {
switch (request.type) {
case NBD_CMD_READ:
TRACE("Request type is READ");
if (request.type & NBD_CMD_FLAG_FUA) {
/* XXX: NBD Protocol only documents use of FUA with WRITE */
if (request.flags & NBD_CMD_FLAG_FUA) {
ret = blk_co_flush(exp->blk);
if (ret < 0) {
LOG("flush failed");
@ -1163,7 +1246,7 @@ static void nbd_trip(void *opaque)
TRACE("Writing to device");
flags = 0;
if (request.type & NBD_CMD_FLAG_FUA) {
if (request.flags & NBD_CMD_FLAG_FUA) {
flags |= BDRV_REQ_FUA;
}
ret = blk_pwrite(exp->blk, request.from + exp->dev_offset,
@ -1179,6 +1262,37 @@ static void nbd_trip(void *opaque)
}
break;
case NBD_CMD_WRITE_ZEROES:
TRACE("Request type is WRITE_ZEROES");
if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
TRACE("Server is read-only, return error");
reply.error = EROFS;
goto error_reply;
}
TRACE("Writing to device");
flags = 0;
if (request.flags & NBD_CMD_FLAG_FUA) {
flags |= BDRV_REQ_FUA;
}
if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
flags |= BDRV_REQ_MAY_UNMAP;
}
ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset,
request.len, flags);
if (ret < 0) {
LOG("writing to file failed");
reply.error = -ret;
goto error_reply;
}
if (nbd_co_send_reply(req, &reply, 0) < 0) {
goto out;
}
break;
case NBD_CMD_DISC:
/* unreachable, thanks to special case in nbd_co_receive_request() */
abort();

View file

@ -763,8 +763,7 @@ static int slirp_guestfwd(SlirpState *s, const char *config_str,
return -1;
}
if (slirp_add_exec(s->slirp, 3, qemu_chr_fe_get_driver(&fwd->hd),
&server, port) < 0) {
if (slirp_add_exec(s->slirp, 3, &fwd->hd, &server, port) < 0) {
error_report("conflicting/invalid host:port in guest forwarding "
"rule '%s'", config_str);
g_free(fwd);

View file

@ -4621,10 +4621,10 @@
#
# @pause: system pauses
#
# Since: 2.1
# Since: 2.1 (poweroff since 2.8)
##
{ 'enum': 'GuestPanicAction',
'data': [ 'pause' ] }
'data': [ 'pause', 'poweroff' ] }
##
# @rtc-reset-reinjection

View file

@ -735,19 +735,23 @@ static void mux_chr_read(void *opaque, const uint8_t *buf, int size)
}
}
static bool muxes_realized;
static void mux_chr_event(void *opaque, int event)
{
CharDriverState *chr = opaque;
MuxDriver *d = chr->opaque;
int i;
if (!muxes_realized) {
return;
}
/* Send the event to all registered listeners */
for (i = 0; i < d->mux_cnt; i++)
mux_chr_send_event(d, i, event);
}
static bool muxes_realized;
/**
* Called after processing of default and command-line-specified
* chardevs to deliver CHR_EVENT_OPENED events to any FEs attached

View file

@ -83,6 +83,7 @@ static void usage(const char *name)
" -t, --persistent don't exit on the last connection\n"
" -v, --verbose display extra debugging information\n"
" -x, --export-name=NAME expose export by name\n"
" -D, --description=TEXT with -x, also export a human-readable description\n"
"\n"
"Exposing part of the image:\n"
" -o, --offset=OFFSET offset into the image\n"
@ -477,7 +478,7 @@ int main(int argc, char **argv)
off_t fd_size;
QemuOpts *sn_opts = NULL;
const char *sn_id_or_name = NULL;
const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:T:";
const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:T:D:";
struct option lopt[] = {
{ "help", no_argument, NULL, 'h' },
{ "version", no_argument, NULL, 'V' },
@ -503,6 +504,7 @@ int main(int argc, char **argv)
{ "verbose", no_argument, NULL, 'v' },
{ "object", required_argument, NULL, QEMU_NBD_OPT_OBJECT },
{ "export-name", required_argument, NULL, 'x' },
{ "description", required_argument, NULL, 'D' },
{ "tls-creds", required_argument, NULL, QEMU_NBD_OPT_TLSCREDS },
{ "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS },
{ "trace", required_argument, NULL, 'T' },
@ -524,6 +526,7 @@ int main(int argc, char **argv)
BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
QDict *options = NULL;
const char *export_name = NULL;
const char *export_description = NULL;
const char *tlscredsid = NULL;
bool imageOpts = false;
bool writethrough = true;
@ -689,6 +692,9 @@ int main(int argc, char **argv)
case 'x':
export_name = optarg;
break;
case 'D':
export_description = optarg;
break;
case 'v':
verbose = 1;
break;
@ -937,7 +943,11 @@ int main(int argc, char **argv)
}
if (export_name) {
nbd_export_set_name(exp, export_name);
nbd_export_set_description(exp, export_description);
newproto = true;
} else if (export_description) {
error_report("Export description requires an export name");
exit(EXIT_FAILURE);
}
server_ioc = qio_channel_socket_new();

View file

@ -79,9 +79,12 @@ Disconnect the device @var{dev}
Allow up to @var{num} clients to share the device (default @samp{1})
@item -t, --persistent
Don't exit on the last connection
@item -x NAME, --export-name=NAME
@item -x, --export-name=@var{name}
Set the NBD volume export name. This switches the server to use
the new style NBD protocol negotiation
@item -D, --description=@var{description}
Set the NBD volume export description, as a human-readable
string. Requires the use of @option{-x}
@item --tls-creds=ID
Enable mandatory TLS encryption for the server by setting the ID
of the TLS credentials object previously created with the --object

View file

@ -1754,7 +1754,7 @@ sub process {
# Ignore those directives where spaces _are_ permitted.
if ($name =~ /^(?:
if|for|while|switch|return|case|
volatile|__volatile__|
volatile|__volatile__|coroutine_fn|
__attribute__|format|__extension__|
asm|__asm__)$/x)
{
@ -2498,8 +2498,8 @@ sub process {
VMStateDescription|
VMStateInfo}x;
if ($line !~ /\bconst\b/ &&
$line =~ /\b($struct_ops)\b/) {
ERROR("struct $1 should normally be const\n" .
$line =~ /\b($struct_ops)\b.*=/) {
ERROR("initializer for struct $1 should normally be const\n" .
$herecurr);
}

View file

@ -9,6 +9,7 @@ stub-obj-y += clock-warp.o
stub-obj-y += cpu-get-clock.o
stub-obj-y += cpu-get-icount.o
stub-obj-y += dump.o
stub-obj-y += error-printf.o
stub-obj-y += fdset-add-fd.o
stub-obj-y += fdset-find-fd.o
stub-obj-y += fdset-get-fd.o
@ -23,7 +24,6 @@ stub-obj-y += is-daemonized.o
stub-obj-y += machine-init-done.o
stub-obj-y += migr-blocker.o
stub-obj-y += mon-is-qmp.o
stub-obj-y += mon-printf.o
stub-obj-y += monitor-init.o
stub-obj-y += notify-event.o
stub-obj-y += qtest.o

19
stubs/error-printf.c Normal file
View file

@ -0,0 +1,19 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/error-report.h"
void error_vprintf(const char *fmt, va_list ap)
{
if (g_test_initialized() && !g_test_subprocess()) {
char *msg = g_strdup_vprintf(fmt, ap);
g_test_message("%s", msg);
g_free(msg);
} else {
vfprintf(stderr, fmt, ap);
}
}
void error_vprintf_unless_qmp(const char *fmt, va_list ap)
{
error_vprintf(fmt, ap);
}

View file

@ -1,11 +0,0 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "monitor/monitor.h"
void monitor_printf(Monitor *mon, const char *fmt, ...)
{
}
void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
{
}

View file

@ -239,6 +239,7 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM,
CPUID_7_0_EBX_RDSEED */
#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | CPUID_7_0_ECX_OSPKE)
#define TCG_7_0_EDX_FEATURES 0
#define TCG_APM_FEATURES 0
#define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT
#define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1)
@ -444,6 +445,22 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
.cpuid_reg = R_ECX,
.tcg_features = TCG_7_0_ECX_FEATURES,
},
[FEAT_7_0_EDX] = {
.feat_names = {
NULL, NULL, "avx512-4vnniw", "avx512-4fmaps",
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
},
.cpuid_eax = 7,
.cpuid_needs_ecx = true, .cpuid_ecx = 0,
.cpuid_reg = R_EDX,
.tcg_features = TCG_7_0_EDX_FEATURES,
},
[FEAT_8000_0007_EDX] = {
.feat_names = {
NULL, NULL, NULL, NULL,
@ -2560,7 +2577,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
if ((*ecx & CPUID_7_0_ECX_PKU) && env->cr[4] & CR4_PKE_MASK) {
*ecx |= CPUID_7_0_ECX_OSPKE;
}
*edx = 0; /* Reserved */
*edx = env->features[FEAT_7_0_EDX]; /* Feature flags */
} else {
*eax = 0;
*ebx = 0;

View file

@ -443,6 +443,7 @@ typedef enum FeatureWord {
FEAT_1_ECX, /* CPUID[1].ECX */
FEAT_7_0_EBX, /* CPUID[EAX=7,ECX=0].EBX */
FEAT_7_0_ECX, /* CPUID[EAX=7,ECX=0].ECX */
FEAT_7_0_EDX, /* CPUID[EAX=7,ECX=0].EDX */
FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */
FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */
FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */
@ -629,6 +630,9 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_7_0_ECX_OSPKE (1U << 4)
#define CPUID_7_0_ECX_RDPID (1U << 22)
#define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */
#define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */
#define CPUID_XSAVE_XSAVEOPT (1U << 0)
#define CPUID_XSAVE_XSAVEC (1U << 1)
#define CPUID_XSAVE_XGETBV1 (1U << 2)

View file

@ -14,24 +14,6 @@
#include "monitor/monitor.h"
#include "qemu/error-report.h"
/*
* Print to current monitor if we have one, else to stderr.
* TODO should return int, so callers can calculate width, but that
* requires surgery to monitor_vprintf(). Left for another day.
*/
void error_vprintf(const char *fmt, va_list ap)
{
if (cur_mon && !monitor_cur_is_qmp()) {
monitor_vprintf(cur_mon, fmt, ap);
} else {
vfprintf(stderr, fmt, ap);
}
}
/*
* Print to current monitor if we have one, else to stderr.
* TODO just like error_vprintf()
*/
void error_printf(const char *fmt, ...)
{
va_list ap;
@ -45,11 +27,9 @@ void error_printf_unless_qmp(const char *fmt, ...)
{
va_list ap;
if (!monitor_cur_is_qmp()) {
va_start(ap, fmt);
error_vprintf(fmt, ap);
va_end(ap);
}
va_start(ap, fmt);
error_vprintf_unless_qmp(fmt, ap);
va_end(ap);
}
static Location std_loc = {

5
vl.c
View file

@ -1792,6 +1792,11 @@ void qemu_system_guest_panicked(void)
}
qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, &error_abort);
vm_stop(RUN_STATE_GUEST_PANICKED);
if (!no_shutdown) {
qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF,
&error_abort);
qemu_system_shutdown_request();
}
}
void qemu_system_reset_request(void)