diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h index c757c8099f..f2f7772c86 100644 --- a/fsdev/file-op-9p.h +++ b/fsdev/file-op-9p.h @@ -59,6 +59,11 @@ typedef struct ExtendedOps { #define V9FS_RDONLY 0x00000040 #define V9FS_PROXY_SOCK_FD 0x00000080 #define V9FS_PROXY_SOCK_NAME 0x00000100 +/* + * multidevs option (either one of the two applies exclusively) + */ +#define V9FS_REMAP_INODES 0x00000200 +#define V9FS_FORBID_MULTIDEVS 0x00000400 #define V9FS_SEC_MASK 0x0000003C diff --git a/fsdev/qemu-fsdev-opts.c b/fsdev/qemu-fsdev-opts.c index 7c31ffffaf..07a18c6e48 100644 --- a/fsdev/qemu-fsdev-opts.c +++ b/fsdev/qemu-fsdev-opts.c @@ -31,7 +31,9 @@ static QemuOptsList qemu_fsdev_opts = { }, { .name = "readonly", .type = QEMU_OPT_BOOL, - + }, { + .name = "multidevs", + .type = QEMU_OPT_STRING, }, { .name = "socket", .type = QEMU_OPT_STRING, @@ -75,6 +77,9 @@ static QemuOptsList qemu_virtfs_opts = { }, { .name = "readonly", .type = QEMU_OPT_BOOL, + }, { + .name = "multidevs", + .type = QEMU_OPT_STRING, }, { .name = "socket", .type = QEMU_OPT_STRING, diff --git a/fsdev/qemu-fsdev.c b/fsdev/qemu-fsdev.c index 077a8c4e2b..a9e069c0c7 100644 --- a/fsdev/qemu-fsdev.c +++ b/fsdev/qemu-fsdev.c @@ -58,6 +58,7 @@ static FsDriverTable FsDrivers[] = { "writeout", "fmode", "dmode", + "multidevs", "throttling.bps-total", "throttling.bps-read", "throttling.bps-write", diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c index 5c7f4cd9c7..4708c0bd89 100644 --- a/hw/9pfs/9p-local.c +++ b/hw/9pfs/9p-local.c @@ -1483,6 +1483,7 @@ static int local_parse_opts(QemuOpts *opts, FsDriverEntry *fse, Error **errp) { const char *sec_model = qemu_opt_get(opts, "security_model"); const char *path = qemu_opt_get(opts, "path"); + const char *multidevs = qemu_opt_get(opts, "multidevs"); Error *local_err = NULL; if (!sec_model) { @@ -1506,6 +1507,26 @@ static int local_parse_opts(QemuOpts *opts, FsDriverEntry *fse, Error **errp) return -1; } + if (multidevs) { + if (!strcmp(multidevs, "remap")) { + fse->export_flags &= ~V9FS_FORBID_MULTIDEVS; + fse->export_flags |= V9FS_REMAP_INODES; + } else if (!strcmp(multidevs, "forbid")) { + fse->export_flags &= ~V9FS_REMAP_INODES; + fse->export_flags |= V9FS_FORBID_MULTIDEVS; + } else if (!strcmp(multidevs, "warn")) { + fse->export_flags &= ~V9FS_FORBID_MULTIDEVS; + fse->export_flags &= ~V9FS_REMAP_INODES; + } else { + error_setg(&local_err, "invalid multidevs property '%s'", + multidevs); + error_append_hint(&local_err, "Valid options are: multidevs=" + "[remap|forbid|warn]\n"); + error_propagate(errp, local_err); + return -1; + } + } + if (!path) { error_setg(errp, "path property not set"); return -1; diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c index 5a895ae0bb..8eb89c5c7d 100644 --- a/hw/9pfs/9p.c +++ b/hw/9pfs/9p.c @@ -26,6 +26,7 @@ #include "trace.h" #include "migration/blocker.h" #include "sysemu/qtest.h" +#include "qemu/xxhash.h" int open_fd_hw; int total_open_fd; @@ -572,23 +573,117 @@ static void coroutine_fn virtfs_reset(V9fsPDU *pdu) P9_STAT_MODE_NAMED_PIPE | \ P9_STAT_MODE_SOCKET) -/* This is the algorithm from ufs in spfs */ -static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp) +/* creative abuse of tb_hash_func7, which is based on xxhash */ +static uint32_t qpp_hash(QppEntry e) { - size_t size; + return qemu_xxhash7(e.ino_prefix, e.dev, 0, 0, 0); +} - if (pdu->s->dev_id != stbuf->st_dev) { - warn_report_once( - "9p: Multiple devices detected in same VirtFS export, " - "which might lead to file ID collisions and severe " - "misbehaviours on guest! You should use a separate " - "export for each device shared from host." - ); +static bool qpp_lookup_func(const void *obj, const void *userp) +{ + const QppEntry *e1 = obj, *e2 = userp; + return e1->dev == e2->dev && e1->ino_prefix == e2->ino_prefix; +} + +static void qpp_table_remove(void *p, uint32_t h, void *up) +{ + g_free(p); +} + +static void qpp_table_destroy(struct qht *ht) +{ + if (!ht || !ht->map) { + return; + } + qht_iter(ht, qpp_table_remove, NULL); + qht_destroy(ht); +} + +static void qpp_table_init(struct qht *ht) +{ + qht_init(ht, qpp_lookup_func, 1, QHT_MODE_AUTO_RESIZE); +} + +/* + * stat_to_qid needs to map inode number (64 bits) and device id (32 bits) + * to a unique QID path (64 bits). To avoid having to map and keep track + * of up to 2^64 objects, we map only the 16 highest bits of the inode plus + * the device id to the 16 highest bits of the QID path. The 48 lowest bits + * of the QID path equal to the lowest bits of the inode number. + * + * This takes advantage of the fact that inode number are usually not + * random but allocated sequentially, so we have fewer items to keep + * track of. + */ +static int qid_path_prefixmap(V9fsPDU *pdu, const struct stat *stbuf, + uint64_t *path) +{ + QppEntry lookup = { + .dev = stbuf->st_dev, + .ino_prefix = (uint16_t) (stbuf->st_ino >> 48) + }, *val; + uint32_t hash = qpp_hash(lookup); + + val = qht_lookup(&pdu->s->qpp_table, &lookup, hash); + + if (!val) { + if (pdu->s->qp_prefix_next == 0) { + /* we ran out of prefixes */ + error_report_once( + "9p: No more prefixes available for remapping inodes from " + "host to guest." + ); + return -ENFILE; + } + + val = g_malloc0(sizeof(QppEntry)); + *val = lookup; + + /* new unique inode prefix and device combo */ + val->qp_prefix = pdu->s->qp_prefix_next++; + qht_insert(&pdu->s->qpp_table, val, hash, NULL); + } + + *path = ((uint64_t)val->qp_prefix << 48) | (stbuf->st_ino & QPATH_INO_MASK); + return 0; +} + +static int stat_to_qid(V9fsPDU *pdu, const struct stat *stbuf, V9fsQID *qidp) +{ + int err; + size_t size; + + if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { + /* map inode+device to qid path (fast path) */ + err = qid_path_prefixmap(pdu, stbuf, &qidp->path); + if (err) { + return err; + } + } else { + if (pdu->s->dev_id != stbuf->st_dev) { + if (pdu->s->ctx.export_flags & V9FS_FORBID_MULTIDEVS) { + error_report_once( + "9p: Multiple devices detected in same VirtFS export. " + "Access of guest to additional devices is (partly) " + "denied due to virtfs option 'multidevs=forbid' being " + "effective." + ); + return -ENODEV; + } else { + warn_report_once( + "9p: Multiple devices detected in same VirtFS export, " + "which might lead to file ID collisions and severe " + "misbehaviours on guest! You should either use a " + "separate export for each device shared from host or " + "use virtfs option 'multidevs=remap'!" + ); + } + } + memset(&qidp->path, 0, sizeof(qidp->path)); + size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); + memcpy(&qidp->path, &stbuf->st_ino, size); } - memset(&qidp->path, 0, sizeof(qidp->path)); - size = MIN(sizeof(stbuf->st_ino), sizeof(qidp->path)); - memcpy(&qidp->path, &stbuf->st_ino, size); qidp->version = stbuf->st_mtime ^ (stbuf->st_size << 8); qidp->type = 0; if (S_ISDIR(stbuf->st_mode)) { @@ -618,6 +713,30 @@ static int coroutine_fn fid_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, return 0; } +static int coroutine_fn dirent_to_qid(V9fsPDU *pdu, V9fsFidState *fidp, + struct dirent *dent, V9fsQID *qidp) +{ + struct stat stbuf; + V9fsPath path; + int err; + + v9fs_path_init(&path); + + err = v9fs_co_name_to_path(pdu, &fidp->path, dent->d_name, &path); + if (err < 0) { + goto out; + } + err = v9fs_co_lstat(pdu, &path, &stbuf); + if (err < 0) { + goto out; + } + err = stat_to_qid(pdu, &stbuf, qidp); + +out: + v9fs_path_free(&path); + return err; +} + V9fsPDU *pdu_alloc(V9fsState *s) { V9fsPDU *pdu = NULL; @@ -1966,16 +2085,39 @@ static int coroutine_fn v9fs_do_readdir(V9fsPDU *pdu, V9fsFidState *fidp, v9fs_string_free(&name); return count; } - /* - * Fill up just the path field of qid because the client uses - * only that. To fill the entire qid structure we will have - * to stat each dirent found, which is expensive - */ - size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); - memcpy(&qid.path, &dent->d_ino, size); - /* Fill the other fields with dummy values */ - qid.type = 0; - qid.version = 0; + + if (pdu->s->ctx.export_flags & V9FS_REMAP_INODES) { + /* + * dirent_to_qid() implies expensive stat call for each entry, + * we must do that here though since inode remapping requires + * the device id, which in turn might be different for + * different entries; we cannot make any assumption to avoid + * that here. + */ + err = dirent_to_qid(pdu, fidp, dent, &qid); + if (err < 0) { + v9fs_readdir_unlock(&fidp->fs.dir); + v9fs_co_seekdir(pdu, fidp, saved_dir_pos); + v9fs_string_free(&name); + return err; + } + } else { + /* + * Fill up just the path field of qid because the client uses + * only that. To fill the entire qid structure we will have + * to stat each dirent found, which is expensive. For the + * latter reason we don't call dirent_to_qid() here. Only drawback + * is that no multi-device export detection of stat_to_qid() + * would be done and provided as error to the user here. But + * user would get that error anyway when accessing those + * files/dirs through other ways. + */ + size = MIN(sizeof(dent->d_ino), sizeof(qid.path)); + memcpy(&qid.path, &dent->d_ino, size); + /* Fill the other fields with dummy values */ + qid.type = 0; + qid.version = 0; + } /* 11 = 7 + 4 (7 = start offset, 4 = space for storing count) */ len = pdu_marshal(pdu, 11 + count, "Qqbs", @@ -3676,6 +3818,9 @@ int v9fs_device_realize_common(V9fsState *s, const V9fsTransport *t, s->dev_id = stat.st_dev; + qpp_table_init(&s->qpp_table); + s->qp_prefix_next = 1; /* reserve 0 to detect overflow */ + s->ctx.fst = &fse->fst; fsdev_throttle_init(s->ctx.fst); @@ -3697,6 +3842,7 @@ void v9fs_device_unrealize_common(V9fsState *s, Error **errp) fsdev_throttle_cleanup(s->ctx.fst); } g_free(s->tag); + qpp_table_destroy(&s->qpp_table); g_free(s->ctx.fs_root); } diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h index 5e316178d5..7262fe80aa 100644 --- a/hw/9pfs/9p.h +++ b/hw/9pfs/9p.h @@ -8,6 +8,7 @@ #include "fsdev/9p-iov-marshal.h" #include "qemu/thread.h" #include "qemu/coroutine.h" +#include "qemu/qht.h" enum { P9_TLERROR = 6, @@ -235,6 +236,15 @@ struct V9fsFidState V9fsFidState *rclm_lst; }; +#define QPATH_INO_MASK ((1ULL << 48) - 1) + +/* QID path prefix entry, see stat_to_qid */ +typedef struct { + dev_t dev; + uint16_t ino_prefix; + uint16_t qp_prefix; +} QppEntry; + struct V9fsState { QLIST_HEAD(, V9fsPDU) free_list; @@ -257,6 +267,8 @@ struct V9fsState V9fsConf fsconf; V9fsQID root_qid; dev_t dev_id; + struct qht qpp_table; + uint16_t qp_prefix_next; }; /* 9p2000.L open flags */ diff --git a/qemu-options.hx b/qemu-options.hx index 2a04ca6ac5..793d70ff93 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -1339,7 +1339,7 @@ ETEXI DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs, "-virtfs local,path=path,mount_tag=tag,security_model=mapped-xattr|mapped-file|passthrough|none\n" - " [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode]\n" + " [,id=id][,writeout=immediate][,readonly][,fmode=fmode][,dmode=dmode][,multidevs=remap|forbid|warn]\n" "-virtfs proxy,mount_tag=tag,socket=socket[,id=id][,writeout=immediate][,readonly]\n" "-virtfs proxy,mount_tag=tag,sock_fd=sock_fd[,id=id][,writeout=immediate][,readonly]\n" "-virtfs synth,mount_tag=tag[,id=id][,readonly]\n", @@ -1347,7 +1347,7 @@ DEF("virtfs", HAS_ARG, QEMU_OPTION_virtfs, STEXI -@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] [,fmode=@var{fmode}][,dmode=@var{dmode}] +@item -virtfs local,path=@var{path},mount_tag=@var{mount_tag} ,security_model=@var{security_model}[,writeout=@var{writeout}][,readonly] [,fmode=@var{fmode}][,dmode=@var{dmode}][,multidevs=@var{multidevs}] @itemx -virtfs proxy,socket=@var{socket},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly] @itemx -virtfs proxy,sock_fd=@var{sock_fd},mount_tag=@var{mount_tag} [,writeout=@var{writeout}][,readonly] @itemx -virtfs synth,mount_tag=@var{mount_tag} @@ -1403,6 +1403,28 @@ Specifies the default mode for newly created directories on the host. Works only with security models "mapped-xattr" and "mapped-file". @item mount_tag=@var{mount_tag} Specifies the tag name to be used by the guest to mount this export point. +@item multidevs=@var{multidevs} +Specifies how to deal with multiple devices being shared with a 9p export. +Supported behaviours are either "remap", "forbid" or "warn". The latter is +the default behaviour on which virtfs 9p expects only one device to be +shared with the same export, and if more than one device is shared and +accessed via the same 9p export then only a warning message is logged +(once) by qemu on host side. In order to avoid file ID collisions on guest +you should either create a separate virtfs export for each device to be +shared with guests (recommended way) or you might use "remap" instead which +allows you to share multiple devices with only one export instead, which is +achieved by remapping the original inode numbers from host to guest in a +way that would prevent such collisions. Remapping inodes in such use cases +is required because the original device IDs from host are never passed and +exposed on guest. Instead all files of an export shared with virtfs always +share the same device id on guest. So two files with identical inode +numbers but from actually different devices on host would otherwise cause a +file ID collision and hence potential misbehaviours on guest. "forbid" on +the other hand assumes like "warn" that only one device is shared by the +same export, however it will not only log a warning message but also +deny access to additional devices on guest. Note though that "forbid" does +currently not block all possible file access operations (e.g. readdir() +would still return entries from other devices). @end table ETEXI diff --git a/vl.c b/vl.c index 002bf4919e..0a295e5d77 100644 --- a/vl.c +++ b/vl.c @@ -3335,7 +3335,8 @@ int main(int argc, char **argv, char **envp) case QEMU_OPTION_virtfs: { QemuOpts *fsdev; QemuOpts *device; - const char *writeout, *sock_fd, *socket, *path, *security_model; + const char *writeout, *sock_fd, *socket, *path, *security_model, + *multidevs; olist = qemu_find_opts("virtfs"); if (!olist) { @@ -3395,6 +3396,10 @@ int main(int argc, char **argv, char **envp) qemu_opt_set_bool(fsdev, "readonly", qemu_opt_get_bool(opts, "readonly", 0), &error_abort); + multidevs = qemu_opt_get(opts, "multidevs"); + if (multidevs) { + qemu_opt_set(fsdev, "multidevs", multidevs, &error_abort); + } device = qemu_opts_create(qemu_find_opts("device"), NULL, 0, &error_abort); qemu_opt_set(device, "driver", "virtio-9p-pci", &error_abort);