From 1bfa316ce776d1f90ac96c59b4b69910db19ed6d Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 5 Feb 2016 11:43:11 +0100 Subject: [PATCH 01/13] virtio-net: use the backend cross-endian capabilities When running a fully emulated device in cross-endian conditions, including a virtio 1.0 device offered to a big endian guest, we need to fix the vnet headers. This is currently handled by the virtio_net_hdr_swap() function in the core virtio-net code but it should actually be handled by the net backend. With this patch, virtio-net now tries to configure the backend to do the endian fixing when the device starts (i.e. drivers sets the CONFIG_OK bit). If the backend cannot support the requested endiannes, we have to fallback onto virtio_net_hdr_swap(): this is recorded in the needs_vnet_hdr_swap flag, to be used in the TX and RX paths. Note that we reset the backend to the default behaviour (guest native endianness) when the device stops (i.e. device status had CONFIG_OK bit and driver unsets it). This is needed, with the linux tap backend at least, otherwise the guest may lose network connectivity if rebooted into a different endianness. The current vhost-net code also tries to configure net backends. This will be no more needed and will be reverted in a subsequent patch. Reviewed-by: Cornelia Huck Reviewed-by: Laurent Vivier Signed-off-by: Greg Kurz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Laurent Vivier --- hw/net/virtio-net.c | 68 ++++++++++++++++++++++++++++++- include/hw/virtio/virtio-access.h | 9 ---- include/hw/virtio/virtio-net.h | 1 + 3 files changed, 67 insertions(+), 11 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index de696e8dd0..5798f87d8e 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -129,6 +129,13 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) if (!n->vhost_started) { int r, i; + if (n->needs_vnet_hdr_swap) { + error_report("backend does not support %s vnet headers; " + "falling back on userspace virtio", + virtio_is_big_endian(vdev) ? "BE" : "LE"); + return; + } + /* Any packets outstanding? Purge them to avoid touching rings * when vhost is running. */ @@ -153,6 +160,59 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) } } +static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev, + NetClientState *peer, + bool enable) +{ + if (virtio_is_big_endian(vdev)) { + return qemu_set_vnet_be(peer, enable); + } else { + return qemu_set_vnet_le(peer, enable); + } +} + +static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs, + int queues, bool enable) +{ + int i; + + for (i = 0; i < queues; i++) { + if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 && + enable) { + while (--i >= 0) { + virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false); + } + + return true; + } + } + + return false; +} + +static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(n); + int queues = n->multiqueue ? n->max_queues : 1; + + if (virtio_net_started(n, status)) { + /* Before using the device, we tell the network backend about the + * endianness to use when parsing vnet headers. If the backend + * can't do it, we fallback onto fixing the headers in the core + * virtio-net code. + */ + n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs, + queues, true); + } else if (virtio_net_started(n, vdev->status)) { + /* After using the device, we need to reset the network backend to + * the default (guest native endianness), otherwise the guest may + * lose network connectivity if it is rebooted into a different + * endianness. + */ + virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false); + } +} + static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) { VirtIONet *n = VIRTIO_NET(vdev); @@ -160,6 +220,7 @@ static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status) int i; uint8_t queue_status; + virtio_net_vnet_endian_status(n, status); virtio_net_vhost_status(n, status); for (i = 0; i < n->max_queues; i++) { @@ -963,7 +1024,10 @@ static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt, void *wbuf = (void *)buf; work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len, size - n->host_hdr_len); - virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); + + if (n->needs_vnet_hdr_swap) { + virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf); + } iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr)); } else { struct virtio_net_hdr hdr = { @@ -1184,7 +1248,7 @@ static int32_t virtio_net_flush_tx(VirtIONetQueue *q) error_report("virtio-net header incorrect"); exit(1); } - if (virtio_needs_swap(vdev)) { + if (n->needs_vnet_hdr_swap) { virtio_net_hdr_swap(vdev, (void *) &mhdr); sg2[0].iov_base = &mhdr; sg2[0].iov_len = n->guest_hdr_len; diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h index 8aec843c8f..a01fff2e51 100644 --- a/include/hw/virtio/virtio-access.h +++ b/include/hw/virtio/virtio-access.h @@ -143,15 +143,6 @@ static inline uint64_t virtio_ldq_p(VirtIODevice *vdev, const void *ptr) } } -static inline bool virtio_needs_swap(VirtIODevice *vdev) -{ -#ifdef HOST_WORDS_BIGENDIAN - return virtio_access_is_big_endian(vdev) ? false : true; -#else - return virtio_access_is_big_endian(vdev) ? true : false; -#endif -} - static inline uint16_t virtio_tswap16(VirtIODevice *vdev, uint16_t s) { #ifdef HOST_WORDS_BIGENDIAN diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index 2ce3b03bd4..0cabdb6822 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -94,6 +94,7 @@ typedef struct VirtIONet { uint64_t curr_guest_offloads; QEMUTimer *announce_timer; int announce_counter; + bool needs_vnet_hdr_swap; } VirtIONet; void virtio_net_set_netclient_name(VirtIONet *n, const char *name, From 3154d1e426e8a82730119984b1b25b830830c1d2 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 5 Feb 2016 11:45:26 +0100 Subject: [PATCH 02/13] vhost-net: revert support of cross-endian vnet headers Cross-endian is now handled by the core virtio-net code. This patch reverts: commit 5be7d9f1b1452613b95c6ba70b8d7ad3d0797991 vhost-net: tell tap backend about the vnet endianness and commit cf0a628f6e81bfc9b7a944fa0b80c3594836df56 net: set endianness on all backend devices Reviewed-by: Laurent Vivier Signed-off-by: Greg Kurz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Laurent Vivier --- hw/net/vhost_net.c | 41 +++++------------------------------------ 1 file changed, 5 insertions(+), 36 deletions(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index 3940a04b65..b2428324b3 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -36,7 +36,6 @@ #include "standard-headers/linux/virtio_ring.h" #include "hw/virtio/vhost.h" #include "hw/virtio/virtio-bus.h" -#include "hw/virtio/virtio-access.h" struct vhost_net { struct vhost_dev dev; @@ -197,27 +196,6 @@ static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index) net->dev.vq_index = vq_index; } -static int vhost_net_set_vnet_endian(VirtIODevice *dev, NetClientState *peer, - bool set) -{ - int r = 0; - - if (virtio_vdev_has_feature(dev, VIRTIO_F_VERSION_1) || - (virtio_legacy_is_cross_endian(dev) && !virtio_is_big_endian(dev))) { - r = qemu_set_vnet_le(peer, set); - if (r) { - error_report("backend does not support LE vnet headers"); - } - } else if (virtio_legacy_is_cross_endian(dev)) { - r = qemu_set_vnet_be(peer, set); - if (r) { - error_report("backend does not support BE vnet headers"); - } - } - - return r; -} - static int vhost_net_start_one(struct vhost_net *net, VirtIODevice *dev) { @@ -298,25 +276,21 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(dev))); VirtioBusState *vbus = VIRTIO_BUS(qbus); VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus); - int r, e, i, j; + int r, e, i; if (!k->set_guest_notifiers) { error_report("binding does not support guest notifiers"); return -ENOSYS; } - for (j = 0; j < total_queues; j++) { - r = vhost_net_set_vnet_endian(dev, ncs[j].peer, true); - if (r < 0) { - goto err_endian; - } - vhost_net_set_vq_index(get_vhost_net(ncs[j].peer), j * 2); + for (i = 0; i < total_queues; i++) { + vhost_net_set_vq_index(get_vhost_net(ncs[i].peer), i * 2); } r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true); if (r < 0) { error_report("Error binding guest notifier: %d", -r); - goto err_endian; + goto err; } for (i = 0; i < total_queues; i++) { @@ -338,10 +312,7 @@ err_start: fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e); fflush(stderr); } -err_endian: - while (--j >= 0) { - vhost_net_set_vnet_endian(dev, ncs[j].peer, false); - } +err: return r; } @@ -363,8 +334,6 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, fflush(stderr); } assert(r >= 0); - - assert(vhost_net_set_vnet_endian(dev, ncs[0].peer, false) >= 0); } void vhost_net_cleanup(struct vhost_net *net) From a122ab24725c0392d4f53105aff343e703119fbe Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 5 Feb 2016 11:45:40 +0100 Subject: [PATCH 03/13] virtio: move cross-endian helper to vhost If target is bi-endian (ppc64, arm), the virtio_legacy_is_cross_endian() indeed returns the runtime state of the virtio device. However, it returns false unconditionally in the general case. This sounds a bit strange given the name of the function. This helper is only useful for vhost actually, where indeed non bi-endian targets don't have to deal with cross-endian issues. This patch moves the helper to vhost.c and gives it a more appropriate name. Reviewed-by: Cornelia Huck Reviewed-by: Laurent Vivier Signed-off-by: Greg Kurz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Laurent Vivier --- hw/virtio/vhost.c | 17 +++++++++++++++-- include/hw/virtio/virtio-access.h | 13 ------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 7dff75547d..6fea07c8ae 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -749,6 +749,19 @@ static void vhost_log_stop(MemoryListener *listener, /* FIXME: implement */ } +static inline bool vhost_needs_vring_endian(VirtIODevice *vdev) +{ +#ifdef TARGET_IS_BIENDIAN +#ifdef HOST_WORDS_BIGENDIAN + return !virtio_is_big_endian(vdev); +#else + return virtio_is_big_endian(vdev); +#endif +#else + return false; +#endif +} + static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev, bool is_big_endian, int vhost_vq_index) @@ -800,7 +813,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, } if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && - virtio_legacy_is_cross_endian(vdev)) { + vhost_needs_vring_endian(vdev)) { r = vhost_virtqueue_set_vring_endian_legacy(dev, virtio_is_big_endian(vdev), vhost_vq_index); @@ -897,7 +910,7 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev, * native as legacy devices expect so by default. */ if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && - virtio_legacy_is_cross_endian(vdev)) { + vhost_needs_vring_endian(vdev)) { r = vhost_virtqueue_set_vring_endian_legacy(dev, !virtio_is_big_endian(vdev), vhost_vq_index); diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h index a01fff2e51..f1f12afe90 100644 --- a/include/hw/virtio/virtio-access.h +++ b/include/hw/virtio/virtio-access.h @@ -32,19 +32,6 @@ static inline bool virtio_access_is_big_endian(VirtIODevice *vdev) #endif } -static inline bool virtio_legacy_is_cross_endian(VirtIODevice *vdev) -{ -#ifdef TARGET_IS_BIENDIAN -#ifdef HOST_WORDS_BIGENDIAN - return !virtio_is_big_endian(vdev); -#else - return virtio_is_big_endian(vdev); -#endif -#else - return false; -#endif -} - static inline uint16_t virtio_lduw_phys(VirtIODevice *vdev, hwaddr pa) { if (virtio_access_is_big_endian(vdev)) { From e58481234ef9c132554cc529d9981ebd78fb6903 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 5 Feb 2016 11:45:49 +0100 Subject: [PATCH 04/13] vhost: move virtio 1.0 check to cross-endian helper Indeed vhost doesn't need to ask for vring endian fixing if the device is virtio 1.0, since it is already handled by the in-kernel vhost driver. This patch simply consolidates the logic into the existing helper. Reviewed-by: Cornelia Huck Signed-off-by: Greg Kurz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Laurent Vivier --- hw/virtio/vhost.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 6fea07c8ae..bb17177f5e 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -751,6 +751,9 @@ static void vhost_log_stop(MemoryListener *listener, static inline bool vhost_needs_vring_endian(VirtIODevice *vdev) { + if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { + return false; + } #ifdef TARGET_IS_BIENDIAN #ifdef HOST_WORDS_BIGENDIAN return !virtio_is_big_endian(vdev); @@ -812,8 +815,7 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, return -errno; } - if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && - vhost_needs_vring_endian(vdev)) { + if (vhost_needs_vring_endian(vdev)) { r = vhost_virtqueue_set_vring_endian_legacy(dev, virtio_is_big_endian(vdev), vhost_vq_index); @@ -909,8 +911,7 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev, /* In the cross-endian case, we need to reset the vring endianness to * native as legacy devices expect so by default. */ - if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) && - vhost_needs_vring_endian(vdev)) { + if (vhost_needs_vring_endian(vdev)) { r = vhost_virtqueue_set_vring_endian_legacy(dev, !virtio_is_big_endian(vdev), vhost_vq_index); From 46f70ff148ae01e2dc96e64c393e295a0092555d Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 5 Feb 2016 11:46:04 +0100 Subject: [PATCH 05/13] vhost: simplify vhost_needs_vring_endian() After the call to virtio_vdev_has_feature(), we only care for legacy devices, so we don't need the extra check in virtio_is_big_endian(). Also the device_endian field is always set (VIRTIO_DEVICE_ENDIAN_UNKNOWN may only happen on a virtio_load() path that cannot lead here), so we don't need the assert() either. This open codes the device_endian checking in vhost_needs_vring_endian(). It also adds a comment to explain the logic, as recent reviews showed the cross-endian tweaks aren't that obvious. Signed-off-by: Greg Kurz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Laurent Vivier Reviewed-by: Cornelia Huck --- hw/virtio/vhost.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index bb17177f5e..9f8ac38ccf 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -749,6 +749,11 @@ static void vhost_log_stop(MemoryListener *listener, /* FIXME: implement */ } +/* The vhost driver natively knows how to handle the vrings of non + * cross-endian legacy devices and modern devices. Only legacy devices + * exposed to a bi-endian guest may require the vhost driver to use a + * specific endianness. + */ static inline bool vhost_needs_vring_endian(VirtIODevice *vdev) { if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { @@ -756,9 +761,9 @@ static inline bool vhost_needs_vring_endian(VirtIODevice *vdev) } #ifdef TARGET_IS_BIENDIAN #ifdef HOST_WORDS_BIGENDIAN - return !virtio_is_big_endian(vdev); + return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_LITTLE; #else - return virtio_is_big_endian(vdev); + return vdev->device_endian == VIRTIO_DEVICE_ENDIAN_BIG; #endif #else return false; From e5157e313cd9c2d57f28873318d0bb29c77a9b1a Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 5 Feb 2016 11:46:18 +0100 Subject: [PATCH 06/13] virtio: optimize virtio_access_is_big_endian() for little-endian targets When adding cross-endian support, we introduced the TARGET_IS_BIENDIAN macro and the virtio_access_is_big_endian() helper to have a branchless fast path in the virtio memory accessors for targets that don't switch endian. This was considered as a strong requirement at the time. Now we have added a runtime check for virtio 1.0, which ruins the benefit of the virtio_access_is_big_endian() helper for always little-endian targets. With this patch, always little-endian targets stop checking for virtio 1.0, since the result is little-endian in all cases. Reviewed-by: Cornelia Huck Reviewed-by: Laurent Vivier Signed-off-by: Greg Kurz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Laurent Vivier --- include/hw/virtio/virtio-access.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h index f1f12afe90..8dc84f5203 100644 --- a/include/hw/virtio/virtio-access.h +++ b/include/hw/virtio/virtio-access.h @@ -19,13 +19,13 @@ static inline bool virtio_access_is_big_endian(VirtIODevice *vdev) { +#if defined(TARGET_IS_BIENDIAN) + return virtio_is_big_endian(vdev); +#elif defined(TARGET_WORDS_BIGENDIAN) if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) { /* Devices conforming to VIRTIO 1.0 or later are always LE. */ return false; } -#if defined(TARGET_IS_BIENDIAN) - return virtio_is_big_endian(vdev); -#elif defined(TARGET_WORDS_BIGENDIAN) return true; #else return false; From e1e4bf225236a2aea99bedee5a5f7e764841616a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sat, 13 Feb 2016 20:50:50 +0200 Subject: [PATCH 07/13] msix: fix msix_vector_masked commit 428c3ece97179557f2753071fb0ca97a03437267 ("fix MSI injection on Xen") inadvertently enabled the xen-specific logic unconditionally. Limit it to only when xen is enabled. Additionally, msix data should be read with pci_get_log since the format is pci little-endian. Reported-by: "Daniel P. Berrange" Cc: Stefano Stabellini Signed-off-by: Michael S. Tsirkin --- hw/pci/msix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/pci/msix.c b/hw/pci/msix.c index eb4ef113d1..537fdba747 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -80,10 +80,10 @@ static void msix_clr_pending(PCIDevice *dev, int vector) static bool msix_vector_masked(PCIDevice *dev, unsigned int vector, bool fmask) { unsigned offset = vector * PCI_MSIX_ENTRY_SIZE; - uint32_t *data = (uint32_t *)&dev->msix_table[offset + PCI_MSIX_ENTRY_DATA]; + uint8_t *data = &dev->msix_table[offset + PCI_MSIX_ENTRY_DATA]; /* MSIs on Xen can be remapped into pirqs. In those cases, masking * and unmasking go through the PV evtchn path. */ - if (xen_is_pirq_msi(*data)) { + if (xen_enabled() && xen_is_pirq_msi(pci_get_long(data))) { return false; } return fmask || dev->msix_table[offset + PCI_MSIX_ENTRY_VECTOR_CTRL] & From 4e082566a9cd17e5f00d0df85a67dc7a086d30b0 Mon Sep 17 00:00:00 2001 From: Victor Kaplansky Date: Sun, 14 Feb 2016 18:59:27 +0200 Subject: [PATCH 08/13] tests: add pxe e1000 and virtio-pci tests The test is based on bios-tables-test.c. It creates a file with the boot sector image and loads it into a guest using PXE and TFTP functionality. Cc: Jason Wang Signed-off-by: Victor Kaplansky Suggested-by: Michael S. Tsirkin Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/Makefile | 5 +- tests/bios-tables-test.c | 72 ++--------------------- tests/boot-sector.c | 119 +++++++++++++++++++++++++++++++++++++++ tests/boot-sector.h | 26 +++++++++ tests/pxe-test.c | 69 +++++++++++++++++++++++ 5 files changed, 224 insertions(+), 67 deletions(-) create mode 100644 tests/boot-sector.c create mode 100644 tests/boot-sector.h create mode 100644 tests/pxe-test.c diff --git a/tests/Makefile b/tests/Makefile index 650e654ec2..fc042f7309 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -173,6 +173,7 @@ check-qtest-i386-y += tests/hd-geo-test$(EXESUF) gcov-files-i386-y += hw/block/hd-geometry.c check-qtest-i386-y += tests/boot-order-test$(EXESUF) check-qtest-i386-y += tests/bios-tables-test$(EXESUF) +check-qtest-i386-y += tests/pxe-test$(EXESUF) check-qtest-i386-y += tests/rtc-test$(EXESUF) check-qtest-i386-y += tests/ipmi-kcs-test$(EXESUF) check-qtest-i386-y += tests/ipmi-bt-test$(EXESUF) @@ -518,7 +519,9 @@ tests/ipmi-kcs-test$(EXESUF): tests/ipmi-kcs-test.o tests/ipmi-bt-test$(EXESUF): tests/ipmi-bt-test.o tests/hd-geo-test$(EXESUF): tests/hd-geo-test.o tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y) -tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o $(libqos-obj-y) +tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o \ + tests/boot-sector.o $(libqos-obj-y) +tests/pxe-test$(EXESUF): tests/pxe-test.o tests/boot-sector.o $(libqos-obj-y) tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y) tests/ds1338-test$(EXESUF): tests/ds1338-test.o $(libqos-imx-obj-y) tests/i440fx-test$(EXESUF): tests/i440fx-test.o $(libqos-pc-obj-y) diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c index 75ec330ceb..3488058a18 100644 --- a/tests/bios-tables-test.c +++ b/tests/bios-tables-test.c @@ -20,6 +20,7 @@ #include "hw/acpi/acpi-defs.h" #include "hw/smbios/smbios.h" #include "qemu/bitmap.h" +#include "boot-sector.h" #define MACHINE_PC "pc" #define MACHINE_Q35 "q35" @@ -53,13 +54,6 @@ typedef struct { struct smbios_21_entry_point smbios_ep_table; } test_data; -#define LOW(x) ((x) & 0xff) -#define HIGH(x) ((x) >> 8) - -#define SIGNATURE 0xdead -#define SIGNATURE_OFFSET 0x10 -#define BOOT_SECTOR_ADDRESS 0x7c00 - #define ACPI_READ_FIELD(field, addr) \ do { \ switch (sizeof(field)) { \ @@ -119,35 +113,6 @@ typedef struct { g_assert_cmpstr(ACPI_ASSERT_CMP_str, ==, expected); \ } while (0) -/* Boot sector code: write SIGNATURE into memory, - * then halt. - * Q35 machine requires a minimum 0x7e000 bytes disk. - * (bug or feature?) - */ -static uint8_t boot_sector[0x7e000] = { - /* 7c00: mov $0xdead,%ax */ - [0x00] = 0xb8, - [0x01] = LOW(SIGNATURE), - [0x02] = HIGH(SIGNATURE), - /* 7c03: mov %ax,0x7c10 */ - [0x03] = 0xa3, - [0x04] = LOW(BOOT_SECTOR_ADDRESS + SIGNATURE_OFFSET), - [0x05] = HIGH(BOOT_SECTOR_ADDRESS + SIGNATURE_OFFSET), - /* 7c06: cli */ - [0x06] = 0xfa, - /* 7c07: hlt */ - [0x07] = 0xf4, - /* 7c08: jmp 0x7c07=0x7c0a-3 */ - [0x08] = 0xeb, - [0x09] = LOW(-3), - /* We mov 0xdead here: set value to make debugging easier */ - [SIGNATURE_OFFSET] = LOW(0xface), - [SIGNATURE_OFFSET + 1] = HIGH(0xface), - /* End of boot sector marker */ - [0x1FE] = 0x55, - [0x1FF] = 0xAA, -}; - static const char *disk = "tests/acpi-test-disk.raw"; static const char *data_dir = "tests/acpi-test-data"; #ifdef CONFIG_IASL @@ -739,10 +704,6 @@ static void test_smbios_structs(test_data *data) static void test_acpi_one(const char *params, test_data *data) { char *args; - uint8_t signature_low; - uint8_t signature_high; - uint16_t signature; - int i; args = g_strdup_printf("-net none -display none %s " "-drive id=hd0,if=none,file=%s,format=raw " @@ -751,24 +712,7 @@ static void test_acpi_one(const char *params, test_data *data) qtest_start(args); - /* Wait at most 1 minute */ -#define TEST_DELAY (1 * G_USEC_PER_SEC / 10) -#define TEST_CYCLES MAX((60 * G_USEC_PER_SEC / TEST_DELAY), 1) - - /* Poll until code has run and modified memory. Once it has we know BIOS - * initialization is done. TODO: check that IP reached the halt - * instruction. - */ - for (i = 0; i < TEST_CYCLES; ++i) { - signature_low = readb(BOOT_SECTOR_ADDRESS + SIGNATURE_OFFSET); - signature_high = readb(BOOT_SECTOR_ADDRESS + SIGNATURE_OFFSET + 1); - signature = (signature_high << 8) | signature_low; - if (signature == SIGNATURE) { - break; - } - g_usleep(TEST_DELAY); - } - g_assert_cmphex(signature, ==, SIGNATURE); + boot_sector_test(); test_acpi_rsdp_address(data); test_acpi_rsdp_table(data); @@ -842,15 +786,11 @@ static void test_acpi_q35_tcg_bridge(void) int main(int argc, char *argv[]) { const char *arch = qtest_get_arch(); - FILE *f = fopen(disk, "w"); int ret; - if (!f) { - fprintf(stderr, "Couldn't open \"%s\": %s", disk, strerror(errno)); - return 1; - } - fwrite(boot_sector, 1, sizeof boot_sector, f); - fclose(f); + ret = boot_sector_init(disk); + if(ret) + return ret; g_test_init(&argc, &argv, NULL); @@ -861,6 +801,6 @@ int main(int argc, char *argv[]) qtest_add_func("acpi/q35/tcg/bridge", test_acpi_q35_tcg_bridge); } ret = g_test_run(); - unlink(disk); + boot_sector_cleanup(disk); return ret; } diff --git a/tests/boot-sector.c b/tests/boot-sector.c new file mode 100644 index 0000000000..0b48bb34bf --- /dev/null +++ b/tests/boot-sector.c @@ -0,0 +1,119 @@ +/* + * QEMU boot sector testing helpers. + * + * Copyright (c) 2016 Red Hat Inc. + * + * Authors: + * Michael S. Tsirkin + * Victor Kaplansky + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ +#include "boot-sector.h" +#include +#include +#include "qemu-common.h" +#include "libqtest.h" + +#define LOW(x) ((x) & 0xff) +#define HIGH(x) ((x) >> 8) + +#define SIGNATURE 0xdead +#define SIGNATURE_OFFSET 0x10 +#define BOOT_SECTOR_ADDRESS 0x7c00 + +/* Boot sector code: write SIGNATURE into memory, + * then halt. + * Q35 machine requires a minimum 0x7e000 bytes disk. + * (bug or feature?) + */ +static uint8_t boot_sector[0x7e000] = { + /* The first sector will be placed at RAM address 00007C00, and + * the BIOS transfers control to 00007C00 + */ + + /* Data Segment register should be initialized, since pxe + * boot loader can leave it dirty. + */ + + /* 7c00: move $0000,%ax */ + [0x00] = 0xb8, + [0x01] = 0x00, + [0x02] = 0x00, + /* 7c03: move %ax,%ds */ + [0x03] = 0x8e, + [0x04] = 0xd8, + + /* 7c05: mov $0xdead,%ax */ + [0x05] = 0xb8, + [0x06] = LOW(SIGNATURE), + [0x07] = HIGH(SIGNATURE), + /* 7c08: mov %ax,0x7c10 */ + [0x08] = 0xa3, + [0x09] = LOW(BOOT_SECTOR_ADDRESS + SIGNATURE_OFFSET), + [0x0a] = HIGH(BOOT_SECTOR_ADDRESS + SIGNATURE_OFFSET), + + /* 7c0b cli */ + [0x0b] = 0xfa, + /* 7c0c: hlt */ + [0x0c] = 0xf4, + /* 7c0e: jmp 0x7c07=0x7c0f-3 */ + [0x0d] = 0xeb, + [0x0e] = LOW(-3), + /* We mov 0xdead here: set value to make debugging easier */ + [SIGNATURE_OFFSET] = LOW(0xface), + [SIGNATURE_OFFSET + 1] = HIGH(0xface), + /* End of boot sector marker */ + [0x1FE] = 0x55, + [0x1FF] = 0xAA, +}; + +/* Create boot disk file. */ +int boot_sector_init(const char *fname) +{ + FILE *f = fopen(fname, "w"); + + if (!f) { + fprintf(stderr, "Couldn't open \"%s\": %s", fname, strerror(errno)); + return 1; + } + fwrite(boot_sector, 1, sizeof boot_sector, f); + fclose(f); + return 0; +} + +/* Loop until signature in memory is OK. */ +void boot_sector_test(void) +{ + uint8_t signature_low; + uint8_t signature_high; + uint16_t signature; + int i; + + /* Wait at most 1 minute */ +#define TEST_DELAY (1 * G_USEC_PER_SEC / 10) +#define TEST_CYCLES MAX((60 * G_USEC_PER_SEC / TEST_DELAY), 1) + + /* Poll until code has run and modified memory. Once it has we know BIOS + * initialization is done. TODO: check that IP reached the halt + * instruction. + */ + for (i = 0; i < TEST_CYCLES; ++i) { + signature_low = readb(BOOT_SECTOR_ADDRESS + SIGNATURE_OFFSET); + signature_high = readb(BOOT_SECTOR_ADDRESS + SIGNATURE_OFFSET + 1); + signature = (signature_high << 8) | signature_low; + if (signature == SIGNATURE) { + break; + } + g_usleep(TEST_DELAY); + } + + g_assert_cmphex(signature, ==, SIGNATURE); +} + +/* unlink boot disk file. */ +void boot_sector_cleanup(const char *fname) +{ + unlink(fname); +} diff --git a/tests/boot-sector.h b/tests/boot-sector.h new file mode 100644 index 0000000000..38be0290e3 --- /dev/null +++ b/tests/boot-sector.h @@ -0,0 +1,26 @@ +/* + * QEMU boot sector testing helpers. + * + * Copyright (c) 2016 Red Hat Inc. + * + * Authors: + * Michael S. Tsirkin + * Victor Kaplansky + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef TEST_BOOT_SECTOR +#define TEST_BOOT_SECTOR + +/* Create boot disk file. */ +int boot_sector_init(const char *fname); + +/* Loop until signature in memory is OK. */ +void boot_sector_test(void); + +/* unlink boot disk file. */ +void boot_sector_cleanup(const char *fname); + +#endif /* TEST_BOOT_SECTOR */ diff --git a/tests/pxe-test.c b/tests/pxe-test.c new file mode 100644 index 0000000000..fa430958ea --- /dev/null +++ b/tests/pxe-test.c @@ -0,0 +1,69 @@ +/* + * PXE test cases. + * + * Copyright (c) 2016 Red Hat Inc. + * + * Authors: + * Michael S. Tsirkin , + * Victor Kaplansky + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include +#include +#include +#include +#include "qemu-common.h" +#include "libqtest.h" +#include "boot-sector.h" + +#define NETNAME "net0" + +static const char *disk = "tests/pxe-test-disk.raw"; + +static void test_pxe_one(const char *params) +{ + char *args; + + args = g_strdup_printf("-machine accel=tcg " + "-netdev user,id=" NETNAME ",tftp=./,bootfile=%s " + "%s ", + disk, params); + + qtest_start(args); + boot_sector_test(); + qtest_quit(global_qtest); + g_free(args); +} + +static void test_pxe_e1000(void) +{ + test_pxe_one("-device e1000,netdev=" NETNAME); +} + +static void test_pxe_virtio_pci(void) +{ + test_pxe_one("-device virtio-net-pci,netdev=" NETNAME); +} + +int main(int argc, char *argv[]) +{ + int ret; + const char *arch = qtest_get_arch(); + + ret = boot_sector_init(disk); + if(ret) + return ret; + + g_test_init(&argc, &argv, NULL); + + if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { + qtest_add_func("pxe/e1000", test_pxe_e1000); + qtest_add_func("pxe/virtio", test_pxe_virtio_pci); + } + ret = g_test_run(); + boot_sector_cleanup(disk); + return ret; +} From 33c28f3bde02e6dd69da58f9a8b7e4a50c616849 Mon Sep 17 00:00:00 2001 From: Cao jin Date: Fri, 15 Jan 2016 10:23:31 +0800 Subject: [PATCH 09/13] dec: convert to realize() Also because pci_bridge_initfn() can`t fail. Signed-off-by: Cao jin Reviewed-by: Markus Armbruster Reviewed-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/dec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/pci-bridge/dec.c b/hw/pci-bridge/dec.c index 40ff0d0632..840c96198a 100644 --- a/hw/pci-bridge/dec.c +++ b/hw/pci-bridge/dec.c @@ -52,9 +52,9 @@ static int dec_map_irq(PCIDevice *pci_dev, int irq_num) return irq_num; } -static int dec_pci_bridge_initfn(PCIDevice *pci_dev) +static void dec_pci_bridge_realize(PCIDevice *pci_dev, Error **errp) { - return pci_bridge_initfn(pci_dev, TYPE_PCI_BUS); + pci_bridge_initfn(pci_dev, TYPE_PCI_BUS); } static void dec_21154_pci_bridge_class_init(ObjectClass *klass, void *data) @@ -62,7 +62,7 @@ static void dec_21154_pci_bridge_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - k->init = dec_pci_bridge_initfn; + k->realize = dec_pci_bridge_realize; k->exit = pci_bridge_exitfn; k->vendor_id = PCI_VENDOR_ID_DEC; k->device_id = PCI_DEVICE_ID_DEC_21154; From 9cfaa0079f5053683c6a632070244c35fa319549 Mon Sep 17 00:00:00 2001 From: Cao jin Date: Fri, 15 Jan 2016 10:23:32 +0800 Subject: [PATCH 10/13] change type of pci_bridge_initfn() to void Since it can`t fail. Also modify the callers. Signed-off-by: Cao jin Reviewed-by: Markus Armbruster Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Marcel Apfelbaum --- hw/pci-bridge/i82801b11.c | 5 +---- hw/pci-bridge/ioh3420.c | 6 +----- hw/pci-bridge/pci_bridge_dev.c | 8 +++----- hw/pci-bridge/xio3130_downstream.c | 6 +----- hw/pci-bridge/xio3130_upstream.c | 6 +----- hw/pci-host/apb.c | 7 +------ hw/pci/pci_bridge.c | 3 +-- include/hw/pci/pci_bridge.h | 2 +- 8 files changed, 10 insertions(+), 33 deletions(-) diff --git a/hw/pci-bridge/i82801b11.c b/hw/pci-bridge/i82801b11.c index b143f8cb79..5c40708ba8 100644 --- a/hw/pci-bridge/i82801b11.c +++ b/hw/pci-bridge/i82801b11.c @@ -62,10 +62,7 @@ static int i82801b11_bridge_initfn(PCIDevice *d) { int rc; - rc = pci_bridge_initfn(d, TYPE_PCI_BUS); - if (rc < 0) { - return rc; - } + pci_bridge_initfn(d, TYPE_PCI_BUS); rc = pci_bridge_ssvid_init(d, I82801ba_SSVID_OFFSET, I82801ba_SSVID_SVID, I82801ba_SSVID_SSID); diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c index 8ac4240bb1..9e048ebe35 100644 --- a/hw/pci-bridge/ioh3420.c +++ b/hw/pci-bridge/ioh3420.c @@ -98,11 +98,7 @@ static int ioh3420_initfn(PCIDevice *d) PCIESlot *s = PCIE_SLOT(d); int rc; - rc = pci_bridge_initfn(d, TYPE_PCIE_BUS); - if (rc < 0) { - return rc; - } - + pci_bridge_initfn(d, TYPE_PCIE_BUS); pcie_port_init_reg(d); rc = pci_bridge_ssvid_init(d, IOH_EP_SSVID_OFFSET, diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c index c9a7e2b2a3..100bb5ebf6 100644 --- a/hw/pci-bridge/pci_bridge_dev.c +++ b/hw/pci-bridge/pci_bridge_dev.c @@ -53,10 +53,8 @@ static int pci_bridge_dev_initfn(PCIDevice *dev) PCIBridgeDev *bridge_dev = PCI_BRIDGE_DEV(dev); int err; - err = pci_bridge_initfn(dev, TYPE_PCI_BUS); - if (err) { - goto bridge_error; - } + pci_bridge_initfn(dev, TYPE_PCI_BUS); + if (bridge_dev->flags & (1 << PCI_BRIDGE_DEV_F_SHPC_REQ)) { dev->config[PCI_INTERRUPT_PIN] = 0x1; memory_region_init(&bridge_dev->bar, OBJECT(dev), "shpc-bar", @@ -95,7 +93,7 @@ slotid_error: } shpc_error: pci_bridge_exitfn(dev); -bridge_error: + return err; } diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c index 9eb3d8895f..c32f2712c8 100644 --- a/hw/pci-bridge/xio3130_downstream.c +++ b/hw/pci-bridge/xio3130_downstream.c @@ -61,11 +61,7 @@ static int xio3130_downstream_initfn(PCIDevice *d) PCIESlot *s = PCIE_SLOT(d); int rc; - rc = pci_bridge_initfn(d, TYPE_PCIE_BUS); - if (rc < 0) { - return rc; - } - + pci_bridge_initfn(d, TYPE_PCIE_BUS); pcie_port_init_reg(d); rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR, diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c index 7d255a6d52..19798c09a8 100644 --- a/hw/pci-bridge/xio3130_upstream.c +++ b/hw/pci-bridge/xio3130_upstream.c @@ -57,11 +57,7 @@ static int xio3130_upstream_initfn(PCIDevice *d) PCIEPort *p = PCIE_PORT(d); int rc; - rc = pci_bridge_initfn(d, TYPE_PCIE_BUS); - if (rc < 0) { - return rc; - } - + pci_bridge_initfn(d, TYPE_PCIE_BUS); pcie_port_init_reg(d); rc = msi_init(d, XIO3130_MSI_OFFSET, XIO3130_MSI_NR_VECTOR, diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c index 75dee50bc3..aaef7bb3a1 100644 --- a/hw/pci-host/apb.c +++ b/hw/pci-host/apb.c @@ -635,12 +635,7 @@ static void pci_apb_set_irq(void *opaque, int irq_num, int level) static int apb_pci_bridge_initfn(PCIDevice *dev) { - int rc; - - rc = pci_bridge_initfn(dev, TYPE_PCI_BUS); - if (rc < 0) { - return rc; - } + pci_bridge_initfn(dev, TYPE_PCI_BUS); /* * command register: diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c index 7eab9d57c5..3cf30bd334 100644 --- a/hw/pci/pci_bridge.c +++ b/hw/pci/pci_bridge.c @@ -333,7 +333,7 @@ void pci_bridge_reset(DeviceState *qdev) } /* default qdev initialization function for PCI-to-PCI bridge */ -int pci_bridge_initfn(PCIDevice *dev, const char *typename) +void pci_bridge_initfn(PCIDevice *dev, const char *typename) { PCIBus *parent = dev->bus; PCIBridge *br = PCI_BRIDGE(dev); @@ -379,7 +379,6 @@ int pci_bridge_initfn(PCIDevice *dev, const char *typename) br->windows = pci_bridge_region_init(br); QLIST_INIT(&sec_bus->child); QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling); - return 0; } /* default qdev clean up function for PCI-to-PCI bridge */ diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h index 93b621cef3..ed4aff6cd2 100644 --- a/include/hw/pci/pci_bridge.h +++ b/include/hw/pci/pci_bridge.h @@ -48,7 +48,7 @@ void pci_bridge_disable_base_limit(PCIDevice *dev); void pci_bridge_reset_reg(PCIDevice *dev); void pci_bridge_reset(DeviceState *qdev); -int pci_bridge_initfn(PCIDevice *pci_dev, const char *typename); +void pci_bridge_initfn(PCIDevice *pci_dev, const char *typename); void pci_bridge_exitfn(PCIDevice *pci_dev); From cefa2bbd6ad516a8cde9425a35597487d9a4becb Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 17 Feb 2016 16:59:36 +0200 Subject: [PATCH 11/13] rules: filter out irrelevant files It's often handy to make executables depend on each other, e.g. make a test depend on a helper. This doesn't work now, as linker will attempt to use the helper as an object. To fix, filter only relevant file types before linking an executable. Signed-off-by: Michael S. Tsirkin --- rules.mak | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rules.mak b/rules.mak index fc5ee41e47..d1ff311254 100644 --- a/rules.mak +++ b/rules.mak @@ -102,7 +102,7 @@ LD_REL := $(CC) -nostdlib -Wl,-r modules: %$(EXESUF): %.o - $(call LINK,$^) + $(call LINK,$(filter %.o %.a %.mo, $^)) %.a: $(call quiet-command,rm -f $@ && $(AR) rcs $@ $^," AR $(TARGET_DIR)$@") From 5669655aafdb88a8797c74a989dd0c0ebb1349fa Mon Sep 17 00:00:00 2001 From: Victor Kaplansky Date: Thu, 18 Feb 2016 16:12:23 +0200 Subject: [PATCH 12/13] vhost-user interrupt management fixes Since guest_mask_notifier can not be used in vhost-user mode due to buffering implied by unix control socket, force use_mask_notifier on virtio devices of vhost-user interfaces, and send correct callfd to the guest at vhost start. Using guest_notifier_mask function in vhost-user case may break interrupt mask paradigm, because mask/unmask is not really done when returning from guest_notifier_mask call, instead message is posted in a unix socket, and processed later. Add an option boolean flag 'use_mask_notifier' to disable the use of guest_notifier_mask in virtio pci. Signed-off-by: Didier Pallard Signed-off-by: Victor Kaplansky Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/vhost_net.c | 15 +++++++++++++-- hw/virtio/vhost.c | 9 +++++++++ hw/virtio/virtio-pci.c | 14 ++++++++------ hw/virtio/virtio.c | 1 + include/hw/virtio/virtio.h | 1 + 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c index b2428324b3..6e1032fc18 100644 --- a/hw/net/vhost_net.c +++ b/hw/net/vhost_net.c @@ -284,8 +284,19 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, } for (i = 0; i < total_queues; i++) { - vhost_net_set_vq_index(get_vhost_net(ncs[i].peer), i * 2); - } + struct vhost_net *net; + + net = get_vhost_net(ncs[i].peer); + vhost_net_set_vq_index(net, i * 2); + + /* Suppress the masking guest notifiers on vhost user + * because vhost user doesn't interrupt masking/unmasking + * properly. + */ + if (net->nc->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) { + dev->use_guest_notifier_mask = false; + } + } r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true); if (r < 0) { diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 9f8ac38ccf..72d0c9e9ae 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -875,6 +875,14 @@ static int vhost_virtqueue_start(struct vhost_dev *dev, /* Clear and discard previous events if any. */ event_notifier_test_and_clear(&vq->masked_notifier); + /* Init vring in unmasked state, unless guest_notifier_mask + * will do it later. + */ + if (!vdev->use_guest_notifier_mask) { + /* TODO: check and handle errors. */ + vhost_virtqueue_mask(dev, vdev, idx, false); + } + return 0; fail_kick: @@ -1167,6 +1175,7 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, struct vhost_vring_file file; if (mask) { + assert(vdev->use_guest_notifier_mask); file.fd = event_notifier_get_fd(&hdev->vqs[index].masked_notifier); } else { file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq)); diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 5494ff4a49..440776c06c 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -806,7 +806,7 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) /* If guest supports masking, set up irqfd now. * Otherwise, delay until unmasked in the frontend. */ - if (k->guest_notifier_mask) { + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector); if (ret < 0) { kvm_virtio_pci_vq_vector_release(proxy, vector); @@ -822,7 +822,7 @@ undo: if (vector >= msix_nr_vectors_allocated(dev)) { continue; } - if (k->guest_notifier_mask) { + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); } kvm_virtio_pci_vq_vector_release(proxy, vector); @@ -849,7 +849,7 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) /* If guest supports masking, clean up irqfd now. * Otherwise, it was cleaned when masked in the frontend. */ - if (k->guest_notifier_mask) { + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); } kvm_virtio_pci_vq_vector_release(proxy, vector); @@ -882,7 +882,7 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, /* If guest supports masking, irqfd is already setup, unmask it. * Otherwise, set it up now. */ - if (k->guest_notifier_mask) { + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { k->guest_notifier_mask(vdev, queue_no, false); /* Test after unmasking to avoid losing events. */ if (k->guest_notifier_pending && @@ -905,7 +905,7 @@ static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy, /* If guest supports masking, keep irqfd but mask it. * Otherwise, clean it up now. */ - if (k->guest_notifier_mask) { + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { k->guest_notifier_mask(vdev, queue_no, true); } else { kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); @@ -1022,7 +1022,9 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, event_notifier_cleanup(notifier); } - if (!msix_enabled(&proxy->pci_dev) && vdc->guest_notifier_mask) { + if (!msix_enabled(&proxy->pci_dev) && + vdev->use_guest_notifier_mask && + vdc->guest_notifier_mask) { vdc->guest_notifier_mask(vdev, n, !assign); } diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 90f25451d0..e365960bd7 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -1677,6 +1677,7 @@ void virtio_init(VirtIODevice *vdev, const char *name, vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change, vdev); vdev->device_endian = virtio_default_endian(); + vdev->use_guest_notifier_mask = true; } hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n) diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 108cdb0f48..c38a2fef04 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -90,6 +90,7 @@ struct VirtIODevice VMChangeStateEntry *vmstate; char *bus_name; uint8_t device_endian; + bool use_guest_notifier_mask; QLIST_HEAD(, VirtQueue) *vector_queues; }; From a28c393cc261afeb4863b05d7c33c2a5fc55ef38 Mon Sep 17 00:00:00 2001 From: Victor Kaplansky Date: Thu, 18 Feb 2016 16:45:05 +0200 Subject: [PATCH 13/13] tests/vhost-user-bridge: add scattering of incoming packets This patch adds to the vubr test the scattering of incoming packets to the chain of RX buffer. Also, this patch corrects the size of the header preceding the packet in RX buffers. Note that this patch doesn't add the support for mergeable buffers. Signed-off-by: Victor Kaplansky Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/vhost-user-bridge.c | 90 ++++++++++++++++++++++++++------------- 1 file changed, 60 insertions(+), 30 deletions(-) diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c index 9fb09f1df4..f5030b247b 100644 --- a/tests/vhost-user-bridge.c +++ b/tests/vhost-user-bridge.c @@ -47,6 +47,7 @@ #include #include #include +#include #include @@ -186,6 +187,8 @@ typedef struct VubrVirtq { #define VHOST_MEMORY_MAX_NREGIONS 8 #define VHOST_USER_F_PROTOCOL_FEATURES 30 +/* v1.0 compliant. */ +#define VIRTIO_F_VERSION_1 32 #define VHOST_LOG_PAGE 4096 @@ -294,6 +297,7 @@ typedef struct VubrDev { struct sockaddr_in backend_udp_dest; int ready; uint64_t features; + int hdrlen; } VubrDev; static const char *vubr_request_str[] = { @@ -484,7 +488,8 @@ vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen) static void vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len) { - int hdrlen = sizeof(struct virtio_net_hdr_v1); + int hdrlen = dev->hdrlen; + DPRINT(" hdrlen = %d\n", dev->hdrlen); if (VHOST_USER_BRIDGE_DEBUG) { print_buffer(buf, len); @@ -546,6 +551,7 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) struct vring_avail *avail = vq->avail; struct vring_used *used = vq->used; uint64_t log_guest_addr = vq->log_guest_addr; + int32_t remaining_len = len; unsigned int size = vq->size; @@ -560,36 +566,49 @@ vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len) uint16_t d_index = avail->ring[a_index]; int i = d_index; + uint32_t written_len = 0; - DPRINT("Post packet to guest on vq:\n"); - DPRINT(" size = %d\n", vq->size); - DPRINT(" last_avail_index = %d\n", vq->last_avail_index); - DPRINT(" last_used_index = %d\n", vq->last_used_index); - DPRINT(" a_index = %d\n", a_index); - DPRINT(" u_index = %d\n", u_index); - DPRINT(" d_index = %d\n", d_index); - DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr); - DPRINT(" desc[%d].len = %d\n", i, desc[i].len); - DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags); - DPRINT(" avail->idx = %d\n", avail_index); - DPRINT(" used->idx = %d\n", used->idx); + do { + DPRINT("Post packet to guest on vq:\n"); + DPRINT(" size = %d\n", vq->size); + DPRINT(" last_avail_index = %d\n", vq->last_avail_index); + DPRINT(" last_used_index = %d\n", vq->last_used_index); + DPRINT(" a_index = %d\n", a_index); + DPRINT(" u_index = %d\n", u_index); + DPRINT(" d_index = %d\n", d_index); + DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr); + DPRINT(" desc[%d].len = %d\n", i, desc[i].len); + DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags); + DPRINT(" avail->idx = %d\n", avail_index); + DPRINT(" used->idx = %d\n", used->idx); - if (!(desc[i].flags & VRING_DESC_F_WRITE)) { - /* FIXME: we should find writable descriptor. */ - fprintf(stderr, "Error: descriptor is not writable. Exiting.\n"); - exit(1); - } + if (!(desc[i].flags & VRING_DESC_F_WRITE)) { + /* FIXME: we should find writable descriptor. */ + fprintf(stderr, "Error: descriptor is not writable. Exiting.\n"); + exit(1); + } - void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr); - uint32_t chunk_len = desc[i].len; + void *chunk_start = (void *)gpa_to_va(dev, desc[i].addr); + uint32_t chunk_len = desc[i].len; + uint32_t chunk_write_len = MIN(remaining_len, chunk_len); - if (len <= chunk_len) { - memcpy(chunk_start, buf, len); - vubr_log_write(dev, desc[i].addr, len); - } else { - fprintf(stderr, - "Received too long packet from the backend. Dropping...\n"); - return; + memcpy(chunk_start, buf + written_len, chunk_write_len); + vubr_log_write(dev, desc[i].addr, chunk_write_len); + remaining_len -= chunk_write_len; + written_len += chunk_write_len; + + if ((remaining_len == 0) || !(desc[i].flags & VRING_DESC_F_NEXT)) { + break; + } + + i = desc[i].next; + } while (1); + + if (remaining_len > 0) { + fprintf(stderr, + "Too long packet for RX, remaining_len = %d, Dropping...\n", + remaining_len); + return; } /* Add descriptor to the used ring. */ @@ -697,7 +716,7 @@ vubr_backend_recv_cb(int sock, void *ctx) VubrVirtq *rx_vq = &dev->vq[0]; uint8_t buf[4096]; struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf; - int hdrlen = sizeof(struct virtio_net_hdr_v1); + int hdrlen = dev->hdrlen; int buflen = sizeof(buf); int len; @@ -706,6 +725,7 @@ vubr_backend_recv_cb(int sock, void *ctx) } DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n"); + DPRINT(" hdrlen = %d\n", hdrlen); uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx); @@ -717,10 +737,12 @@ vubr_backend_recv_cb(int sock, void *ctx) return; } + memset(buf, 0, hdrlen); + /* TODO: support mergeable buffers. */ + if (hdrlen == 12) + hdr->num_buffers = 1; len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen); - *hdr = (struct virtio_net_hdr_v1) { }; - hdr->num_buffers = 1; vubr_post_buffer(dev, rx_vq, buf, len + hdrlen); } @@ -768,7 +790,15 @@ static int vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg) { DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64); + dev->features = vmsg->payload.u64; + if ((dev->features & (1ULL << VIRTIO_F_VERSION_1)) || + (dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) { + dev->hdrlen = 12; + } else { + dev->hdrlen = 10; + } + return 0; }