From 6a084ea39aec84d352dbd3de0f523daaaaac8c7d Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 15 Jun 2015 16:20:21 +0200
Subject: [PATCH 01/11] vhost-user: part of virtio

vhost user is related to virtio, add it to the relevant entry.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 0463696dd3..7a13d68a36 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -694,6 +694,7 @@ virtio
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
 F: hw/*/virtio*
+F: net/vhost-user.c
 
 virtio-9p
 M: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>

From 332f64073bddc9240cd572f64682a44572b67049 Mon Sep 17 00:00:00 2001
From: Greg Kurz <gkurz@linux.vnet.ibm.com>
Date: Wed, 17 Jun 2015 15:23:29 +0200
Subject: [PATCH 02/11] linux-headers: sync vhost.h

This patch brings the cross-endian vhost API to QEMU.

Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 linux-headers/linux/vhost.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h
index c656f61cfc..ead86db91a 100644
--- a/linux-headers/linux/vhost.h
+++ b/linux-headers/linux/vhost.h
@@ -103,6 +103,20 @@ struct vhost_memory {
 /* Get accessor: reads index, writes value in num */
 #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
 
+/* Set the vring byte order in num. Valid values are VHOST_VRING_LITTLE_ENDIAN
+ * or VHOST_VRING_BIG_ENDIAN (other values return -EINVAL).
+ * The byte order cannot be changed while the device is active: trying to do so
+ * returns -EBUSY.
+ * This is a legacy only API that is simply ignored when VIRTIO_F_VERSION_1 is
+ * set.
+ * Not all kernel configurations support this ioctl, but all configurations that
+ * support SET also support GET.
+ */
+#define VHOST_VRING_LITTLE_ENDIAN 0
+#define VHOST_VRING_BIG_ENDIAN 1
+#define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
+#define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
+
 /* The following ioctls use eventfd file descriptors to signal and poll
  * for events. */
 

From 41d283bdab08868a244b9c19dce507fdf15a8990 Mon Sep 17 00:00:00 2001
From: Greg Kurz <gkurz@linux.vnet.ibm.com>
Date: Wed, 17 Jun 2015 15:23:34 +0200
Subject: [PATCH 03/11] virtio: introduce virtio_legacy_is_cross_endian()

This helper will be used by vhost and tap to detect cross-endianness in
the legacy virtio case.

Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/hw/virtio/virtio-access.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/hw/virtio/virtio-access.h b/include/hw/virtio/virtio-access.h
index ee28c21fef..cee5dd70c9 100644
--- a/include/hw/virtio/virtio-access.h
+++ b/include/hw/virtio/virtio-access.h
@@ -32,6 +32,19 @@ static inline bool virtio_access_is_big_endian(VirtIODevice *vdev)
 #endif
 }
 
+static inline bool virtio_legacy_is_cross_endian(VirtIODevice *vdev)
+{
+#ifdef TARGET_IS_BIENDIAN
+#ifdef HOST_WORDS_BIGENDIAN
+    return !virtio_is_big_endian(vdev);
+#else
+    return virtio_is_big_endian(vdev);
+#endif
+#else
+    return false;
+#endif
+}
+
 static inline uint16_t virtio_lduw_phys(VirtIODevice *vdev, hwaddr pa)
 {
     if (virtio_access_is_big_endian(vdev)) {

From 04b7a1523d65bb5c78832098cf3108a1aadcaf8a Mon Sep 17 00:00:00 2001
From: Greg Kurz <gkurz@linux.vnet.ibm.com>
Date: Wed, 17 Jun 2015 15:23:39 +0200
Subject: [PATCH 04/11] vhost: set vring endianness for legacy virtio

Legacy virtio is native endian: if the guest and host endianness differ,
we have to tell vhost so it can swap bytes where appropriate. This is
done through a vhost ring ioctl.

Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/virtio/vhost.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 7908255503..7ea45b391c 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -17,9 +17,11 @@
 #include "hw/hw.h"
 #include "qemu/atomic.h"
 #include "qemu/range.h"
+#include "qemu/error-report.h"
 #include <linux/vhost.h>
 #include "exec/address-spaces.h"
 #include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"
 #include "migration/migration.h"
 
 static struct vhost_log *vhost_log;
@@ -686,6 +688,27 @@ static void vhost_log_stop(MemoryListener *listener,
     /* FIXME: implement */
 }
 
+static int vhost_virtqueue_set_vring_endian_legacy(struct vhost_dev *dev,
+                                                   bool is_big_endian,
+                                                   int vhost_vq_index)
+{
+    struct vhost_vring_state s = {
+        .index = vhost_vq_index,
+        .num = is_big_endian
+    };
+
+    if (!dev->vhost_ops->vhost_call(dev, VHOST_SET_VRING_ENDIAN, &s)) {
+        return 0;
+    }
+
+    if (errno == ENOTTY) {
+        error_report("vhost does not support cross-endian");
+        return -ENOSYS;
+    }
+
+    return -errno;
+}
+
 static int vhost_virtqueue_start(struct vhost_dev *dev,
                                 struct VirtIODevice *vdev,
                                 struct vhost_virtqueue *vq,
@@ -716,6 +739,16 @@ static int vhost_virtqueue_start(struct vhost_dev *dev,
         return -errno;
     }
 
+    if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1) &&
+        virtio_legacy_is_cross_endian(vdev)) {
+        r = vhost_virtqueue_set_vring_endian_legacy(dev,
+                                                    virtio_is_big_endian(vdev),
+                                                    vhost_vq_index);
+        if (r) {
+            return -errno;
+        }
+    }
+
     s = l = virtio_queue_get_desc_size(vdev, idx);
     a = virtio_queue_get_desc_addr(vdev, idx);
     vq->desc = cpu_physical_memory_map(a, &l, 0);
@@ -786,8 +819,9 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,
                                     struct vhost_virtqueue *vq,
                                     unsigned idx)
 {
+    int vhost_vq_index = idx - dev->vq_index;
     struct vhost_vring_state state = {
-        .index = idx - dev->vq_index
+        .index = vhost_vq_index,
     };
     int r;
     assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);
@@ -798,6 +832,20 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,
     }
     virtio_queue_set_last_avail_idx(vdev, idx, state.num);
     virtio_queue_invalidate_signalled_used(vdev, idx);
+
+    /* In the cross-endian case, we need to reset the vring endianness to
+     * native as legacy devices expect so by default.
+     */
+    if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1) &&
+        virtio_legacy_is_cross_endian(vdev)) {
+        r = vhost_virtqueue_set_vring_endian_legacy(dev,
+                                                    !virtio_is_big_endian(vdev),
+                                                    vhost_vq_index);
+        if (r < 0) {
+            error_report("failed to reset vring endianness");
+        }
+    }
+
     assert (r >= 0);
     cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
                               0, virtio_queue_get_ring_size(vdev, idx));

From c80cd6bb9c20ef518c56319ce44d2971171e677d Mon Sep 17 00:00:00 2001
From: Greg Kurz <gkurz@linux.vnet.ibm.com>
Date: Wed, 17 Jun 2015 15:23:44 +0200
Subject: [PATCH 05/11] tap: add VNET_LE/VNET_BE operations

The linux tap and macvtap backends can be told to parse vnet headers
according to little or big endian. This is done through the TUNSETVNETLE
and TUNSETVNETBE ioctls.

This patch brings all the plumbing for QEMU to use these APIs.

Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/net/net.h |  6 ++++++
 net/net.c         | 18 ++++++++++++++++++
 net/tap-linux.c   | 34 ++++++++++++++++++++++++++++++++++
 net/tap-linux.h   |  2 ++
 net/tap.c         | 16 ++++++++++++++++
 net/tap_int.h     |  2 ++
 6 files changed, 78 insertions(+)

diff --git a/include/net/net.h b/include/net/net.h
index e66ca03bf3..4306252b97 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -55,6 +55,8 @@ typedef bool (HasVnetHdrLen)(NetClientState *, int);
 typedef void (UsingVnetHdr)(NetClientState *, bool);
 typedef void (SetOffload)(NetClientState *, int, int, int, int, int);
 typedef void (SetVnetHdrLen)(NetClientState *, int);
+typedef int (SetVnetLE)(NetClientState *, bool);
+typedef int (SetVnetBE)(NetClientState *, bool);
 
 typedef struct NetClientInfo {
     NetClientOptionsKind type;
@@ -73,6 +75,8 @@ typedef struct NetClientInfo {
     UsingVnetHdr *using_vnet_hdr;
     SetOffload *set_offload;
     SetVnetHdrLen *set_vnet_hdr_len;
+    SetVnetLE *set_vnet_le;
+    SetVnetBE *set_vnet_be;
 } NetClientInfo;
 
 struct NetClientState {
@@ -139,6 +143,8 @@ void qemu_using_vnet_hdr(NetClientState *nc, bool enable);
 void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6,
                       int ecn, int ufo);
 void qemu_set_vnet_hdr_len(NetClientState *nc, int len);
+int qemu_set_vnet_le(NetClientState *nc, bool is_le);
+int qemu_set_vnet_be(NetClientState *nc, bool is_be);
 void qemu_macaddr_default_if_unset(MACAddr *macaddr);
 int qemu_show_nic_models(const char *arg, const char *const *models);
 void qemu_check_nic_model(NICInfo *nd, const char *model);
diff --git a/net/net.c b/net/net.c
index db6be12a1e..5148aacf81 100644
--- a/net/net.c
+++ b/net/net.c
@@ -510,6 +510,24 @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
     nc->info->set_vnet_hdr_len(nc, len);
 }
 
+int qemu_set_vnet_le(NetClientState *nc, bool is_le)
+{
+    if (!nc || !nc->info->set_vnet_le) {
+        return -ENOSYS;
+    }
+
+    return nc->info->set_vnet_le(nc, is_le);
+}
+
+int qemu_set_vnet_be(NetClientState *nc, bool is_be)
+{
+    if (!nc || !nc->info->set_vnet_be) {
+        return -ENOSYS;
+    }
+
+    return nc->info->set_vnet_be(nc, is_be);
+}
+
 int qemu_can_send_packet(NetClientState *sender)
 {
     int vm_running = runstate_is_running();
diff --git a/net/tap-linux.c b/net/tap-linux.c
index 6c3caef21e..394f2a646f 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -198,6 +198,40 @@ void tap_fd_set_vnet_hdr_len(int fd, int len)
     }
 }
 
+int tap_fd_set_vnet_le(int fd, int is_le)
+{
+    int arg = is_le ? 1 : 0;
+
+    if (!ioctl(fd, TUNSETVNETLE, &arg)) {
+        return 0;
+    }
+
+    /* Check if our kernel supports TUNSETVNETLE */
+    if (errno == EINVAL) {
+        return -errno;
+    }
+
+    error_report("TUNSETVNETLE ioctl() failed: %s.\n", strerror(errno));
+    abort();
+}
+
+int tap_fd_set_vnet_be(int fd, int is_be)
+{
+    int arg = is_be ? 1 : 0;
+
+    if (!ioctl(fd, TUNSETVNETBE, &arg)) {
+        return 0;
+    }
+
+    /* Check if our kernel supports TUNSETVNETBE */
+    if (errno == EINVAL) {
+        return -errno;
+    }
+
+    error_report("TUNSETVNETBE ioctl() failed: %s.\n", strerror(errno));
+    abort();
+}
+
 void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
diff --git a/net/tap-linux.h b/net/tap-linux.h
index 1cf35d41bd..01dc6f8a2d 100644
--- a/net/tap-linux.h
+++ b/net/tap-linux.h
@@ -30,6 +30,8 @@
 #define TUNGETVNETHDRSZ _IOR('T', 215, int)
 #define TUNSETVNETHDRSZ _IOW('T', 216, int)
 #define TUNSETQUEUE  _IOW('T', 217, int)
+#define TUNSETVNETLE _IOW('T', 220, int)
+#define TUNSETVNETBE _IOW('T', 222, int)
 
 #endif
 
diff --git a/net/tap.c b/net/tap.c
index d1ca314dcf..ec12dfd1d8 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -275,6 +275,20 @@ static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
     s->using_vnet_hdr = using_vnet_hdr;
 }
 
+static int tap_set_vnet_le(NetClientState *nc, bool is_le)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+
+    return tap_fd_set_vnet_le(s->fd, is_le);
+}
+
+static int tap_set_vnet_be(NetClientState *nc, bool is_be)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+
+    return tap_fd_set_vnet_be(s->fd, is_be);
+}
+
 static void tap_set_offload(NetClientState *nc, int csum, int tso4,
                      int tso6, int ecn, int ufo)
 {
@@ -341,6 +355,8 @@ static NetClientInfo net_tap_info = {
     .using_vnet_hdr = tap_using_vnet_hdr,
     .set_offload = tap_set_offload,
     .set_vnet_hdr_len = tap_set_vnet_hdr_len,
+    .set_vnet_le = tap_set_vnet_le,
+    .set_vnet_be = tap_set_vnet_be,
 };
 
 static TAPState *net_tap_fd_init(NetClientState *peer,
diff --git a/net/tap_int.h b/net/tap_int.h
index d12a409967..2378021c45 100644
--- a/net/tap_int.h
+++ b/net/tap_int.h
@@ -40,6 +40,8 @@ int tap_probe_vnet_hdr_len(int fd, int len);
 int tap_probe_has_ufo(int fd);
 void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);
 void tap_fd_set_vnet_hdr_len(int fd, int len);
+int tap_fd_set_vnet_le(int fd, int vnet_is_le);
+int tap_fd_set_vnet_be(int fd, int vnet_is_be);
 int tap_fd_enable(int fd);
 int tap_fd_disable(int fd);
 int tap_fd_get_ifname(int fd, char *ifname);

From 4ee9b43be9a6e4ae161a1e6322bfef90818589f6 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 18 Jun 2015 16:52:23 +0200
Subject: [PATCH 06/11] tap: fix non-linux build

tap_fd_set_vnet_le/tap_fd_set_vnet_be was missing,
fix it up.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
---
 net/tap-aix.c     | 10 ++++++++++
 net/tap-bsd.c     | 10 ++++++++++
 net/tap-haiku.c   | 10 ++++++++++
 net/tap-solaris.c | 10 ++++++++++
 net/tap-win32.c   | 10 ++++++++++
 5 files changed, 50 insertions(+)

diff --git a/net/tap-aix.c b/net/tap-aix.c
index 18fdbf3b21..e84fc39136 100644
--- a/net/tap-aix.c
+++ b/net/tap-aix.c
@@ -55,6 +55,16 @@ void tap_fd_set_vnet_hdr_len(int fd, int len)
 {
 }
 
+int tap_fd_set_vnet_le(int fd, int is_le)
+{
+    return -EINVAL;
+}
+
+int tap_fd_set_vnet_be(int fd, int is_be)
+{
+    return -EINVAL;
+}
+
 void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index 5889920eac..7028d9be95 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -196,6 +196,16 @@ void tap_fd_set_vnet_hdr_len(int fd, int len)
 {
 }
 
+int tap_fd_set_vnet_le(int fd, int is_le)
+{
+    return -EINVAL;
+}
+
+int tap_fd_set_vnet_be(int fd, int is_be)
+{
+    return -EINVAL;
+}
+
 void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
diff --git a/net/tap-haiku.c b/net/tap-haiku.c
index d18590c636..2e738ec6a3 100644
--- a/net/tap-haiku.c
+++ b/net/tap-haiku.c
@@ -55,6 +55,16 @@ void tap_fd_set_vnet_hdr_len(int fd, int len)
 {
 }
 
+int tap_fd_set_vnet_le(int fd, int is_le)
+{
+    return -EINVAL;
+}
+
+int tap_fd_set_vnet_be(int fd, int is_be)
+{
+    return -EINVAL;
+}
+
 void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index 90b2fd12f1..0f60f78dd0 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -223,6 +223,16 @@ void tap_fd_set_vnet_hdr_len(int fd, int len)
 {
 }
 
+int tap_fd_set_vnet_le(int fd, int is_le)
+{
+    return -EINVAL;
+}
+
+int tap_fd_set_vnet_be(int fd, int is_be)
+{
+    return -EINVAL;
+}
+
 void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
diff --git a/net/tap-win32.c b/net/tap-win32.c
index f6fc9610a7..625d53c64b 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -688,6 +688,16 @@ void tap_fd_set_vnet_hdr_len(int fd, int len)
 {
 }
 
+int tap_fd_set_vnet_le(int fd, int is_le)
+{
+    return -EINVAL;
+}
+
+int tap_fd_set_vnet_be(int fd, int is_be)
+{
+    return -EINVAL;
+}
+
 static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
 {
 }

From 5be7d9f1b1452613b95c6ba70b8d7ad3d0797991 Mon Sep 17 00:00:00 2001
From: Greg Kurz <gkurz@linux.vnet.ibm.com>
Date: Wed, 17 Jun 2015 15:23:49 +0200
Subject: [PATCH 07/11] vhost-net: tell tap backend about the vnet endianness

The default behaviour for TAP/MACVTAP is to consider vnet as native endian.

This patch handles the cases when this is not true:
- virtio 1.0: always little-endian
- legacy cross-endian

Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/net/vhost_net.c | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 1c55517e36..8cbb2f618c 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -38,6 +38,7 @@
 #include "standard-headers/linux/virtio_ring.h"
 #include "hw/virtio/vhost.h"
 #include "hw/virtio/virtio-bus.h"
+#include "hw/virtio/virtio-access.h"
 
 struct vhost_net {
     struct vhost_dev dev;
@@ -197,6 +198,27 @@ static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index)
     net->dev.vq_index = vq_index;
 }
 
+static int vhost_net_set_vnet_endian(VirtIODevice *dev, NetClientState *peer,
+                                     bool set)
+{
+    int r = 0;
+
+    if (virtio_has_feature(dev, VIRTIO_F_VERSION_1) ||
+        (virtio_legacy_is_cross_endian(dev) && !virtio_is_big_endian(dev))) {
+        r = qemu_set_vnet_le(peer, set);
+        if (r) {
+            error_report("backend does not support LE vnet headers");
+        }
+    } else if (virtio_legacy_is_cross_endian(dev)) {
+        r = qemu_set_vnet_be(peer, set);
+        if (r) {
+            error_report("backend does not support BE vnet headers");
+        }
+    }
+
+    return r;
+}
+
 static int vhost_net_start_one(struct vhost_net *net,
                                VirtIODevice *dev)
 {
@@ -314,6 +336,11 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
         goto err;
     }
 
+    r = vhost_net_set_vnet_endian(dev, ncs[0].peer, true);
+    if (r < 0) {
+        goto err;
+    }
+
     for (i = 0; i < total_queues; i++) {
         vhost_net_set_vq_index(get_vhost_net(ncs[i].peer), i * 2);
     }
@@ -321,7 +348,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
     r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true);
     if (r < 0) {
         error_report("Error binding guest notifier: %d", -r);
-        goto err;
+        goto err_endian;
     }
 
     for (i = 0; i < total_queues; i++) {
@@ -343,6 +370,8 @@ err_start:
         fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", e);
         fflush(stderr);
     }
+err_endian:
+    vhost_net_set_vnet_endian(dev, ncs[0].peer, false);
 err:
     return r;
 }
@@ -365,6 +394,8 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
         fflush(stderr);
     }
     assert(r >= 0);
+
+    assert(vhost_net_set_vnet_endian(dev, ncs[0].peer, false) >= 0);
 }
 
 void vhost_net_cleanup(struct vhost_net *net)

From 1717388645670336c48aa05d19b0acd07687a821 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@fr.ibm.com>
Date: Wed, 17 Jun 2015 15:23:54 +0200
Subject: [PATCH 08/11] vhost_net: re-enable when cross endian
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cross-endianness is now checked by the core vhost code.

revert 371df9f5e0f1 "vhost-net: disable when cross-endian"

Signed-off-by: Cédric Le Goater <clg@fr.ibm.com>
[ added commit message, Greg Kurz <gkurz@linux.vnet.ibm.com> ]
Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/net/vhost_net.c | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 8cbb2f618c..f505c91d42 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -303,19 +303,6 @@ static void vhost_net_stop_one(struct vhost_net *net,
     vhost_dev_disable_notifiers(&net->dev, dev);
 }
 
-static bool vhost_net_device_endian_ok(VirtIODevice *vdev)
-{
-#ifdef TARGET_IS_BIENDIAN
-#ifdef HOST_WORDS_BIGENDIAN
-    return virtio_is_big_endian(vdev);
-#else
-    return !virtio_is_big_endian(vdev);
-#endif
-#else
-    return true;
-#endif
-}
-
 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
                     int total_queues)
 {
@@ -324,12 +311,6 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
     int r, e, i;
 
-    if (!vhost_net_device_endian_ok(dev)) {
-        error_report("vhost-net does not support cross-endian");
-        r = -ENOSYS;
-        goto err;
-    }
-
     if (!k->set_guest_notifiers) {
         error_report("binding does not support guest notifiers");
         r = -ENOSYS;

From 5ba03e2dd785362026917e4cc8a1fd2c64e8e62c Mon Sep 17 00:00:00 2001
From: Laszlo Ersek <lersek@redhat.com>
Date: Wed, 17 Jun 2015 14:45:03 +0200
Subject: [PATCH 09/11] hw/core: rebase sysbus_get_fw_dev_path() to
 g_strdup_printf()

This is done mainly for improving readability, and in preparation for the
next patch, but Markus pointed out another bonus for the string being
returned:

"No arbitrary length limit. Before the patch, it's 39 characters, and the
code breaks catastrophically when qdev_fw_name() is longer: the second
snprintf() is called with its first argument pointing beyond path[], and
its second argument underflowing to a huge size."

Cc: qemu-stable@nongnu.org
Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Marcel Apfelbaum <marcel@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/core/sysbus.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/hw/core/sysbus.c b/hw/core/sysbus.c
index b53c351aa4..92eced9424 100644
--- a/hw/core/sysbus.c
+++ b/hw/core/sysbus.c
@@ -281,19 +281,15 @@ static void sysbus_dev_print(Monitor *mon, DeviceState *dev, int indent)
 static char *sysbus_get_fw_dev_path(DeviceState *dev)
 {
     SysBusDevice *s = SYS_BUS_DEVICE(dev);
-    char path[40];
-    int off;
-
-    off = snprintf(path, sizeof(path), "%s", qdev_fw_name(dev));
 
     if (s->num_mmio) {
-        snprintf(path + off, sizeof(path) - off, "@"TARGET_FMT_plx,
-                 s->mmio[0].addr);
-    } else if (s->num_pio) {
-        snprintf(path + off, sizeof(path) - off, "@i%04x", s->pio[0]);
+        return g_strdup_printf("%s@" TARGET_FMT_plx, qdev_fw_name(dev),
+                               s->mmio[0].addr);
     }
-
-    return g_strdup(path);
+    if (s->num_pio) {
+        return g_strdup_printf("%s@i%04x", qdev_fw_name(dev), s->pio[0]);
+    }
+    return g_strdup(qdev_fw_name(dev));
 }
 
 void sysbus_add_io(SysBusDevice *dev, hwaddr addr,

From 74de5504fd063019433ec0746105da774ede790d Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik.ladkani@ravellosystems.com>
Date: Tue, 16 Jun 2015 11:24:39 +0300
Subject: [PATCH 10/11] pci: Don't register a specialized 'config_write' if
 default behavior is intended

Few devices have their specialized 'config_write' methods which simply
call 'pci_default_write_config' followed by a 'msix_write_config' or
'msi_write_config' calls, using exact same arguments.

This is unnecessary as 'pci_default_write_config' already invokes
'msi_write_config' and 'msix_write_config'.

Also, since 'pci_default_write_config' is the default 'config_write'
handler, we can simply avoid the registration of these specialized
versions.

Cc: Leonid Shatz <leonid.shatz@ravellosystems.com>
Signed-off-by: Shmulik Ladkani <shmulik.ladkani@ravellosystems.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 hw/misc/ivshmem.c    | 1 -
 hw/net/vmxnet3.c     | 9 ---------
 hw/scsi/megasas.c    | 8 --------
 hw/scsi/vmw_pvscsi.c | 8 --------
 4 files changed, 26 deletions(-)

diff --git a/hw/misc/ivshmem.c b/hw/misc/ivshmem.c
index 5d272c84e9..231c35fdef 100644
--- a/hw/misc/ivshmem.c
+++ b/hw/misc/ivshmem.c
@@ -698,7 +698,6 @@ static void ivshmem_write_config(PCIDevice *pci_dev, uint32_t address,
 				 uint32_t val, int len)
 {
     pci_default_write_config(pci_dev, address, val, len);
-    msix_write_config(pci_dev, address, val, len);
 }
 
 static int pci_ivshmem_init(PCIDevice *dev)
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index dfb328debd..34ffafd5a6 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -2481,14 +2481,6 @@ static const VMStateDescription vmstate_vmxnet3 = {
     }
 };
 
-static void
-vmxnet3_write_config(PCIDevice *pci_dev, uint32_t addr, uint32_t val, int len)
-{
-    pci_default_write_config(pci_dev, addr, val, len);
-    msix_write_config(pci_dev, addr, val, len);
-    msi_write_config(pci_dev, addr, val, len);
-}
-
 static Property vmxnet3_properties[] = {
     DEFINE_NIC_PROPERTIES(VMXNET3State, conf),
     DEFINE_PROP_END_OF_LIST(),
@@ -2507,7 +2499,6 @@ static void vmxnet3_class_init(ObjectClass *class, void *data)
     c->class_id = PCI_CLASS_NETWORK_ETHERNET;
     c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
     c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
-    c->config_write = vmxnet3_write_config,
     dc->desc = "VMWare Paravirtualized Ethernet v3";
     dc->reset = vmxnet3_qdev_reset;
     dc->vmsd = &vmstate_vmxnet3;
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index 91a5d97c73..51ba9e0e6e 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -2407,13 +2407,6 @@ static void megasas_scsi_realize(PCIDevice *dev, Error **errp)
     }
 }
 
-static void
-megasas_write_config(PCIDevice *pci, uint32_t addr, uint32_t val, int len)
-{
-    pci_default_write_config(pci, addr, val, len);
-    msi_write_config(pci, addr, val, len);
-}
-
 static Property megasas_properties_gen1[] = {
     DEFINE_PROP_UINT32("max_sge", MegasasState, fw_sge,
                        MEGASAS_DEFAULT_SGE),
@@ -2516,7 +2509,6 @@ static void megasas_class_init(ObjectClass *oc, void *data)
     dc->vmsd = info->vmsd;
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
     dc->desc = info->desc;
-    pc->config_write = megasas_write_config;
 }
 
 static const TypeInfo megasas_info = {
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index c6148d380e..9c71f31fe2 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -1174,13 +1174,6 @@ static const VMStateDescription vmstate_pvscsi = {
     }
 };
 
-static void
-pvscsi_write_config(PCIDevice *pci, uint32_t addr, uint32_t val, int len)
-{
-    pci_default_write_config(pci, addr, val, len);
-    msi_write_config(pci, addr, val, len);
-}
-
 static Property pvscsi_properties[] = {
     DEFINE_PROP_UINT8("use_msg", PVSCSIState, use_msg, 1),
     DEFINE_PROP_END_OF_LIST(),
@@ -1202,7 +1195,6 @@ static void pvscsi_class_init(ObjectClass *klass, void *data)
     dc->vmsd = &vmstate_pvscsi;
     dc->props = pvscsi_properties;
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-    k->config_write = pvscsi_write_config;
     hc->unplug = pvscsi_hot_unplug;
     hc->plug = pvscsi_hotplug;
 }

From 1e7398a140f7a6bd9f5a438e7ad0f1ef50990e25 Mon Sep 17 00:00:00 2001
From: Pankaj Gupta <pagupta@redhat.com>
Date: Tue, 16 Jun 2015 13:48:59 +0530
Subject: [PATCH 11/11] vhost: enable vhost without without MSI-X

We use vhostforce to enable vhost even if Guests don't have MSI-X
support and we fall back to QEMU virtio-net.

This gives a very small performance gain, but the disadvantage
is that guest now controls which virtio code is running
(qemu or vhost) so our attack surface is doubled.

This patch will enable vhost unconditionally whenever it's requested.
For compatibility, enable vhost when vhostforce is set, as well.

Signed-off-by: Pankaj Gupta <pagupta@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/vhost_net.c        | 12 +-----------
 hw/net/virtio-net.c       |  4 ----
 hw/scsi/vhost-scsi.c      |  2 +-
 hw/virtio/vhost.c         | 14 +-------------
 include/hw/virtio/vhost.h |  3 +--
 include/net/vhost_net.h   |  2 --
 net/tap.c                 |  1 -
 net/vhost-user.c          |  1 -
 8 files changed, 4 insertions(+), 35 deletions(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index f505c91d42..9bd360bd17 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -163,7 +163,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
     net->dev.vq_index = net->nc->queue_index;
 
     r = vhost_dev_init(&net->dev, options->opaque,
-                       options->backend_type, options->force);
+                       options->backend_type);
     if (r < 0) {
         goto fail;
     }
@@ -188,11 +188,6 @@ fail:
     return NULL;
 }
 
-bool vhost_net_query(VHostNetState *net, VirtIODevice *dev)
-{
-    return vhost_dev_query(&net->dev, dev);
-}
-
 static void vhost_net_set_vq_index(struct vhost_net *net, int vq_index)
 {
     net->dev.vq_index = vq_index;
@@ -424,11 +419,6 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options)
     return NULL;
 }
 
-bool vhost_net_query(VHostNetState *net, VirtIODevice *dev)
-{
-    return false;
-}
-
 int vhost_net_start(VirtIODevice *dev,
                     NetClientState *ncs,
                     int total_queues)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 9281aa107c..d7282335de 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -128,10 +128,6 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
     if (!n->vhost_started) {
         int r, i;
 
-        if (!vhost_net_query(get_vhost_net(nc->peer), vdev)) {
-            return;
-        }
-
         /* Any packets outstanding? Purge them to avoid touching rings
          * when vhost is running.
          */
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index 1941aa15c0..1c389c4d7e 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -246,7 +246,7 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
     s->dev.backend_features = 0;
 
     ret = vhost_dev_init(&s->dev, (void *)(uintptr_t)vhostfd,
-                         VHOST_BACKEND_TYPE_KERNEL, true);
+                         VHOST_BACKEND_TYPE_KERNEL);
     if (ret < 0) {
         error_setg(errp, "vhost-scsi: vhost initialization failed: %s",
                    strerror(-ret));
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 7ea45b391c..b84d21cf7e 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -898,7 +898,7 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
 }
 
 int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
-                   VhostBackendType backend_type, bool force)
+                   VhostBackendType backend_type)
 {
     uint64_t features;
     int i, r;
@@ -961,7 +961,6 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
     hdev->started = false;
     hdev->memory_changed = false;
     memory_listener_register(&hdev->memory_listener, &address_space_memory);
-    hdev->force = force;
     return 0;
 fail_vq:
     while (--i >= 0) {
@@ -989,17 +988,6 @@ void vhost_dev_cleanup(struct vhost_dev *hdev)
     hdev->vhost_ops->vhost_backend_cleanup(hdev);
 }
 
-bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev)
-{
-    BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev)));
-    VirtioBusState *vbus = VIRTIO_BUS(qbus);
-    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(vbus);
-
-    return !k->query_guest_notifiers ||
-           k->query_guest_notifiers(qbus->parent) ||
-           hdev->force;
-}
-
 /* Stop processing guest IO notifications in qemu.
  * Start processing them in vhost in kernel.
  */
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 84f170e789..dd510509eb 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -51,7 +51,6 @@ struct vhost_dev {
     bool log_enabled;
     unsigned long long log_size;
     Error *migration_blocker;
-    bool force;
     bool memory_changed;
     hwaddr mem_changed_start_addr;
     hwaddr mem_changed_end_addr;
@@ -61,7 +60,7 @@ struct vhost_dev {
 };
 
 int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
-                   VhostBackendType backend_type, bool force);
+                   VhostBackendType backend_type);
 void vhost_dev_cleanup(struct vhost_dev *hdev);
 bool vhost_dev_query(struct vhost_dev *hdev, VirtIODevice *vdev);
 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
index 9eb493efc3..840d4b16e2 100644
--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
@@ -11,12 +11,10 @@ typedef struct VhostNetOptions {
     VhostBackendType backend_type;
     NetClientState *net_backend;
     void *opaque;
-    bool force;
 } VhostNetOptions;
 
 struct vhost_net *vhost_net_init(VhostNetOptions *options);
 
-bool vhost_net_query(VHostNetState *net, VirtIODevice *dev);
 int vhost_net_start(VirtIODevice *dev, NetClientState *ncs, int total_queues);
 void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, int total_queues);
 
diff --git a/net/tap.c b/net/tap.c
index ec12dfd1d8..3a3f522eac 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -671,7 +671,6 @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
 
         options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
         options.net_backend = &s->nc;
-        options.force = tap->has_vhostforce && tap->vhostforce;
 
         if (tap->has_vhostfd || tap->has_vhostfds) {
             vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err);
diff --git a/net/vhost-user.c b/net/vhost-user.c
index 8d2672846f..f1df26df95 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -50,7 +50,6 @@ static int vhost_user_start(VhostUserState *s)
     options.backend_type = VHOST_BACKEND_TYPE_USER;
     options.net_backend = &s->nc;
     options.opaque = s->chr;
-    options.force = true;
 
     s->vhost_net = vhost_net_init(&options);