From 987a9b4800003567b1a47a379255e886a77d57ea Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 9 Aug 2012 16:45:55 +0200
Subject: [PATCH 01/13] net: notify iothread after flushing queue

virtio-net has code to flush the queue and notify the iothread
whenever new receive buffers are added by the guest.  That is
fine, and indeed we need to do the same in all other drivers.
However, notifying the iothread should be work for the network
subsystem.  And since we are at it we can add a little smartness:
if some of the queued packets already could not be delivered,
there is no need to notify the iothread.

Reported-by: Luigi Rizzo <rizzo@iet.unipi.it>
Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Cc: Jan Kiszka <jan.kiszka@siemens.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Amos Kong <akong@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 hw/virtio-net.c | 4 ----
 net.c           | 7 ++++++-
 net/queue.c     | 5 +++--
 net/queue.h     | 2 +-
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index b1998b27d3..6490743290 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -447,10 +447,6 @@ static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
     VirtIONet *n = to_virtio_net(vdev);
 
     qemu_flush_queued_packets(&n->nic->nc);
-
-    /* We now have RX buffers, signal to the IO thread to break out of the
-     * select to re-poll the tap file descriptor */
-    qemu_notify_event();
 }
 
 static int virtio_net_can_receive(NetClientState *nc)
diff --git a/net.c b/net.c
index e5d25d4b6d..d9ba1e5b74 100644
--- a/net.c
+++ b/net.c
@@ -357,7 +357,12 @@ void qemu_flush_queued_packets(NetClientState *nc)
 {
     nc->receive_disabled = 0;
 
-    qemu_net_queue_flush(nc->send_queue);
+    if (qemu_net_queue_flush(nc->send_queue)) {
+        /* We emptied the queue successfully, signal to the IO thread to repoll
+         * the file descriptor (for tap, for example).
+         */
+        qemu_notify_event();
+    }
 }
 
 static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender,
diff --git a/net/queue.c b/net/queue.c
index e8030aafe4..6e64091463 100644
--- a/net/queue.c
+++ b/net/queue.c
@@ -228,7 +228,7 @@ void qemu_net_queue_purge(NetQueue *queue, NetClientState *from)
     }
 }
 
-void qemu_net_queue_flush(NetQueue *queue)
+bool qemu_net_queue_flush(NetQueue *queue)
 {
     while (!QTAILQ_EMPTY(&queue->packets)) {
         NetPacket *packet;
@@ -244,7 +244,7 @@ void qemu_net_queue_flush(NetQueue *queue)
                                      packet->size);
         if (ret == 0) {
             QTAILQ_INSERT_HEAD(&queue->packets, packet, entry);
-            break;
+            return false;
         }
 
         if (packet->sent_cb) {
@@ -253,4 +253,5 @@ void qemu_net_queue_flush(NetQueue *queue)
 
         g_free(packet);
     }
+    return true;
 }
diff --git a/net/queue.h b/net/queue.h
index 9d44a9b3b8..fc02b33915 100644
--- a/net/queue.h
+++ b/net/queue.h
@@ -53,6 +53,6 @@ ssize_t qemu_net_queue_send_iov(NetQueue *queue,
                                 NetPacketSent *sent_cb);
 
 void qemu_net_queue_purge(NetQueue *queue, NetClientState *from);
-void qemu_net_queue_flush(NetQueue *queue);
+bool qemu_net_queue_flush(NetQueue *queue);
 
 #endif /* QEMU_NET_QUEUE_H */

From e8b4c680b41bd960ecccd9ff076b7b058e0afcd4 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 9 Aug 2012 16:45:56 +0200
Subject: [PATCH 02/13] e1000: flush queue whenever can_receive can go from
 false to true

When the guests replenish the receive ring buffer, the network device
should flush its queue of pending packets.  This is done with
qemu_flush_queued_packets.

e1000's can_receive can go from false to true when RCTL or RDT are
modified.

Reported-by: Luigi Rizzo <rizzo@iet.unipi.it>
Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Cc: Jan Kiszka <jan.kiszka@siemens.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Amos Kong <akong@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 hw/e1000.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/hw/e1000.c b/hw/e1000.c
index ae8a6c5523..ec3a7c4ecc 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -295,6 +295,7 @@ set_rx_control(E1000State *s, int index, uint32_t val)
     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
            s->mac_reg[RCTL]);
+    qemu_flush_queued_packets(&s->nic->nc);
 }
 
 static void
@@ -926,6 +927,9 @@ set_rdt(E1000State *s, int index, uint32_t val)
 {
     s->check_rxov = 0;
     s->mac_reg[index] = val & 0xffff;
+    if (e1000_has_rxbufs(s, 1)) {
+        qemu_flush_queued_packets(&s->nic->nc);
+    }
 }
 
 static void

From a98b140223d3a627eab7ee3ddec645bab630d756 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 9 Aug 2012 16:45:57 +0200
Subject: [PATCH 03/13] xen: flush queue when getting an event

xen does not have a register that, when written, will cause can_receive
to go from false to true.  However, flushing the queue can be attempted
whenever the front-end raises its side of the Xen event channel.  There
is a single event channel for tx and rx.

Cc: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Amos Kong <akong@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 hw/xen_nic.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/xen_nic.c b/hw/xen_nic.c
index 8b79bfb73e..cf7d5591b3 100644
--- a/hw/xen_nic.c
+++ b/hw/xen_nic.c
@@ -415,6 +415,7 @@ static void net_event(struct XenDevice *xendev)
 {
     struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
     net_tx_packets(netdev);
+    qemu_flush_queued_packets(&netdev->nic->nc);
 }
 
 static int net_free(struct XenDevice *xendev)

From 1069985fb132cd4324fc02d371f1e61492a1823f Mon Sep 17 00:00:00 2001
From: Bo Yang <boyang@suse.com>
Date: Wed, 29 Aug 2012 19:26:11 +0800
Subject: [PATCH 04/13] eepro100: Fix network hang when rx buffers run out

This is reported by QA. When installing os with pxe, after the initial
kernel and initrd are loaded, the procedure tries to copy files from install
server to local harddisk, the network becomes stall because of running out of
receive descriptor.

[Whitespace fixes and removed qemu_notify_event() because Paolo's
earlier net patches have moved it into qemu_flush_queued_packets().

Additional info:

I can reproduce the network hang with a tap device doing a iPXE HTTP
boot as follows:

  $ qemu -enable-kvm -m 1024 \
    -netdev tap,id=netdev0,script=no,downscript=no \
    -device i82559er,netdev=netdev0,romfile=80861209.rom \
    -drive if=virtio,cache=none,file=test.img
  iPXE> ifopen net0
  iPXE> config # set static network configuration
  iPXE> kernel http://mirror.bytemark.co.uk/fedora/linux/releases/17/Fedora/x86_64/os/images/pxeboot/vmlinuz

I needed a vanilla iPXE ROM to get to the iPXE prompt.  I think the boot
prompt has been disabled in the ROMs that ship with QEMU to reduce boot
time.

During the vmlinuz HTTP download there is a network hang.  hw/eepro100.c
has reached the end of the rx descriptor list.  When the iPXE driver
replenishes the rx descriptor list we don't kick the QEMU net subsystem
and event loop, thereby leaving the tap netdev without its file
descriptor in select(2).

Stefan Hajnoczi <stefanha@gmail.com>]

Signed-off-by: Bo Yang <boyang@suse.com>
Signed-off-by: Stefan Hajnoczi <stefanha@gmail.com>
---
 hw/eepro100.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/eepro100.c b/hw/eepro100.c
index 50d117e35e..5b231163d8 100644
--- a/hw/eepro100.c
+++ b/hw/eepro100.c
@@ -1036,6 +1036,7 @@ static void eepro100_ru_command(EEPRO100State * s, uint8_t val)
         }
         set_ru_state(s, ru_ready);
         s->ru_offset = e100_read_reg4(s, SCBPointer);
+        qemu_flush_queued_packets(&s->nic->nc);
         TRACE(OTHER, logout("val=0x%02x (rx start)\n", val));
         break;
     case RX_RESUME:
@@ -1770,7 +1771,8 @@ static ssize_t nic_receive(NetClientState *nc, const uint8_t * buf, size_t size)
     if (rfd_command & COMMAND_EL) {
         /* EL bit is set, so this was the last frame. */
         logout("receive: Running out of frames\n");
-        set_ru_state(s, ru_suspended);
+        set_ru_state(s, ru_no_resources);
+        eepro100_rnr_interrupt(s);
     }
     if (rfd_command & COMMAND_S) {
         /* S bit is set. */

From c67f5dc10573687497f0f5c3aec19b15c35c63d7 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Date: Fri, 17 Aug 2012 21:16:42 +0100
Subject: [PATCH 05/13] net: add receive_disabled logic to iov delivery path

This patch adds the missing NetClient->receive_disabled logic in the
sendv delivery code path.  It seems that commit
893379efd0e1b84ceb0c42a713293f3dbd27b1bd ("net: disable receiving if
client returns zero") only added the logic to qemu_deliver_packet() and
not qemu_deliver_packet_iov().

The receive_disabled flag should be automatically set when .receive(),
.receive_raw(), or .receive_iov() return 0.  No further packets will be
delivered to the NetClient until the receive_disabled flag is cleared
again by calling qemu_flush_queued_packets().

Typically the NetClient will wait until its file descriptor becomes
writable and then invoke qemu_flush_queued_packets() to resume
transmission.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 net.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/net.c b/net.c
index d9ba1e5b74..a187a7b3db 100644
--- a/net.c
+++ b/net.c
@@ -423,16 +423,27 @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender,
                                 void *opaque)
 {
     NetClientState *nc = opaque;
+    int ret;
 
     if (nc->link_down) {
         return iov_size(iov, iovcnt);
     }
 
-    if (nc->info->receive_iov) {
-        return nc->info->receive_iov(nc, iov, iovcnt);
-    } else {
-        return nc_sendv_compat(nc, iov, iovcnt);
+    if (nc->receive_disabled) {
+        return 0;
     }
+
+    if (nc->info->receive_iov) {
+        ret = nc->info->receive_iov(nc, iov, iovcnt);
+    } else {
+        ret = nc_sendv_compat(nc, iov, iovcnt);
+    }
+
+    if (ret == 0) {
+        nc->receive_disabled = 1;
+    }
+
+    return ret;
 }
 
 ssize_t qemu_sendv_packet_async(NetClientState *sender,

From 06b5f36d052b540a59b52150582d65674199b2ce Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Date: Mon, 20 Aug 2012 13:35:23 +0100
Subject: [PATCH 06/13] net: do not report queued packets as sent

Net send functions have a return value where 0 means the packet has not
been sent and will be queued.  A non-zero value means the packet was
sent or an error caused the packet to be dropped.

This patch fixes two instances where packets are queued but we return
their size.  This causes callers to believe the packets were sent.  When
the caller uses the async send interface this creates a real problem
because the callback will be invoked for a packet that the caller
believed to be already sent.  This bug can cause double-frees in the
caller.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 net/queue.c | 35 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/net/queue.c b/net/queue.c
index 6e64091463..254f28013a 100644
--- a/net/queue.c
+++ b/net/queue.c
@@ -83,12 +83,12 @@ void qemu_del_net_queue(NetQueue *queue)
     g_free(queue);
 }
 
-static ssize_t qemu_net_queue_append(NetQueue *queue,
-                                     NetClientState *sender,
-                                     unsigned flags,
-                                     const uint8_t *buf,
-                                     size_t size,
-                                     NetPacketSent *sent_cb)
+static void qemu_net_queue_append(NetQueue *queue,
+                                  NetClientState *sender,
+                                  unsigned flags,
+                                  const uint8_t *buf,
+                                  size_t size,
+                                  NetPacketSent *sent_cb)
 {
     NetPacket *packet;
 
@@ -100,16 +100,14 @@ static ssize_t qemu_net_queue_append(NetQueue *queue,
     memcpy(packet->data, buf, size);
 
     QTAILQ_INSERT_TAIL(&queue->packets, packet, entry);
-
-    return size;
 }
 
-static ssize_t qemu_net_queue_append_iov(NetQueue *queue,
-                                         NetClientState *sender,
-                                         unsigned flags,
-                                         const struct iovec *iov,
-                                         int iovcnt,
-                                         NetPacketSent *sent_cb)
+static void qemu_net_queue_append_iov(NetQueue *queue,
+                                      NetClientState *sender,
+                                      unsigned flags,
+                                      const struct iovec *iov,
+                                      int iovcnt,
+                                      NetPacketSent *sent_cb)
 {
     NetPacket *packet;
     size_t max_len = 0;
@@ -133,8 +131,6 @@ static ssize_t qemu_net_queue_append_iov(NetQueue *queue,
     }
 
     QTAILQ_INSERT_TAIL(&queue->packets, packet, entry);
-
-    return packet->size;
 }
 
 static ssize_t qemu_net_queue_deliver(NetQueue *queue,
@@ -177,7 +173,8 @@ ssize_t qemu_net_queue_send(NetQueue *queue,
     ssize_t ret;
 
     if (queue->delivering || !qemu_can_send_packet(sender)) {
-        return qemu_net_queue_append(queue, sender, flags, data, size, sent_cb);
+        qemu_net_queue_append(queue, sender, flags, data, size, sent_cb);
+        return 0;
     }
 
     ret = qemu_net_queue_deliver(queue, sender, flags, data, size);
@@ -201,8 +198,8 @@ ssize_t qemu_net_queue_send_iov(NetQueue *queue,
     ssize_t ret;
 
     if (queue->delivering || !qemu_can_send_packet(sender)) {
-        return qemu_net_queue_append_iov(queue, sender, flags,
-                                         iov, iovcnt, sent_cb);
+        qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt, sent_cb);
+        return 0;
     }
 
     ret = qemu_net_queue_deliver_iov(queue, sender, flags, iov, iovcnt);

From 08d12022c7f1aba6acccc75150659c6e4c9dff23 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Date: Tue, 14 Aug 2012 14:14:27 +0100
Subject: [PATCH 07/13] net: add -netdev options to man page

Document the -netdev syntax which supercedes the older -net syntax.
This patch is a first step to making -netdev prominent in the QEMU
manual.

Reported-by: Anatoly Techtonik <techtonik@gmail.com>
Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 qemu-options.hx | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/qemu-options.hx b/qemu-options.hx
index 804a2d1739..0977b3f783 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1357,6 +1357,7 @@ Valid values for @var{type} are
 Not all devices are supported on all targets.  Use -net nic,model=?
 for a list of available devices for your target.
 
+@item -netdev user,id=@var{id}[,@var{option}][,@var{option}][,...]
 @item -net user[,@var{option}][,@var{option}][,...]
 Use the user mode network stack which requires no administrator
 privilege to run. Valid options are:
@@ -1365,6 +1366,7 @@ privilege to run. Valid options are:
 @item vlan=@var{n}
 Connect user mode stack to VLAN @var{n} (@var{n} = 0 is the default).
 
+@item id=@var{id}
 @item name=@var{name}
 Assign symbolic name for use in monitor commands.
 
@@ -1490,6 +1492,7 @@ processed and applied to -net user. Mixing them with the new configuration
 syntax gives undefined results. Their use for new applications is discouraged
 as they will be removed from future versions.
 
+@item -netdev tap,id=@var{id}[,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}]
 @item -net tap[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}]
 Connect the host TAP network interface @var{name} to VLAN @var{n}.
 
@@ -1529,6 +1532,7 @@ qemu-system-i386 linux.img \
                  -net nic -net tap,"helper=/usr/local/libexec/qemu-bridge-helper"
 @end example
 
+@item -netdev bridge,id=@var{id}[,br=@var{bridge}][,helper=@var{helper}]
 @item -net bridge[,vlan=@var{n}][,name=@var{name}][,br=@var{bridge}][,helper=@var{helper}]
 Connect a host TAP network interface to a host bridge device.
 
@@ -1551,6 +1555,7 @@ qemu-system-i386 linux.img -net bridge -net nic,model=virtio
 qemu-system-i386 linux.img -net bridge,br=qemubr0 -net nic,model=virtio
 @end example
 
+@item -netdev socket,id=@var{id}[,fd=@var{h}][,listen=[@var{host}]:@var{port}][,connect=@var{host}:@var{port}]
 @item -net socket[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}] [,listen=[@var{host}]:@var{port}][,connect=@var{host}:@var{port}]
 
 Connect the VLAN @var{n} to a remote VLAN in another QEMU virtual
@@ -1573,6 +1578,7 @@ qemu-system-i386 linux.img \
                  -net socket,connect=127.0.0.1:1234
 @end example
 
+@item -netdev socket,id=@var{id}[,fd=@var{h}][,mcast=@var{maddr}:@var{port}[,localaddr=@var{addr}]]
 @item -net socket[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,mcast=@var{maddr}:@var{port}[,localaddr=@var{addr}]]
 
 Create a VLAN @var{n} shared with another QEMU virtual
@@ -1624,6 +1630,7 @@ qemu-system-i386 linux.img \
                  -net socket,mcast=239.192.168.1:1102,localaddr=1.2.3.4
 @end example
 
+@item -netdev vde,id=@var{id}[,sock=@var{socketpath}][,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
 @item -net vde[,vlan=@var{n}][,name=@var{name}][,sock=@var{socketpath}] [,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
 Connect VLAN @var{n} to PORT @var{n} of a vde switch running on host and
 listening for incoming connections on @var{socketpath}. Use GROUP @var{groupname}

From f237ddbb89142c6948a2257c459e49dee7500a7c Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Date: Fri, 24 Aug 2012 13:32:16 +0100
Subject: [PATCH 08/13] net: clean up usbnet_receive()

The USB network interface has two code paths depending on whether or not
RNDIS mode is enabled.  Refactor usbnet_receive() so that there is a
common path throughout the function instead of duplicating everything
across if (is_rndis(s)) ... else ... code paths.

Clean up coding style and 80 character line wrap along the way.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 hw/usb/dev-network.c | 30 +++++++++++++++++-------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/hw/usb/dev-network.c b/hw/usb/dev-network.c
index c84892c98d..0b5cb71f98 100644
--- a/hw/usb/dev-network.c
+++ b/hw/usb/dev-network.c
@@ -1250,20 +1250,27 @@ static int usb_net_handle_data(USBDevice *dev, USBPacket *p)
 static ssize_t usbnet_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     USBNetState *s = DO_UPCAST(NICState, nc, nc)->opaque;
-    struct rndis_packet_msg_type *msg;
+    uint8_t *in_buf = s->in_buf;
+    size_t total_size = size;
 
     if (is_rndis(s)) {
-        msg = (struct rndis_packet_msg_type *) s->in_buf;
         if (s->rndis_state != RNDIS_DATA_INITIALIZED) {
             return -1;
         }
-        if (size + sizeof(struct rndis_packet_msg_type) > sizeof(s->in_buf))
-            return -1;
+        total_size += sizeof(struct rndis_packet_msg_type);
+    }
+    if (total_size > sizeof(s->in_buf)) {
+        return -1;
+    }
 
+    if (is_rndis(s)) {
+        struct rndis_packet_msg_type *msg;
+
+        msg = (struct rndis_packet_msg_type *)in_buf;
         memset(msg, 0, sizeof(struct rndis_packet_msg_type));
         msg->MessageType = cpu_to_le32(RNDIS_PACKET_MSG);
-        msg->MessageLength = cpu_to_le32(size + sizeof(struct rndis_packet_msg_type));
-        msg->DataOffset = cpu_to_le32(sizeof(struct rndis_packet_msg_type) - 8);
+        msg->MessageLength = cpu_to_le32(size + sizeof(*msg));
+        msg->DataOffset = cpu_to_le32(sizeof(*msg) - 8);
         msg->DataLength = cpu_to_le32(size);
         /* msg->OOBDataOffset;
          * msg->OOBDataLength;
@@ -1273,14 +1280,11 @@ static ssize_t usbnet_receive(NetClientState *nc, const uint8_t *buf, size_t siz
          * msg->VcHandle;
          * msg->Reserved;
          */
-        memcpy(msg + 1, buf, size);
-        s->in_len = size + sizeof(struct rndis_packet_msg_type);
-    } else {
-        if (size > sizeof(s->in_buf))
-            return -1;
-        memcpy(s->in_buf, buf, size);
-        s->in_len = size;
+        in_buf += sizeof(*msg);
     }
+
+    memcpy(in_buf, buf, size);
+    s->in_len = total_size;
     s->in_ptr = 0;
     return size;
 }

From 190563f9a90c9df8ad32fc7f3e4b166deda949a6 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Date: Fri, 24 Aug 2012 13:37:29 +0100
Subject: [PATCH 09/13] net: fix usbnet_receive() packet drops

The USB network interface has a single buffer which the guest reads
from.  This patch prevents multiple calls to usbnet_receive() from
clobbering the input buffer.  Instead we queue packets until buffer
space becomes available again.

This is inspired by virtio-net and e1000 rxbuf handling.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 hw/usb/dev-network.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/hw/usb/dev-network.c b/hw/usb/dev-network.c
index 0b5cb71f98..e4a43599b5 100644
--- a/hw/usb/dev-network.c
+++ b/hw/usb/dev-network.c
@@ -1001,6 +1001,13 @@ static int rndis_keepalive_response(USBNetState *s,
     return 0;
 }
 
+/* Prepare to receive the next packet */
+static void usb_net_reset_in_buf(USBNetState *s)
+{
+    s->in_ptr = s->in_len = 0;
+    qemu_flush_queued_packets(&s->nic->nc);
+}
+
 static int rndis_parse(USBNetState *s, uint8_t *data, int length)
 {
     uint32_t msg_type;
@@ -1025,7 +1032,8 @@ static int rndis_parse(USBNetState *s, uint8_t *data, int length)
 
     case RNDIS_RESET_MSG:
         rndis_clear_responsequeue(s);
-        s->out_ptr = s->in_ptr = s->in_len = 0;
+        s->out_ptr = 0;
+        usb_net_reset_in_buf(s);
         return rndis_reset_response(s, (rndis_reset_msg_type *) data);
 
     case RNDIS_KEEPALIVE_MSG:
@@ -1135,7 +1143,7 @@ static int usb_net_handle_datain(USBNetState *s, USBPacket *p)
     int ret = USB_RET_NAK;
 
     if (s->in_ptr > s->in_len) {
-        s->in_ptr = s->in_len = 0;
+        usb_net_reset_in_buf(s);
         ret = USB_RET_NAK;
         return ret;
     }
@@ -1152,7 +1160,7 @@ static int usb_net_handle_datain(USBNetState *s, USBPacket *p)
     if (s->in_ptr >= s->in_len &&
                     (is_rndis(s) || (s->in_len & (64 - 1)) || !ret)) {
         /* no short packet necessary */
-        s->in_ptr = s->in_len = 0;
+        usb_net_reset_in_buf(s);
     }
 
 #ifdef TRAFFIC_DEBUG
@@ -1263,6 +1271,11 @@ static ssize_t usbnet_receive(NetClientState *nc, const uint8_t *buf, size_t siz
         return -1;
     }
 
+    /* Only accept packet if input buffer is empty */
+    if (s->in_len > 0) {
+        return 0;
+    }
+
     if (is_rndis(s)) {
         struct rndis_packet_msg_type *msg;
 

From 61518a74ca98870e8ff132f91dd5dda252e31f58 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Date: Fri, 24 Aug 2012 13:50:30 +0100
Subject: [PATCH 10/13] net: broadcast hub packets if at least one port can
 receive

In commit 60c07d933c66c4b30a83b7ccbc8a0cb3df1b2d0e ("net: fix
qemu_can_send_packet logic") the "VLAN" broadcast behavior was changed
to queue packets if any net client cannot receive.  It turns out that
this was not actually the right fix and just hides the real bug that
hw/usb/dev-network.c:usbnet_receive() clobbers its receive buffer when
called multiple times in a row.  The commit also introduced a new bug
that "VLAN" packets would not be sent if one of multiple net clients was
down.

The hw/usb/dev-network.c bug has since been fixed, so this patch reverts
broadcast behavior to send packets as long as one net client can
receive.  Packets simply get queued for the net clients that are
temporarily unable to receive.

Reported-by: Roy.Li <rongqing.li@windriver.com>
Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 net/hub.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/hub.c b/net/hub.c
index ac157e32ee..650a8b4a40 100644
--- a/net/hub.c
+++ b/net/hub.c
@@ -97,12 +97,12 @@ static int net_hub_port_can_receive(NetClientState *nc)
             continue;
         }
 
-        if (!qemu_can_send_packet(&port->nc)) {
-            return 0;
+        if (qemu_can_send_packet(&port->nc)) {
+            return 1;
         }
     }
 
-    return 1;
+    return 0;
 }
 
 static ssize_t net_hub_port_receive(NetClientState *nc,

From 863f678fba4191f3b695620f41056cb7c124425d Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Date: Mon, 20 Aug 2012 10:21:54 +0100
Subject: [PATCH 11/13] net: asynchronous send/receive infrastructure for
 net/socket.c

The net/socket.c net client is not truly asynchronous.  This patch
borrows the qemu_set_fd_handler2() code from net/tap.c as the basis for
proper asynchronous send/receive.

Only read packets from the socket when the peer is able to receive.
This avoids needless queuing.

Later patches implement asynchronous send.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 net/socket.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 52 insertions(+), 6 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 7c602e4c3a..7bff5367ac 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -42,9 +42,51 @@ typedef struct NetSocketState {
     unsigned int packet_len;
     uint8_t buf[4096];
     struct sockaddr_in dgram_dst; /* contains inet host and port destination iff connectionless (SOCK_DGRAM) */
+    IOHandler *send_fn;           /* differs between SOCK_STREAM/SOCK_DGRAM */
+    bool read_poll;               /* waiting to receive data? */
+    bool write_poll;              /* waiting to transmit data? */
 } NetSocketState;
 
 static void net_socket_accept(void *opaque);
+static void net_socket_writable(void *opaque);
+
+/* Only read packets from socket when peer can receive them */
+static int net_socket_can_send(void *opaque)
+{
+    NetSocketState *s = opaque;
+
+    return qemu_can_send_packet(&s->nc);
+}
+
+static void net_socket_update_fd_handler(NetSocketState *s)
+{
+    qemu_set_fd_handler2(s->fd,
+                         s->read_poll  ? net_socket_can_send : NULL,
+                         s->read_poll  ? s->send_fn : NULL,
+                         s->write_poll ? net_socket_writable : NULL,
+                         s);
+}
+
+static void net_socket_read_poll(NetSocketState *s, bool enable)
+{
+    s->read_poll = enable;
+    net_socket_update_fd_handler(s);
+}
+
+static void net_socket_write_poll(NetSocketState *s, bool enable)
+{
+    s->write_poll = enable;
+    net_socket_update_fd_handler(s);
+}
+
+static void net_socket_writable(void *opaque)
+{
+    NetSocketState *s = opaque;
+
+    net_socket_write_poll(s, false);
+
+    qemu_flush_queued_packets(&s->nc);
+}
 
 /* XXX: we consider we can send the whole packet without blocking */
 static ssize_t net_socket_receive(NetClientState *nc, const uint8_t *buf, size_t size)
@@ -81,7 +123,8 @@ static void net_socket_send(void *opaque)
     } else if (size == 0) {
         /* end of connection */
     eoc:
-        qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+        net_socket_read_poll(s, false);
+        net_socket_write_poll(s, false);
         if (s->listen_fd != -1) {
             qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s);
         }
@@ -152,7 +195,8 @@ static void net_socket_send_dgram(void *opaque)
         return;
     if (size == 0) {
         /* end of connection */
-        qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+        net_socket_read_poll(s, false);
+        net_socket_write_poll(s, false);
         return;
     }
     qemu_send_packet(&s->nc, s->buf, size);
@@ -243,7 +287,8 @@ static void net_socket_cleanup(NetClientState *nc)
 {
     NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc);
     if (s->fd != -1) {
-        qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+        net_socket_read_poll(s, false);
+        net_socket_write_poll(s, false);
         close(s->fd);
         s->fd = -1;
     }
@@ -314,8 +359,8 @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer,
 
     s->fd = fd;
     s->listen_fd = -1;
-
-    qemu_set_fd_handler(s->fd, net_socket_send_dgram, NULL, s);
+    s->send_fn = net_socket_send_dgram;
+    net_socket_read_poll(s, true);
 
     /* mcast: save bound address as dst */
     if (is_connected) {
@@ -332,7 +377,8 @@ err:
 static void net_socket_connect(void *opaque)
 {
     NetSocketState *s = opaque;
-    qemu_set_fd_handler(s->fd, net_socket_send, NULL, s);
+    s->send_fn = net_socket_send;
+    net_socket_read_poll(s, true);
 }
 
 static NetClientInfo net_socket_info = {

From 213fd5087e2e4e2da10ad266df0ba950cf7618bf Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Date: Mon, 20 Aug 2012 10:28:53 +0100
Subject: [PATCH 12/13] net: EAGAIN handling for net/socket.c UDP

Implement asynchronous send for UDP (or other SOCK_DGRAM) sockets.  If
send fails with EAGAIN we wait for the socket to become writable again.

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 net/socket.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 7bff5367ac..aabf0a48c5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -102,9 +102,19 @@ static ssize_t net_socket_receive(NetClientState *nc, const uint8_t *buf, size_t
 static ssize_t net_socket_receive_dgram(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc);
+    ssize_t ret;
 
-    return sendto(s->fd, (const void *)buf, size, 0,
-                  (struct sockaddr *)&s->dgram_dst, sizeof(s->dgram_dst));
+    do {
+        ret = sendto(s->fd, buf, size, 0,
+                     (struct sockaddr *)&s->dgram_dst,
+                     sizeof(s->dgram_dst));
+    } while (ret == -1 && errno == EINTR);
+
+    if (ret == -1 && errno == EAGAIN) {
+        net_socket_write_poll(s, true);
+        return 0;
+    }
+    return ret;
 }
 
 static void net_socket_send(void *opaque)

From 45a7f54a8bb3928ffa58d522e0d61acaee8277bb Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Date: Mon, 20 Aug 2012 10:14:35 +0100
Subject: [PATCH 13/13] net: EAGAIN handling for net/socket.c TCP

Replace spinning send_all() with a proper non-blocking send.  When the
socket write buffer limit is reached, we should stop trying to send and
wait for the socket to become writable again.

Non-blocking TCP sockets can return in two different ways when the write
buffer limit is reached:

1. ret = -1 and errno = EAGAIN/EWOULDBLOCK.  No data has been written.

2. ret < total_size.  Short write, only part of the message was
   transmitted.

Handle both cases and keep track of how many bytes have been written in
s->send_index.  (This includes the 'length' header before the actual
payload buffer.)

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
---
 net/socket.c | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index aabf0a48c5..5e0c92e062 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -32,6 +32,7 @@
 #include "qemu-error.h"
 #include "qemu-option.h"
 #include "qemu_socket.h"
+#include "iov.h"
 
 typedef struct NetSocketState {
     NetClientState nc;
@@ -40,6 +41,7 @@ typedef struct NetSocketState {
     int state; /* 0 = getting length, 1 = getting data */
     unsigned int index;
     unsigned int packet_len;
+    unsigned int send_index;      /* number of bytes sent (only SOCK_STREAM) */
     uint8_t buf[4096];
     struct sockaddr_in dgram_dst; /* contains inet host and port destination iff connectionless (SOCK_DGRAM) */
     IOHandler *send_fn;           /* differs between SOCK_STREAM/SOCK_DGRAM */
@@ -88,15 +90,39 @@ static void net_socket_writable(void *opaque)
     qemu_flush_queued_packets(&s->nc);
 }
 
-/* XXX: we consider we can send the whole packet without blocking */
 static ssize_t net_socket_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc);
-    uint32_t len;
-    len = htonl(size);
+    uint32_t len = htonl(size);
+    struct iovec iov[] = {
+        {
+            .iov_base = &len,
+            .iov_len  = sizeof(len),
+        }, {
+            .iov_base = (void *)buf,
+            .iov_len  = size,
+        },
+    };
+    size_t remaining;
+    ssize_t ret;
 
-    send_all(s->fd, (const uint8_t *)&len, sizeof(len));
-    return send_all(s->fd, buf, size);
+    remaining = iov_size(iov, 2) - s->send_index;
+    ret = iov_send(s->fd, iov, 2, s->send_index, remaining);
+
+    if (ret == -1 && errno == EAGAIN) {
+        ret = 0; /* handled further down */
+    }
+    if (ret == -1) {
+        s->send_index = 0;
+        return -errno;
+    }
+    if (ret < (ssize_t)remaining) {
+        s->send_index += ret;
+        net_socket_write_poll(s, true);
+        return 0;
+    }
+    s->send_index = 0;
+    return size;
 }
 
 static ssize_t net_socket_receive_dgram(NetClientState *nc, const uint8_t *buf, size_t size)