From 590790297c0dd2c8e817c7b33daf66862b0ee8ef Mon Sep 17 00:00:00 2001 From: Yuri Benditovich Date: Fri, 8 May 2020 15:59:28 +0300 Subject: [PATCH 01/33] virtio-net: implement RSS configuration command Optionally report RSS feature. Handle RSS configuration command and keep RSS parameters in virtio-net device context. Signed-off-by: Yuri Benditovich Signed-off-by: Jason Wang --- hw/net/trace-events | 3 + hw/net/virtio-net.c | 169 +++++++++++++++++++++++++++++++-- include/hw/virtio/virtio-net.h | 13 +++ 3 files changed, 175 insertions(+), 10 deletions(-) diff --git a/hw/net/trace-events b/hw/net/trace-events index 26700dad99..e6875c4c0f 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -381,6 +381,9 @@ virtio_net_announce_notify(void) "" virtio_net_announce_timer(int round) "%d" virtio_net_handle_announce(int round) "%d" virtio_net_post_load_device(void) +virtio_net_rss_disable(void) +virtio_net_rss_error(const char *msg, uint32_t value) "%s, value 0x%08x" +virtio_net_rss_enable(uint32_t p1, uint16_t p2, uint8_t p3) "hashes 0x%x, table of %d, key of %d" # tulip.c tulip_reg_write(uint64_t addr, const char *name, int size, uint64_t val) "addr 0x%02"PRIx64" (%s) size %d value 0x%08"PRIx64 diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index b7f3d1b2eb..e803b0a26f 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -77,6 +77,16 @@ tso/gso/gro 'off'. */ #define VIRTIO_NET_RSC_DEFAULT_INTERVAL 300000 +#define VIRTIO_NET_RSS_SUPPORTED_HASHES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \ + VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \ + VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \ + VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \ + VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \ + VIRTIO_NET_RSS_HASH_TYPE_UDPv6 | \ + VIRTIO_NET_RSS_HASH_TYPE_IP_EX | \ + VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ + VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) + /* temporary until standard header include it */ #if !defined(VIRTIO_NET_HDR_F_RSC_INFO) @@ -108,6 +118,8 @@ static VirtIOFeature feature_sizes[] = { .end = endof(struct virtio_net_config, mtu)}, {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, .end = endof(struct virtio_net_config, duplex)}, + {.flags = 1ULL << VIRTIO_NET_F_RSS, + .end = endof(struct virtio_net_config, supported_hash_types)}, {} }; @@ -138,6 +150,11 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) memcpy(netcfg.mac, n->mac, ETH_ALEN); virtio_stl_p(vdev, &netcfg.speed, n->net_conf.speed); netcfg.duplex = n->net_conf.duplex; + netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; + virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, + VIRTIO_NET_RSS_MAX_TABLE_LEN); + virtio_stl_p(vdev, &netcfg.supported_hash_types, + VIRTIO_NET_RSS_SUPPORTED_HASHES); memcpy(config, &netcfg, n->config_size); } @@ -701,6 +718,7 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, return features; } + virtio_clear_feature(&features, VIRTIO_NET_F_RSS); features = vhost_net_get_features(get_vhost_net(nc->peer), features); vdev->backend_features = features; @@ -860,6 +878,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) } virtio_net_set_multiqueue(n, + virtio_has_feature(features, VIRTIO_NET_F_RSS) || virtio_has_feature(features, VIRTIO_NET_F_MQ)); virtio_net_set_mrg_rx_bufs(n, @@ -1136,25 +1155,152 @@ static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd, } } +static void virtio_net_disable_rss(VirtIONet *n) +{ + if (n->rss_data.enabled) { + trace_virtio_net_rss_disable(); + } + n->rss_data.enabled = false; +} + +static uint16_t virtio_net_handle_rss(VirtIONet *n, + struct iovec *iov, unsigned int iov_cnt) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct virtio_net_rss_config cfg; + size_t s, offset = 0, size_get; + uint16_t queues, i; + struct { + uint16_t us; + uint8_t b; + } QEMU_PACKED temp; + const char *err_msg = ""; + uint32_t err_value = 0; + + if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { + err_msg = "RSS is not negotiated"; + goto error; + } + size_get = offsetof(struct virtio_net_rss_config, indirection_table); + s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); + if (s != size_get) { + err_msg = "Short command buffer"; + err_value = (uint32_t)s; + goto error; + } + n->rss_data.hash_types = virtio_ldl_p(vdev, &cfg.hash_types); + n->rss_data.indirections_len = + virtio_lduw_p(vdev, &cfg.indirection_table_mask); + n->rss_data.indirections_len++; + if (!is_power_of_2(n->rss_data.indirections_len)) { + err_msg = "Invalid size of indirection table"; + err_value = n->rss_data.indirections_len; + goto error; + } + if (n->rss_data.indirections_len > VIRTIO_NET_RSS_MAX_TABLE_LEN) { + err_msg = "Too large indirection table"; + err_value = n->rss_data.indirections_len; + goto error; + } + n->rss_data.default_queue = + virtio_lduw_p(vdev, &cfg.unclassified_queue); + if (n->rss_data.default_queue >= n->max_queues) { + err_msg = "Invalid default queue"; + err_value = n->rss_data.default_queue; + goto error; + } + offset += size_get; + size_get = sizeof(uint16_t) * n->rss_data.indirections_len; + g_free(n->rss_data.indirections_table); + n->rss_data.indirections_table = g_malloc(size_get); + if (!n->rss_data.indirections_table) { + err_msg = "Can't allocate indirections table"; + err_value = n->rss_data.indirections_len; + goto error; + } + s = iov_to_buf(iov, iov_cnt, offset, + n->rss_data.indirections_table, size_get); + if (s != size_get) { + err_msg = "Short indirection table buffer"; + err_value = (uint32_t)s; + goto error; + } + for (i = 0; i < n->rss_data.indirections_len; ++i) { + uint16_t val = n->rss_data.indirections_table[i]; + n->rss_data.indirections_table[i] = virtio_lduw_p(vdev, &val); + } + offset += size_get; + size_get = sizeof(temp); + s = iov_to_buf(iov, iov_cnt, offset, &temp, size_get); + if (s != size_get) { + err_msg = "Can't get queues"; + err_value = (uint32_t)s; + goto error; + } + queues = virtio_lduw_p(vdev, &temp.us); + if (queues == 0 || queues > n->max_queues) { + err_msg = "Invalid number of queues"; + err_value = queues; + goto error; + } + if (temp.b > VIRTIO_NET_RSS_MAX_KEY_SIZE) { + err_msg = "Invalid key size"; + err_value = temp.b; + goto error; + } + if (!temp.b && n->rss_data.hash_types) { + err_msg = "No key provided"; + err_value = 0; + goto error; + } + if (!temp.b && !n->rss_data.hash_types) { + virtio_net_disable_rss(n); + return queues; + } + offset += size_get; + size_get = temp.b; + s = iov_to_buf(iov, iov_cnt, offset, n->rss_data.key, size_get); + if (s != size_get) { + err_msg = "Can get key buffer"; + err_value = (uint32_t)s; + goto error; + } + n->rss_data.enabled = true; + trace_virtio_net_rss_enable(n->rss_data.hash_types, + n->rss_data.indirections_len, + temp.b); + return queues; +error: + trace_virtio_net_rss_error(err_msg, err_value); + virtio_net_disable_rss(n); + return 0; +} + static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, struct iovec *iov, unsigned int iov_cnt) { VirtIODevice *vdev = VIRTIO_DEVICE(n); - struct virtio_net_ctrl_mq mq; - size_t s; uint16_t queues; - s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); - if (s != sizeof(mq)) { + virtio_net_disable_rss(n); + if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { + queues = virtio_net_handle_rss(n, iov, iov_cnt); + } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { + struct virtio_net_ctrl_mq mq; + size_t s; + if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ)) { + return VIRTIO_NET_ERR; + } + s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq)); + if (s != sizeof(mq)) { + return VIRTIO_NET_ERR; + } + queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs); + + } else { return VIRTIO_NET_ERR; } - if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { - return VIRTIO_NET_ERR; - } - - queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs); - if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || queues > n->max_queues || @@ -3111,6 +3257,7 @@ static void virtio_net_device_unrealize(DeviceState *dev) g_free(n->vqs); qemu_del_nic(n->nic); virtio_net_rsc_cleanup(n); + g_free(n->rss_data.indirections_table); virtio_cleanup(vdev); } @@ -3212,6 +3359,8 @@ static Property virtio_net_properties[] = { DEFINE_PROP_BIT64("ctrl_guest_offloads", VirtIONet, host_features, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true), DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), + DEFINE_PROP_BIT64("rss", VirtIONet, host_features, + VIRTIO_NET_F_RSS, false), DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, VIRTIO_NET_F_RSC_EXT, false), DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index 96c68d4a92..d3fad7c8f3 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -126,6 +126,18 @@ typedef struct VirtioNetRscChain { /* Maximum packet size we can receive from tap device: header + 64k */ #define VIRTIO_NET_MAX_BUFSIZE (sizeof(struct virtio_net_hdr) + (64 * KiB)) +#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 +#define VIRTIO_NET_RSS_MAX_TABLE_LEN 128 + +typedef struct VirtioNetRssData { + bool enabled; + uint32_t hash_types; + uint8_t key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; + uint16_t indirections_len; + uint16_t *indirections_table; + uint16_t default_queue; +} VirtioNetRssData; + typedef struct VirtIONetQueue { VirtQueue *rx_vq; VirtQueue *tx_vq; @@ -199,6 +211,7 @@ struct VirtIONet { bool failover; DeviceListener primary_listener; Notifier migration_state; + VirtioNetRssData rss_data; }; void virtio_net_set_netclient_name(VirtIONet *n, const char *name, From 4474e37a5b3a616803f4570b542e8eede91e50d2 Mon Sep 17 00:00:00 2001 From: Yuri Benditovich Date: Fri, 8 May 2020 15:59:29 +0300 Subject: [PATCH 02/33] virtio-net: implement RX RSS processing If VIRTIO_NET_F_RSS negotiated and RSS is enabled, process incoming packets, calculate packet's hash and place the packet into respective RX virtqueue. Signed-off-by: Yuri Benditovich Signed-off-by: Jason Wang --- hw/net/Makefile.objs | 1 + hw/net/virtio-net.c | 88 +++++++++++++++++++++++++++++++++- include/hw/virtio/virtio-net.h | 1 + 3 files changed, 88 insertions(+), 2 deletions(-) diff --git a/hw/net/Makefile.objs b/hw/net/Makefile.objs index f2b73983ee..7ccbf72ea7 100644 --- a/hw/net/Makefile.objs +++ b/hw/net/Makefile.objs @@ -41,6 +41,7 @@ obj-$(CONFIG_MILKYMIST) += milkymist-minimac2.o obj-$(CONFIG_PSERIES) += spapr_llan.o obj-$(CONFIG_XILINX_ETHLITE) += xilinx_ethlite.o +common-obj-$(CONFIG_VIRTIO_NET) += net_rx_pkt.o obj-$(CONFIG_VIRTIO_NET) += virtio-net.o common-obj-$(call land,$(CONFIG_VIRTIO_NET),$(CONFIG_VHOST_NET)) += vhost_net.o common-obj-$(call lnot,$(call land,$(CONFIG_VIRTIO_NET),$(CONFIG_VHOST_NET))) += vhost_net-stub.o diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index e803b0a26f..556f221669 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -42,6 +42,7 @@ #include "trace.h" #include "monitor/qdev.h" #include "hw/pci/pci.h" +#include "net_rx_pkt.h" #define VIRTIO_NET_VM_VERSION 11 @@ -1533,8 +1534,80 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) return 0; } +static uint8_t virtio_net_get_hash_type(bool isip4, + bool isip6, + bool isudp, + bool istcp, + uint32_t types) +{ + if (isip4) { + if (istcp && (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4)) { + return NetPktRssIpV4Tcp; + } + if (isudp && (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4)) { + return NetPktRssIpV4Udp; + } + if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { + return NetPktRssIpV4; + } + } else if (isip6) { + uint32_t mask = VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | + VIRTIO_NET_RSS_HASH_TYPE_TCPv6; + + if (istcp && (types & mask)) { + return (types & VIRTIO_NET_RSS_HASH_TYPE_TCP_EX) ? + NetPktRssIpV6TcpEx : NetPktRssIpV6Tcp; + } + mask = VIRTIO_NET_RSS_HASH_TYPE_UDP_EX | VIRTIO_NET_RSS_HASH_TYPE_UDPv6; + if (isudp && (types & mask)) { + return (types & VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) ? + NetPktRssIpV6UdpEx : NetPktRssIpV6Udp; + } + mask = VIRTIO_NET_RSS_HASH_TYPE_IP_EX | VIRTIO_NET_RSS_HASH_TYPE_IPv6; + if (types & mask) { + return (types & VIRTIO_NET_RSS_HASH_TYPE_IP_EX) ? + NetPktRssIpV6Ex : NetPktRssIpV6; + } + } + return 0xff; +} + +static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, + size_t size) +{ + VirtIONet *n = qemu_get_nic_opaque(nc); + unsigned int index = nc->queue_index, new_index; + struct NetRxPkt *pkt = n->rx_pkt; + uint8_t net_hash_type; + uint32_t hash; + bool isip4, isip6, isudp, istcp; + + net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len, + size - n->host_hdr_len); + net_rx_pkt_get_protocols(pkt, &isip4, &isip6, &isudp, &istcp); + if (isip4 && (net_rx_pkt_get_ip4_info(pkt)->fragment)) { + istcp = isudp = false; + } + if (isip6 && (net_rx_pkt_get_ip6_info(pkt)->fragment)) { + istcp = isudp = false; + } + net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp, + n->rss_data.hash_types); + if (net_hash_type > NetPktRssIpV6UdpEx) { + return n->rss_data.default_queue; + } + + hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); + new_index = hash & (n->rss_data.indirections_len - 1); + new_index = n->rss_data.indirections_table[new_index]; + if (index == new_index) { + return -1; + } + return new_index; +} + static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, - size_t size) + size_t size, bool no_rss) { VirtIONet *n = qemu_get_nic_opaque(nc); VirtIONetQueue *q = virtio_net_get_subqueue(nc); @@ -1548,6 +1621,14 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, return -1; } + if (!no_rss && n->rss_data.enabled) { + int index = virtio_net_process_rss(nc, buf, size); + if (index >= 0) { + NetClientState *nc2 = qemu_get_subqueue(n->nic, index); + return virtio_net_receive_rcu(nc2, buf, size, true); + } + } + /* hdr_len refers to the header we supply to the guest */ if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) { return 0; @@ -1642,7 +1723,7 @@ static ssize_t virtio_net_do_receive(NetClientState *nc, const uint8_t *buf, { RCU_READ_LOCK_GUARD(); - return virtio_net_receive_rcu(nc, buf, size); + return virtio_net_receive_rcu(nc, buf, size, false); } static void virtio_net_rsc_extract_unit4(VirtioNetRscChain *chain, @@ -3221,6 +3302,8 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) QTAILQ_INIT(&n->rsc_chains); n->qdev = dev; + + net_rx_pkt_init(&n->rx_pkt, false); } static void virtio_net_device_unrealize(DeviceState *dev) @@ -3258,6 +3341,7 @@ static void virtio_net_device_unrealize(DeviceState *dev) qemu_del_nic(n->nic); virtio_net_rsc_cleanup(n); g_free(n->rss_data.indirections_table); + net_rx_pkt_uninit(n->rx_pkt); virtio_cleanup(vdev); } diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index d3fad7c8f3..5081f3c52a 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -212,6 +212,7 @@ struct VirtIONet { DeviceListener primary_listener; Notifier migration_state; VirtioNetRssData rss_data; + struct NetRxPkt *rx_pkt; }; void virtio_net_set_netclient_name(VirtIONet *n, const char *name, From fbbdbddec018723d9f863f01cdec172dda9df12b Mon Sep 17 00:00:00 2001 From: Yuri Benditovich Date: Fri, 8 May 2020 15:59:30 +0300 Subject: [PATCH 03/33] tap: allow extended virtio header with hash info Signed-off-by: Yuri Benditovich Signed-off-by: Jason Wang --- net/tap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tap.c b/net/tap.c index 6207f61f84..ca48f2a285 100644 --- a/net/tap.c +++ b/net/tap.c @@ -254,7 +254,8 @@ static void tap_set_vnet_hdr_len(NetClientState *nc, int len) assert(nc->info->type == NET_CLIENT_DRIVER_TAP); assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) || - len == sizeof(struct virtio_net_hdr)); + len == sizeof(struct virtio_net_hdr) || + len == sizeof(struct virtio_net_hdr_v1_hash)); tap_fd_set_vnet_hdr_len(s->fd, len); s->host_vnet_hdr_len = len; From e22f0603fb2fc274920a9e3a1d1306260b9a4cc4 Mon Sep 17 00:00:00 2001 From: Yuri Benditovich Date: Fri, 8 May 2020 15:59:31 +0300 Subject: [PATCH 04/33] virtio-net: reference implementation of hash report Suggest VIRTIO_NET_F_HASH_REPORT if specified in device parameters. If the VIRTIO_NET_F_HASH_REPORT is set, the device extends configuration space. If the feature is negotiated, the packet layout is extended to accomodate the hash information. In this case deliver packet's hash value and report type in virtio header extension. Use for configuration the same procedure as already used for RSS. We add two fields in rss_data that controls what the device does with the calculated hash if rss_data.enabled is set. If field 'populate' is set the hash is set in the packet, if field 'redirect' is set the hash is used to decide the queue to place the packet to. Signed-off-by: Yuri Benditovich Signed-off-by: Jason Wang --- hw/net/virtio-net.c | 99 +++++++++++++++++++++++++++------- include/hw/virtio/virtio-net.h | 2 + 2 files changed, 81 insertions(+), 20 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 556f221669..6ff3cc35c5 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -119,7 +119,7 @@ static VirtIOFeature feature_sizes[] = { .end = endof(struct virtio_net_config, mtu)}, {.flags = 1ULL << VIRTIO_NET_F_SPEED_DUPLEX, .end = endof(struct virtio_net_config, duplex)}, - {.flags = 1ULL << VIRTIO_NET_F_RSS, + {.flags = (1ULL << VIRTIO_NET_F_RSS) | (1ULL << VIRTIO_NET_F_HASH_REPORT), .end = endof(struct virtio_net_config, supported_hash_types)}, {} }; @@ -153,7 +153,8 @@ static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config) netcfg.duplex = n->net_conf.duplex; netcfg.rss_max_key_size = VIRTIO_NET_RSS_MAX_KEY_SIZE; virtio_stw_p(vdev, &netcfg.rss_max_indirection_table_length, - VIRTIO_NET_RSS_MAX_TABLE_LEN); + virtio_host_has_feature(vdev, VIRTIO_NET_F_RSS) ? + VIRTIO_NET_RSS_MAX_TABLE_LEN : 1); virtio_stl_p(vdev, &netcfg.supported_hash_types, VIRTIO_NET_RSS_SUPPORTED_HASHES); memcpy(config, &netcfg, n->config_size); @@ -579,7 +580,7 @@ static int peer_has_ufo(VirtIONet *n) } static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, - int version_1) + int version_1, int hash_report) { int i; NetClientState *nc; @@ -587,7 +588,10 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, n->mergeable_rx_bufs = mergeable_rx_bufs; if (version_1) { - n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); + n->guest_hdr_len = hash_report ? + sizeof(struct virtio_net_hdr_v1_hash) : + sizeof(struct virtio_net_hdr_mrg_rxbuf); + n->rss_data.populate_hash = !!hash_report; } else { n->guest_hdr_len = n->mergeable_rx_bufs ? sizeof(struct virtio_net_hdr_mrg_rxbuf) : @@ -708,6 +712,8 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); + + virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); } if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { @@ -720,6 +726,7 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, } virtio_clear_feature(&features, VIRTIO_NET_F_RSS); + virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); features = vhost_net_get_features(get_vhost_net(nc->peer), features); vdev->backend_features = features; @@ -886,12 +893,15 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) virtio_has_feature(features, VIRTIO_NET_F_MRG_RXBUF), virtio_has_feature(features, - VIRTIO_F_VERSION_1)); + VIRTIO_F_VERSION_1), + virtio_has_feature(features, + VIRTIO_NET_F_HASH_REPORT)); n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); + n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); if (n->has_vnet_hdr) { n->curr_guest_offloads = @@ -1165,7 +1175,9 @@ static void virtio_net_disable_rss(VirtIONet *n) } static uint16_t virtio_net_handle_rss(VirtIONet *n, - struct iovec *iov, unsigned int iov_cnt) + struct iovec *iov, + unsigned int iov_cnt, + bool do_rss) { VirtIODevice *vdev = VIRTIO_DEVICE(n); struct virtio_net_rss_config cfg; @@ -1178,10 +1190,14 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, const char *err_msg = ""; uint32_t err_value = 0; - if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { + if (do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_RSS)) { err_msg = "RSS is not negotiated"; goto error; } + if (!do_rss && !virtio_vdev_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) { + err_msg = "Hash report is not negotiated"; + goto error; + } size_get = offsetof(struct virtio_net_rss_config, indirection_table); s = iov_to_buf(iov, iov_cnt, offset, &cfg, size_get); if (s != size_get) { @@ -1193,6 +1209,9 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, n->rss_data.indirections_len = virtio_lduw_p(vdev, &cfg.indirection_table_mask); n->rss_data.indirections_len++; + if (!do_rss) { + n->rss_data.indirections_len = 1; + } if (!is_power_of_2(n->rss_data.indirections_len)) { err_msg = "Invalid size of indirection table"; err_value = n->rss_data.indirections_len; @@ -1203,8 +1222,8 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, err_value = n->rss_data.indirections_len; goto error; } - n->rss_data.default_queue = - virtio_lduw_p(vdev, &cfg.unclassified_queue); + n->rss_data.default_queue = do_rss ? + virtio_lduw_p(vdev, &cfg.unclassified_queue) : 0; if (n->rss_data.default_queue >= n->max_queues) { err_msg = "Invalid default queue"; err_value = n->rss_data.default_queue; @@ -1238,7 +1257,7 @@ static uint16_t virtio_net_handle_rss(VirtIONet *n, err_value = (uint32_t)s; goto error; } - queues = virtio_lduw_p(vdev, &temp.us); + queues = do_rss ? virtio_lduw_p(vdev, &temp.us) : n->curr_queues; if (queues == 0 || queues > n->max_queues) { err_msg = "Invalid number of queues"; err_value = queues; @@ -1284,8 +1303,12 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, uint16_t queues; virtio_net_disable_rss(n); + if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { + queues = virtio_net_handle_rss(n, iov, iov_cnt, false); + return queues ? VIRTIO_NET_OK : VIRTIO_NET_ERR; + } if (cmd == VIRTIO_NET_CTRL_MQ_RSS_CONFIG) { - queues = virtio_net_handle_rss(n, iov, iov_cnt); + queues = virtio_net_handle_rss(n, iov, iov_cnt, true); } else if (cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { struct virtio_net_ctrl_mq mq; size_t s; @@ -1572,15 +1595,34 @@ static uint8_t virtio_net_get_hash_type(bool isip4, return 0xff; } +static void virtio_set_packet_hash(const uint8_t *buf, uint8_t report, + uint32_t hash) +{ + struct virtio_net_hdr_v1_hash *hdr = (void *)buf; + hdr->hash_value = hash; + hdr->hash_report = report; +} + static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, size_t size) { VirtIONet *n = qemu_get_nic_opaque(nc); - unsigned int index = nc->queue_index, new_index; + unsigned int index = nc->queue_index, new_index = index; struct NetRxPkt *pkt = n->rx_pkt; uint8_t net_hash_type; uint32_t hash; bool isip4, isip6, isudp, istcp; + static const uint8_t reports[NetPktRssIpV6UdpEx + 1] = { + VIRTIO_NET_HASH_REPORT_IPv4, + VIRTIO_NET_HASH_REPORT_TCPv4, + VIRTIO_NET_HASH_REPORT_TCPv6, + VIRTIO_NET_HASH_REPORT_IPv6, + VIRTIO_NET_HASH_REPORT_IPv6_EX, + VIRTIO_NET_HASH_REPORT_TCPv6_EX, + VIRTIO_NET_HASH_REPORT_UDPv4, + VIRTIO_NET_HASH_REPORT_UDPv6, + VIRTIO_NET_HASH_REPORT_UDPv6_EX + }; net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len, size - n->host_hdr_len); @@ -1594,16 +1636,24 @@ static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, net_hash_type = virtio_net_get_hash_type(isip4, isip6, isudp, istcp, n->rss_data.hash_types); if (net_hash_type > NetPktRssIpV6UdpEx) { - return n->rss_data.default_queue; + if (n->rss_data.populate_hash) { + virtio_set_packet_hash(buf, VIRTIO_NET_HASH_REPORT_NONE, 0); + } + return n->rss_data.redirect ? n->rss_data.default_queue : -1; } hash = net_rx_pkt_calc_rss_hash(pkt, net_hash_type, n->rss_data.key); - new_index = hash & (n->rss_data.indirections_len - 1); - new_index = n->rss_data.indirections_table[new_index]; - if (index == new_index) { - return -1; + + if (n->rss_data.populate_hash) { + virtio_set_packet_hash(buf, reports[net_hash_type], hash); } - return new_index; + + if (n->rss_data.redirect) { + new_index = hash & (n->rss_data.indirections_len - 1); + new_index = n->rss_data.indirections_table[new_index]; + } + + return (index == new_index) ? -1 : new_index; } static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, @@ -1679,6 +1729,11 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, } receive_header(n, sg, elem->in_num, buf, size); + if (n->rss_data.populate_hash) { + offset = sizeof(mhdr); + iov_from_buf(sg, elem->in_num, offset, + buf + offset, n->host_hdr_len - sizeof(mhdr)); + } offset = n->host_hdr_len; total += n->guest_hdr_len; guest_offset = n->guest_hdr_len; @@ -2671,7 +2726,9 @@ static int virtio_net_post_load_device(void *opaque, int version_id) trace_virtio_net_post_load_device(); virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, virtio_vdev_has_feature(vdev, - VIRTIO_F_VERSION_1)); + VIRTIO_F_VERSION_1), + virtio_vdev_has_feature(vdev, + VIRTIO_NET_F_HASH_REPORT)); /* MAC_TABLE_ENTRIES may be different from the saved image */ if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { @@ -3290,7 +3347,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) n->vqs[0].tx_waiting = 0; n->tx_burst = n->net_conf.txburst; - virtio_net_set_mrg_rx_bufs(n, 0, 0); + virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); n->promisc = 1; /* for compatibility */ n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); @@ -3445,6 +3502,8 @@ static Property virtio_net_properties[] = { DEFINE_PROP_BIT64("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false), DEFINE_PROP_BIT64("rss", VirtIONet, host_features, VIRTIO_NET_F_RSS, false), + DEFINE_PROP_BIT64("hash", VirtIONet, host_features, + VIRTIO_NET_F_HASH_REPORT, false), DEFINE_PROP_BIT64("guest_rsc_ext", VirtIONet, host_features, VIRTIO_NET_F_RSC_EXT, false), DEFINE_PROP_UINT32("rsc_interval", VirtIONet, rsc_timeout, diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h index 5081f3c52a..a45ef8278e 100644 --- a/include/hw/virtio/virtio-net.h +++ b/include/hw/virtio/virtio-net.h @@ -131,6 +131,8 @@ typedef struct VirtioNetRscChain { typedef struct VirtioNetRssData { bool enabled; + bool redirect; + bool populate_hash; uint32_t hash_types; uint8_t key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; uint16_t indirections_len; From ff4e6d545d924ad9bfe135a90c05b4f9960773d6 Mon Sep 17 00:00:00 2001 From: Yuri Benditovich Date: Fri, 8 May 2020 15:59:32 +0300 Subject: [PATCH 05/33] vmstate.h: provide VMSTATE_VARRAY_UINT16_ALLOC macro Similar to VMSTATE_VARRAY_UINT32_ALLOC, but the size is 16-bit field. Signed-off-by: Michael S. Tsirkin Signed-off-by: Yuri Benditovich Signed-off-by: Jason Wang --- include/migration/vmstate.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index eafa39f560..f68ed7db13 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -432,6 +432,16 @@ extern const VMStateInfo vmstate_info_qlist; .offset = vmstate_offset_pointer(_state, _field, _type), \ } +#define VMSTATE_VARRAY_UINT16_ALLOC(_field, _state, _field_num, _version, _info, _type) {\ + .name = (stringify(_field)), \ + .version_id = (_version), \ + .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),\ + .info = &(_info), \ + .size = sizeof(_type), \ + .flags = VMS_VARRAY_UINT16 | VMS_POINTER | VMS_ALLOC, \ + .offset = vmstate_offset_pointer(_state, _field, _type), \ +} + #define VMSTATE_VARRAY_UINT16_UNSAFE(_field, _state, _field_num, _version, _info, _type) {\ .name = (stringify(_field)), \ .version_id = (_version), \ From e41b711485e5b2dcf747ef27cf252a940e09247f Mon Sep 17 00:00:00 2001 From: Yuri Benditovich Date: Fri, 8 May 2020 15:59:33 +0300 Subject: [PATCH 06/33] virtio-net: add migration support for RSS and hash report Save and restore RSS/hash report configuration. Signed-off-by: Yuri Benditovich Signed-off-by: Jason Wang --- hw/net/virtio-net.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 6ff3cc35c5..2a5da2985a 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -2777,6 +2777,13 @@ static int virtio_net_post_load_device(void *opaque, int version_id) } } + if (n->rss_data.enabled) { + trace_virtio_net_rss_enable(n->rss_data.hash_types, + n->rss_data.indirections_len, + sizeof(n->rss_data.key)); + } else { + trace_virtio_net_rss_disable(); + } return 0; } @@ -2954,6 +2961,32 @@ static const VMStateDescription vmstate_virtio_net_has_vnet = { }, }; +static bool virtio_net_rss_needed(void *opaque) +{ + return VIRTIO_NET(opaque)->rss_data.enabled; +} + +static const VMStateDescription vmstate_virtio_net_rss = { + .name = "virtio-net-device/rss", + .version_id = 1, + .minimum_version_id = 1, + .needed = virtio_net_rss_needed, + .fields = (VMStateField[]) { + VMSTATE_BOOL(rss_data.enabled, VirtIONet), + VMSTATE_BOOL(rss_data.redirect, VirtIONet), + VMSTATE_BOOL(rss_data.populate_hash, VirtIONet), + VMSTATE_UINT32(rss_data.hash_types, VirtIONet), + VMSTATE_UINT16(rss_data.indirections_len, VirtIONet), + VMSTATE_UINT16(rss_data.default_queue, VirtIONet), + VMSTATE_UINT8_ARRAY(rss_data.key, VirtIONet, + VIRTIO_NET_RSS_MAX_KEY_SIZE), + VMSTATE_VARRAY_UINT16_ALLOC(rss_data.indirections_table, VirtIONet, + rss_data.indirections_len, 0, + vmstate_info_uint16, uint16_t), + VMSTATE_END_OF_LIST() + }, +}; + static const VMStateDescription vmstate_virtio_net_device = { .name = "virtio-net-device", .version_id = VIRTIO_NET_VM_VERSION, @@ -3004,6 +3037,10 @@ static const VMStateDescription vmstate_virtio_net_device = { has_ctrl_guest_offloads), VMSTATE_END_OF_LIST() }, + .subsections = (const VMStateDescription * []) { + &vmstate_virtio_net_rss, + NULL + } }; static NetClientInfo net_virtio_info = { From dd3d85e89123c907be7628957457af3d03e3b85b Mon Sep 17 00:00:00 2001 From: Yuri Benditovich Date: Fri, 8 May 2020 15:59:34 +0300 Subject: [PATCH 07/33] virtio-net: align RSC fields with updated virtio-net header Removal of duplicated RSC definitions. Changing names of the fields to ones defined in the Linux header. Signed-off-by: Yuri Benditovich Signed-off-by: Jason Wang --- hw/net/virtio-net.c | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 2a5da2985a..aff67a92df 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -88,26 +88,6 @@ VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) -/* temporary until standard header include it */ -#if !defined(VIRTIO_NET_HDR_F_RSC_INFO) - -#define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc_ext data in csum_ fields */ -#define VIRTIO_NET_F_RSC_EXT 61 - -#endif - -static inline __virtio16 *virtio_net_rsc_ext_num_packets( - struct virtio_net_hdr *hdr) -{ - return &hdr->csum_start; -} - -static inline __virtio16 *virtio_net_rsc_ext_num_dupacks( - struct virtio_net_hdr *hdr) -{ - return &hdr->csum_offset; -} - static VirtIOFeature feature_sizes[] = { {.flags = 1ULL << VIRTIO_NET_F_MAC, .end = endof(struct virtio_net_config, mac)}, @@ -1821,15 +1801,15 @@ static size_t virtio_net_rsc_drain_seg(VirtioNetRscChain *chain, VirtioNetRscSeg *seg) { int ret; - struct virtio_net_hdr *h; + struct virtio_net_hdr_v1 *h; - h = (struct virtio_net_hdr *)seg->buf; + h = (struct virtio_net_hdr_v1 *)seg->buf; h->flags = 0; h->gso_type = VIRTIO_NET_HDR_GSO_NONE; if (seg->is_coalesced) { - *virtio_net_rsc_ext_num_packets(h) = seg->packets; - *virtio_net_rsc_ext_num_dupacks(h) = seg->dup_ack; + h->rsc.segments = seg->packets; + h->rsc.dup_acks = seg->dup_ack; h->flags = VIRTIO_NET_HDR_F_RSC_INFO; if (chain->proto == ETH_P_IP) { h->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; From d9b69640391618045949f7c500b87fc129f862ed Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 26 Apr 2020 12:55:39 +0200 Subject: [PATCH 08/33] Fix tulip breakage The tulip network driver in a qemu-system-hppa emulation is broken in the sense that bigger network packages aren't received any longer and thus even running e.g. "apt update" inside the VM fails. The breakage was introduced by commit 8ffb7265af ("check frame size and r/w data length") which added checks to prevent accesses outside of the rx/tx buffers. But the new checks were implemented wrong. The variable rx_frame_len counts backwards, from rx_frame_size down to zero, and the variable len is never bigger than rx_frame_len, so accesses just can't happen and the checks are unnecessary. On the contrary the checks now prevented bigger packages to be moved into the rx buffers. This patch reverts the wrong checks and were sucessfully tested with a qemu-system-hppa emulation. Fixes: 8ffb7265af ("check frame size and r/w data length") Buglink: https://bugs.launchpad.net/bugs/1874539 Signed-off-by: Helge Deller Signed-off-by: Jason Wang --- hw/net/tulip.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/hw/net/tulip.c b/hw/net/tulip.c index 6cefc0add2..57ecbe2161 100644 --- a/hw/net/tulip.c +++ b/hw/net/tulip.c @@ -171,9 +171,6 @@ static void tulip_copy_rx_bytes(TULIPState *s, struct tulip_descriptor *desc) len = s->rx_frame_len; } - if (s->rx_frame_len + len > sizeof(s->rx_frame)) { - return; - } pci_dma_write(&s->dev, desc->buf_addr1, s->rx_frame + (s->rx_frame_size - s->rx_frame_len), len); s->rx_frame_len -= len; @@ -186,9 +183,6 @@ static void tulip_copy_rx_bytes(TULIPState *s, struct tulip_descriptor *desc) len = s->rx_frame_len; } - if (s->rx_frame_len + len > sizeof(s->rx_frame)) { - return; - } pci_dma_write(&s->dev, desc->buf_addr2, s->rx_frame + (s->rx_frame_size - s->rx_frame_len), len); s->rx_frame_len -= len; From 171ce2e27906a59e243baf8c67ad1c1193cec4be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 24 Apr 2020 01:16:42 +0200 Subject: [PATCH 09/33] hw/net/tulip: Fix 'Descriptor Error' definition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bit #14 is "DE" for 'Descriptor Error': When set, indicates a frame truncation caused by a frame that does not fit within the current descriptor buffers, and that the 21143 does not own the next descriptor. [Table 4-1. RDES0 Bit Fields Description] Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: Jason Wang --- hw/net/tulip.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/net/tulip.h b/hw/net/tulip.h index 97521b21db..5271aad8d5 100644 --- a/hw/net/tulip.h +++ b/hw/net/tulip.h @@ -211,7 +211,7 @@ #define RDES0_RF BIT(11) #define RDES0_DT_SHIFT 12 #define RDES0_DT_MASK 3 -#define RDES0_LE BIT(14) +#define RDES0_DE BIT(14) #define RDES0_ES BIT(15) #define RDES0_FL_SHIFT 16 #define RDES0_FL_MASK 0x3fff From 97d7fb5a792b8b289ca6a981d9fe3369fa8b8c3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Fri, 24 Apr 2020 01:16:43 +0200 Subject: [PATCH 10/33] hw/net/tulip: Log descriptor overflows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Log with GUEST_ERROR what the guest is doing wrong. Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: Jason Wang --- hw/net/tulip.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hw/net/tulip.c b/hw/net/tulip.c index 57ecbe2161..4487fd61cf 100644 --- a/hw/net/tulip.c +++ b/hw/net/tulip.c @@ -578,6 +578,9 @@ static int tulip_copy_tx_buffers(TULIPState *s, struct tulip_descriptor *desc) int len2 = (desc->control >> TDES1_BUF2_SIZE_SHIFT) & TDES1_BUF2_SIZE_MASK; if (s->tx_frame_len + len1 > sizeof(s->tx_frame)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: descriptor overflow (ofs: %u, len:%d, size:%zu)\n", + __func__, s->tx_frame_len, len1, sizeof(s->tx_frame)); return -1; } if (len1) { @@ -587,6 +590,9 @@ static int tulip_copy_tx_buffers(TULIPState *s, struct tulip_descriptor *desc) } if (s->tx_frame_len + len2 > sizeof(s->tx_frame)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: descriptor overflow (ofs: %u, len:%d, size:%zu)\n", + __func__, s->tx_frame_len, len2, sizeof(s->tx_frame)); return -1; } if (len2) { From 6fe7661d3dc16d605e0ef283b5c77b5256125c2a Mon Sep 17 00:00:00 2001 From: Sai Pavan Boddu Date: Tue, 12 May 2020 20:24:43 +0530 Subject: [PATCH 11/33] net: cadence_gem: Fix debug statements Enabling debug breaks the build, Fix them and make debug statements always compilable. Fix few statements to use sized integer casting. Signed-off-by: Sai Pavan Boddu Reviewed-by: Edgar E. Iglesias Signed-off-by: Jason Wang --- hw/net/cadence_gem.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index e8f9cc7f1e..2e273dca18 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -35,14 +35,13 @@ #include "sysemu/dma.h" #include "net/checksum.h" -#ifdef CADENCE_GEM_ERR_DEBUG -#define DB_PRINT(...) do { \ - fprintf(stderr, ": %s: ", __func__); \ - fprintf(stderr, ## __VA_ARGS__); \ - } while (0) -#else - #define DB_PRINT(...) -#endif +#define CADENCE_GEM_ERR_DEBUG 0 +#define DB_PRINT(...) do {\ + if (CADENCE_GEM_ERR_DEBUG) { \ + qemu_log(": %s: ", __func__); \ + qemu_log(__VA_ARGS__); \ + } \ +} while (0) #define GEM_NWCTRL (0x00000000/4) /* Network Control reg */ #define GEM_NWCFG (0x00000004/4) /* Network Config reg */ @@ -979,7 +978,7 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size) size += 4; } - DB_PRINT("config bufsize: %d packet size: %ld\n", rxbufsize, size); + DB_PRINT("config bufsize: %u packet size: %zd\n", rxbufsize, size); /* Find which queue we are targeting */ q = get_queue_from_screen(s, rxbuf_ptr, rxbufsize); @@ -992,9 +991,9 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size) return -1; } - DB_PRINT("copy %u bytes to 0x%" PRIx64 "\n", - MIN(bytes_to_copy, rxbufsize), - rx_desc_get_buffer(s, s->rx_desc[q])); + DB_PRINT("copy %" PRIu32 " bytes to 0x%" PRIx64 "\n", + MIN(bytes_to_copy, rxbufsize), + rx_desc_get_buffer(s, s->rx_desc[q])); /* Copy packet data to emulated DMA buffer */ address_space_write(&s->dma_as, rx_desc_get_buffer(s, s->rx_desc[q]) + @@ -1160,8 +1159,8 @@ static void gem_transmit(CadenceGEMState *s) */ if ((tx_desc_get_buffer(s, desc) == 0) || (tx_desc_get_length(desc) == 0)) { - DB_PRINT("Invalid TX descriptor @ 0x%x\n", - (unsigned)packet_desc_addr); + DB_PRINT("Invalid TX descriptor @ 0x%" HWADDR_PRIx "\n", + packet_desc_addr); break; } From 96ea126a8deef30cc81519fb3cb50bf3d65eb6aa Mon Sep 17 00:00:00 2001 From: Sai Pavan Boddu Date: Tue, 12 May 2020 20:24:44 +0530 Subject: [PATCH 12/33] net: cadence_gem: Fix the queue address update during wrap around During wrap around and reset, queues are pointing to initial base address of queue 0, irrespective of what queue we are dealing with. Fix it by assigning proper base address every time. Signed-off-by: Sai Pavan Boddu Reviewed-by: Edgar E. Iglesias Signed-off-by: Jason Wang --- hw/net/cadence_gem.c | 37 +++++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index 2e273dca18..fd3e4a8cd6 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -845,6 +845,35 @@ static int get_queue_from_screen(CadenceGEMState *s, uint8_t *rxbuf_ptr, return 0; } +static uint32_t gem_get_queue_base_addr(CadenceGEMState *s, bool tx, int q) +{ + uint32_t base_addr = 0; + + switch (q) { + case 0: + base_addr = s->regs[tx ? GEM_TXQBASE : GEM_RXQBASE]; + break; + case 1 ... (MAX_PRIORITY_QUEUES - 1): + base_addr = s->regs[(tx ? GEM_TRANSMIT_Q1_PTR : + GEM_RECEIVE_Q1_PTR) + q - 1]; + break; + default: + g_assert_not_reached(); + }; + + return base_addr; +} + +static inline uint32_t gem_get_tx_queue_base_addr(CadenceGEMState *s, int q) +{ + return gem_get_queue_base_addr(s, true, q); +} + +static inline uint32_t gem_get_rx_queue_base_addr(CadenceGEMState *s, int q) +{ + return gem_get_queue_base_addr(s, false, q); +} + static hwaddr gem_get_desc_addr(CadenceGEMState *s, bool tx, int q) { hwaddr desc_addr = 0; @@ -1043,7 +1072,7 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size) /* Next descriptor */ if (rx_desc_get_wrap(s->rx_desc[q])) { DB_PRINT("wrapping RX descriptor list\n"); - s->rx_desc_addr[q] = s->regs[GEM_RXQBASE]; + s->rx_desc_addr[q] = gem_get_rx_queue_base_addr(s, q); } else { DB_PRINT("incrementing RX descriptor list\n"); s->rx_desc_addr[q] += 4 * gem_get_desc_len(s, true); @@ -1199,7 +1228,7 @@ static void gem_transmit(CadenceGEMState *s) sizeof(desc_first)); /* Advance the hardware current descriptor past this packet */ if (tx_desc_get_wrap(desc)) { - s->tx_desc_addr[q] = s->regs[GEM_TXQBASE]; + s->tx_desc_addr[q] = gem_get_tx_queue_base_addr(s, q); } else { s->tx_desc_addr[q] = packet_desc_addr + 4 * gem_get_desc_len(s, false); @@ -1251,7 +1280,7 @@ static void gem_transmit(CadenceGEMState *s) } else { packet_desc_addr = 0; } - packet_desc_addr |= s->regs[GEM_TXQBASE]; + packet_desc_addr |= gem_get_tx_queue_base_addr(s, q); } else { packet_desc_addr += 4 * gem_get_desc_len(s, false); } @@ -1457,7 +1486,7 @@ static void gem_write(void *opaque, hwaddr offset, uint64_t val, if (!(val & GEM_NWCTRL_TXENA)) { /* Reset to start of Q when transmit disabled. */ for (i = 0; i < s->num_priority_queues; i++) { - s->tx_desc_addr[i] = s->regs[GEM_TXQBASE]; + s->tx_desc_addr[i] = gem_get_tx_queue_base_addr(s, i); } } if (gem_can_receive(qemu_get_queue(s->nic))) { From 86a29d4c72e42130e08bae3335c25575d4af0b4d Mon Sep 17 00:00:00 2001 From: Sai Pavan Boddu Date: Tue, 12 May 2020 20:24:45 +0530 Subject: [PATCH 13/33] net: cadence_gem: Fix irq update w.r.t queue Set irq's specific to a queue, present implementation is setting q1 irq based on q0 status. Signed-off-by: Sai Pavan Boddu Reviewed-by: Edgar E. Iglesias Signed-off-by: Jason Wang --- hw/net/cadence_gem.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index fd3e4a8cd6..4ad6c8e3c9 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -554,29 +554,10 @@ static void gem_update_int_status(CadenceGEMState *s) { int i; - if (!s->regs[GEM_ISR]) { - /* ISR isn't set, clear all the interrupts */ - for (i = 0; i < s->num_priority_queues; ++i) { - qemu_set_irq(s->irq[i], 0); - } - return; - } + qemu_set_irq(s->irq[0], !!s->regs[GEM_ISR]); - /* If we get here we know s->regs[GEM_ISR] is set, so we don't need to - * check it again. - */ - if (s->num_priority_queues == 1) { - /* No priority queues, just trigger the interrupt */ - DB_PRINT("asserting int.\n"); - qemu_set_irq(s->irq[0], 1); - return; - } - - for (i = 0; i < s->num_priority_queues; ++i) { - if (s->regs[GEM_INT_Q1_STATUS + i]) { - DB_PRINT("asserting int. (q=%d)\n", i); - qemu_set_irq(s->irq[i], 1); - } + for (i = 1; i < s->num_priority_queues; ++i) { + qemu_set_irq(s->irq[i], !!s->regs[GEM_INT_Q1_STATUS + i - 1]); } } From 4c70e32f05fc7903185a4e9d01987ee3de2052f6 Mon Sep 17 00:00:00 2001 From: Sai Pavan Boddu Date: Tue, 12 May 2020 20:24:46 +0530 Subject: [PATCH 14/33] net: cadence_gem: Define access permission for interrupt registers Q1 to Q7 ISR's are clear-on-read, IER/IDR registers are write-only, mask reg are read-only. Signed-off-by: Sai Pavan Boddu Reviewed-by: Edgar E. Iglesias Signed-off-by: Jason Wang --- hw/net/cadence_gem.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index 4ad6c8e3c9..72e7cf99d7 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -458,6 +458,7 @@ static const uint8_t broadcast_addr[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; */ static void gem_init_register_masks(CadenceGEMState *s) { + unsigned int i; /* Mask of register bits which are read only */ memset(&s->regs_ro[0], 0, sizeof(s->regs_ro)); s->regs_ro[GEM_NWCTRL] = 0xFFF80000; @@ -470,10 +471,19 @@ static void gem_init_register_masks(CadenceGEMState *s) s->regs_ro[GEM_ISR] = 0xFFFFFFFF; s->regs_ro[GEM_IMR] = 0xFFFFFFFF; s->regs_ro[GEM_MODID] = 0xFFFFFFFF; + for (i = 0; i < s->num_priority_queues; i++) { + s->regs_ro[GEM_INT_Q1_STATUS + i] = 0xFFFFFFFF; + s->regs_ro[GEM_INT_Q1_ENABLE + i] = 0xFFFFF319; + s->regs_ro[GEM_INT_Q1_DISABLE + i] = 0xFFFFF319; + s->regs_ro[GEM_INT_Q1_MASK + i] = 0xFFFFFFFF; + } /* Mask of register bits which are clear on read */ memset(&s->regs_rtc[0], 0, sizeof(s->regs_rtc)); s->regs_rtc[GEM_ISR] = 0xFFFFFFFF; + for (i = 0; i < s->num_priority_queues; i++) { + s->regs_rtc[GEM_INT_Q1_STATUS + i] = 0x00000CE6; + } /* Mask of register bits which are write 1 to clear */ memset(&s->regs_w1c[0], 0, sizeof(s->regs_w1c)); @@ -485,6 +495,10 @@ static void gem_init_register_masks(CadenceGEMState *s) s->regs_wo[GEM_NWCTRL] = 0x00073E60; s->regs_wo[GEM_IER] = 0x07FFFFFF; s->regs_wo[GEM_IDR] = 0x07FFFFFF; + for (i = 0; i < s->num_priority_queues; i++) { + s->regs_wo[GEM_INT_Q1_ENABLE + i] = 0x00000CE6; + s->regs_wo[GEM_INT_Q1_DISABLE + i] = 0x00000CE6; + } } /* From 68dbee3bf95173d73f103d1a82ad9b14e5cde354 Mon Sep 17 00:00:00 2001 From: Sai Pavan Boddu Date: Tue, 12 May 2020 20:24:47 +0530 Subject: [PATCH 15/33] net: cadence_gem: Set ISR according to queue in use Set ISR according to queue in use, added interrupt support for all queues. Signed-off-by: Sai Pavan Boddu Reviewed-by: Edgar E. Iglesias Signed-off-by: Jason Wang --- hw/net/cadence_gem.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index 72e7cf99d7..2e183b5976 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -451,6 +451,16 @@ static inline void rx_desc_set_sar(uint32_t *desc, int sar_idx) /* The broadcast MAC address: 0xFFFFFFFFFFFF */ static const uint8_t broadcast_addr[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; +static void gem_set_isr(CadenceGEMState *s, int q, uint32_t flag) +{ + if (q == 0) { + s->regs[GEM_ISR] |= flag & ~(s->regs[GEM_IMR]); + } else { + s->regs[GEM_INT_Q1_STATUS + q - 1] |= flag & + ~(s->regs[GEM_INT_Q1_MASK + q - 1]); + } +} + /* * gem_init_register_masks: * One time initialization. @@ -906,7 +916,7 @@ static void gem_get_rx_desc(CadenceGEMState *s, int q) if (rx_desc_get_ownership(s->rx_desc[q]) == 1) { DB_PRINT("descriptor 0x%" HWADDR_PRIx " owned by sw.\n", desc_addr); s->regs[GEM_RXSTATUS] |= GEM_RXSTATUS_NOBUF; - s->regs[GEM_ISR] |= GEM_INT_RXUSED & ~(s->regs[GEM_IMR]); + gem_set_isr(s, q, GEM_INT_RXUSED); /* Handle interrupt consequences */ gem_update_int_status(s); } @@ -1080,7 +1090,7 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size) gem_receive_updatestats(s, buf, size); s->regs[GEM_RXSTATUS] |= GEM_RXSTATUS_FRMRCVD; - s->regs[GEM_ISR] |= GEM_INT_RXCMPL & ~(s->regs[GEM_IMR]); + gem_set_isr(s, q, GEM_INT_RXCMPL); /* Handle interrupt consequences */ gem_update_int_status(s); @@ -1231,13 +1241,7 @@ static void gem_transmit(CadenceGEMState *s) DB_PRINT("TX descriptor next: 0x%08x\n", s->tx_desc_addr[q]); s->regs[GEM_TXSTATUS] |= GEM_TXSTATUS_TXCMPL; - s->regs[GEM_ISR] |= GEM_INT_TXCMPL & ~(s->regs[GEM_IMR]); - - /* Update queue interrupt status */ - if (s->num_priority_queues > 1) { - s->regs[GEM_INT_Q1_STATUS + q] |= - GEM_INT_TXCMPL & ~(s->regs[GEM_INT_Q1_MASK + q]); - } + gem_set_isr(s, q, GEM_INT_TXCMPL); /* Handle interrupt consequences */ gem_update_int_status(s); @@ -1287,7 +1291,10 @@ static void gem_transmit(CadenceGEMState *s) if (tx_desc_get_used(desc)) { s->regs[GEM_TXSTATUS] |= GEM_TXSTATUS_USED; - s->regs[GEM_ISR] |= GEM_INT_TXUSED & ~(s->regs[GEM_IMR]); + /* IRQ TXUSED is defined only for queue 0 */ + if (q == 0) { + gem_set_isr(s, 0, GEM_INT_TXUSED); + } gem_update_int_status(s); } } From 24d62fd5028ea66448f441de8ae483beaf4afe93 Mon Sep 17 00:00:00 2001 From: Sai Pavan Boddu Date: Tue, 12 May 2020 20:24:48 +0530 Subject: [PATCH 16/33] net: cadence_gem: Move tx/rx packet buffert to CadenceGEMState Moving this buffers to CadenceGEMState, as their size will be increased more when JUMBO frames support is added. Signed-off-by: Sai Pavan Boddu Reviewed-by: Edgar E. Iglesias Signed-off-by: Jason Wang --- hw/net/cadence_gem.c | 38 ++++++++++++++++-------------------- include/hw/net/cadence_gem.h | 4 ++++ 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index 2e183b5976..247a52f8ce 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -928,17 +928,14 @@ static void gem_get_rx_desc(CadenceGEMState *s, int q) */ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size) { - CadenceGEMState *s; + CadenceGEMState *s = qemu_get_nic_opaque(nc); unsigned rxbufsize, bytes_to_copy; unsigned rxbuf_offset; - uint8_t rxbuf[2048]; uint8_t *rxbuf_ptr; bool first_desc = true; int maf; int q = 0; - s = qemu_get_nic_opaque(nc); - /* Is this destination MAC address "for us" ? */ maf = gem_mac_address_filter(s, buf); if (maf == GEM_RX_REJECT) { @@ -994,19 +991,19 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size) } else { unsigned crc_val; - if (size > sizeof(rxbuf) - sizeof(crc_val)) { - size = sizeof(rxbuf) - sizeof(crc_val); + if (size > MAX_FRAME_SIZE - sizeof(crc_val)) { + size = MAX_FRAME_SIZE - sizeof(crc_val); } bytes_to_copy = size; /* The application wants the FCS field, which QEMU does not provide. * We must try and calculate one. */ - memcpy(rxbuf, buf, size); - memset(rxbuf + size, 0, sizeof(rxbuf) - size); - rxbuf_ptr = rxbuf; - crc_val = cpu_to_le32(crc32(0, rxbuf, MAX(size, 60))); - memcpy(rxbuf + size, &crc_val, sizeof(crc_val)); + memcpy(s->rx_packet, buf, size); + memset(s->rx_packet + size, 0, MAX_FRAME_SIZE - size); + rxbuf_ptr = s->rx_packet; + crc_val = cpu_to_le32(crc32(0, s->rx_packet, MAX(size, 60))); + memcpy(s->rx_packet + size, &crc_val, sizeof(crc_val)); bytes_to_copy += 4; size += 4; @@ -1152,7 +1149,6 @@ static void gem_transmit(CadenceGEMState *s) { uint32_t desc[DESC_MAX_NUM_WORDS]; hwaddr packet_desc_addr; - uint8_t tx_packet[2048]; uint8_t *p; unsigned total_bytes; int q = 0; @@ -1168,7 +1164,7 @@ static void gem_transmit(CadenceGEMState *s) * Packets scattered across multiple descriptors are gathered to this * one contiguous buffer first. */ - p = tx_packet; + p = s->tx_packet; total_bytes = 0; for (q = s->num_priority_queues - 1; q >= 0; q--) { @@ -1198,12 +1194,12 @@ static void gem_transmit(CadenceGEMState *s) break; } - if (tx_desc_get_length(desc) > sizeof(tx_packet) - - (p - tx_packet)) { + if (tx_desc_get_length(desc) > MAX_FRAME_SIZE - + (p - s->tx_packet)) { DB_PRINT("TX descriptor @ 0x%" HWADDR_PRIx \ " too large: size 0x%x space 0x%zx\n", packet_desc_addr, tx_desc_get_length(desc), - sizeof(tx_packet) - (p - tx_packet)); + MAX_FRAME_SIZE - (p - s->tx_packet)); break; } @@ -1248,24 +1244,24 @@ static void gem_transmit(CadenceGEMState *s) /* Is checksum offload enabled? */ if (s->regs[GEM_DMACFG] & GEM_DMACFG_TXCSUM_OFFL) { - net_checksum_calculate(tx_packet, total_bytes); + net_checksum_calculate(s->tx_packet, total_bytes); } /* Update MAC statistics */ - gem_transmit_updatestats(s, tx_packet, total_bytes); + gem_transmit_updatestats(s, s->tx_packet, total_bytes); /* Send the packet somewhere */ if (s->phy_loop || (s->regs[GEM_NWCTRL] & GEM_NWCTRL_LOCALLOOP)) { - gem_receive(qemu_get_queue(s->nic), tx_packet, + gem_receive(qemu_get_queue(s->nic), s->tx_packet, total_bytes); } else { - qemu_send_packet(qemu_get_queue(s->nic), tx_packet, + qemu_send_packet(qemu_get_queue(s->nic), s->tx_packet, total_bytes); } /* Prepare for next packet */ - p = tx_packet; + p = s->tx_packet; total_bytes = 0; } diff --git a/include/hw/net/cadence_gem.h b/include/hw/net/cadence_gem.h index 5c83036ade..eddac70f20 100644 --- a/include/hw/net/cadence_gem.h +++ b/include/hw/net/cadence_gem.h @@ -40,6 +40,8 @@ #define MAX_TYPE1_SCREENERS 16 #define MAX_TYPE2_SCREENERS 16 +#define MAX_FRAME_SIZE 2048 + typedef struct CadenceGEMState { /*< private >*/ SysBusDevice parent_obj; @@ -80,6 +82,8 @@ typedef struct CadenceGEMState { uint8_t can_rx_state; /* Debug only */ + uint8_t tx_packet[MAX_FRAME_SIZE]; + uint8_t rx_packet[MAX_FRAME_SIZE]; uint32_t rx_desc[MAX_PRIORITY_QUEUES][DESC_MAX_NUM_WORDS]; bool sar_active[4]; From 88dba7ed842d18da0a9354660adf1e6a5a87b37b Mon Sep 17 00:00:00 2001 From: Sai Pavan Boddu Date: Tue, 12 May 2020 20:24:49 +0530 Subject: [PATCH 17/33] net: cadence_gem: Fix up code style Fix the code style for register definitions. Signed-off-by: Sai Pavan Boddu Reviewed-by: Edgar E. Iglesias Signed-off-by: Jason Wang --- hw/net/cadence_gem.c | 202 ++++++++++++++++++++++--------------------- 1 file changed, 102 insertions(+), 100 deletions(-) diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index 247a52f8ce..22d0d167c1 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -43,110 +43,112 @@ } \ } while (0) -#define GEM_NWCTRL (0x00000000/4) /* Network Control reg */ -#define GEM_NWCFG (0x00000004/4) /* Network Config reg */ -#define GEM_NWSTATUS (0x00000008/4) /* Network Status reg */ -#define GEM_USERIO (0x0000000C/4) /* User IO reg */ -#define GEM_DMACFG (0x00000010/4) /* DMA Control reg */ -#define GEM_TXSTATUS (0x00000014/4) /* TX Status reg */ -#define GEM_RXQBASE (0x00000018/4) /* RX Q Base address reg */ -#define GEM_TXQBASE (0x0000001C/4) /* TX Q Base address reg */ -#define GEM_RXSTATUS (0x00000020/4) /* RX Status reg */ -#define GEM_ISR (0x00000024/4) /* Interrupt Status reg */ -#define GEM_IER (0x00000028/4) /* Interrupt Enable reg */ -#define GEM_IDR (0x0000002C/4) /* Interrupt Disable reg */ -#define GEM_IMR (0x00000030/4) /* Interrupt Mask reg */ -#define GEM_PHYMNTNC (0x00000034/4) /* Phy Maintenance reg */ -#define GEM_RXPAUSE (0x00000038/4) /* RX Pause Time reg */ -#define GEM_TXPAUSE (0x0000003C/4) /* TX Pause Time reg */ -#define GEM_TXPARTIALSF (0x00000040/4) /* TX Partial Store and Forward */ -#define GEM_RXPARTIALSF (0x00000044/4) /* RX Partial Store and Forward */ -#define GEM_HASHLO (0x00000080/4) /* Hash Low address reg */ -#define GEM_HASHHI (0x00000084/4) /* Hash High address reg */ -#define GEM_SPADDR1LO (0x00000088/4) /* Specific addr 1 low reg */ -#define GEM_SPADDR1HI (0x0000008C/4) /* Specific addr 1 high reg */ -#define GEM_SPADDR2LO (0x00000090/4) /* Specific addr 2 low reg */ -#define GEM_SPADDR2HI (0x00000094/4) /* Specific addr 2 high reg */ -#define GEM_SPADDR3LO (0x00000098/4) /* Specific addr 3 low reg */ -#define GEM_SPADDR3HI (0x0000009C/4) /* Specific addr 3 high reg */ -#define GEM_SPADDR4LO (0x000000A0/4) /* Specific addr 4 low reg */ -#define GEM_SPADDR4HI (0x000000A4/4) /* Specific addr 4 high reg */ -#define GEM_TIDMATCH1 (0x000000A8/4) /* Type ID1 Match reg */ -#define GEM_TIDMATCH2 (0x000000AC/4) /* Type ID2 Match reg */ -#define GEM_TIDMATCH3 (0x000000B0/4) /* Type ID3 Match reg */ -#define GEM_TIDMATCH4 (0x000000B4/4) /* Type ID4 Match reg */ -#define GEM_WOLAN (0x000000B8/4) /* Wake on LAN reg */ -#define GEM_IPGSTRETCH (0x000000BC/4) /* IPG Stretch reg */ -#define GEM_SVLAN (0x000000C0/4) /* Stacked VLAN reg */ -#define GEM_MODID (0x000000FC/4) /* Module ID reg */ -#define GEM_OCTTXLO (0x00000100/4) /* Octects transmitted Low reg */ -#define GEM_OCTTXHI (0x00000104/4) /* Octects transmitted High reg */ -#define GEM_TXCNT (0x00000108/4) /* Error-free Frames transmitted */ -#define GEM_TXBCNT (0x0000010C/4) /* Error-free Broadcast Frames */ -#define GEM_TXMCNT (0x00000110/4) /* Error-free Multicast Frame */ -#define GEM_TXPAUSECNT (0x00000114/4) /* Pause Frames Transmitted */ -#define GEM_TX64CNT (0x00000118/4) /* Error-free 64 TX */ -#define GEM_TX65CNT (0x0000011C/4) /* Error-free 65-127 TX */ -#define GEM_TX128CNT (0x00000120/4) /* Error-free 128-255 TX */ -#define GEM_TX256CNT (0x00000124/4) /* Error-free 256-511 */ -#define GEM_TX512CNT (0x00000128/4) /* Error-free 512-1023 TX */ -#define GEM_TX1024CNT (0x0000012C/4) /* Error-free 1024-1518 TX */ -#define GEM_TX1519CNT (0x00000130/4) /* Error-free larger than 1519 TX */ -#define GEM_TXURUNCNT (0x00000134/4) /* TX under run error counter */ -#define GEM_SINGLECOLLCNT (0x00000138/4) /* Single Collision Frames */ -#define GEM_MULTCOLLCNT (0x0000013C/4) /* Multiple Collision Frames */ -#define GEM_EXCESSCOLLCNT (0x00000140/4) /* Excessive Collision Frames */ -#define GEM_LATECOLLCNT (0x00000144/4) /* Late Collision Frames */ -#define GEM_DEFERTXCNT (0x00000148/4) /* Deferred Transmission Frames */ -#define GEM_CSENSECNT (0x0000014C/4) /* Carrier Sense Error Counter */ -#define GEM_OCTRXLO (0x00000150/4) /* Octects Received register Low */ -#define GEM_OCTRXHI (0x00000154/4) /* Octects Received register High */ -#define GEM_RXCNT (0x00000158/4) /* Error-free Frames Received */ -#define GEM_RXBROADCNT (0x0000015C/4) /* Error-free Broadcast Frames RX */ -#define GEM_RXMULTICNT (0x00000160/4) /* Error-free Multicast Frames RX */ -#define GEM_RXPAUSECNT (0x00000164/4) /* Pause Frames Received Counter */ -#define GEM_RX64CNT (0x00000168/4) /* Error-free 64 byte Frames RX */ -#define GEM_RX65CNT (0x0000016C/4) /* Error-free 65-127B Frames RX */ -#define GEM_RX128CNT (0x00000170/4) /* Error-free 128-255B Frames RX */ -#define GEM_RX256CNT (0x00000174/4) /* Error-free 256-512B Frames RX */ -#define GEM_RX512CNT (0x00000178/4) /* Error-free 512-1023B Frames RX */ -#define GEM_RX1024CNT (0x0000017C/4) /* Error-free 1024-1518B Frames RX */ -#define GEM_RX1519CNT (0x00000180/4) /* Error-free 1519-max Frames RX */ -#define GEM_RXUNDERCNT (0x00000184/4) /* Undersize Frames Received */ -#define GEM_RXOVERCNT (0x00000188/4) /* Oversize Frames Received */ -#define GEM_RXJABCNT (0x0000018C/4) /* Jabbers Received Counter */ -#define GEM_RXFCSCNT (0x00000190/4) /* Frame Check seq. Error Counter */ -#define GEM_RXLENERRCNT (0x00000194/4) /* Length Field Error Counter */ -#define GEM_RXSYMERRCNT (0x00000198/4) /* Symbol Error Counter */ -#define GEM_RXALIGNERRCNT (0x0000019C/4) /* Alignment Error Counter */ -#define GEM_RXRSCERRCNT (0x000001A0/4) /* Receive Resource Error Counter */ -#define GEM_RXORUNCNT (0x000001A4/4) /* Receive Overrun Counter */ -#define GEM_RXIPCSERRCNT (0x000001A8/4) /* IP header Checksum Error Counter */ -#define GEM_RXTCPCCNT (0x000001AC/4) /* TCP Checksum Error Counter */ -#define GEM_RXUDPCCNT (0x000001B0/4) /* UDP Checksum Error Counter */ +#define GEM_NWCTRL (0x00000000 / 4) /* Network Control reg */ +#define GEM_NWCFG (0x00000004 / 4) /* Network Config reg */ +#define GEM_NWSTATUS (0x00000008 / 4) /* Network Status reg */ +#define GEM_USERIO (0x0000000C / 4) /* User IO reg */ +#define GEM_DMACFG (0x00000010 / 4) /* DMA Control reg */ +#define GEM_TXSTATUS (0x00000014 / 4) /* TX Status reg */ +#define GEM_RXQBASE (0x00000018 / 4) /* RX Q Base address reg */ +#define GEM_TXQBASE (0x0000001C / 4) /* TX Q Base address reg */ +#define GEM_RXSTATUS (0x00000020 / 4) /* RX Status reg */ +#define GEM_ISR (0x00000024 / 4) /* Interrupt Status reg */ +#define GEM_IER (0x00000028 / 4) /* Interrupt Enable reg */ +#define GEM_IDR (0x0000002C / 4) /* Interrupt Disable reg */ +#define GEM_IMR (0x00000030 / 4) /* Interrupt Mask reg */ +#define GEM_PHYMNTNC (0x00000034 / 4) /* Phy Maintenance reg */ +#define GEM_RXPAUSE (0x00000038 / 4) /* RX Pause Time reg */ +#define GEM_TXPAUSE (0x0000003C / 4) /* TX Pause Time reg */ +#define GEM_TXPARTIALSF (0x00000040 / 4) /* TX Partial Store and Forward */ +#define GEM_RXPARTIALSF (0x00000044 / 4) /* RX Partial Store and Forward */ +#define GEM_HASHLO (0x00000080 / 4) /* Hash Low address reg */ +#define GEM_HASHHI (0x00000084 / 4) /* Hash High address reg */ +#define GEM_SPADDR1LO (0x00000088 / 4) /* Specific addr 1 low reg */ +#define GEM_SPADDR1HI (0x0000008C / 4) /* Specific addr 1 high reg */ +#define GEM_SPADDR2LO (0x00000090 / 4) /* Specific addr 2 low reg */ +#define GEM_SPADDR2HI (0x00000094 / 4) /* Specific addr 2 high reg */ +#define GEM_SPADDR3LO (0x00000098 / 4) /* Specific addr 3 low reg */ +#define GEM_SPADDR3HI (0x0000009C / 4) /* Specific addr 3 high reg */ +#define GEM_SPADDR4LO (0x000000A0 / 4) /* Specific addr 4 low reg */ +#define GEM_SPADDR4HI (0x000000A4 / 4) /* Specific addr 4 high reg */ +#define GEM_TIDMATCH1 (0x000000A8 / 4) /* Type ID1 Match reg */ +#define GEM_TIDMATCH2 (0x000000AC / 4) /* Type ID2 Match reg */ +#define GEM_TIDMATCH3 (0x000000B0 / 4) /* Type ID3 Match reg */ +#define GEM_TIDMATCH4 (0x000000B4 / 4) /* Type ID4 Match reg */ +#define GEM_WOLAN (0x000000B8 / 4) /* Wake on LAN reg */ +#define GEM_IPGSTRETCH (0x000000BC / 4) /* IPG Stretch reg */ +#define GEM_SVLAN (0x000000C0 / 4) /* Stacked VLAN reg */ +#define GEM_MODID (0x000000FC / 4) /* Module ID reg */ +#define GEM_OCTTXLO (0x00000100 / 4) /* Octects transmitted Low reg */ +#define GEM_OCTTXHI (0x00000104 / 4) /* Octects transmitted High reg */ +#define GEM_TXCNT (0x00000108 / 4) /* Error-free Frames transmitted */ +#define GEM_TXBCNT (0x0000010C / 4) /* Error-free Broadcast Frames */ +#define GEM_TXMCNT (0x00000110 / 4) /* Error-free Multicast Frame */ +#define GEM_TXPAUSECNT (0x00000114 / 4) /* Pause Frames Transmitted */ +#define GEM_TX64CNT (0x00000118 / 4) /* Error-free 64 TX */ +#define GEM_TX65CNT (0x0000011C / 4) /* Error-free 65-127 TX */ +#define GEM_TX128CNT (0x00000120 / 4) /* Error-free 128-255 TX */ +#define GEM_TX256CNT (0x00000124 / 4) /* Error-free 256-511 */ +#define GEM_TX512CNT (0x00000128 / 4) /* Error-free 512-1023 TX */ +#define GEM_TX1024CNT (0x0000012C / 4) /* Error-free 1024-1518 TX */ +#define GEM_TX1519CNT (0x00000130 / 4) /* Error-free larger than 1519 TX */ +#define GEM_TXURUNCNT (0x00000134 / 4) /* TX under run error counter */ +#define GEM_SINGLECOLLCNT (0x00000138 / 4) /* Single Collision Frames */ +#define GEM_MULTCOLLCNT (0x0000013C / 4) /* Multiple Collision Frames */ +#define GEM_EXCESSCOLLCNT (0x00000140 / 4) /* Excessive Collision Frames */ +#define GEM_LATECOLLCNT (0x00000144 / 4) /* Late Collision Frames */ +#define GEM_DEFERTXCNT (0x00000148 / 4) /* Deferred Transmission Frames */ +#define GEM_CSENSECNT (0x0000014C / 4) /* Carrier Sense Error Counter */ +#define GEM_OCTRXLO (0x00000150 / 4) /* Octects Received register Low */ +#define GEM_OCTRXHI (0x00000154 / 4) /* Octects Received register High */ +#define GEM_RXCNT (0x00000158 / 4) /* Error-free Frames Received */ +#define GEM_RXBROADCNT (0x0000015C / 4) /* Error-free Broadcast Frames RX */ +#define GEM_RXMULTICNT (0x00000160 / 4) /* Error-free Multicast Frames RX */ +#define GEM_RXPAUSECNT (0x00000164 / 4) /* Pause Frames Received Counter */ +#define GEM_RX64CNT (0x00000168 / 4) /* Error-free 64 byte Frames RX */ +#define GEM_RX65CNT (0x0000016C / 4) /* Error-free 65-127B Frames RX */ +#define GEM_RX128CNT (0x00000170 / 4) /* Error-free 128-255B Frames RX */ +#define GEM_RX256CNT (0x00000174 / 4) /* Error-free 256-512B Frames RX */ +#define GEM_RX512CNT (0x00000178 / 4) /* Error-free 512-1023B Frames RX */ +#define GEM_RX1024CNT (0x0000017C / 4) /* Error-free 1024-1518B Frames RX */ +#define GEM_RX1519CNT (0x00000180 / 4) /* Error-free 1519-max Frames RX */ +#define GEM_RXUNDERCNT (0x00000184 / 4) /* Undersize Frames Received */ +#define GEM_RXOVERCNT (0x00000188 / 4) /* Oversize Frames Received */ +#define GEM_RXJABCNT (0x0000018C / 4) /* Jabbers Received Counter */ +#define GEM_RXFCSCNT (0x00000190 / 4) /* Frame Check seq. Error Counter */ +#define GEM_RXLENERRCNT (0x00000194 / 4) /* Length Field Error Counter */ +#define GEM_RXSYMERRCNT (0x00000198 / 4) /* Symbol Error Counter */ +#define GEM_RXALIGNERRCNT (0x0000019C / 4) /* Alignment Error Counter */ +#define GEM_RXRSCERRCNT (0x000001A0 / 4) /* Receive Resource Error Counter */ +#define GEM_RXORUNCNT (0x000001A4 / 4) /* Receive Overrun Counter */ +#define GEM_RXIPCSERRCNT (0x000001A8 / 4) /* IP header Checksum Err Counter */ +#define GEM_RXTCPCCNT (0x000001AC / 4) /* TCP Checksum Error Counter */ +#define GEM_RXUDPCCNT (0x000001B0 / 4) /* UDP Checksum Error Counter */ -#define GEM_1588S (0x000001D0/4) /* 1588 Timer Seconds */ -#define GEM_1588NS (0x000001D4/4) /* 1588 Timer Nanoseconds */ -#define GEM_1588ADJ (0x000001D8/4) /* 1588 Timer Adjust */ -#define GEM_1588INC (0x000001DC/4) /* 1588 Timer Increment */ -#define GEM_PTPETXS (0x000001E0/4) /* PTP Event Frame Transmitted (s) */ -#define GEM_PTPETXNS (0x000001E4/4) /* PTP Event Frame Transmitted (ns) */ -#define GEM_PTPERXS (0x000001E8/4) /* PTP Event Frame Received (s) */ -#define GEM_PTPERXNS (0x000001EC/4) /* PTP Event Frame Received (ns) */ -#define GEM_PTPPTXS (0x000001E0/4) /* PTP Peer Frame Transmitted (s) */ -#define GEM_PTPPTXNS (0x000001E4/4) /* PTP Peer Frame Transmitted (ns) */ -#define GEM_PTPPRXS (0x000001E8/4) /* PTP Peer Frame Received (s) */ -#define GEM_PTPPRXNS (0x000001EC/4) /* PTP Peer Frame Received (ns) */ +#define GEM_1588S (0x000001D0 / 4) /* 1588 Timer Seconds */ +#define GEM_1588NS (0x000001D4 / 4) /* 1588 Timer Nanoseconds */ +#define GEM_1588ADJ (0x000001D8 / 4) /* 1588 Timer Adjust */ +#define GEM_1588INC (0x000001DC / 4) /* 1588 Timer Increment */ +#define GEM_PTPETXS (0x000001E0 / 4) /* PTP Event Frame Transmitted (s) */ +#define GEM_PTPETXNS (0x000001E4 / 4) /* + * PTP Event Frame Transmitted (ns) + */ +#define GEM_PTPERXS (0x000001E8 / 4) /* PTP Event Frame Received (s) */ +#define GEM_PTPERXNS (0x000001EC / 4) /* PTP Event Frame Received (ns) */ +#define GEM_PTPPTXS (0x000001E0 / 4) /* PTP Peer Frame Transmitted (s) */ +#define GEM_PTPPTXNS (0x000001E4 / 4) /* PTP Peer Frame Transmitted (ns) */ +#define GEM_PTPPRXS (0x000001E8 / 4) /* PTP Peer Frame Received (s) */ +#define GEM_PTPPRXNS (0x000001EC / 4) /* PTP Peer Frame Received (ns) */ /* Design Configuration Registers */ -#define GEM_DESCONF (0x00000280/4) -#define GEM_DESCONF2 (0x00000284/4) -#define GEM_DESCONF3 (0x00000288/4) -#define GEM_DESCONF4 (0x0000028C/4) -#define GEM_DESCONF5 (0x00000290/4) -#define GEM_DESCONF6 (0x00000294/4) +#define GEM_DESCONF (0x00000280 / 4) +#define GEM_DESCONF2 (0x00000284 / 4) +#define GEM_DESCONF3 (0x00000288 / 4) +#define GEM_DESCONF4 (0x0000028C / 4) +#define GEM_DESCONF5 (0x00000290 / 4) +#define GEM_DESCONF6 (0x00000294 / 4) #define GEM_DESCONF6_64B_MASK (1U << 23) -#define GEM_DESCONF7 (0x00000298/4) +#define GEM_DESCONF7 (0x00000298 / 4) #define GEM_INT_Q1_STATUS (0x00000400 / 4) #define GEM_INT_Q1_MASK (0x00000640 / 4) From 7ca151c381c1da146dd274c428ed7825906cc29a Mon Sep 17 00:00:00 2001 From: Sai Pavan Boddu Date: Tue, 12 May 2020 20:24:50 +0530 Subject: [PATCH 18/33] net: cadence_gem: Add support for jumbo frames Add a property "jumbo-max-len", which sets default value of jumbo frames up to 16,383 bytes. Add Frame length checks for standard and jumbo frames. Signed-off-by: Sai Pavan Boddu Reviewed-by: Edgar E. Iglesias Signed-off-by: Jason Wang --- hw/net/cadence_gem.c | 51 ++++++++++++++++++++++++++++++++---- include/hw/net/cadence_gem.h | 4 ++- 2 files changed, 49 insertions(+), 6 deletions(-) diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index 22d0d167c1..8e927ada73 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -61,6 +61,7 @@ #define GEM_TXPAUSE (0x0000003C / 4) /* TX Pause Time reg */ #define GEM_TXPARTIALSF (0x00000040 / 4) /* TX Partial Store and Forward */ #define GEM_RXPARTIALSF (0x00000044 / 4) /* RX Partial Store and Forward */ +#define GEM_JUMBO_MAX_LEN (0x00000048 / 4) /* Max Jumbo Frame Size */ #define GEM_HASHLO (0x00000080 / 4) /* Hash Low address reg */ #define GEM_HASHHI (0x00000084 / 4) /* Hash High address reg */ #define GEM_SPADDR1LO (0x00000088 / 4) /* Specific addr 1 low reg */ @@ -212,10 +213,12 @@ #define GEM_NWCFG_LERR_DISC 0x00010000 /* Discard RX frames with len err */ #define GEM_NWCFG_BUFF_OFST_M 0x0000C000 /* Receive buffer offset mask */ #define GEM_NWCFG_BUFF_OFST_S 14 /* Receive buffer offset shift */ +#define GEM_NWCFG_RCV_1538 0x00000100 /* Receive 1538 bytes frame */ #define GEM_NWCFG_UCAST_HASH 0x00000080 /* accept unicast if hash match */ #define GEM_NWCFG_MCAST_HASH 0x00000040 /* accept multicast if hash match */ #define GEM_NWCFG_BCAST_REJ 0x00000020 /* Reject broadcast packets */ #define GEM_NWCFG_PROMISC 0x00000010 /* Accept all packets */ +#define GEM_NWCFG_JUMBO_FRAME 0x00000008 /* Jumbo Frames enable */ #define GEM_DMACFG_ADDR_64B (1U << 30) #define GEM_DMACFG_TX_BD_EXT (1U << 29) @@ -233,6 +236,7 @@ /* GEM_ISR GEM_IER GEM_IDR GEM_IMR */ #define GEM_INT_TXCMPL 0x00000080 /* Transmit Complete */ +#define GEM_INT_AMBA_ERR 0x00000040 #define GEM_INT_TXUSED 0x00000008 #define GEM_INT_RXUSED 0x00000004 #define GEM_INT_RXCMPL 0x00000002 @@ -453,6 +457,24 @@ static inline void rx_desc_set_sar(uint32_t *desc, int sar_idx) /* The broadcast MAC address: 0xFFFFFFFFFFFF */ static const uint8_t broadcast_addr[] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; +static uint32_t gem_get_max_buf_len(CadenceGEMState *s, bool tx) +{ + uint32_t size; + if (s->regs[GEM_NWCFG] & GEM_NWCFG_JUMBO_FRAME) { + size = s->regs[GEM_JUMBO_MAX_LEN]; + if (size > s->jumbo_max_len) { + size = s->jumbo_max_len; + qemu_log_mask(LOG_GUEST_ERROR, "GEM_JUMBO_MAX_LEN reg cannot be" + " greater than 0x%" PRIx32 "\n", s->jumbo_max_len); + } + } else if (tx) { + size = 1518; + } else { + size = s->regs[GEM_NWCFG] & GEM_NWCFG_RCV_1538 ? 1538 : 1518; + } + return size; +} + static void gem_set_isr(CadenceGEMState *s, int q, uint32_t flag) { if (q == 0) { @@ -1016,6 +1038,12 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size) /* Find which queue we are targeting */ q = get_queue_from_screen(s, rxbuf_ptr, rxbufsize); + if (size > gem_get_max_buf_len(s, false)) { + qemu_log_mask(LOG_GUEST_ERROR, "rx frame too long\n"); + gem_set_isr(s, q, GEM_INT_AMBA_ERR); + return -1; + } + while (bytes_to_copy) { hwaddr desc_addr; @@ -1196,12 +1224,13 @@ static void gem_transmit(CadenceGEMState *s) break; } - if (tx_desc_get_length(desc) > MAX_FRAME_SIZE - + if (tx_desc_get_length(desc) > gem_get_max_buf_len(s, true) - (p - s->tx_packet)) { - DB_PRINT("TX descriptor @ 0x%" HWADDR_PRIx \ - " too large: size 0x%x space 0x%zx\n", + qemu_log_mask(LOG_GUEST_ERROR, "TX descriptor @ 0x%" \ + HWADDR_PRIx " too large: size 0x%x space 0x%zx\n", packet_desc_addr, tx_desc_get_length(desc), - MAX_FRAME_SIZE - (p - s->tx_packet)); + gem_get_max_buf_len(s, true) - (p - s->tx_packet)); + gem_set_isr(s, q, GEM_INT_AMBA_ERR); break; } @@ -1343,9 +1372,10 @@ static void gem_reset(DeviceState *d) s->regs[GEM_RXPARTIALSF] = 0x000003ff; s->regs[GEM_MODID] = s->revision; s->regs[GEM_DESCONF] = 0x02500111; - s->regs[GEM_DESCONF2] = 0x2ab13fff; + s->regs[GEM_DESCONF2] = 0x2ab10000 | s->jumbo_max_len; s->regs[GEM_DESCONF5] = 0x002f2045; s->regs[GEM_DESCONF6] = GEM_DESCONF6_64B_MASK; + s->regs[GEM_JUMBO_MAX_LEN] = s->jumbo_max_len; if (s->num_priority_queues > 1) { queues_mask = MAKE_64BIT_MASK(1, s->num_priority_queues - 1); @@ -1516,6 +1546,9 @@ static void gem_write(void *opaque, hwaddr offset, uint64_t val, s->regs[GEM_IMR] &= ~val; gem_update_int_status(s); break; + case GEM_JUMBO_MAX_LEN: + s->regs[GEM_JUMBO_MAX_LEN] = val & MAX_JUMBO_FRAME_SIZE_MASK; + break; case GEM_INT_Q1_ENABLE ... GEM_INT_Q7_ENABLE: s->regs[GEM_INT_Q1_MASK + offset - GEM_INT_Q1_ENABLE] &= ~val; gem_update_int_status(s); @@ -1610,6 +1643,12 @@ static void gem_realize(DeviceState *dev, Error **errp) s->nic = qemu_new_nic(&net_gem_info, &s->conf, object_get_typename(OBJECT(dev)), dev->id, s); + + if (s->jumbo_max_len > MAX_FRAME_SIZE) { + error_setg(errp, "jumbo-max-len is greater than %d", + MAX_FRAME_SIZE); + return; + } } static void gem_init(Object *obj) @@ -1658,6 +1697,8 @@ static Property gem_properties[] = { num_type1_screeners, 4), DEFINE_PROP_UINT8("num-type2-screeners", CadenceGEMState, num_type2_screeners, 4), + DEFINE_PROP_UINT16("jumbo-max-len", CadenceGEMState, + jumbo_max_len, 10240), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/net/cadence_gem.h b/include/hw/net/cadence_gem.h index eddac70f20..54e646ff79 100644 --- a/include/hw/net/cadence_gem.h +++ b/include/hw/net/cadence_gem.h @@ -40,7 +40,8 @@ #define MAX_TYPE1_SCREENERS 16 #define MAX_TYPE2_SCREENERS 16 -#define MAX_FRAME_SIZE 2048 +#define MAX_JUMBO_FRAME_SIZE_MASK 0x3FFF +#define MAX_FRAME_SIZE MAX_JUMBO_FRAME_SIZE_MASK typedef struct CadenceGEMState { /*< private >*/ @@ -59,6 +60,7 @@ typedef struct CadenceGEMState { uint8_t num_type1_screeners; uint8_t num_type2_screeners; uint32_t revision; + uint16_t jumbo_max_len; /* GEM registers backing store */ uint32_t regs[CADENCE_GEM_MAXREG]; From d48cb519b35010a90f18df915d187e566bf10c3e Mon Sep 17 00:00:00 2001 From: Sai Pavan Boddu Date: Tue, 12 May 2020 20:24:51 +0530 Subject: [PATCH 19/33] net: cadnece_gem: Update irq_read_clear field of designcfg_debug1 reg Advertise support of clear-on-read for ISR registers. Signed-off-by: Sai Pavan Boddu Reviewed-by: Edgar E. Iglesias Signed-off-by: Jason Wang --- hw/net/cadence_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index 8e927ada73..2211550d2b 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -1371,7 +1371,7 @@ static void gem_reset(DeviceState *d) s->regs[GEM_TXPARTIALSF] = 0x000003ff; s->regs[GEM_RXPARTIALSF] = 0x000003ff; s->regs[GEM_MODID] = s->revision; - s->regs[GEM_DESCONF] = 0x02500111; + s->regs[GEM_DESCONF] = 0x02D00111; s->regs[GEM_DESCONF2] = 0x2ab10000 | s->jumbo_max_len; s->regs[GEM_DESCONF5] = 0x002f2045; s->regs[GEM_DESCONF6] = GEM_DESCONF6_64B_MASK; From 15baf5e23743871820504be6afb1bae24d1211c2 Mon Sep 17 00:00:00 2001 From: Sai Pavan Boddu Date: Tue, 12 May 2020 20:24:52 +0530 Subject: [PATCH 20/33] net: cadence_gem: Update the reset value for interrupt mask register Mask all interrupt on reset. Signed-off-by: Sai Pavan Boddu Reviewed-by: Edgar E. Iglesias Signed-off-by: Jason Wang --- hw/net/cadence_gem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index 2211550d2b..df6d8186ca 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -1375,6 +1375,7 @@ static void gem_reset(DeviceState *d) s->regs[GEM_DESCONF2] = 0x2ab10000 | s->jumbo_max_len; s->regs[GEM_DESCONF5] = 0x002f2045; s->regs[GEM_DESCONF6] = GEM_DESCONF6_64B_MASK; + s->regs[GEM_INT_Q1_MASK] = 0x00000CE6; s->regs[GEM_JUMBO_MAX_LEN] = s->jumbo_max_len; if (s->num_priority_queues > 1) { From fdd35195c5ed57162b080a20df588773768b589c Mon Sep 17 00:00:00 2001 From: Sai Pavan Boddu Date: Tue, 12 May 2020 20:24:53 +0530 Subject: [PATCH 21/33] net: cadence_gem: TX_LAST bit should be set by guest TX_LAST bit should not be set by hardware, its set by guest to inform the last bd of the frame. Signed-off-by: Sai Pavan Boddu Signed-off-by: Edgar E. Iglesias Reviewed-by: Edgar E. Iglesias Signed-off-by: Jason Wang --- hw/net/cadence_gem.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index df6d8186ca..78fb9acf96 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -350,11 +350,6 @@ static inline unsigned tx_desc_get_last(uint32_t *desc) return (desc[1] & DESC_1_TX_LAST) ? 1 : 0; } -static inline void tx_desc_set_last(uint32_t *desc) -{ - desc[1] |= DESC_1_TX_LAST; -} - static inline unsigned tx_desc_get_length(uint32_t *desc) { return desc[1] & DESC_1_LENGTH; @@ -1298,7 +1293,6 @@ static void gem_transmit(CadenceGEMState *s) /* read next descriptor */ if (tx_desc_get_wrap(desc)) { - tx_desc_set_last(desc); if (s->regs[GEM_DMACFG] & GEM_DMACFG_ADDR_64B) { packet_desc_addr = s->regs[GEM_TBQPH]; From fbc14a098d945dfa9c141423771db0122defa38c Mon Sep 17 00:00:00 2001 From: Tong Ho Date: Tue, 12 May 2020 20:24:54 +0530 Subject: [PATCH 22/33] net: cadence_gem: Fix RX address filtering Two defects are fixed: 1/ Detection of multicast frames 2/ Treating drop of mis-addressed frames as non-error Signed-off-by: Tong Ho Signed-off-by: Edgar E. Iglesias Signed-off-by: Sai Pavan Boddu Reviewed-by: Edgar E. Iglesias Signed-off-by: Jason Wang --- hw/net/cadence_gem.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index 78fb9acf96..a93b5c07ce 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -34,6 +34,7 @@ #include "qemu/module.h" #include "sysemu/dma.h" #include "net/checksum.h" +#include "net/eth.h" #define CADENCE_GEM_ERR_DEBUG 0 #define DB_PRINT(...) do {\ @@ -702,7 +703,7 @@ static unsigned calc_mac_hash(const uint8_t *mac) static int gem_mac_address_filter(CadenceGEMState *s, const uint8_t *packet) { uint8_t *gem_spaddr; - int i; + int i, is_mc; /* Promiscuous mode? */ if (s->regs[GEM_NWCFG] & GEM_NWCFG_PROMISC) { @@ -718,22 +719,17 @@ static int gem_mac_address_filter(CadenceGEMState *s, const uint8_t *packet) } /* Accept packets -w- hash match? */ - if ((packet[0] == 0x01 && (s->regs[GEM_NWCFG] & GEM_NWCFG_MCAST_HASH)) || - (packet[0] != 0x01 && (s->regs[GEM_NWCFG] & GEM_NWCFG_UCAST_HASH))) { + is_mc = is_multicast_ether_addr(packet); + if ((is_mc && (s->regs[GEM_NWCFG] & GEM_NWCFG_MCAST_HASH)) || + (!is_mc && (s->regs[GEM_NWCFG] & GEM_NWCFG_UCAST_HASH))) { + uint64_t buckets; unsigned hash_index; hash_index = calc_mac_hash(packet); - if (hash_index < 32) { - if (s->regs[GEM_HASHLO] & (1<regs[GEM_HASHHI] & (1<regs[GEM_HASHHI] << 32) | s->regs[GEM_HASHLO]; + if ((buckets >> hash_index) & 1) { + return is_mc ? GEM_RX_MULTICAST_HASH_ACCEPT + : GEM_RX_UNICAST_HASH_ACCEPT; } } @@ -958,7 +954,7 @@ static ssize_t gem_receive(NetClientState *nc, const uint8_t *buf, size_t size) /* Is this destination MAC address "for us" ? */ maf = gem_mac_address_filter(s, buf); if (maf == GEM_RX_REJECT) { - return -1; + return size; /* no, drop siliently b/c it's not an error */ } /* Discard packets with receive length error enabled ? */ From 5fe19fb81839ea42b592b409f725349cf3c73551 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Mon, 11 May 2020 12:04:53 +0800 Subject: [PATCH 23/33] net: use peer when purging queue in qemu_flush_or_purge_queue_packets() The sender of packet will be checked in the qemu_net_queue_purge() but we use NetClientState not its peer when trying to purge the incoming queue in qemu_flush_or_purge_packets(). This will trigger the assert in virtio_net_reset since we can't pass the sender check: hw/net/virtio-net.c:533: void virtio_net_reset(VirtIODevice *): Assertion `!virtio_net_get_subqueue(nc)->async_tx.elem' failed. #9 0x55a33fa31b78 in virtio_net_reset hw/net/virtio-net.c:533:13 #10 0x55a33fc88412 in virtio_reset hw/virtio/virtio.c:1919:9 #11 0x55a341d82764 in virtio_bus_reset hw/virtio/virtio-bus.c:95:9 #12 0x55a341dba2de in virtio_pci_reset hw/virtio/virtio-pci.c:1824:5 #13 0x55a341db3e02 in virtio_pci_common_write hw/virtio/virtio-pci.c:1252:13 #14 0x55a33f62117b in memory_region_write_accessor memory.c:496:5 #15 0x55a33f6205e4 in access_with_adjusted_size memory.c:557:18 #16 0x55a33f61e177 in memory_region_dispatch_write memory.c:1488:16 Reproducer: https://www.mail-archive.com/qemu-devel@nongnu.org/msg701914.html Fix by using the peer. Reported-by: "Alexander Bulekov" Acked-by: Alexander Bulekov Fixes: ca77d85e1dbf9 ("net: complete all queued packets on VM stop") Cc: qemu-stable@nongnu.org Signed-off-by: Jason Wang --- net/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/net.c b/net/net.c index cbeeeadff8..4c62b10acd 100644 --- a/net/net.c +++ b/net/net.c @@ -610,7 +610,7 @@ void qemu_flush_or_purge_queued_packets(NetClientState *nc, bool purge) qemu_notify_event(); } else if (purge) { /* Unable to empty the queue, purge remaining packets */ - qemu_net_queue_purge(nc->incoming_queue, nc); + qemu_net_queue_purge(nc->incoming_queue, nc->peer); } } From 5893c7383e6e16e3e6b24b0551501721f3309d9e Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Fri, 22 May 2020 15:53:51 +0800 Subject: [PATCH 24/33] net/colo-compare.c: Create event_bh with the right AioContext qemu_bh_new will set the bh to be executed in the main loop. This causes crashes as colo_compare_handle_event assumes that it has exclusive access the queues, which are also concurrently accessed in the iothread. Create the bh with the AioContext of the iothread to fulfill these assumptions and fix the crashes. This is safe, because the bh already takes the appropriate locks. Signed-off-by: Lukas Straub Reviewed-by: Zhang Chen Reviewed-by: Derek Su Tested-by: Derek Su Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/colo-compare.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index c07e7c1c09..e557da70e5 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -890,6 +890,7 @@ static void colo_compare_handle_event(void *opaque) static void colo_compare_iothread(CompareState *s) { + AioContext *ctx = iothread_get_aio_context(s->iothread); object_ref(OBJECT(s->iothread)); s->worker_context = iothread_get_g_main_context(s->iothread); @@ -906,7 +907,7 @@ static void colo_compare_iothread(CompareState *s) } colo_compare_timer_init(s); - s->event_bh = qemu_bh_new(colo_compare_handle_event, s); + s->event_bh = aio_bh_new(ctx, colo_compare_handle_event, s); } static char *compare_get_pri_indev(Object *obj, Error **errp) From 2158fa1be7c1e76e2c25c32d75bb38e90cce0267 Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Fri, 22 May 2020 15:53:52 +0800 Subject: [PATCH 25/33] chardev/char.c: Use qemu_co_sleep_ns if in coroutine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be able to convert compare_chr_send to a coroutine in the next commit, use qemu_co_sleep_ns if in coroutine. Signed-off-by: Lukas Straub Reviewed-by: Marc-André Lureau Reviewed-by: Zhang Chen Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- chardev/char.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/chardev/char.c b/chardev/char.c index ea06c5ff4d..e3051295ac 100644 --- a/chardev/char.c +++ b/chardev/char.c @@ -38,6 +38,7 @@ #include "qemu/module.h" #include "qemu/option.h" #include "qemu/id.h" +#include "qemu/coroutine.h" #include "chardev/char-mux.h" @@ -119,7 +120,11 @@ static int qemu_chr_write_buffer(Chardev *s, retry: res = cc->chr_write(s, buf + *offset, len - *offset); if (res < 0 && errno == EAGAIN && write_all) { - g_usleep(100); + if (qemu_in_coroutine()) { + qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); + } else { + g_usleep(100); + } goto retry; } From 9c55fe94081dd15148428d32e60fd75ec2eb37ec Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Fri, 22 May 2020 15:53:53 +0800 Subject: [PATCH 26/33] net/colo-compare.c: Fix deadlock in compare_chr_send The chr_out chardev is connected to a filter-redirector running in the main loop. qemu_chr_fe_write_all might block here in compare_chr_send if the (socket-)buffer is full. If another filter-redirector in the main loop want's to send data to chr_pri_in it might also block if the buffer is full. This leads to a deadlock because both event loops get blocked. Fix this by converting compare_chr_send to a coroutine and putting the packets in a send queue. Signed-off-by: Lukas Straub Reviewed-by: Zhang Chen Tested-by: Zhang Chen Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/colo-compare.c | 205 ++++++++++++++++++++++++++++++++++----------- net/colo.c | 7 ++ net/colo.h | 1 + 3 files changed, 162 insertions(+), 51 deletions(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index e557da70e5..62ecd38bb7 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -32,6 +32,9 @@ #include "migration/migration.h" #include "util.h" +#include "block/aio-wait.h" +#include "qemu/coroutine.h" + #define TYPE_COLO_COMPARE "colo-compare" #define COLO_COMPARE(obj) \ OBJECT_CHECK(CompareState, (obj), TYPE_COLO_COMPARE) @@ -77,6 +80,23 @@ static int event_unhandled_count; * |packet | |packet + |packet | |packet + * +--------+ +--------+ +--------+ +--------+ */ + +typedef struct SendCo { + Coroutine *co; + struct CompareState *s; + CharBackend *chr; + GQueue send_list; + bool notify_remote_frame; + bool done; + int ret; +} SendCo; + +typedef struct SendEntry { + uint32_t size; + uint32_t vnet_hdr_len; + uint8_t *buf; +} SendEntry; + typedef struct CompareState { Object parent; @@ -91,6 +111,8 @@ typedef struct CompareState { SocketReadState pri_rs; SocketReadState sec_rs; SocketReadState notify_rs; + SendCo out_sendco; + SendCo notify_sendco; bool vnet_hdr; uint32_t compare_timeout; uint32_t expired_scan_cycle; @@ -124,10 +146,11 @@ enum { static int compare_chr_send(CompareState *s, - const uint8_t *buf, + uint8_t *buf, uint32_t size, uint32_t vnet_hdr_len, - bool notify_remote_frame); + bool notify_remote_frame, + bool zero_copy); static bool packet_matches_str(const char *str, const uint8_t *buf, @@ -145,7 +168,7 @@ static void notify_remote_frame(CompareState *s) char msg[] = "DO_CHECKPOINT"; int ret = 0; - ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true); + ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true, false); if (ret < 0) { error_report("Notify Xen COLO-frame failed"); } @@ -272,12 +295,13 @@ static void colo_release_primary_pkt(CompareState *s, Packet *pkt) pkt->data, pkt->size, pkt->vnet_hdr_len, - false); + false, + true); if (ret < 0) { error_report("colo send primary packet failed"); } trace_colo_compare_main("packet same and release packet"); - packet_destroy(pkt, NULL); + packet_destroy_partial(pkt, NULL); } /* @@ -699,65 +723,115 @@ static void colo_compare_connection(void *opaque, void *user_data) } } +static void coroutine_fn _compare_chr_send(void *opaque) +{ + SendCo *sendco = opaque; + CompareState *s = sendco->s; + int ret = 0; + + while (!g_queue_is_empty(&sendco->send_list)) { + SendEntry *entry = g_queue_pop_tail(&sendco->send_list); + uint32_t len = htonl(entry->size); + + ret = qemu_chr_fe_write_all(sendco->chr, (uint8_t *)&len, sizeof(len)); + + if (ret != sizeof(len)) { + g_free(entry->buf); + g_slice_free(SendEntry, entry); + goto err; + } + + if (!sendco->notify_remote_frame && s->vnet_hdr) { + /* + * We send vnet header len make other module(like filter-redirector) + * know how to parse net packet correctly. + */ + len = htonl(entry->vnet_hdr_len); + + ret = qemu_chr_fe_write_all(sendco->chr, + (uint8_t *)&len, + sizeof(len)); + + if (ret != sizeof(len)) { + g_free(entry->buf); + g_slice_free(SendEntry, entry); + goto err; + } + } + + ret = qemu_chr_fe_write_all(sendco->chr, + (uint8_t *)entry->buf, + entry->size); + + if (ret != entry->size) { + g_free(entry->buf); + g_slice_free(SendEntry, entry); + goto err; + } + + g_free(entry->buf); + g_slice_free(SendEntry, entry); + } + + sendco->ret = 0; + goto out; + +err: + while (!g_queue_is_empty(&sendco->send_list)) { + SendEntry *entry = g_queue_pop_tail(&sendco->send_list); + g_free(entry->buf); + g_slice_free(SendEntry, entry); + } + sendco->ret = ret < 0 ? ret : -EIO; +out: + sendco->co = NULL; + sendco->done = true; + aio_wait_kick(); +} + static int compare_chr_send(CompareState *s, - const uint8_t *buf, + uint8_t *buf, uint32_t size, uint32_t vnet_hdr_len, - bool notify_remote_frame) + bool notify_remote_frame, + bool zero_copy) { - int ret = 0; - uint32_t len = htonl(size); + SendCo *sendco; + SendEntry *entry; + + if (notify_remote_frame) { + sendco = &s->notify_sendco; + } else { + sendco = &s->out_sendco; + } if (!size) { return 0; } - if (notify_remote_frame) { - ret = qemu_chr_fe_write_all(&s->chr_notify_dev, - (uint8_t *)&len, - sizeof(len)); + entry = g_slice_new(SendEntry); + entry->size = size; + entry->vnet_hdr_len = vnet_hdr_len; + if (zero_copy) { + entry->buf = buf; } else { - ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len)); + entry->buf = g_malloc(size); + memcpy(entry->buf, buf, size); } + g_queue_push_head(&sendco->send_list, entry); - if (ret != sizeof(len)) { - goto err; - } - - if (s->vnet_hdr) { - /* - * We send vnet header len make other module(like filter-redirector) - * know how to parse net packet correctly. - */ - len = htonl(vnet_hdr_len); - - if (!notify_remote_frame) { - ret = qemu_chr_fe_write_all(&s->chr_out, - (uint8_t *)&len, - sizeof(len)); - } - - if (ret != sizeof(len)) { - goto err; + if (sendco->done) { + sendco->co = qemu_coroutine_create(_compare_chr_send, sendco); + sendco->done = false; + qemu_coroutine_enter(sendco->co); + if (sendco->done) { + /* report early errors */ + return sendco->ret; } } - if (notify_remote_frame) { - ret = qemu_chr_fe_write_all(&s->chr_notify_dev, - (uint8_t *)buf, - size); - } else { - ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size); - } - - if (ret != size) { - goto err; - } - + /* assume success */ return 0; - -err: - return ret < 0 ? ret : -EIO; } static int compare_chr_can_read(void *opaque) @@ -1063,6 +1137,7 @@ static void compare_pri_rs_finalize(SocketReadState *pri_rs) pri_rs->buf, pri_rs->packet_len, pri_rs->vnet_hdr_len, + false, false); } else { /* compare packet in the specified connection */ @@ -1093,7 +1168,7 @@ static void compare_notify_rs_finalize(SocketReadState *notify_rs) if (packet_matches_str("COLO_USERSPACE_PROXY_INIT", notify_rs->buf, notify_rs->packet_len)) { - ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true); + ret = compare_chr_send(s, (uint8_t *)msg, strlen(msg), 0, true, false); if (ret < 0) { error_report("Notify Xen COLO-frame INIT failed"); } @@ -1199,6 +1274,20 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) QTAILQ_INSERT_TAIL(&net_compares, s, next); + s->out_sendco.s = s; + s->out_sendco.chr = &s->chr_out; + s->out_sendco.notify_remote_frame = false; + s->out_sendco.done = true; + g_queue_init(&s->out_sendco.send_list); + + if (s->notify_dev) { + s->notify_sendco.s = s; + s->notify_sendco.chr = &s->chr_notify_dev; + s->notify_sendco.notify_remote_frame = true; + s->notify_sendco.done = true; + g_queue_init(&s->notify_sendco.send_list); + } + g_queue_init(&s->conn_list); qemu_mutex_init(&event_mtx); @@ -1225,8 +1314,9 @@ static void colo_flush_packets(void *opaque, void *user_data) pkt->data, pkt->size, pkt->vnet_hdr_len, - false); - packet_destroy(pkt, NULL); + false, + true); + packet_destroy_partial(pkt, NULL); } while (!g_queue_is_empty(&conn->secondary_list)) { pkt = g_queue_pop_head(&conn->secondary_list); @@ -1297,10 +1387,23 @@ static void colo_compare_finalize(Object *obj) } } + AioContext *ctx = iothread_get_aio_context(s->iothread); + aio_context_acquire(ctx); + AIO_WAIT_WHILE(ctx, !s->out_sendco.done); + if (s->notify_dev) { + AIO_WAIT_WHILE(ctx, !s->notify_sendco.done); + } + aio_context_release(ctx); + /* Release all unhandled packets after compare thead exited */ g_queue_foreach(&s->conn_list, colo_flush_packets, s); + AIO_WAIT_WHILE(NULL, !s->out_sendco.done); g_queue_clear(&s->conn_list); + g_queue_clear(&s->out_sendco.send_list); + if (s->notify_dev) { + g_queue_clear(&s->notify_sendco.send_list); + } if (s->connection_track_table) { g_hash_table_destroy(s->connection_track_table); diff --git a/net/colo.c b/net/colo.c index 8196b35837..a6c66d829a 100644 --- a/net/colo.c +++ b/net/colo.c @@ -185,6 +185,13 @@ void packet_destroy(void *opaque, void *user_data) g_slice_free(Packet, pkt); } +void packet_destroy_partial(void *opaque, void *user_data) +{ + Packet *pkt = opaque; + + g_slice_free(Packet, pkt); +} + /* * Clear hashtable, stop this hash growing really huge */ diff --git a/net/colo.h b/net/colo.h index 679314b1ca..573ab91785 100644 --- a/net/colo.h +++ b/net/colo.h @@ -102,5 +102,6 @@ bool connection_has_tracked(GHashTable *connection_track_table, void connection_hashtable_reset(GHashTable *connection_track_table); Packet *packet_new(const void *data, int size, int vnet_hdr_len); void packet_destroy(void *opaque, void *user_data); +void packet_destroy_partial(void *opaque, void *user_data); #endif /* NET_COLO_H */ From 76658541f3950e580ae3be8019f1b4c019a8a638 Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Fri, 22 May 2020 15:53:54 +0800 Subject: [PATCH 27/33] net/colo-compare.c: Only hexdump packets if tracing is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Else the log will be flooded if there is a lot of network traffic. Signed-off-by: Lukas Straub Reviewed-by: Zhang Chen Reviewed-by: Philippe Mathieu-Daudé Tested-by: Philippe Mathieu-Daudé Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/colo-compare.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index 62ecd38bb7..a609f499b9 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -483,10 +483,12 @@ sec: g_queue_push_head(&conn->primary_list, ppkt); g_queue_push_head(&conn->secondary_list, spkt); - qemu_hexdump((char *)ppkt->data, stderr, - "colo-compare ppkt", ppkt->size); - qemu_hexdump((char *)spkt->data, stderr, - "colo-compare spkt", spkt->size); + if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) { + qemu_hexdump((char *)ppkt->data, stderr, + "colo-compare ppkt", ppkt->size); + qemu_hexdump((char *)spkt->data, stderr, + "colo-compare spkt", spkt->size); + } colo_compare_inconsistency_notify(s); } From 45942b79b9f89b42a5f5ccfa861c36a86a95d89a Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Fri, 22 May 2020 15:53:55 +0800 Subject: [PATCH 28/33] net/colo-compare.c: Check that colo-compare is active If the colo-compare object is removed before failover and a checkpoint happens, qemu crashes because it tries to lock the destroyed event_mtx in colo_notify_compares_event. Fix this by checking if everything is initialized by introducing a new variable colo_compare_active which is protected by a new mutex colo_compare_mutex. The new mutex also protects against concurrent access of the net_compares list and makes sure that colo_notify_compares_event isn't active while we destroy event_mtx and event_complete_cond. With this it also is again possible to use colo without colo-compare (periodic mode) and to use multiple colo-compare for multiple network interfaces. Signed-off-by: Lukas Straub Tested-by: Lukas Straub Reviewed-by: Zhang Chen Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/colo-compare.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index a609f499b9..c30dbfb6e6 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -54,6 +54,8 @@ static NotifierList colo_compare_notifiers = #define REGULAR_PACKET_CHECK_MS 3000 #define DEFAULT_TIME_OUT_MS 3000 +static QemuMutex colo_compare_mutex; +static bool colo_compare_active; static QemuMutex event_mtx; static QemuCond event_complete_cond; static int event_unhandled_count; @@ -906,6 +908,12 @@ static void check_old_packet_regular(void *opaque) void colo_notify_compares_event(void *opaque, int event, Error **errp) { CompareState *s; + qemu_mutex_lock(&colo_compare_mutex); + + if (!colo_compare_active) { + qemu_mutex_unlock(&colo_compare_mutex); + return; + } qemu_mutex_lock(&event_mtx); QTAILQ_FOREACH(s, &net_compares, next) { @@ -919,6 +927,7 @@ void colo_notify_compares_event(void *opaque, int event, Error **errp) } qemu_mutex_unlock(&event_mtx); + qemu_mutex_unlock(&colo_compare_mutex); } static void colo_compare_timer_init(CompareState *s) @@ -1274,7 +1283,14 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) s->vnet_hdr); } + qemu_mutex_lock(&colo_compare_mutex); + if (!colo_compare_active) { + qemu_mutex_init(&event_mtx); + qemu_cond_init(&event_complete_cond); + colo_compare_active = true; + } QTAILQ_INSERT_TAIL(&net_compares, s, next); + qemu_mutex_unlock(&colo_compare_mutex); s->out_sendco.s = s; s->out_sendco.chr = &s->chr_out; @@ -1292,9 +1308,6 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) g_queue_init(&s->conn_list); - qemu_mutex_init(&event_mtx); - qemu_cond_init(&event_complete_cond); - s->connection_track_table = g_hash_table_new_full(connection_key_hash, connection_key_equal, g_free, @@ -1382,12 +1395,19 @@ static void colo_compare_finalize(Object *obj) qemu_bh_delete(s->event_bh); + qemu_mutex_lock(&colo_compare_mutex); QTAILQ_FOREACH(tmp, &net_compares, next) { if (tmp == s) { QTAILQ_REMOVE(&net_compares, s, next); break; } } + if (QTAILQ_EMPTY(&net_compares)) { + colo_compare_active = false; + qemu_mutex_destroy(&event_mtx); + qemu_cond_destroy(&event_complete_cond); + } + qemu_mutex_unlock(&colo_compare_mutex); AioContext *ctx = iothread_get_aio_context(s->iothread); aio_context_acquire(ctx); @@ -1415,15 +1435,18 @@ static void colo_compare_finalize(Object *obj) object_unref(OBJECT(s->iothread)); } - qemu_mutex_destroy(&event_mtx); - qemu_cond_destroy(&event_complete_cond); - g_free(s->pri_indev); g_free(s->sec_indev); g_free(s->outdev); g_free(s->notify_dev); } +static void __attribute__((__constructor__)) colo_compare_init_globals(void) +{ + colo_compare_active = false; + qemu_mutex_init(&colo_compare_mutex); +} + static const TypeInfo colo_compare_info = { .name = TYPE_COLO_COMPARE, .parent = TYPE_OBJECT, From 5bd57eba041fb62c373f89c311936000c115fdd6 Mon Sep 17 00:00:00 2001 From: Lukas Straub Date: Fri, 22 May 2020 15:53:56 +0800 Subject: [PATCH 29/33] net/colo-compare.c: Correct ordering in complete and finalize In colo_compare_complete, insert CompareState into net_compares only after everything has been initialized. In colo_compare_finalize, remove CompareState from net_compares before anything is deinitialized. Signed-off-by: Lukas Straub Reviewed-by: Zhang Chen Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/colo-compare.c | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index c30dbfb6e6..ed1f3d0af0 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -1283,15 +1283,6 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) s->vnet_hdr); } - qemu_mutex_lock(&colo_compare_mutex); - if (!colo_compare_active) { - qemu_mutex_init(&event_mtx); - qemu_cond_init(&event_complete_cond); - colo_compare_active = true; - } - QTAILQ_INSERT_TAIL(&net_compares, s, next); - qemu_mutex_unlock(&colo_compare_mutex); - s->out_sendco.s = s; s->out_sendco.chr = &s->chr_out; s->out_sendco.notify_remote_frame = false; @@ -1314,6 +1305,16 @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) connection_destroy); colo_compare_iothread(s); + + qemu_mutex_lock(&colo_compare_mutex); + if (!colo_compare_active) { + qemu_mutex_init(&event_mtx); + qemu_cond_init(&event_complete_cond); + colo_compare_active = true; + } + QTAILQ_INSERT_TAIL(&net_compares, s, next); + qemu_mutex_unlock(&colo_compare_mutex); + return; } @@ -1382,19 +1383,6 @@ static void colo_compare_finalize(Object *obj) CompareState *s = COLO_COMPARE(obj); CompareState *tmp = NULL; - qemu_chr_fe_deinit(&s->chr_pri_in, false); - qemu_chr_fe_deinit(&s->chr_sec_in, false); - qemu_chr_fe_deinit(&s->chr_out, false); - if (s->notify_dev) { - qemu_chr_fe_deinit(&s->chr_notify_dev, false); - } - - if (s->iothread) { - colo_compare_timer_del(s); - } - - qemu_bh_delete(s->event_bh); - qemu_mutex_lock(&colo_compare_mutex); QTAILQ_FOREACH(tmp, &net_compares, next) { if (tmp == s) { @@ -1409,6 +1397,19 @@ static void colo_compare_finalize(Object *obj) } qemu_mutex_unlock(&colo_compare_mutex); + qemu_chr_fe_deinit(&s->chr_pri_in, false); + qemu_chr_fe_deinit(&s->chr_sec_in, false); + qemu_chr_fe_deinit(&s->chr_out, false); + if (s->notify_dev) { + qemu_chr_fe_deinit(&s->chr_notify_dev, false); + } + + if (s->iothread) { + colo_compare_timer_del(s); + } + + qemu_bh_delete(s->event_bh); + AioContext *ctx = iothread_get_aio_context(s->iothread); aio_context_acquire(ctx); AIO_WAIT_WHILE(ctx, !s->out_sendco.done); From bdadbb0f74305d1509805936bf9b1ac14eab30e4 Mon Sep 17 00:00:00 2001 From: Derek Su Date: Fri, 22 May 2020 15:53:57 +0800 Subject: [PATCH 30/33] colo-compare: Fix memory leak in packet_enqueue() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch is to fix the "pkt" memory leak in packet_enqueue(). The allocated "pkt" needs to be freed if the colo compare primary or secondary queue is too big. Replace the error_report of full queue with a trace event. Signed-off-by: Derek Su Reviewed-by: Zhang Chen Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Zhang Chen Signed-off-by: Jason Wang --- net/colo-compare.c | 23 +++++++++++++++-------- net/trace-events | 1 + 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index ed1f3d0af0..f15779dedc 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -146,6 +146,10 @@ enum { SECONDARY_IN, }; +static const char *colo_mode[] = { + [PRIMARY_IN] = "primary", + [SECONDARY_IN] = "secondary", +}; static int compare_chr_send(CompareState *s, uint8_t *buf, @@ -242,6 +246,7 @@ static int packet_enqueue(CompareState *s, int mode, Connection **con) ConnectionKey key; Packet *pkt = NULL; Connection *conn; + int ret; if (mode == PRIMARY_IN) { pkt = packet_new(s->pri_rs.buf, @@ -270,16 +275,18 @@ static int packet_enqueue(CompareState *s, int mode, Connection **con) } if (mode == PRIMARY_IN) { - if (!colo_insert_packet(&conn->primary_list, pkt, &conn->pack)) { - error_report("colo compare primary queue size too big," - "drop packet"); - } + ret = colo_insert_packet(&conn->primary_list, pkt, &conn->pack); } else { - if (!colo_insert_packet(&conn->secondary_list, pkt, &conn->sack)) { - error_report("colo compare secondary queue size too big," - "drop packet"); - } + ret = colo_insert_packet(&conn->secondary_list, pkt, &conn->sack); } + + if (!ret) { + trace_colo_compare_drop_packet(colo_mode[mode], + "queue size too big, drop packet"); + packet_destroy(pkt, NULL); + pkt = NULL; + } + *con = conn; return 0; diff --git a/net/trace-events b/net/trace-events index 02c13fd0ba..fa49c71533 100644 --- a/net/trace-events +++ b/net/trace-events @@ -12,6 +12,7 @@ colo_proxy_main(const char *chr) ": %s" # colo-compare.c colo_compare_main(const char *chr) ": %s" +colo_compare_drop_packet(const char *queue, const char *chr) ": %s: %s" colo_compare_udp_miscompare(const char *sta, int size) ": %s = %d" colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d" colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s" From fda43b1204aecd1db158b3255c591d227fbdd629 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 25 May 2020 14:23:30 +0200 Subject: [PATCH 31/33] hw/net/e1000e: Do not abort() on invalid PSRCTL register value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit libFuzzer found using 'qemu-system-i386 -M q35': qemu: hardware error: e1000e: PSRCTL.BSIZE0 cannot be zero CPU #0: EAX=00000000 EBX=00000000 ECX=00000000 EDX=00000663 ESI=00000000 EDI=00000000 EBP=00000000 ESP=00000000 EIP=0000fff0 EFL=00000002 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0 ES =0000 00000000 0000ffff 00009300 CS =f000 ffff0000 0000ffff 00009b00 SS =0000 00000000 0000ffff 00009300 DS =0000 00000000 0000ffff 00009300 FS =0000 00000000 0000ffff 00009300 GS =0000 00000000 0000ffff 00009300 LDT=0000 00000000 0000ffff 00008200 TR =0000 00000000 0000ffff 00008b00 GDT= 00000000 0000ffff IDT= 00000000 0000ffff CR0=60000010 CR2=00000000 CR3=00000000 CR4=00000000 DR0=00000000 DR1=00000000 DR2=00000000 DR3=00000000 DR6=ffff0ff0 DR7=00000400 EFER=0000000000000000 FCW=037f FSW=0000 [ST=0] FTW=00 MXCSR=00001f80 FPR0=0000000000000000 0000 FPR1=0000000000000000 0000 FPR2=0000000000000000 0000 FPR3=0000000000000000 0000 FPR4=0000000000000000 0000 FPR5=0000000000000000 0000 FPR6=0000000000000000 0000 FPR7=0000000000000000 0000 XMM00=00000000000000000000000000000000 XMM01=00000000000000000000000000000000 XMM02=00000000000000000000000000000000 XMM03=00000000000000000000000000000000 XMM04=00000000000000000000000000000000 XMM05=00000000000000000000000000000000 XMM06=00000000000000000000000000000000 XMM07=00000000000000000000000000000000 ==1988== ERROR: libFuzzer: deadly signal #6 0x7fae4d3ea894 in __GI_abort (/lib64/libc.so.6+0x22894) #7 0x563f4cc59a1d in hw_error (qemu-fuzz-i386+0xe8ca1d) #8 0x563f4d7c93f2 in e1000e_set_psrctl (qemu-fuzz-i386+0x19fc3f2) #9 0x563f4d7b798f in e1000e_core_write (qemu-fuzz-i386+0x19ea98f) #10 0x563f4d7afc46 in e1000e_mmio_write (qemu-fuzz-i386+0x19e2c46) #11 0x563f4cc9a0a7 in memory_region_write_accessor (qemu-fuzz-i386+0xecd0a7) #12 0x563f4cc99c13 in access_with_adjusted_size (qemu-fuzz-i386+0xeccc13) #13 0x563f4cc987b4 in memory_region_dispatch_write (qemu-fuzz-i386+0xecb7b4) It simply sent the following 2 I/O command to the e1000e PCI BAR #2 I/O region: writew 0x0100 0x0c00 # RCTL = E1000_RCTL_DTYP_MASK writeb 0x2170 0x00 # PSRCTL = 0 2813 static void 2814 e1000e_set_psrctl(E1000ECore *core, int index, uint32_t val) 2815 { 2816 if (core->mac[RCTL] & E1000_RCTL_DTYP_MASK) { 2817 2818 if ((val & E1000_PSRCTL_BSIZE0_MASK) == 0) { 2819 hw_error("e1000e: PSRCTL.BSIZE0 cannot be zero"); 2820 } Instead of calling hw_error() which abort the process (it is meant for CPU fatal error condition, not for device logging), log the invalid request with qemu_log_mask(LOG_GUEST_ERROR) and return, ignoring the request. Cc: qemu-stable@nongnu.org Signed-off-by: Philippe Mathieu-Daudé Signed-off-by: Jason Wang --- hw/net/e1000e_core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index d5676871fa..bcd186cac5 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -34,9 +34,9 @@ */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "net/net.h" #include "net/tap.h" -#include "hw/hw.h" #include "hw/pci/msi.h" #include "hw/pci/msix.h" #include "sysemu/runstate.h" @@ -2816,11 +2816,15 @@ e1000e_set_psrctl(E1000ECore *core, int index, uint32_t val) if (core->mac[RCTL] & E1000_RCTL_DTYP_MASK) { if ((val & E1000_PSRCTL_BSIZE0_MASK) == 0) { - hw_error("e1000e: PSRCTL.BSIZE0 cannot be zero"); + qemu_log_mask(LOG_GUEST_ERROR, + "e1000e: PSRCTL.BSIZE0 cannot be zero"); + return; } if ((val & E1000_PSRCTL_BSIZE1_MASK) == 0) { - hw_error("e1000e: PSRCTL.BSIZE1 cannot be zero"); + qemu_log_mask(LOG_GUEST_ERROR, + "e1000e: PSRCTL.BSIZE1 cannot be zero"); + return; } } From 9d903f30cb37eb2f7aec53ab58cd869f48ec16d5 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 18 May 2020 20:01:02 +0200 Subject: [PATCH 32/33] net: Drop the legacy "name" parameter from the -net option It's been deprecated since QEMU v3.1, so it's time to finally remove it. The "id" parameter can simply be used instead. Reviewed-by: Eric Blake Signed-off-by: Thomas Huth Signed-off-by: Jason Wang --- docs/system/deprecated.rst | 15 +++++++++------ net/net.c | 10 +--------- qapi/net.json | 3 --- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/docs/system/deprecated.rst b/docs/system/deprecated.rst index 544ece0a45..3a255591c3 100644 --- a/docs/system/deprecated.rst +++ b/docs/system/deprecated.rst @@ -47,12 +47,6 @@ The 'file' driver for drives is no longer appropriate for character or host devices and will only accept regular files (S_IFREG). The correct driver for these file types is 'host_cdrom' or 'host_device' as appropriate. -``-net ...,name=``\ *name* (since 3.1) -'''''''''''''''''''''''''''''''''''''' - -The ``name`` parameter of the ``-net`` option is a synonym -for the ``id`` parameter, which should now be used instead. - ``-smp`` (invalid topologies) (since 3.1) ''''''''''''''''''''''''''''''''''''''''' @@ -441,6 +435,15 @@ What follows is a record of recently removed, formerly deprecated features that serves as a record for users who have encountered trouble after a recent upgrade. +System emulator command line arguments +-------------------------------------- + +``-net ...,name=``\ *name* (removed in 5.1) +''''''''''''''''''''''''''''''''''''''''''' + +The ``name`` parameter of the ``-net`` option was a synonym +for the ``id`` parameter, which should now be used instead. + QEMU Machine Protocol (QMP) commands ------------------------------------ diff --git a/net/net.c b/net/net.c index 4c62b10acd..e55d3572a4 100644 --- a/net/net.c +++ b/net/net.c @@ -969,12 +969,10 @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp) { Netdev legacy = {0}; const Netdev *netdev; - const char *name; NetClientState *peer = NULL; if (is_netdev) { netdev = object; - name = netdev->id; if (netdev->type == NET_CLIENT_DRIVER_NIC || !net_client_init_fun[netdev->type]) { @@ -987,12 +985,6 @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp) const NetLegacyOptions *opts = net->opts; legacy.id = net->id; netdev = &legacy; - /* missing optional values have been initialized to "all bits zero" */ - name = net->has_id ? net->id : net->name; - - if (net->has_name) { - warn_report("The 'name' parameter is deprecated, use 'id' instead"); - } /* Map the old options to the new flat type */ switch (opts->type) { @@ -1052,7 +1044,7 @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp) } } - if (net_client_init_fun[netdev->type](netdev, name, peer, errp) < 0) { + if (net_client_init_fun[netdev->type](netdev, netdev->id, peer, errp) < 0) { /* FIXME drop when all init functions store an Error */ if (errp && !*errp) { error_setg(errp, QERR_DEVICE_INIT_FAILED, diff --git a/qapi/net.json b/qapi/net.json index cebb1b52e3..fc7c95f6d8 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -474,8 +474,6 @@ # # @id: identifier for monitor commands # -# @name: identifier for monitor commands, ignored if @id is present -# # @opts: device type specific properties (legacy) # # Since: 1.2 @@ -483,7 +481,6 @@ { 'struct': 'NetLegacy', 'data': { '*id': 'str', - '*name': 'str', 'opts': 'NetLegacyOptions' } } ## From 71830d8430e65dd20aec4765d87e60336148e1a6 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 18 May 2020 20:01:03 +0200 Subject: [PATCH 33/33] net: Drop the NetLegacy structure, always use Netdev instead Now that the "name" parameter is gone, there is hardly any difference between NetLegacy and Netdev anymore, so we can drop NetLegacy and always use Netdev to simplify the code quite a bit. The only two differences that were really left between Netdev and NetLegacy: 1) NetLegacy does not allow a "hubport" type. We can continue to block this with a simple check in net_client_init1() for this type. 2) The "id" parameter was optional in NetLegacy (and an internal id was chosen via assign_name() during initialization), but it is mandatory for Netdev. To avoid that the visitor code bails out here, we have to add an internal id to the QemuOpts already earlier now. Signed-off-by: Thomas Huth Reviewed-by: Eric Blake Signed-off-by: Jason Wang --- net/net.c | 77 +++++++++------------------------------------------ qapi/net.json | 46 ------------------------------ 2 files changed, 13 insertions(+), 110 deletions(-) diff --git a/net/net.c b/net/net.c index e55d3572a4..d1130296e1 100644 --- a/net/net.c +++ b/net/net.c @@ -965,15 +965,11 @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])( }; -static int net_client_init1(const void *object, bool is_netdev, Error **errp) +static int net_client_init1(const Netdev *netdev, bool is_netdev, Error **errp) { - Netdev legacy = {0}; - const Netdev *netdev; NetClientState *peer = NULL; if (is_netdev) { - netdev = object; - if (netdev->type == NET_CLIENT_DRIVER_NIC || !net_client_init_fun[netdev->type]) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", @@ -981,56 +977,11 @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp) return -1; } } else { - const NetLegacy *net = object; - const NetLegacyOptions *opts = net->opts; - legacy.id = net->id; - netdev = &legacy; - - /* Map the old options to the new flat type */ - switch (opts->type) { - case NET_LEGACY_OPTIONS_TYPE_NONE: + if (netdev->type == NET_CLIENT_DRIVER_NONE) { return 0; /* nothing to do */ - case NET_LEGACY_OPTIONS_TYPE_NIC: - legacy.type = NET_CLIENT_DRIVER_NIC; - legacy.u.nic = opts->u.nic; - break; - case NET_LEGACY_OPTIONS_TYPE_USER: - legacy.type = NET_CLIENT_DRIVER_USER; - legacy.u.user = opts->u.user; - break; - case NET_LEGACY_OPTIONS_TYPE_TAP: - legacy.type = NET_CLIENT_DRIVER_TAP; - legacy.u.tap = opts->u.tap; - break; - case NET_LEGACY_OPTIONS_TYPE_L2TPV3: - legacy.type = NET_CLIENT_DRIVER_L2TPV3; - legacy.u.l2tpv3 = opts->u.l2tpv3; - break; - case NET_LEGACY_OPTIONS_TYPE_SOCKET: - legacy.type = NET_CLIENT_DRIVER_SOCKET; - legacy.u.socket = opts->u.socket; - break; - case NET_LEGACY_OPTIONS_TYPE_VDE: - legacy.type = NET_CLIENT_DRIVER_VDE; - legacy.u.vde = opts->u.vde; - break; - case NET_LEGACY_OPTIONS_TYPE_BRIDGE: - legacy.type = NET_CLIENT_DRIVER_BRIDGE; - legacy.u.bridge = opts->u.bridge; - break; - case NET_LEGACY_OPTIONS_TYPE_NETMAP: - legacy.type = NET_CLIENT_DRIVER_NETMAP; - legacy.u.netmap = opts->u.netmap; - break; - case NET_LEGACY_OPTIONS_TYPE_VHOST_USER: - legacy.type = NET_CLIENT_DRIVER_VHOST_USER; - legacy.u.vhost_user = opts->u.vhost_user; - break; - default: - abort(); } - - if (!net_client_init_fun[netdev->type]) { + if (netdev->type == NET_CLIENT_DRIVER_HUBPORT || + !net_client_init_fun[netdev->type]) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type", "a net backend type (maybe it is not compiled " "into this binary)"); @@ -1039,7 +990,7 @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp) /* Do not add to a hub if it's a nic with a netdev= parameter. */ if (netdev->type != NET_CLIENT_DRIVER_NIC || - !opts->u.nic.has_netdev) { + !netdev->u.nic.has_netdev) { peer = net_hub_add_port(0, NULL, NULL); } } @@ -1100,7 +1051,7 @@ static void show_netdevs(void) static int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp) { gchar **substrings = NULL; - void *object = NULL; + Netdev *object = NULL; Error *err = NULL; int ret = -1; Visitor *v = opts_visitor_new(opts); @@ -1143,21 +1094,19 @@ static int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp) } } - if (is_netdev) { - visit_type_Netdev(v, NULL, (Netdev **)&object, &err); - } else { - visit_type_NetLegacy(v, NULL, (NetLegacy **)&object, &err); + /* Create an ID for -net if the user did not specify one */ + if (!is_netdev && !qemu_opts_id(opts)) { + static int idx; + qemu_opts_set_id(opts, g_strdup_printf("__org.qemu.net%i", idx++)); } + visit_type_Netdev(v, NULL, &object, &err); + if (!err) { ret = net_client_init1(object, is_netdev, &err); } - if (is_netdev) { - qapi_free_Netdev(object); - } else { - qapi_free_NetLegacy(object); - } + qapi_free_Netdev(object); out: error_propagate(errp, err); diff --git a/qapi/net.json b/qapi/net.json index fc7c95f6d8..9244c9af56 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -467,52 +467,6 @@ 'netmap': 'NetdevNetmapOptions', 'vhost-user': 'NetdevVhostUserOptions' } } -## -# @NetLegacy: -# -# Captures the configuration of a network device; legacy. -# -# @id: identifier for monitor commands -# -# @opts: device type specific properties (legacy) -# -# Since: 1.2 -## -{ 'struct': 'NetLegacy', - 'data': { - '*id': 'str', - 'opts': 'NetLegacyOptions' } } - -## -# @NetLegacyOptionsType: -# -# Since: 1.2 -## -{ 'enum': 'NetLegacyOptionsType', - 'data': ['none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde', - 'bridge', 'netmap', 'vhost-user'] } - -## -# @NetLegacyOptions: -# -# Like Netdev, but for use only by the legacy command line options -# -# Since: 1.2 -## -{ 'union': 'NetLegacyOptions', - 'base': { 'type': 'NetLegacyOptionsType' }, - 'discriminator': 'type', - 'data': { - 'nic': 'NetLegacyNicOptions', - 'user': 'NetdevUserOptions', - 'tap': 'NetdevTapOptions', - 'l2tpv3': 'NetdevL2TPv3Options', - 'socket': 'NetdevSocketOptions', - 'vde': 'NetdevVdeOptions', - 'bridge': 'NetdevBridgeOptions', - 'netmap': 'NetdevNetmapOptions', - 'vhost-user': 'NetdevVhostUserOptions' } } - ## # @NetFilterDirection: #