From e94ea5d8c6be32d3e3671793c59bd1ddab7b7d94 Mon Sep 17 00:00:00 2001 From: Arne Welzel Date: Sun, 13 Sep 2020 16:29:55 +0200 Subject: [PATCH] virtio_netmap: Use half the vring's size as netmap slots For every slot in a netmap ring, two descriptors in the virtqueue's vring are used. Therefore, we can only use half the vring's size as slots for netmap rings. Otherwise, the vring and the netmap ring are out of sync. This currently corrupts proper operation and produces bad data (see below). This patch also sets an explicit nm_config to ensure we keep the values for num_rx_desc/num_tx_desc from attach time and marks the VQ_FULL case in virtio_netmap_init_buffers() as an issue that should never be triggered. Background: Experiment / Observation Running a qemu-guest with a virtio_net NIC (rx_queue_size=256) attached to a bridge on the host. Sending known/predictable packets in a loop from the host via scapy. On the guest side, running tcpdump with native netmap support and inspecting the packet data: tcpdump -i netmap:enp8s0 -Xn# The first 128 packets look as expected, but thereafter come 128 packets of "null data": 128 15:59:46.592378 IP 10.0.0.128.128 > 127.0.0.1.256: UDP, length 141 0x0000: 4500 00a9 0001 0000 4011 f0c2 0a00 0080 E.......@....... 0x0010: 7f00 0001 0080 0100 0095 7f07 3030 3132 ............0012 0x0020: 3820 3031 3330 2046 4646 4646 4646 4646 8.0130.FFFFFFFFF 0x0030: 4646 4646 4646 4646 4646 4646 4646 4646 FFFFFFFFFFFFFFFF 0x0040: 4646 4646 4646 4646 4646 4646 4646 4646 FFFFFFFFFFFFFFFF 0x0050: 4646 4646 4646 4646 4646 4646 4646 4646 FFFFFFFFFFFFFFFF 0x0060: 4646 4646 4646 4646 4646 4646 4646 4646 FFFFFFFFFFFFFFFF 0x0070: 4646 4646 4646 4646 4646 4646 4646 4646 FFFFFFFFFFFFFFFF 0x0080: 4646 4646 4646 4646 4646 4646 4646 4646 FFFFFFFFFFFFFFFF 0x0090: 4646 4646 4646 4646 4646 4646 4646 4646 FFFFFFFFFFFFFFFF 0x00a0: 4646 4646 4646 4646 46 FFFFFFFFF 129 15:59:46.712936 00:00:00:00:00:00 > 00:00:00:00:00:00 Null Information, send seq 0, rcv seq 0, Flags [Command], length 121 0x0000: 0000 0000 0000 0000 0000 0000 0000 0000 ................ 0x0010: 0000 0000 0000 0000 0000 0000 0000 0000 ................ 0x0020: 0000 0000 0000 0000 0000 0000 0000 0000 ................ 0x0030: 0000 0000 0000 0000 0000 0000 0000 0000 ................ 0x0040: 0000 0000 0000 0000 0000 0000 0000 0000 ................ 0x0050: 0000 0000 0000 0000 0000 0000 0000 0000 ................ 0x0060: 0000 0000 0000 0000 0000 0000 0000 0000 ................ 0x0070: 0000 0000 00 ..... At packet 256 tcpdump receives non-null data, but the payloads represents what should've been received for packet 129 and later. Because the length is taken from the vring, some of the slots also expose stale/corrupt data: 281 16:03:51.846490 IP 10.0.0.153.153 > 127.0.0.1.306: UDP, length 74 0x0000: 4500 0066 0001 0000 4011 f0ec 0a00 0099 E..f....@....... 0x0010: 7f00 0001 0099 0132 0052 d633 3030 3135 .......2.R.30015 0x0020: 3320 3030 3633 2046 4646 4646 4646 4646 3.0063.FFFFFFFFF 0x0030: 4646 4646 4646 4646 4646 4646 4646 4646 FFFFFFFFFFFFFFFF 0x0040: 4646 4646 4646 4646 4646 4646 4646 4646 FFFFFFFFFFFFFFFF 0x0050: 4646 4646 4646 4646 4646 4646 4646 4646 FFFFFFFFFFFFFFFF 0x0060: 4646 4646 4646 5757 5757 5757 5757 5757 FFFFFFWWWWWWWWWW 0x0070: 5757 5757 5757 5757 5757 5757 5757 5757 WWWWWWWWWWWWWWWW 0x0080: 5757 5757 5757 5757 5757 5757 5757 5757 WWWWWWWWWWWWWWWW 0x0090: 5757 5757 5757 5757 5757 5757 0000 0000 WWWWWWWWWWWW.... 0x00a0: 0000 0000 0000 0000 ........ --- LINUX/virtio_netmap.h | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/LINUX/virtio_netmap.h b/LINUX/virtio_netmap.h index bf5e37ff6..71532d851 100644 --- a/LINUX/virtio_netmap.h +++ b/LINUX/virtio_netmap.h @@ -632,6 +632,17 @@ virtio_netmap_init_buffers(struct virtnet_info *vi) for (i = 0; i < na->num_rx_desc; i++) { void *addr; + /* + * As the comment above states, we need to add exactly + * num_rx_desc descriptors and so the queue is not + * allowed to become full. + */ + if (vq->num_free == 0) { + nm_prerr("No space left in vq! %d/%d", + i, na->num_rx_desc); + break; + } + slot = &ring->slot[i]; addr = NMB(na, slot); COMPAT_INIT_SG(sg); @@ -644,9 +655,6 @@ virtio_netmap_init_buffers(struct virtnet_info *vi) return 0; } RXNUM_INC(vi, r); - - if (VQ_FULL(vq, err)) - break; } nm_prinf("added %d inbufs on queue %d", i, r); virtqueue_kick(vq); @@ -677,6 +685,20 @@ virtio_netmap_intr(struct netmap_adapter *na, int onoff) } } +static int +virtio_netmap_config(struct netmap_adapter *na, struct nm_config_info *info) +{ + int ret = netmap_rings_config_get(na, info); + if (ret) + return ret; + + /* Take whatever we had at init time. */ + info->num_tx_descs = na->num_tx_desc; + info->num_rx_descs = na->num_rx_desc; + info->rx_buf_maxsize = na->rx_buf_maxsize; + return 0; +} + static void virtio_netmap_attach(struct virtnet_info *vi) { @@ -686,13 +708,14 @@ virtio_netmap_attach(struct virtnet_info *vi) bzero(&na, sizeof(na)); na.ifp = vi->dev; - na.num_tx_desc = virtqueue_get_vring_size(GET_TX_VQ(vi, 0)); - na.num_rx_desc = virtqueue_get_vring_size(GET_RX_VQ(vi, 0)); + na.num_tx_desc = virtqueue_get_vring_size(GET_TX_VQ(vi, 0)) / 2; + na.num_rx_desc = virtqueue_get_vring_size(GET_RX_VQ(vi, 0)) / 2; na.num_tx_rings = na.num_rx_rings = 1; na.nm_register = virtio_netmap_reg; na.nm_txsync = virtio_netmap_txsync; na.nm_rxsync = virtio_netmap_rxsync; na.nm_intr = virtio_netmap_intr; + na.nm_config = virtio_netmap_config; ret = netmap_attach_ext(&na, sizeof(struct netmap_virtio_adapter), 1); if (ret) {