[v8,5/8] net/virtio: implement transmit path for packed queues

Message ID 20181016090134.29448-6-jfreimann@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers
Series implement packed virtqueues |

Checks

Context Check Description
ci/Intel-compilation fail Compilation issues

Commit Message

Jens Freimann Oct. 16, 2018, 9:01 a.m. UTC
  This implements the transmit path for devices with
support for packed virtqueues.

Signed-off-by: Jens Freiman <jfreimann@redhat.com>
---
 drivers/net/virtio/virtio_ethdev.c |  32 ++--
 drivers/net/virtio/virtio_ethdev.h |   2 +
 drivers/net/virtio/virtio_rxtx.c   | 276 +++++++++++++++++++++++++++++
 drivers/net/virtio/virtqueue.h     |  18 +-
 4 files changed, 316 insertions(+), 12 deletions(-)
  

Comments

Tiwei Bie Oct. 18, 2018, 4:43 a.m. UTC | #1
On Tue, Oct 16, 2018 at 11:01:31AM +0200, Jens Freimann wrote:
[...]
> +void
> +vq_ring_free_chain_packed(struct virtqueue *vq, uint16_t desc_idx)
> +{
> +	struct vring_desc_packed *dp;
> +	struct vq_desc_extra *dxp = NULL, *dxp_tail = NULL;
> +	uint16_t desc_idx_last = desc_idx;
> +
> +	dp  = &vq->vq_ring.desc_packed[desc_idx];
> +	dxp = &vq->vq_descx[desc_idx];

In the out-of-order case, we can't assume the desc and desc_extra
have the same index.

> +	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
> +	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
> +		while (dp->flags & VRING_DESC_F_NEXT) {

https://github.com/oasis-tcs/virtio-spec/blob/89dd55f5e606/packed-ring.tex#L222-L226
The device only writes out a single used descriptor for the whole
list. It then skips forward according to the number of
descriptors in the list. The driver needs to keep track of the size
of the list corresponding to each buffer ID, to be able to skip
to where the next used descriptor is written by the device.

IOW, we can't use the VRING_DESC_F_NEXT here.


> +			desc_idx_last = dxp->next;
> +			dp = &vq->vq_ring.desc_packed[dxp->next];
> +			dxp = &vq->vq_descx[dxp->next];
> +		}
> +	}
  

Patch

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 873b345b3..14cf3657e 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -384,6 +384,8 @@  virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
 	vq->hw = hw;
 	vq->vq_queue_index = vtpci_queue_idx;
 	vq->vq_nentries = vq_size;
+	if (vtpci_packed_queue(hw))
+		vq->vq_ring.avail_wrap_counter = 1;
 
 	/*
 	 * Reserve a memzone for vring elements
@@ -490,16 +492,22 @@  virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx)
 		memset(txr, 0, vq_size * sizeof(*txr));
 		for (i = 0; i < vq_size; i++) {
 			struct vring_desc *start_dp = txr[i].tx_indir;
-
-			vring_desc_init_split(start_dp, RTE_DIM(txr[i].tx_indir));
-
+			struct vring_desc_packed*start_dp_packed = txr[i].tx_indir_pq;
+	
 			/* first indirect descriptor is always the tx header */
-			start_dp->addr = txvq->virtio_net_hdr_mem
-				+ i * sizeof(*txr)
-				+ offsetof(struct virtio_tx_region, tx_hdr);
-
-			start_dp->len = hw->vtnet_hdr_size;
-			start_dp->flags = VRING_DESC_F_NEXT;
+			if (vtpci_packed_queue(hw)) {
+				start_dp_packed->addr = txvq->virtio_net_hdr_mem
+					+ i * sizeof(*txr)
+					+ offsetof(struct virtio_tx_region, tx_hdr);
+				start_dp_packed->len = hw->vtnet_hdr_size;
+			} else {
+				vring_desc_init_split(start_dp, RTE_DIM(txr[i].tx_indir));
+				start_dp->addr = txvq->virtio_net_hdr_mem
+					+ i * sizeof(*txr)
+					+ offsetof(struct virtio_tx_region, tx_hdr);
+				start_dp->len = hw->vtnet_hdr_size;
+				start_dp->flags = VRING_DESC_F_NEXT;
+			}
 		}
 	}
 
@@ -1338,7 +1346,11 @@  set_rxtx_funcs(struct rte_eth_dev *eth_dev)
 		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
 	}
 
-	if (hw->use_inorder_tx) {
+	if (vtpci_packed_queue(hw)) {
+		PMD_INIT_LOG(INFO, "virtio: using virtio 1.1 Tx path on port %u",
+			eth_dev->data->port_id);
+		eth_dev->tx_pkt_burst = virtio_xmit_pkts_packed;
+	} else if (hw->use_inorder_tx) {
 		PMD_INIT_LOG(INFO, "virtio: using inorder Tx path on port %u",
 			eth_dev->data->port_id);
 		eth_dev->tx_pkt_burst = virtio_xmit_pkts_inorder;
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index e0f80e5a4..05d355180 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -82,6 +82,8 @@  uint16_t virtio_recv_mergeable_pkts_inorder(void *rx_queue,
 
 uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts);
+uint16_t virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts,
+		uint16_t nb_pkts);
 
 uint16_t virtio_xmit_pkts_inorder(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts);
diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c
index eb891433e..0c17102c3 100644
--- a/drivers/net/virtio/virtio_rxtx.c
+++ b/drivers/net/virtio/virtio_rxtx.c
@@ -88,6 +88,41 @@  vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
 	dp->next = VQ_RING_DESC_CHAIN_END;
 }
 
+void
+vq_ring_free_chain_packed(struct virtqueue *vq, uint16_t desc_idx)
+{
+	struct vring_desc_packed *dp;
+	struct vq_desc_extra *dxp = NULL, *dxp_tail = NULL;
+	uint16_t desc_idx_last = desc_idx;
+
+	dp  = &vq->vq_ring.desc_packed[desc_idx];
+	dxp = &vq->vq_descx[desc_idx];
+	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs);
+	if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
+		while (dp->flags & VRING_DESC_F_NEXT) {
+			desc_idx_last = dxp->next;
+			dp = &vq->vq_ring.desc_packed[dxp->next];
+			dxp = &vq->vq_descx[dxp->next];
+		}
+	}
+
+	/*
+	 * We must append the existing free chain, if any, to the end of
+	 * newly freed chain. If the virtqueue was completely used, then
+	 * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
+	 */
+	if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) {
+		vq->vq_desc_head_idx = desc_idx;
+	}
+	else {
+		dxp_tail = &vq->vq_descx[vq->vq_desc_tail_idx];
+		dxp_tail->next = desc_idx;
+	}
+
+	vq->vq_desc_tail_idx = desc_idx_last;
+	dxp->next = VQ_RING_DESC_CHAIN_END;
+}
+
 static uint16_t
 virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts,
 			   uint32_t *len, uint16_t num)
@@ -165,6 +200,33 @@  virtqueue_dequeue_rx_inorder(struct virtqueue *vq,
 #endif
 
 /* Cleanup from completed transmits. */
+static void
+virtio_xmit_cleanup_packed(struct virtqueue *vq)
+{
+	uint16_t used_idx, id;
+	uint16_t size = vq->vq_nentries;
+	struct vring_desc_packed *desc = vq->vq_ring.desc_packed;
+	struct vq_desc_extra *dxp;
+
+	used_idx = vq->vq_used_cons_idx;
+	while (desc_is_used(&desc[used_idx], &vq->vq_ring) &&
+	       vq->vq_free_cnt < size) {
+		used_idx = vq->vq_used_cons_idx;
+		id = desc[used_idx].index;
+		dxp = &vq->vq_descx[id];
+		if (++vq->vq_used_cons_idx >= size) {
+			vq->vq_used_cons_idx -= size;
+			vq->vq_ring.used_wrap_counter ^= 1;
+		}
+		vq_ring_free_chain_packed(vq, id);
+		if (dxp->cookie != NULL) {
+			rte_pktmbuf_free(dxp->cookie);
+			dxp->cookie = NULL;
+		}
+		used_idx = vq->vq_used_cons_idx;
+	}
+}
+
 static void
 virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num)
 {
@@ -456,6 +518,129 @@  virtqueue_enqueue_xmit_inorder(struct virtnet_tx *txvq,
 	vq->vq_desc_head_idx = idx & (vq->vq_nentries - 1);
 }
 
+static inline void
+virtqueue_enqueue_xmit_packed(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
+			uint16_t needed, int use_indirect, int can_push,
+			int in_order)
+{
+	struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr;
+	struct vq_desc_extra *dxp, *head_dxp;
+	struct virtqueue *vq = txvq->vq;
+	struct vring_desc_packed *start_dp, *head_dp;
+	uint16_t seg_num = cookie->nb_segs;
+	uint16_t idx, head_id, head_flags;
+	uint16_t head_size = vq->hw->vtnet_hdr_size;
+	struct virtio_net_hdr *hdr;
+
+	head_id = vq->vq_desc_head_idx;
+	idx = head_id;
+	start_dp = vq->vq_ring.desc_packed;
+	dxp = &vq->vq_descx[idx];
+	dxp->ndescs = needed;
+
+	head_dp = &vq->vq_ring.desc_packed[head_id];
+	head_dxp = &vq->vq_descx[head_id];
+	head_dxp->cookie = (void *) cookie;
+	head_flags = cookie->next ? VRING_DESC_F_NEXT: 0;
+	head_flags = VRING_DESC_F_AVAIL(vq->vq_ring.avail_wrap_counter) |
+		     VRING_DESC_F_USED(!vq->vq_ring.avail_wrap_counter);
+
+	if (can_push) {
+		/* prepend cannot fail, checked by caller */
+		hdr = (struct virtio_net_hdr *)
+			rte_pktmbuf_prepend(cookie, head_size);
+		/* rte_pktmbuf_prepend() counts the hdr size to the pkt length,
+		 * which is wrong. Below subtract restores correct pkt size.
+		 */
+		cookie->pkt_len -= head_size;
+
+		/* if offload disabled, it is not zeroed below, do it now */
+		if (!vq->hw->has_tx_offload) {
+			ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0);
+			ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0);
+			ASSIGN_UNLESS_EQUAL(hdr->flags, 0);
+			ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0);
+			ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0);
+			ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0);
+		}
+	} else if (use_indirect) {
+		/* setup tx ring slot to point to indirect
+		 * descriptor list stored in reserved region.
+		 *
+		 * the first slot in indirect ring is already preset
+		 * to point to the header in reserved region
+		 */
+		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
+			RTE_PTR_DIFF(&txr[idx].tx_indir_pq, txr);
+		start_dp[idx].len   = (seg_num + 1) * sizeof(struct vring_desc_packed);
+		start_dp[idx].flags = VRING_DESC_F_INDIRECT;
+		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
+
+		/* loop below will fill in rest of the indirect elements */
+		start_dp = txr[idx].tx_indir_pq;
+		idx = 1;
+	} else {
+		/* setup first tx ring slot to point to header
+		 * stored in reserved region.
+		 */
+		start_dp[idx].addr  = txvq->virtio_net_hdr_mem +
+			RTE_PTR_DIFF(&txr[idx].tx_hdr, txr);
+		start_dp[idx].len   = vq->hw->vtnet_hdr_size;
+		hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr;
+		idx = dxp->next;
+	}
+
+	virtqueue_xmit_offload(hdr, cookie, vq->hw->has_tx_offload);
+
+	do {
+		uint16_t flags;
+		if (idx >= vq->vq_nentries) {
+			idx = 0;
+			vq->vq_ring.avail_wrap_counter ^= 1;
+		}
+		start_dp[idx].addr  = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq);
+		start_dp[idx].len   = cookie->data_len;
+		if (likely(idx != head_id)) {
+			flags = cookie->next ? VRING_DESC_F_NEXT : 0;
+			flags |= VRING_DESC_F_AVAIL(vq->vq_ring.avail_wrap_counter) |
+				 VRING_DESC_F_USED(!vq->vq_ring.avail_wrap_counter);
+			start_dp[idx].flags = flags;
+		}
+		if (use_indirect) {
+			if (++idx >= (seg_num + 1))
+				break;
+		} else {
+			dxp = &vq->vq_descx[idx];
+			if (idx == (vq->vq_nentries -1) && dxp->next == 0)
+				vq->vq_ring.avail_wrap_counter ^= 1;
+			idx = dxp->next;
+		}
+	} while ((cookie = cookie->next) != NULL);
+
+	if (use_indirect)
+		idx = vq->vq_ring.desc_packed[head_id].index;
+
+	vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed);
+
+	if (!in_order) {
+		if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
+			vq->vq_desc_tail_idx = idx;
+	}
+	vq->vq_avail_idx += needed;
+	if (idx==VQ_RING_DESC_CHAIN_END)
+		idx = vq->vq_avail_idx;
+	if (vq->vq_avail_idx >= vq->vq_nentries) {
+		vq->vq_avail_idx -= vq->vq_nentries;
+		idx = vq->vq_avail_idx;
+		vq->vq_ring.avail_wrap_counter ^= 1;
+	}
+	vq->vq_desc_head_idx = idx;
+
+	rte_smp_wmb();
+	head_dp->flags = head_flags;
+}
+
+
 static inline void
 virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie,
 			uint16_t needed, int use_indirect, int can_push,
@@ -1346,6 +1531,97 @@  virtio_recv_mergeable_pkts(void *rx_queue,
 	return nb_rx;
 }
 
+uint16_t
+virtio_xmit_pkts_packed(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct virtnet_tx *txvq = tx_queue;
+	struct virtqueue *vq = txvq->vq;
+	struct virtio_hw *hw = vq->hw;
+	uint16_t hdr_size = hw->vtnet_hdr_size;
+	uint16_t nb_tx = 0;
+	int error;
+
+	if (unlikely(hw->started == 0 && tx_pkts != hw->inject_pkts))
+		return nb_tx;
+
+	if (unlikely(nb_pkts < 1))
+		return nb_pkts;
+
+	PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts);
+
+	virtio_rmb();
+	if (likely(nb_pkts > vq->vq_nentries - vq->vq_free_thresh))
+		virtio_xmit_cleanup_packed(vq);
+
+	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+		struct rte_mbuf *txm = tx_pkts[nb_tx];
+		int can_push = 0, use_indirect = 0, slots, need;
+
+		/* Do VLAN tag insertion */
+		if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) {
+			error = rte_vlan_insert(&txm);
+			if (unlikely(error)) {
+				rte_pktmbuf_free(txm);
+				continue;
+			}
+		}
+
+		/* optimize ring usage */
+		if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) ||
+		      vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) &&
+		    rte_mbuf_refcnt_read(txm) == 1 &&
+		    RTE_MBUF_DIRECT(txm) &&
+		    txm->nb_segs == 1 &&
+		    rte_pktmbuf_headroom(txm) >= hdr_size &&
+		    rte_is_aligned(rte_pktmbuf_mtod(txm, char *),
+				   __alignof__(struct virtio_net_hdr_mrg_rxbuf)))
+			can_push = 1;
+		else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) &&
+			 txm->nb_segs < VIRTIO_MAX_TX_INDIRECT)
+			use_indirect = 1;
+
+		/* How many main ring entries are needed to this Tx?
+		 * any_layout => number of segments
+		 * indirect   => 1
+		 * default    => number of segments + 1
+		 */
+		slots = use_indirect ? 1 : (txm->nb_segs + !can_push);
+		need = slots - vq->vq_free_cnt;
+
+		/* Positive value indicates it need free vring descriptors */
+		if (unlikely(need > 0)) {
+			virtio_rmb();
+			need = RTE_MIN(need, (int)nb_pkts);
+
+			virtio_xmit_cleanup_packed(vq);
+			need = slots - vq->vq_free_cnt;
+			if (unlikely(need > 0)) {
+				PMD_TX_LOG(ERR,
+					   "No free tx descriptors to transmit");
+				break;
+			}
+		}
+
+		/* Enqueue Packet buffers */
+		virtqueue_enqueue_xmit_packed(txvq, txm, slots, use_indirect,
+			can_push, 0);
+
+		txvq->stats.bytes += txm->pkt_len;
+		virtio_update_packet_stats(&txvq->stats, txm);
+	}
+
+	txvq->stats.packets += nb_tx;
+
+	if (likely(nb_tx)) {
+		if (unlikely(virtqueue_kick_prepare_packed(vq))) {
+			virtqueue_notify(vq);
+			PMD_TX_LOG(DEBUG, "Notified backend after xmit");
+		}
+	}
+
+	return nb_tx;
+}
+
 uint16_t
 virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index 2af1bc259..9debbc3f9 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -242,8 +242,12 @@  struct virtio_net_hdr_mrg_rxbuf {
 #define VIRTIO_MAX_TX_INDIRECT 8
 struct virtio_tx_region {
 	struct virtio_net_hdr_mrg_rxbuf tx_hdr;
-	struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT]
-			   __attribute__((__aligned__(16)));
+	union {
+		struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT]
+			__attribute__((__aligned__(16)));
+		struct vring_desc_packed tx_indir_pq[VIRTIO_MAX_TX_INDIRECT]
+			__attribute__((__aligned__(16)));
+	};
 };
 
 static inline void
@@ -319,6 +323,7 @@  virtio_get_queue_type(struct virtio_hw *hw, uint16_t vtpci_queue_idx)
 #define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_ring.used->idx - (vq)->vq_used_cons_idx))
 
 void vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx);
+void vq_ring_free_chain_packed(struct virtqueue *vq, uint16_t desc_idx);
 void vq_ring_free_inorder(struct virtqueue *vq, uint16_t desc_idx,
 			  uint16_t num);
 
@@ -352,6 +357,15 @@  virtqueue_kick_prepare(struct virtqueue *vq)
 	return !(vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY);
 }
 
+static inline int
+virtqueue_kick_prepare_packed(struct virtqueue *vq)
+{
+	uint16_t flags;
+
+	flags = vq->vq_ring.device_event->desc_event_flags & RING_EVENT_FLAGS_DESC;
+	return (flags != RING_EVENT_FLAGS_DISABLE);
+}
+
 static inline void
 virtqueue_notify(struct virtqueue *vq)
 {