[dpdk-dev,1/3] vmxnet3: support mult-segment receive

Message ID 1449191107-14222-2-git-send-email-stephen@networkplumber.org (mailing list archive)
State Changes Requested, archived
Delegated to: Bruce Richardson
Headers

Commit Message

Stephen Hemminger Dec. 4, 2015, 1:05 a.m. UTC
  From: Stephen Hemminger <shemming@brocade.com>

The vmxnet3 interface specification supports having multiple
receive rings. The first ring has buffers of BTYPE_HEAD which
are used for the start of the packet, the second ring has buffers
of type BTYPE_BODY which are used only if the received packet
exceeds the available space of the head buffer.  There can
zero or more body buffers for one packet.

What the driver does is post mbuf's to both rings. If a jumbo
frame is received; then a multi-segment mbuf is created and this
is returned. The previous version of the driver was filling both
rings, but the second ring was never used. It would error out
if it ever received a multi-segment mbuf.

The logic is very similar to existing methods used in other
OS's. This patch has been used internally by several companies
and users for several releases and tested with Jumbo frames.

Signed-off-by: Stephen Hemminger <shemming@brocade.com>
---
 drivers/net/vmxnet3/vmxnet3_ring.h |  2 +
 drivers/net/vmxnet3/vmxnet3_rxtx.c | 78 +++++++++++++++++++-------------------
 2 files changed, 42 insertions(+), 38 deletions(-)
  

Comments

Yong Wang Dec. 22, 2015, 7:51 p.m. UTC | #1
On 12/3/15, 5:05 PM, "Stephen Hemminger" <stephen@networkplumber.org> wrote:



>From: Stephen Hemminger <shemming@brocade.com>

>

>The vmxnet3 interface specification supports having multiple

>receive rings. The first ring has buffers of BTYPE_HEAD which

>are used for the start of the packet, the second ring has buffers

>of type BTYPE_BODY which are used only if the received packet

>exceeds the available space of the head buffer.  There can

>zero or more body buffers for one packet.

>

>What the driver does is post mbuf's to both rings. If a jumbo

>frame is received; then a multi-segment mbuf is created and this

>is returned. The previous version of the driver was filling both

>rings, but the second ring was never used. It would error out

>if it ever received a multi-segment mbuf.

>

>The logic is very similar to existing methods used in other

>OS's. This patch has been used internally by several companies

>and users for several releases and tested with Jumbo frames.



I am fine with this change but just want to point it out that
the logic here deviates from the other OSes somewhat as they
populate the two rings differently:

The first ring has type (HEAD, BODY, BODY, HEAD, BODY, BODY)
assuming 3 descriptors are needed to hold a pkt of MTU size.  
The 2nd ring is all BODY type.  Only if BODY type from 1st ring
runs out will the 2nd ring be used.

Here the first ring is exclusively of HEAD type and any pkts
that cannot hold in the HEAD descriptor will go to the 2nd
ring.  This will work but then the number of descriptors that
actually are needed from the first ring is smaller.  If we
populates the 1st ring based on MTU as other OSes do, then
we just need 1 ring to handle jumbo frame, which ends up with
smaller working set, etc.

>

>Signed-off-by: Stephen Hemminger <shemming@brocade.com>

>---

> drivers/net/vmxnet3/vmxnet3_ring.h |  2 +

> drivers/net/vmxnet3/vmxnet3_rxtx.c | 78 +++++++++++++++++++-------------------

> 2 files changed, 42 insertions(+), 38 deletions(-)

>

>diff --git a/drivers/net/vmxnet3/vmxnet3_ring.h b/drivers/net/vmxnet3/vmxnet3_ring.h

>index 612487e..55ceadf 100644

>--- a/drivers/net/vmxnet3/vmxnet3_ring.h

>+++ b/drivers/net/vmxnet3/vmxnet3_ring.h

>@@ -171,6 +171,8 @@ typedef struct vmxnet3_rx_queue {

> 	uint32_t                    qid1;

> 	uint32_t                    qid2;

> 	Vmxnet3_RxQueueDesc         *shared;

>+	struct rte_mbuf		    *start_seg;

>+	struct rte_mbuf		    *last_seg;

> 	struct vmxnet3_rxq_stats    stats;

> 	bool                        stopped;

> 	uint16_t                    queue_id;      /**< Device RX queue index. */

>diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c

>index 4de5d89..00980a5 100644

>--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c

>+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c

>@@ -575,35 +575,11 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)

> 		rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;

> 		rbi = rxq->cmd_ring[ring_idx].buf_info + idx;

> 

>-		if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {

>-			rte_pktmbuf_free_seg(rbi->m);

>-			PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");

>-			goto rcd_done;

>-		}

>-

> 		PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);

> 

> 		VMXNET3_ASSERT(rcd->len <= rxd->len);

> 		VMXNET3_ASSERT(rbi->m);

> 

>-		if (unlikely(rcd->len == 0)) {

>-			PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",

>-				   ring_idx, idx);

>-			VMXNET3_ASSERT(rcd->sop && rcd->eop);

>-			rte_pktmbuf_free_seg(rbi->m);

>-			goto rcd_done;

>-		}

>-

>-		/* Assuming a packet is coming in a single packet buffer */

>-		if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {

>-			PMD_RX_LOG(DEBUG,

>-				   "Alert : Misbehaving device, incorrect "

>-				   " buffer type used. iPacket dropped.");

>-			rte_pktmbuf_free_seg(rbi->m);

>-			goto rcd_done;

>-		}

>-		VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);

>-

> 		/* Get the packet buffer pointer from buf_info */

> 		rxm = rbi->m;

> 

>@@ -615,7 +591,7 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)

> 		rxq->cmd_ring[ring_idx].next2comp = idx;

> 

> 		/* For RCD with EOP set, check if there is frame error */

>-		if (unlikely(rcd->err)) {

>+		if (unlikely(rcd->eop && rcd->err)) {

> 			rxq->stats.drop_total++;

> 			rxq->stats.drop_err++;

> 

>@@ -641,9 +617,45 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)

> 		rxm->ol_flags = 0;

> 		rxm->vlan_tci = 0;

> 

>-		vmxnet3_rx_offload(rcd, rxm);

>+		/*

>+		 * If this is the first buffer of the received packet,

>+		 * set the pointer to the first mbuf of the packet

>+		 * Otherwise, update the total length and the number of segments

>+		 * of the current scattered packet, and update the pointer to

>+		 * the last mbuf of the current packet.

>+		 */

>+		if (rcd->sop) {

>+			VMXNET3_ASSERT(!rxq->start_seg);


nit: change this to VMXNET3(rxq->start_seg == NULL) to be consistent
with the check for start != NULL in the else block.

>+			VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);

>+

>+			if (unlikely(rcd->len == 0)) {

>+				PMD_RX_LOG(DEBUG,

>+					   "Rx buf was skipped. rxring[%d][%d])",

>+					   ring_idx, idx);


Can you add back the assert VMXNET3_ASSERT(rcd->sop && rcd->top) here.

>+				rte_pktmbuf_free_seg(rxm);

>+				goto rcd_done;

>+			}

>+

>+			rxq->start_seg = rxm;

>+			vmxnet3_rx_offload(rcd, rxm);

>+		} else {

>+			struct rte_mbuf *start = rxq->start_seg;

>+

>+			VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);

>+			VMXNET3_ASSERT(start != NULL);

>+

>+			start->pkt_len += rxm->data_len;

>+			start->nb_segs++;

>+

>+			rxq->last_seg->next = rxm;

>+		}

>+		rxq->last_seg = rxm;

>+

>+		if (rcd->eop) {

>+			rx_pkts[nb_rx++] = rxq->start_seg;

>+			rxq->start_seg = NULL;

>+		}

> 

>-		rx_pkts[nb_rx++] = rxm;

> rcd_done:

> 		rxq->cmd_ring[ring_idx].next2comp = idx;

> 		VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);

>@@ -812,20 +824,9 @@ vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,

> 	int size;

> 	uint8_t i;

> 	char mem_name[32];

>-	uint16_t buf_size;

> 

> 	PMD_INIT_FUNC_TRACE();

> 

>-	buf_size = rte_pktmbuf_data_room_size(mp) -

>-		RTE_PKTMBUF_HEADROOM;

>-

>-	if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {

>-		PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "

>-			     "VMXNET3 don't support scatter packets yet",

>-			     buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);

>-		return -EINVAL;

>-	}

>-

> 	rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);

> 	if (rxq == NULL) {

> 		PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");

>@@ -944,6 +945,7 @@ vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)

> 			}

> 		}

> 		rxq->stopped = FALSE;

>+		rxq->start_seg = NULL;

> 	}

> 

> 	for (i = 0; i < dev->data->nb_tx_queues; i++) {

>-- 

>2.1.4

>
  

Patch

diff --git a/drivers/net/vmxnet3/vmxnet3_ring.h b/drivers/net/vmxnet3/vmxnet3_ring.h
index 612487e..55ceadf 100644
--- a/drivers/net/vmxnet3/vmxnet3_ring.h
+++ b/drivers/net/vmxnet3/vmxnet3_ring.h
@@ -171,6 +171,8 @@  typedef struct vmxnet3_rx_queue {
 	uint32_t                    qid1;
 	uint32_t                    qid2;
 	Vmxnet3_RxQueueDesc         *shared;
+	struct rte_mbuf		    *start_seg;
+	struct rte_mbuf		    *last_seg;
 	struct vmxnet3_rxq_stats    stats;
 	bool                        stopped;
 	uint16_t                    queue_id;      /**< Device RX queue index. */
diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c
index 4de5d89..00980a5 100644
--- a/drivers/net/vmxnet3/vmxnet3_rxtx.c
+++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c
@@ -575,35 +575,11 @@  vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;
 		rbi = rxq->cmd_ring[ring_idx].buf_info + idx;
 
-		if (unlikely(rcd->sop != 1 || rcd->eop != 1)) {
-			rte_pktmbuf_free_seg(rbi->m);
-			PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)");
-			goto rcd_done;
-		}
-
 		PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx);
 
 		VMXNET3_ASSERT(rcd->len <= rxd->len);
 		VMXNET3_ASSERT(rbi->m);
 
-		if (unlikely(rcd->len == 0)) {
-			PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)",
-				   ring_idx, idx);
-			VMXNET3_ASSERT(rcd->sop && rcd->eop);
-			rte_pktmbuf_free_seg(rbi->m);
-			goto rcd_done;
-		}
-
-		/* Assuming a packet is coming in a single packet buffer */
-		if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {
-			PMD_RX_LOG(DEBUG,
-				   "Alert : Misbehaving device, incorrect "
-				   " buffer type used. iPacket dropped.");
-			rte_pktmbuf_free_seg(rbi->m);
-			goto rcd_done;
-		}
-		VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
-
 		/* Get the packet buffer pointer from buf_info */
 		rxm = rbi->m;
 
@@ -615,7 +591,7 @@  vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		rxq->cmd_ring[ring_idx].next2comp = idx;
 
 		/* For RCD with EOP set, check if there is frame error */
-		if (unlikely(rcd->err)) {
+		if (unlikely(rcd->eop && rcd->err)) {
 			rxq->stats.drop_total++;
 			rxq->stats.drop_err++;
 
@@ -641,9 +617,45 @@  vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 		rxm->ol_flags = 0;
 		rxm->vlan_tci = 0;
 
-		vmxnet3_rx_offload(rcd, rxm);
+		/*
+		 * If this is the first buffer of the received packet,
+		 * set the pointer to the first mbuf of the packet
+		 * Otherwise, update the total length and the number of segments
+		 * of the current scattered packet, and update the pointer to
+		 * the last mbuf of the current packet.
+		 */
+		if (rcd->sop) {
+			VMXNET3_ASSERT(!rxq->start_seg);
+			VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);
+
+			if (unlikely(rcd->len == 0)) {
+				PMD_RX_LOG(DEBUG,
+					   "Rx buf was skipped. rxring[%d][%d])",
+					   ring_idx, idx);
+				rte_pktmbuf_free_seg(rxm);
+				goto rcd_done;
+			}
+
+			rxq->start_seg = rxm;
+			vmxnet3_rx_offload(rcd, rxm);
+		} else {
+			struct rte_mbuf *start = rxq->start_seg;
+
+			VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_BODY);
+			VMXNET3_ASSERT(start != NULL);
+
+			start->pkt_len += rxm->data_len;
+			start->nb_segs++;
+
+			rxq->last_seg->next = rxm;
+		}
+		rxq->last_seg = rxm;
+
+		if (rcd->eop) {
+			rx_pkts[nb_rx++] = rxq->start_seg;
+			rxq->start_seg = NULL;
+		}
 
-		rx_pkts[nb_rx++] = rxm;
 rcd_done:
 		rxq->cmd_ring[ring_idx].next2comp = idx;
 		VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);
@@ -812,20 +824,9 @@  vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	int size;
 	uint8_t i;
 	char mem_name[32];
-	uint16_t buf_size;
 
 	PMD_INIT_FUNC_TRACE();
 
-	buf_size = rte_pktmbuf_data_room_size(mp) -
-		RTE_PKTMBUF_HEADROOM;
-
-	if (dev->data->dev_conf.rxmode.max_rx_pkt_len > buf_size) {
-		PMD_INIT_LOG(ERR, "buf_size = %u, max_pkt_len = %u, "
-			     "VMXNET3 don't support scatter packets yet",
-			     buf_size, dev->data->dev_conf.rxmode.max_rx_pkt_len);
-		return -EINVAL;
-	}
-
 	rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE);
 	if (rxq == NULL) {
 		PMD_INIT_LOG(ERR, "Can not allocate rx queue structure");
@@ -944,6 +945,7 @@  vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev)
 			}
 		}
 		rxq->stopped = FALSE;
+		rxq->start_seg = NULL;
 	}
 
 	for (i = 0; i < dev->data->nb_tx_queues; i++) {