[v7,5/5] net/af_xdp: enable zero copy
diff mbox series

Message ID 20190327090027.72170-6-xiaolong.ye@intel.com
State Superseded, archived
Delegated to: Ferruh Yigit
Headers show
Series
  • Introduce AF_XDP PMD
Related show

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Ye Xiaolong March 27, 2019, 9 a.m. UTC
Try to check if external mempool (from rx_queue_setup) is fit for
af_xdp, if it is, it will be registered to af_xdp socket directly and
there will be no packet data copy on Rx and Tx.

Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 130 ++++++++++++++++++++--------
 1 file changed, 96 insertions(+), 34 deletions(-)

Comments

Ferruh Yigit March 28, 2019, 6:44 p.m. UTC | #1
On 3/27/2019 9:00 AM, Xiaolong Ye wrote:
> Try to check if external mempool (from rx_queue_setup) is fit for
> af_xdp, if it is, it will be registered to af_xdp socket directly and
> there will be no packet data copy on Rx and Tx.
> 
> Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
> ---
>  drivers/net/af_xdp/rte_eth_af_xdp.c | 130 ++++++++++++++++++++--------
>  1 file changed, 96 insertions(+), 34 deletions(-)
> 
> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
> index a1fda9212..c6ade4c94 100644
> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
> @@ -67,6 +67,7 @@ struct xsk_umem_info {
>  	struct xsk_umem *umem;
>  	struct rte_mempool *mb_pool;
>  	void *buffer;
> +	uint8_t zc;
>  };
>  
>  struct rx_stats {
> @@ -85,6 +86,7 @@ struct pkt_rx_queue {
>  
>  	struct pkt_tx_queue *pair;
>  	uint16_t queue_idx;
> +	uint8_t zc;
>  };
>  
>  struct tx_stats {

<...>

> @@ -630,6 +685,13 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
>  
>  	internals->umem = rxq->umem;
>  
> +	if (mb_pool == internals->umem->mb_pool)
> +		rxq->zc = internals->umem->zc;
> +
> +	if (rxq->zc)
> +		AF_XDP_LOG(INFO,
> +			"zero copy enabled on rx queue %d\n", rx_queue_id);
> +

The "zero copy" implemented in this patch, also the variable 'zc', is from
'af_xdp' umem to mbuf data via versa copy, right?
There is also another "zero copy" support in af_xdp, device to buffers...
Do you think can these be confused with each other, should we have another log
message and variable name for this one?
Indeed I can't think of a good name, but something like, "pmd/driver zero copy"
& 'pmd_zc' ??
Ye Xiaolong March 29, 2019, 1:53 a.m. UTC | #2
On 03/28, Ferruh Yigit wrote:
>On 3/27/2019 9:00 AM, Xiaolong Ye wrote:
>> Try to check if external mempool (from rx_queue_setup) is fit for
>> af_xdp, if it is, it will be registered to af_xdp socket directly and
>> there will be no packet data copy on Rx and Tx.
>> 
>> Signed-off-by: Xiaolong Ye <xiaolong.ye@intel.com>
>> ---
>>  drivers/net/af_xdp/rte_eth_af_xdp.c | 130 ++++++++++++++++++++--------
>>  1 file changed, 96 insertions(+), 34 deletions(-)
>> 
>> diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
>> index a1fda9212..c6ade4c94 100644
>> --- a/drivers/net/af_xdp/rte_eth_af_xdp.c
>> +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
>> @@ -67,6 +67,7 @@ struct xsk_umem_info {
>>  	struct xsk_umem *umem;
>>  	struct rte_mempool *mb_pool;
>>  	void *buffer;
>> +	uint8_t zc;
>>  };
>>  
>>  struct rx_stats {
>> @@ -85,6 +86,7 @@ struct pkt_rx_queue {
>>  
>>  	struct pkt_tx_queue *pair;
>>  	uint16_t queue_idx;
>> +	uint8_t zc;
>>  };
>>  
>>  struct tx_stats {
>
><...>
>
>> @@ -630,6 +685,13 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
>>  
>>  	internals->umem = rxq->umem;
>>  
>> +	if (mb_pool == internals->umem->mb_pool)
>> +		rxq->zc = internals->umem->zc;
>> +
>> +	if (rxq->zc)
>> +		AF_XDP_LOG(INFO,
>> +			"zero copy enabled on rx queue %d\n", rx_queue_id);
>> +
>
>The "zero copy" implemented in this patch, also the variable 'zc', is from
>'af_xdp' umem to mbuf data via versa copy, right?
>There is also another "zero copy" support in af_xdp, device to buffers...
>Do you think can these be confused with each other, should we have another log
>message and variable name for this one?
>Indeed I can't think of a good name, but something like, "pmd/driver zero copy"
>& 'pmd_zc' ??

That's a good suggestion, will adopt it.

Thanks,
Xiaolong

Patch
diff mbox series

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index a1fda9212..c6ade4c94 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -67,6 +67,7 @@  struct xsk_umem_info {
 	struct xsk_umem *umem;
 	struct rte_mempool *mb_pool;
 	void *buffer;
+	uint8_t zc;
 };
 
 struct rx_stats {
@@ -85,6 +86,7 @@  struct pkt_rx_queue {
 
 	struct pkt_tx_queue *pair;
 	uint16_t queue_idx;
+	uint8_t zc;
 };
 
 struct tx_stats {
@@ -202,7 +204,9 @@  eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	if (xsk_prod_nb_free(fq, free_thresh) >= free_thresh)
 		(void)reserve_fill_queue(umem, ETH_AF_XDP_RX_BATCH_SIZE);
 
-	if (unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool, mbufs, rcvd) != 0))
+	if (!rxq->zc &&
+		unlikely(rte_pktmbuf_alloc_bulk(rxq->mb_pool,
+				mbufs, rcvd) != 0))
 		return 0;
 
 	for (i = 0; i < rcvd; i++) {
@@ -216,13 +220,23 @@  eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		len = desc->len;
 		pkt = xsk_umem__get_data(rxq->umem->buffer, addr);
 
-		rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *), pkt, len);
-		rte_pktmbuf_pkt_len(mbufs[i]) = len;
-		rte_pktmbuf_data_len(mbufs[i]) = len;
-		rx_bytes += len;
-		bufs[count++] = mbufs[i];
-
-		rte_pktmbuf_free(addr_to_mbuf(umem, addr));
+		if (rxq->zc) {
+			struct rte_mbuf *mbuf;
+			mbuf = addr_to_mbuf(rxq->umem, addr);
+			rte_pktmbuf_pkt_len(mbuf) = len;
+			rte_pktmbuf_data_len(mbuf) = len;
+			rx_bytes += len;
+			bufs[count++] = mbuf;
+		} else {
+			rte_memcpy(rte_pktmbuf_mtod(mbufs[i], void *),
+					pkt, len);
+			rte_pktmbuf_pkt_len(mbufs[i]) = len;
+			rte_pktmbuf_data_len(mbufs[i]) = len;
+			rx_bytes += len;
+			bufs[count++] = mbufs[i];
+
+			rte_pktmbuf_free(addr_to_mbuf(umem, addr));
+		}
 	}
 
 	xsk_ring_cons__release(rx, rcvd);
@@ -295,22 +309,29 @@  eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 					- ETH_AF_XDP_DATA_HEADROOM;
 		desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx + i);
 		mbuf = bufs[i];
-		if (mbuf->pkt_len <= buf_len) {
-			mbuf_to_tx = rte_pktmbuf_alloc(umem->mb_pool);
-			if (unlikely(mbuf_to_tx == NULL)) {
-				rte_pktmbuf_free(mbuf);
-				continue;
-			}
-			desc->addr = mbuf_to_addr(umem, mbuf_to_tx);
+		if (txq->pair->zc && mbuf->pool == umem->mb_pool) {
+			desc->addr = mbuf_to_addr(umem, mbuf);
 			desc->len = mbuf->pkt_len;
-			pkt = xsk_umem__get_data(umem->buffer,
-						 desc->addr);
-			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
-			       desc->len);
 			valid++;
 			tx_bytes += mbuf->pkt_len;
+		} else {
+			if (mbuf->pkt_len <= buf_len) {
+				mbuf_to_tx = rte_pktmbuf_alloc(umem->mb_pool);
+				if (unlikely(mbuf_to_tx == NULL)) {
+					rte_pktmbuf_free(mbuf);
+					continue;
+				}
+				desc->addr = mbuf_to_addr(umem, mbuf_to_tx);
+				desc->len = mbuf->pkt_len;
+				pkt = xsk_umem__get_data(umem->buffer,
+							 desc->addr);
+				memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
+				       desc->len);
+				valid++;
+				tx_bytes += mbuf->pkt_len;
+			}
+			rte_pktmbuf_free(mbuf);
 		}
-		rte_pktmbuf_free(mbuf);
 	}
 
 	xsk_ring_prod__submit(&txq->tx, nb_pkts);
@@ -488,7 +509,7 @@  static inline uint64_t get_len(struct rte_mempool *mp)
 	return (uint64_t)(memhdr->len);
 }
 
-static struct xsk_umem_info *xdp_umem_configure(void)
+static struct xsk_umem_info *xdp_umem_configure(struct rte_mempool *mb_pool)
 {
 	struct xsk_umem_info *umem;
 	struct xsk_umem_config usr_config = {
@@ -505,16 +526,23 @@  static struct xsk_umem_info *xdp_umem_configure(void)
 		return NULL;
 	}
 
-	umem->mb_pool = rte_pktmbuf_pool_create_with_flags("af_xdp_mempool",
-			ETH_AF_XDP_NUM_BUFFERS,
-			250, 0,
-			ETH_AF_XDP_FRAME_SIZE -
-			ETH_AF_XDP_MBUF_OVERHEAD,
-			MEMPOOL_F_NO_SPREAD | MEMPOOL_CHUNK_F_PAGE_ALIGN,
-			SOCKET_ID_ANY);
-	if (umem->mb_pool == NULL || umem->mb_pool->nb_mem_chunks != 1) {
-		AF_XDP_LOG(ERR, "Failed to create mempool\n");
-		goto err;
+	if (!mb_pool) {
+		umem->mb_pool = rte_pktmbuf_pool_create_with_flags("af_xdp_mempool",
+				ETH_AF_XDP_NUM_BUFFERS,
+				250, 0,
+				ETH_AF_XDP_FRAME_SIZE -
+				ETH_AF_XDP_MBUF_OVERHEAD,
+				MEMPOOL_F_NO_SPREAD | MEMPOOL_CHUNK_F_PAGE_ALIGN,
+				SOCKET_ID_ANY);
+
+		if (umem->mb_pool == NULL ||
+				umem->mb_pool->nb_mem_chunks != 1) {
+			AF_XDP_LOG(ERR, "Failed to create mempool\n");
+			goto err;
+		}
+	} else {
+		umem->mb_pool = mb_pool;
+		umem->zc = 1;
 	}
 	base_addr = (void *)get_base_addr(umem->mb_pool);
 
@@ -536,16 +564,43 @@  static struct xsk_umem_info *xdp_umem_configure(void)
 	return NULL;
 }
 
+static uint8_t
+check_mempool_zc(struct rte_mempool *mp)
+{
+	RTE_ASSERT(mp);
+
+	/* must continues */
+	if (mp->nb_mem_chunks > 1)
+		return 0;
+
+	/* check header size */
+	if (mp->header_size != RTE_CACHE_LINE_SIZE)
+		return 0;
+
+	/* check base address */
+	if ((uint64_t)get_base_addr(mp) % getpagesize() != 0)
+		return 0;
+
+	/* check chunk size */
+	if ((mp->elt_size + mp->header_size + mp->trailer_size) %
+	    ETH_AF_XDP_FRAME_SIZE != 0)
+		return 0;
+
+	return 1;
+}
+
 static int
 xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
-	      int ring_size)
+	      int ring_size, struct rte_mempool *mb_pool)
 {
 	struct xsk_socket_config cfg;
 	struct pkt_tx_queue *txq = rxq->pair;
+	struct rte_mempool *mp;
 	int ret = 0;
 	int reserve_size;
 
-	rxq->umem = xdp_umem_configure();
+	mp = check_mempool_zc(mb_pool) ? mb_pool : NULL;
+	rxq->umem = xdp_umem_configure(mp);
 	if (rxq->umem == NULL)
 		return -ENOMEM;
 
@@ -622,7 +677,7 @@  eth_rx_queue_setup(struct rte_eth_dev *dev,
 
 	rxq->mb_pool = mb_pool;
 
-	if (xsk_configure(internals, rxq, nb_rx_desc)) {
+	if (xsk_configure(internals, rxq, nb_rx_desc, mb_pool)) {
 		AF_XDP_LOG(ERR, "Failed to configure xdp socket\n");
 		ret = -EINVAL;
 		goto err;
@@ -630,6 +685,13 @@  eth_rx_queue_setup(struct rte_eth_dev *dev,
 
 	internals->umem = rxq->umem;
 
+	if (mb_pool == internals->umem->mb_pool)
+		rxq->zc = internals->umem->zc;
+
+	if (rxq->zc)
+		AF_XDP_LOG(INFO,
+			"zero copy enabled on rx queue %d\n", rx_queue_id);
+
 	dev->data->rx_queues[rx_queue_id] = rxq;
 	return 0;