[v8,2/2] net/af_xdp: Refactor af_xdp_tx_zc

Message ID 20250206204645.1564535-3-ariel.otilibili@6wind.com (mailing list archive)
State Superseded
Delegated to: Stephen Hemminger
Headers
Series Fix use after free, and refactor af_xdp_tx_zc |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/intel-Functional success Functional PASS
ci/github-robot: build success github build: passed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-marvell-Functional success Functional Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-compile-arm64-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-sample-apps-testing success Testing PASS

Commit Message

Ariel Otilibili Feb. 6, 2025, 8:46 p.m. UTC
Both legs of the loop share the same logic: the common parts are about
reserving and filling both address and length into the description.

This is moved into reserve_and_fill().

Bugzilla ID: 1440
Suggested-by: Maryam Tahhan <mtahhan@redhat.com>
Signed-off-by: Ariel Otilibili <ariel.otilibili@6wind.com>
---
 drivers/net/af_xdp/rte_eth_af_xdp.c | 75 ++++++++++++++++-------------
 1 file changed, 41 insertions(+), 34 deletions(-)
  

Comments

Stephen Hemminger Feb. 6, 2025, 9:42 p.m. UTC | #1
On Thu,  6 Feb 2025 21:46:45 +0100
Ariel Otilibili <ariel.otilibili@6wind.com> wrote:

>  
> +static inline struct xdp_desc *
> +reserve_and_fill(struct pkt_tx_queue *txq, struct rte_mbuf *mbuf,
> +		 struct xsk_umem_info *umem, void **pkt_ptr)
> +{
> +	struct xdp_desc *desc = NULL;
> +	uint64_t addr, offset;
> +	uint32_t idx_tx;
> +
> +	if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx))
> +		goto out;
> +
> +	desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
> +	desc->len = mbuf->pkt_len;
> +
> +	addr = (uint64_t)mbuf - (uint64_t)umem->buffer
> +		- umem->mb_pool->header_size;

addr (and the cast of mbuf) should probably be uintptr_t since the
intent is to do calculations with pointers.
  
Maryam Tahhan Feb. 7, 2025, 7:09 a.m. UTC | #2
>
> <snip>
>

@@ -559,51 +587,30 @@ af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs,
> uint16_t nb_pkts)
>                 mbuf = bufs[i];
>
>                 if (mbuf->pool == umem->mb_pool) {
> -                       if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx))
> {
> +                 if (!(desc = reserve_and_fill(txq, mbuf, umem, NULL))) {
>                                 kick_tx(txq, cq);
> -                               if (!xsk_ring_prod__reserve(&txq->tx, 1,
> -                                                           &idx_tx))
> +                               desc = reserve_and_fill(txq, mbuf, umem,
> NULL);
> +                               if (!desc)
>                                         goto out;
>                         }
> -                       desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
> -                       desc->len = mbuf->pkt_len;
> -                       addr = (uint64_t)mbuf - (uint64_t)umem->buffer -
> -                                       umem->mb_pool->header_size;
> -                       offset = rte_pktmbuf_mtod(mbuf, uint64_t) -
> -                                       (uint64_t)mbuf +
> -                                       umem->mb_pool->header_size;
> -                       offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
> -                       desc->addr = addr | offset;
> +
>                         tx_bytes += desc->len;
>                         count++;
>                 } else {
> -                       struct rte_mbuf *local_mbuf =
> -                                       rte_pktmbuf_alloc(umem->mb_pool);
> -                       void *pkt;
> -
> -                       if (local_mbuf == NULL)
> +                       if (!(local_mbuf =
> rte_pktmbuf_alloc(umem->mb_pool)))
>                                 goto out;
>
> -                       if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx))
> {
> +                       desc = reserve_and_fill(txq, local_mbuf, umem,
> &pkt);
> +                       if (!desc) {
>                                 rte_pktmbuf_free(local_mbuf);
>                                 goto out;
>                         }
>
> -                       desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
> -                       desc->len = mbuf->pkt_len;
> -
> -                       addr = (uint64_t)local_mbuf -
> (uint64_t)umem->buffer -
> -                                       umem->mb_pool->header_size;
> -                       offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) -
> -                                       (uint64_t)local_mbuf +
> -                                       umem->mb_pool->header_size;
> -                       pkt = xsk_umem__get_data(umem->buffer, addr +
> offset);
> -                       offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
> -                       desc->addr = addr | offset;
> +                       desc->len = local_mbuf->pkt_len;
>

Sorry if my remarks were confusing, it was just missing from the previous
patch and it needs to be:
desc->len = mbuf->pkt_len;

We need to keep this the same as the original code. This is a scenario
where we need to copy the data from an mbuf that isn't in from the pool of
buffers allocated for the umem. So the desc->len needs to be set to that of
the (non umem) mbuf.

The other changes look good. Nearly there,

Thanks again

<snip>
  
Maryam Tahhan Feb. 7, 2025, 9:18 a.m. UTC | #3
On 06/02/2025 21:42, Stephen Hemminger wrote:
> On Thu,  6 Feb 2025 21:46:45 +0100
> Ariel Otilibili<ariel.otilibili@6wind.com> wrote:
>
>>   
>> +static inline struct xdp_desc *
>> +reserve_and_fill(struct pkt_tx_queue *txq, struct rte_mbuf *mbuf,
>> +		 struct xsk_umem_info *umem, void **pkt_ptr)
>> +{
>> +	struct xdp_desc *desc = NULL;
>> +	uint64_t addr, offset;
>> +	uint32_t idx_tx;
>> +
>> +	if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx))
>> +		goto out;
>> +
>> +	desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
>> +	desc->len = mbuf->pkt_len;
>> +
>> +	addr = (uint64_t)mbuf - (uint64_t)umem->buffer
>> +		- umem->mb_pool->header_size;
> addr (and the cast of mbuf) should probably be uintptr_t since the
> intent is to do calculations with pointers.
>
I think it's ok as we would end up casting it anyway for the `struct xdp_desc`

/* Rx/Tx descriptor */
struct xdp_desc {
     __u64 addr;
     __u32 len;
     __u32 options;
};
  
Ariel Otilibili Feb. 7, 2025, 10:48 a.m. UTC | #4
Hi Maryam, hi Stephen;

On Fri, Feb 7, 2025 at 10:14 AM Maryam Tahhan <mtahhan@redhat.com> wrote:

> Sorry if my remarks were confusing, it was just missing from the previous
> patch and it needs to be:
> desc->len = mbuf->pkt_len;
>
> We need to keep this the same as the original code. This is a scenario
> where we need to copy the data from an mbuf that isn't in from the pool of
> buffers allocated for the umem. So the desc->len needs to be set to that of
> the (non umem) mbuf.
>
> The other changes look good. Nearly there,
>
> Thanks again
>

Thanks for the feedback. There is it,
http://inbox.dpdk.org/dev/20250207104552.1663519-1-ariel.otilibili@6wind.com/

>
> <snip>
>
  

Patch

diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c
index 092bcb73aa0a..ce80f32041fb 100644
--- a/drivers/net/af_xdp/rte_eth_af_xdp.c
+++ b/drivers/net/af_xdp/rte_eth_af_xdp.c
@@ -536,21 +536,49 @@  kick_tx(struct pkt_tx_queue *txq, struct xsk_ring_cons *cq)
 		}
 }
 
+static inline struct xdp_desc *
+reserve_and_fill(struct pkt_tx_queue *txq, struct rte_mbuf *mbuf,
+		 struct xsk_umem_info *umem, void **pkt_ptr)
+{
+	struct xdp_desc *desc = NULL;
+	uint64_t addr, offset;
+	uint32_t idx_tx;
+
+	if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx))
+		goto out;
+
+	desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
+	desc->len = mbuf->pkt_len;
+
+	addr = (uint64_t)mbuf - (uint64_t)umem->buffer
+		- umem->mb_pool->header_size;
+	offset = rte_pktmbuf_mtod(mbuf, uint64_t) - (uint64_t)mbuf
+		+ umem->mb_pool->header_size;
+
+	if (pkt_ptr)
+		*pkt_ptr = xsk_umem__get_data(umem->buffer, addr + offset);
+
+	offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+	desc->addr = addr | offset;
+
+out:
+	return desc;
+}
+
 #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG)
 static uint16_t
 af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
 	struct pkt_tx_queue *txq = queue;
 	struct xsk_umem_info *umem = txq->umem;
-	struct rte_mbuf *mbuf;
+	struct rte_mbuf *mbuf, *local_mbuf = NULL;
 	unsigned long tx_bytes = 0;
 	int i;
-	uint32_t idx_tx;
 	uint16_t count = 0;
 	struct xdp_desc *desc;
-	uint64_t addr, offset;
 	struct xsk_ring_cons *cq = &txq->pair->cq;
 	uint32_t free_thresh = cq->size >> 1;
+	void *pkt;
 
 	if (xsk_cons_nb_avail(cq, free_thresh) >= free_thresh)
 		pull_umem_cq(umem, XSK_RING_CONS__DEFAULT_NUM_DESCS, cq);
@@ -559,51 +587,30 @@  af_xdp_tx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		mbuf = bufs[i];
 
 		if (mbuf->pool == umem->mb_pool) {
-			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
+		  if (!(desc = reserve_and_fill(txq, mbuf, umem, NULL))) {
 				kick_tx(txq, cq);
-				if (!xsk_ring_prod__reserve(&txq->tx, 1,
-							    &idx_tx))
+				desc = reserve_and_fill(txq, mbuf, umem, NULL);
+				if (!desc)
 					goto out;
 			}
-			desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
-			desc->len = mbuf->pkt_len;
-			addr = (uint64_t)mbuf - (uint64_t)umem->buffer -
-					umem->mb_pool->header_size;
-			offset = rte_pktmbuf_mtod(mbuf, uint64_t) -
-					(uint64_t)mbuf +
-					umem->mb_pool->header_size;
-			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
-			desc->addr = addr | offset;
+
 			tx_bytes += desc->len;
 			count++;
 		} else {
-			struct rte_mbuf *local_mbuf =
-					rte_pktmbuf_alloc(umem->mb_pool);
-			void *pkt;
-
-			if (local_mbuf == NULL)
+			if (!(local_mbuf = rte_pktmbuf_alloc(umem->mb_pool)))
 				goto out;
 
-			if (!xsk_ring_prod__reserve(&txq->tx, 1, &idx_tx)) {
+			desc = reserve_and_fill(txq, local_mbuf, umem, &pkt);
+			if (!desc) {
 				rte_pktmbuf_free(local_mbuf);
 				goto out;
 			}
 
-			desc = xsk_ring_prod__tx_desc(&txq->tx, idx_tx);
-			desc->len = mbuf->pkt_len;
-
-			addr = (uint64_t)local_mbuf - (uint64_t)umem->buffer -
-					umem->mb_pool->header_size;
-			offset = rte_pktmbuf_mtod(local_mbuf, uint64_t) -
-					(uint64_t)local_mbuf +
-					umem->mb_pool->header_size;
-			pkt = xsk_umem__get_data(umem->buffer, addr + offset);
-			offset = offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
-			desc->addr = addr | offset;
+			desc->len = local_mbuf->pkt_len;
 			rte_memcpy(pkt, rte_pktmbuf_mtod(mbuf, void *),
-					desc->len);
-			tx_bytes += desc->len;
+				   desc->len);
 			rte_pktmbuf_free(mbuf);
+			tx_bytes += desc->len;
 			count++;
 		}
 	}