event/eth_tx: prefetch mbuf headers

Message ID 20250328054339.489914-1-mattias.ronnblom@ericsson.com (mailing list archive)
State Changes Requested
Delegated to: Jerin Jacob
Headers
Series event/eth_tx: prefetch mbuf headers |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/intel-Functional success Functional PASS
ci/github-robot: build success github build: passed
ci/iol-mellanox-Functional success Functional Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-compile-arm64-testing success Testing PASS
ci/iol-marvell-Functional success Functional Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS

Commit Message

Mattias Rönnblom March 28, 2025, 5:43 a.m. UTC
Prefetch mbuf headers, resulting in ~10% throughput improvement when
the Ethernet RX and TX Adapters are hosted on the same core (likely
~2x in case a dedicated TX core is used).

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Tested-by: Peter Nilsson <peter.j.nilsson@ericsson.com>
---
 lib/eventdev/rte_event_eth_tx_adapter.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
  

Comments

Mattias Rönnblom March 28, 2025, 6:07 a.m. UTC | #1
On 2025-03-28 06:43, Mattias Rönnblom wrote:
> Prefetch mbuf headers, resulting in ~10% throughput improvement when
> the Ethernet RX and TX Adapters are hosted on the same core (likely
> ~2x in case a dedicated TX core is used).
> 
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> Tested-by: Peter Nilsson <peter.j.nilsson@ericsson.com>

What should be added is that what's been tested is the 
non-RTE_EVENT_TYPE_VECTOR case.

> ---
>   lib/eventdev/rte_event_eth_tx_adapter.c | 20 ++++++++++++++++++++
>   1 file changed, 20 insertions(+)
> 
> diff --git a/lib/eventdev/rte_event_eth_tx_adapter.c b/lib/eventdev/rte_event_eth_tx_adapter.c
> index 67fff8b7d6..d740ae00f9 100644
> --- a/lib/eventdev/rte_event_eth_tx_adapter.c
> +++ b/lib/eventdev/rte_event_eth_tx_adapter.c
> @@ -598,6 +598,12 @@ txa_process_event_vector(struct txa_service_data *txa,
>   	return nb_tx;
>   }
>   
> +static inline void
> +txa_prefetch_mbuf(struct rte_mbuf *mbuf)
> +{
> +	rte_mbuf_prefetch_part1(mbuf);
> +}
> +
>   static void
>   txa_service_tx(struct txa_service_data *txa, struct rte_event *ev,
>   	uint32_t n)
> @@ -608,6 +614,20 @@ txa_service_tx(struct txa_service_data *txa, struct rte_event *ev,
>   
>   	stats = &txa->stats;
>   
> +	for (i = 0; i < n; i++) {
> +		struct rte_event *event = &ev[i];
> +
> +		if (unlikely(event->event_type & RTE_EVENT_TYPE_VECTOR)) {
> +			struct rte_event_vector *vec = event->vec;
> +			struct rte_mbuf **mbufs = vec->mbufs;
> +			uint32_t k;
> +
> +			for (k = 0; k < vec->nb_elem; k++)
> +				txa_prefetch_mbuf(mbufs[k]);
> +		} else
> +			txa_prefetch_mbuf(event->mbuf);
> +	}
> +
>   	nb_tx = 0;
>   	for (i = 0; i < n; i++) {
>   		uint16_t port;
  
Mattias Rönnblom May 20, 2025, 12:56 p.m. UTC | #2
Unaddressed
On 2025-03-28 07:07, Mattias Rönnblom wrote:
> On 2025-03-28 06:43, Mattias Rönnblom wrote:
>> Prefetch mbuf headers, resulting in ~10% throughput improvement when
>> the Ethernet RX and TX Adapters are hosted on the same core (likely
>> ~2x in case a dedicated TX core is used).
>>
>> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>> Tested-by: Peter Nilsson <peter.j.nilsson@ericsson.com>
> 

<snip>

Naga, could you comment on this patch?
  
Naga Harish K, S V May 27, 2025, 10:55 a.m. UTC | #3
> -----Original Message-----
> From: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> Sent: Friday, March 28, 2025 11:14 AM
> To: dev@dpdk.org
> Cc: Mattias Rönnblom <hofors@lysator.liu.se>; Naga Harish K, S V
> <s.v.naga.harish.k@intel.com>; Jerin Jacob <jerinj@marvell.com>; Mattias
> Rönnblom <mattias.ronnblom@ericsson.com>; Peter Nilsson
> <peter.j.nilsson@ericsson.com>
> Subject: [PATCH] event/eth_tx: prefetch mbuf headers
> 
> Prefetch mbuf headers, resulting in ~10% throughput improvement when the
> Ethernet RX and TX Adapters are hosted on the same core (likely ~2x in case a
> dedicated TX core is used).
> 
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> Tested-by: Peter Nilsson <peter.j.nilsson@ericsson.com>
> ---
>  lib/eventdev/rte_event_eth_tx_adapter.c | 20 ++++++++++++++++++++
>  1 file changed, 20 insertions(+)
> 
> diff --git a/lib/eventdev/rte_event_eth_tx_adapter.c
> b/lib/eventdev/rte_event_eth_tx_adapter.c
> index 67fff8b7d6..d740ae00f9 100644
> --- a/lib/eventdev/rte_event_eth_tx_adapter.c
> +++ b/lib/eventdev/rte_event_eth_tx_adapter.c
> @@ -598,6 +598,12 @@ txa_process_event_vector(struct txa_service_data
> *txa,
>  	return nb_tx;
>  }
> 
> +static inline void
> +txa_prefetch_mbuf(struct rte_mbuf *mbuf) {
> +	rte_mbuf_prefetch_part1(mbuf);
> +}
> +
>  static void
>  txa_service_tx(struct txa_service_data *txa, struct rte_event *ev,
>  	uint32_t n)
> @@ -608,6 +614,20 @@ txa_service_tx(struct txa_service_data *txa, struct
> rte_event *ev,
> 
>  	stats = &txa->stats;
> 
> +	for (i = 0; i < n; i++) {
> +		struct rte_event *event = &ev[i];
> +
> +		if (unlikely(event->event_type & RTE_EVENT_TYPE_VECTOR))


This gives a branch prediction advantage to non-vector events. Is that the intention?

> {
> +			struct rte_event_vector *vec = event->vec;
> +			struct rte_mbuf **mbufs = vec->mbufs;
> +			uint32_t k;
> +
> +			for (k = 0; k < vec->nb_elem; k++)
> +				txa_prefetch_mbuf(mbufs[k]);
> +		} else
> +			txa_prefetch_mbuf(event->mbuf);
> +	}
> +
>  	nb_tx = 0;
>  	for (i = 0; i < n; i++) {
>  		uint16_t port;
> --
> 2.43.0
  

Patch

diff --git a/lib/eventdev/rte_event_eth_tx_adapter.c b/lib/eventdev/rte_event_eth_tx_adapter.c
index 67fff8b7d6..d740ae00f9 100644
--- a/lib/eventdev/rte_event_eth_tx_adapter.c
+++ b/lib/eventdev/rte_event_eth_tx_adapter.c
@@ -598,6 +598,12 @@  txa_process_event_vector(struct txa_service_data *txa,
 	return nb_tx;
 }
 
+static inline void
+txa_prefetch_mbuf(struct rte_mbuf *mbuf)
+{
+	rte_mbuf_prefetch_part1(mbuf);
+}
+
 static void
 txa_service_tx(struct txa_service_data *txa, struct rte_event *ev,
 	uint32_t n)
@@ -608,6 +614,20 @@  txa_service_tx(struct txa_service_data *txa, struct rte_event *ev,
 
 	stats = &txa->stats;
 
+	for (i = 0; i < n; i++) {
+		struct rte_event *event = &ev[i];
+
+		if (unlikely(event->event_type & RTE_EVENT_TYPE_VECTOR)) {
+			struct rte_event_vector *vec = event->vec;
+			struct rte_mbuf **mbufs = vec->mbufs;
+			uint32_t k;
+
+			for (k = 0; k < vec->nb_elem; k++)
+				txa_prefetch_mbuf(mbufs[k]);
+		} else
+			txa_prefetch_mbuf(event->mbuf);
+	}
+
 	nb_tx = 0;
 	for (i = 0; i < n; i++) {
 		uint16_t port;