[2/4] eventdev: have ethernet Rx adapter appropriately report idle

Message ID 20221010145406.118880-3-mattias.ronnblom@ericsson.com (mailing list archive)
State Accepted, archived
Delegated to: Jerin Jacob
Headers
Series Have event adapters report idle status |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Mattias Rönnblom Oct. 10, 2022, 2:54 p.m. UTC
  Update the Event Ethernet Rx Adapter's service function to report as
idle (i.e., return -EAGAIN) in case no Ethernet frames were received
from the ethdev and no events were enqueued to the event device.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 lib/eventdev/rte_event_eth_rx_adapter.c | 56 ++++++++++++++++++-------
 1 file changed, 41 insertions(+), 15 deletions(-)
  

Comments

Jayatheerthan, Jay Oct. 11, 2022, 7:10 a.m. UTC | #1
@Harish, Could you review the patch ?

-Jay

> -----Original Message-----
> From: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> Sent: Monday, October 10, 2022 8:24 PM
> To: Jayatheerthan, Jay <jay.jayatheerthan@intel.com>; Carrillo, Erik G <erik.g.carrillo@intel.com>; Gujjar, Abhinandan S
> <abhinandan.gujjar@intel.com>; Jerin Jacob <jerinj@marvell.com>
> Cc: dev@dpdk.org; Van Haaren, Harry <harry.van.haaren@intel.com>; hofors@lysator.liu.se; mattias.ronnblom
> <mattias.ronnblom@ericsson.com>
> Subject: [PATCH 2/4] eventdev: have ethernet Rx adapter appropriately report idle
> 
> Update the Event Ethernet Rx Adapter's service function to report as
> idle (i.e., return -EAGAIN) in case no Ethernet frames were received
> from the ethdev and no events were enqueued to the event device.
> 
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> ---
>  lib/eventdev/rte_event_eth_rx_adapter.c | 56 ++++++++++++++++++-------
>  1 file changed, 41 insertions(+), 15 deletions(-)
> 
> diff --git a/lib/eventdev/rte_event_eth_rx_adapter.c b/lib/eventdev/rte_event_eth_rx_adapter.c
> index 5c3021a184..cf7bbd4d69 100644
> --- a/lib/eventdev/rte_event_eth_rx_adapter.c
> +++ b/lib/eventdev/rte_event_eth_rx_adapter.c
> @@ -1184,7 +1184,7 @@ rxa_intr_thread(void *arg)
>  /* Dequeue <port, q> from interrupt ring and enqueue received
>   * mbufs to eventdev
>   */
> -static inline void
> +static inline bool
>  rxa_intr_ring_dequeue(struct event_eth_rx_adapter *rx_adapter)
>  {
>  	uint32_t n;
> @@ -1194,20 +1194,27 @@ rxa_intr_ring_dequeue(struct event_eth_rx_adapter *rx_adapter)
>  	struct rte_event_eth_rx_adapter_stats *stats;
>  	rte_spinlock_t *ring_lock;
>  	uint8_t max_done = 0;
> +	bool work = false;
> 
>  	if (rx_adapter->num_rx_intr == 0)
> -		return;
> +		return work;
> 
>  	if (rte_ring_count(rx_adapter->intr_ring) == 0
>  		&& !rx_adapter->qd_valid)
> -		return;
> +		return work;
> 
>  	buf = &rx_adapter->event_enqueue_buffer;
>  	stats = &rx_adapter->stats;
>  	ring_lock = &rx_adapter->intr_ring_lock;
> 
> -	if (buf->count >= BATCH_SIZE)
> -		rxa_flush_event_buffer(rx_adapter, buf, stats);
> +	if (buf->count >= BATCH_SIZE) {
> +		uint16_t n;
> +
> +		n = rxa_flush_event_buffer(rx_adapter, buf, stats);
> +
> +		if (likely(n > 0))
> +			work = true;
> +	}
> 
>  	while (rxa_pkt_buf_available(buf)) {
>  		struct eth_device_info *dev_info;
> @@ -1289,7 +1296,12 @@ rxa_intr_ring_dequeue(struct event_eth_rx_adapter *rx_adapter)
>  	}
> 
>  done:
> -	rx_adapter->stats.rx_intr_packets += nb_rx;
> +	if (nb_rx > 0) {
> +		rx_adapter->stats.rx_intr_packets += nb_rx;
> +		work = true;
> +	}
> +
> +	return work;
>  }
> 
>  /*
> @@ -1305,7 +1317,7 @@ rxa_intr_ring_dequeue(struct event_eth_rx_adapter *rx_adapter)
>   * the hypervisor's switching layer where adjustments can be made to deal with
>   * it.
>   */
> -static inline void
> +static inline bool
>  rxa_poll(struct event_eth_rx_adapter *rx_adapter)
>  {
>  	uint32_t num_queue;
> @@ -1314,6 +1326,7 @@ rxa_poll(struct event_eth_rx_adapter *rx_adapter)
>  	struct rte_event_eth_rx_adapter_stats *stats = NULL;
>  	uint32_t wrr_pos;
>  	uint32_t max_nb_rx;
> +	bool work = false;
> 
>  	wrr_pos = rx_adapter->wrr_pos;
>  	max_nb_rx = rx_adapter->max_nb_rx;
> @@ -1329,14 +1342,20 @@ rxa_poll(struct event_eth_rx_adapter *rx_adapter)
>  		/* Don't do a batch dequeue from the rx queue if there isn't
>  		 * enough space in the enqueue buffer.
>  		 */
> -		if (buf->count >= BATCH_SIZE)
> -			rxa_flush_event_buffer(rx_adapter, buf, stats);
> +		if (buf->count >= BATCH_SIZE) {
> +			uint16_t n;
> +
> +			n = rxa_flush_event_buffer(rx_adapter, buf, stats);
> +
> +			if (likely(n > 0))
> +				work = true;
> +		}
>  		if (!rxa_pkt_buf_available(buf)) {
>  			if (rx_adapter->use_queue_event_buf)
>  				goto poll_next_entry;
>  			else {
>  				rx_adapter->wrr_pos = wrr_pos;
> -				return;
> +				break;
>  			}
>  		}
> 
> @@ -1352,6 +1371,11 @@ rxa_poll(struct event_eth_rx_adapter *rx_adapter)
>  		if (++wrr_pos == rx_adapter->wrr_len)
>  			wrr_pos = 0;
>  	}
> +
> +	if (nb_rx > 0)
> +		work = true;
> +
> +	return work;
>  }
> 
>  static void
> @@ -1384,12 +1408,14 @@ static int
>  rxa_service_func(void *args)
>  {
>  	struct event_eth_rx_adapter *rx_adapter = args;
> +	bool intr_work;
> +	bool poll_work;
> 
>  	if (rte_spinlock_trylock(&rx_adapter->rx_lock) == 0)
> -		return 0;
> +		return -EAGAIN;
>  	if (!rx_adapter->rxa_started) {
>  		rte_spinlock_unlock(&rx_adapter->rx_lock);
> -		return 0;
> +		return -EAGAIN;
>  	}
> 
>  	if (rx_adapter->ena_vector) {
> @@ -1410,12 +1436,12 @@ rxa_service_func(void *args)
>  		}
>  	}
> 
> -	rxa_intr_ring_dequeue(rx_adapter);
> -	rxa_poll(rx_adapter);
> +	intr_work = rxa_intr_ring_dequeue(rx_adapter);
> +	poll_work = rxa_poll(rx_adapter);
> 
>  	rte_spinlock_unlock(&rx_adapter->rx_lock);
> 
> -	return 0;
> +	return intr_work || poll_work ? 0 : -EAGAIN;
>  }
> 
>  static void *
> --
> 2.34.1
  
Naga Harish K, S V Oct. 13, 2022, 1:32 a.m. UTC | #2
> -----Original Message-----
> From: Jayatheerthan, Jay <jay.jayatheerthan@intel.com>
> Sent: Tuesday, October 11, 2022 12:40 PM
> To: mattias.ronnblom <mattias.ronnblom@ericsson.com>; Carrillo, Erik G
> <erik.g.carrillo@intel.com>; Gujjar, Abhinandan S
> <abhinandan.gujjar@intel.com>; Jerin Jacob <jerinj@marvell.com>; Naga
> Harish K, S V <s.v.naga.harish.k@intel.com>
> Cc: dev@dpdk.org; Van Haaren, Harry <harry.van.haaren@intel.com>;
> hofors@lysator.liu.se; mattias.ronnblom <mattias.ronnblom@ericsson.com>
> Subject: RE: [PATCH 2/4] eventdev: have ethernet Rx adapter appropriately
> report idle
> 
> @Harish, Could you review the patch ?
> 
> -Jay
> 
> > -----Original Message-----
> > From: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> > Sent: Monday, October 10, 2022 8:24 PM
> > To: Jayatheerthan, Jay <jay.jayatheerthan@intel.com>; Carrillo, Erik G
> > <erik.g.carrillo@intel.com>; Gujjar, Abhinandan S
> > <abhinandan.gujjar@intel.com>; Jerin Jacob <jerinj@marvell.com>
> > Cc: dev@dpdk.org; Van Haaren, Harry <harry.van.haaren@intel.com>;
> > hofors@lysator.liu.se; mattias.ronnblom
> > <mattias.ronnblom@ericsson.com>
> > Subject: [PATCH 2/4] eventdev: have ethernet Rx adapter appropriately
> > report idle
> >
> > Update the Event Ethernet Rx Adapter's service function to report as
> > idle (i.e., return -EAGAIN) in case no Ethernet frames were received
> > from the ethdev and no events were enqueued to the event device.
> >
> > Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> > ---
> >  lib/eventdev/rte_event_eth_rx_adapter.c | 56
> > ++++++++++++++++++-------
> >  1 file changed, 41 insertions(+), 15 deletions(-)
> >
> > diff --git a/lib/eventdev/rte_event_eth_rx_adapter.c
> > b/lib/eventdev/rte_event_eth_rx_adapter.c
> > index 5c3021a184..cf7bbd4d69 100644
> > --- a/lib/eventdev/rte_event_eth_rx_adapter.c
> > +++ b/lib/eventdev/rte_event_eth_rx_adapter.c
> > @@ -1184,7 +1184,7 @@ rxa_intr_thread(void *arg)
> >  /* Dequeue <port, q> from interrupt ring and enqueue received
> >   * mbufs to eventdev
> >   */
> > -static inline void
> > +static inline bool
> >  rxa_intr_ring_dequeue(struct event_eth_rx_adapter *rx_adapter)  {
> >  	uint32_t n;
> > @@ -1194,20 +1194,27 @@ rxa_intr_ring_dequeue(struct
> event_eth_rx_adapter *rx_adapter)
> >  	struct rte_event_eth_rx_adapter_stats *stats;
> >  	rte_spinlock_t *ring_lock;
> >  	uint8_t max_done = 0;
> > +	bool work = false;
> >
> >  	if (rx_adapter->num_rx_intr == 0)
> > -		return;
> > +		return work;
> >
> >  	if (rte_ring_count(rx_adapter->intr_ring) == 0
> >  		&& !rx_adapter->qd_valid)
> > -		return;
> > +		return work;
> >
> >  	buf = &rx_adapter->event_enqueue_buffer;
> >  	stats = &rx_adapter->stats;
> >  	ring_lock = &rx_adapter->intr_ring_lock;
> >
> > -	if (buf->count >= BATCH_SIZE)
> > -		rxa_flush_event_buffer(rx_adapter, buf, stats);
> > +	if (buf->count >= BATCH_SIZE) {
> > +		uint16_t n;
> > +
> > +		n = rxa_flush_event_buffer(rx_adapter, buf, stats);
> > +
> > +		if (likely(n > 0))
> > +			work = true;
> > +	}
> >
> >  	while (rxa_pkt_buf_available(buf)) {
> >  		struct eth_device_info *dev_info;
> > @@ -1289,7 +1296,12 @@ rxa_intr_ring_dequeue(struct
> event_eth_rx_adapter *rx_adapter)
> >  	}
> >
> >  done:
> > -	rx_adapter->stats.rx_intr_packets += nb_rx;
> > +	if (nb_rx > 0) {

How are the performance numbers before and after this patch?
Trying to understand the performance impact, as new condition is added to the service function Datapath.

> > +		rx_adapter->stats.rx_intr_packets += nb_rx;
> > +		work = true;
> > +	}
> > +
> > +	return work;
> >  }
> >
> >  /*
> > @@ -1305,7 +1317,7 @@ rxa_intr_ring_dequeue(struct
> event_eth_rx_adapter *rx_adapter)
> >   * the hypervisor's switching layer where adjustments can be made to deal
> with
> >   * it.
> >   */
> > -static inline void
> > +static inline bool
> >  rxa_poll(struct event_eth_rx_adapter *rx_adapter)  {
> >  	uint32_t num_queue;
> > @@ -1314,6 +1326,7 @@ rxa_poll(struct event_eth_rx_adapter
> *rx_adapter)
> >  	struct rte_event_eth_rx_adapter_stats *stats = NULL;
> >  	uint32_t wrr_pos;
> >  	uint32_t max_nb_rx;
> > +	bool work = false;
> >
> >  	wrr_pos = rx_adapter->wrr_pos;
> >  	max_nb_rx = rx_adapter->max_nb_rx;
> > @@ -1329,14 +1342,20 @@ rxa_poll(struct event_eth_rx_adapter
> *rx_adapter)
> >  		/* Don't do a batch dequeue from the rx queue if there isn't
> >  		 * enough space in the enqueue buffer.
> >  		 */
> > -		if (buf->count >= BATCH_SIZE)
> > -			rxa_flush_event_buffer(rx_adapter, buf, stats);
> > +		if (buf->count >= BATCH_SIZE) {
> > +			uint16_t n;
> > +
> > +			n = rxa_flush_event_buffer(rx_adapter, buf, stats);
> > +
> > +			if (likely(n > 0))
> > +				work = true;

Same as above

> > +		}
> >  		if (!rxa_pkt_buf_available(buf)) {
> >  			if (rx_adapter->use_queue_event_buf)
> >  				goto poll_next_entry;
> >  			else {
> >  				rx_adapter->wrr_pos = wrr_pos;
> > -				return;
> > +				break;
> >  			}
> >  		}
> >
> > @@ -1352,6 +1371,11 @@ rxa_poll(struct event_eth_rx_adapter
> *rx_adapter)
> >  		if (++wrr_pos == rx_adapter->wrr_len)
> >  			wrr_pos = 0;
> >  	}
> > +
> > +	if (nb_rx > 0)
> > +		work = true;
> > +
> > +	return work;

Same as above

> >  }
> >
> >  static void
> > @@ -1384,12 +1408,14 @@ static int
> >  rxa_service_func(void *args)
> >  {
> >  	struct event_eth_rx_adapter *rx_adapter = args;
> > +	bool intr_work;
> > +	bool poll_work;
> >
> >  	if (rte_spinlock_trylock(&rx_adapter->rx_lock) == 0)
> > -		return 0;
> > +		return -EAGAIN;
> >  	if (!rx_adapter->rxa_started) {
> >  		rte_spinlock_unlock(&rx_adapter->rx_lock);
> > -		return 0;
> > +		return -EAGAIN;
> >  	}
> >
> >  	if (rx_adapter->ena_vector) {
> > @@ -1410,12 +1436,12 @@ rxa_service_func(void *args)
> >  		}
> >  	}
> >
> > -	rxa_intr_ring_dequeue(rx_adapter);
> > -	rxa_poll(rx_adapter);
> > +	intr_work = rxa_intr_ring_dequeue(rx_adapter);
> > +	poll_work = rxa_poll(rx_adapter);
> >
> >  	rte_spinlock_unlock(&rx_adapter->rx_lock);
> >
> > -	return 0;
> > +	return intr_work || poll_work ? 0 : -EAGAIN;
> >  }
> >
> >  static void *
> > --
> > 2.34.1
  
Mattias Rönnblom Oct. 13, 2022, 9:53 a.m. UTC | #3
On 2022-10-13 03:32, Naga Harish K, S V wrote:
> 
> 
>> -----Original Message-----
>> From: Jayatheerthan, Jay <jay.jayatheerthan@intel.com>
>> Sent: Tuesday, October 11, 2022 12:40 PM
>> To: mattias.ronnblom <mattias.ronnblom@ericsson.com>; Carrillo, Erik G
>> <erik.g.carrillo@intel.com>; Gujjar, Abhinandan S
>> <abhinandan.gujjar@intel.com>; Jerin Jacob <jerinj@marvell.com>; Naga
>> Harish K, S V <s.v.naga.harish.k@intel.com>
>> Cc: dev@dpdk.org; Van Haaren, Harry <harry.van.haaren@intel.com>;
>> hofors@lysator.liu.se; mattias.ronnblom <mattias.ronnblom@ericsson.com>
>> Subject: RE: [PATCH 2/4] eventdev: have ethernet Rx adapter appropriately
>> report idle
>>
>> @Harish, Could you review the patch ?
>>
>> -Jay
>>
>>> -----Original Message-----
>>> From: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>>> Sent: Monday, October 10, 2022 8:24 PM
>>> To: Jayatheerthan, Jay <jay.jayatheerthan@intel.com>; Carrillo, Erik G
>>> <erik.g.carrillo@intel.com>; Gujjar, Abhinandan S
>>> <abhinandan.gujjar@intel.com>; Jerin Jacob <jerinj@marvell.com>
>>> Cc: dev@dpdk.org; Van Haaren, Harry <harry.van.haaren@intel.com>;
>>> hofors@lysator.liu.se; mattias.ronnblom
>>> <mattias.ronnblom@ericsson.com>
>>> Subject: [PATCH 2/4] eventdev: have ethernet Rx adapter appropriately
>>> report idle
>>>
>>> Update the Event Ethernet Rx Adapter's service function to report as
>>> idle (i.e., return -EAGAIN) in case no Ethernet frames were received
>>> from the ethdev and no events were enqueued to the event device.
>>>
>>> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>>> ---
>>>   lib/eventdev/rte_event_eth_rx_adapter.c | 56
>>> ++++++++++++++++++-------
>>>   1 file changed, 41 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/lib/eventdev/rte_event_eth_rx_adapter.c
>>> b/lib/eventdev/rte_event_eth_rx_adapter.c
>>> index 5c3021a184..cf7bbd4d69 100644
>>> --- a/lib/eventdev/rte_event_eth_rx_adapter.c
>>> +++ b/lib/eventdev/rte_event_eth_rx_adapter.c
>>> @@ -1184,7 +1184,7 @@ rxa_intr_thread(void *arg)
>>>   /* Dequeue <port, q> from interrupt ring and enqueue received
>>>    * mbufs to eventdev
>>>    */
>>> -static inline void
>>> +static inline bool
>>>   rxa_intr_ring_dequeue(struct event_eth_rx_adapter *rx_adapter)  {
>>>   	uint32_t n;
>>> @@ -1194,20 +1194,27 @@ rxa_intr_ring_dequeue(struct
>> event_eth_rx_adapter *rx_adapter)
>>>   	struct rte_event_eth_rx_adapter_stats *stats;
>>>   	rte_spinlock_t *ring_lock;
>>>   	uint8_t max_done = 0;
>>> +	bool work = false;
>>>
>>>   	if (rx_adapter->num_rx_intr == 0)
>>> -		return;
>>> +		return work;
>>>
>>>   	if (rte_ring_count(rx_adapter->intr_ring) == 0
>>>   		&& !rx_adapter->qd_valid)
>>> -		return;
>>> +		return work;
>>>
>>>   	buf = &rx_adapter->event_enqueue_buffer;
>>>   	stats = &rx_adapter->stats;
>>>   	ring_lock = &rx_adapter->intr_ring_lock;
>>>
>>> -	if (buf->count >= BATCH_SIZE)
>>> -		rxa_flush_event_buffer(rx_adapter, buf, stats);
>>> +	if (buf->count >= BATCH_SIZE) {
>>> +		uint16_t n;
>>> +
>>> +		n = rxa_flush_event_buffer(rx_adapter, buf, stats);
>>> +
>>> +		if (likely(n > 0))
>>> +			work = true;
>>> +	}
>>>
>>>   	while (rxa_pkt_buf_available(buf)) {
>>>   		struct eth_device_info *dev_info;
>>> @@ -1289,7 +1296,12 @@ rxa_intr_ring_dequeue(struct
>> event_eth_rx_adapter *rx_adapter)
>>>   	}
>>>
>>>   done:
>>> -	rx_adapter->stats.rx_intr_packets += nb_rx;
>>> +	if (nb_rx > 0) {
> 
> How are the performance numbers before and after this patch?
> Trying to understand the performance impact, as new condition is added to the service function Datapath.
> 
I haven't tested the RX and TX adapters separately, but if you run them 
on the same core, I get the following result:

Without patches, with stats disabled: 16,0 Mpps
Without patches, with stats enabled: 16,1 Mpps
With patches, with stats disabled: 16,1 Mpps
With patches, with stats enabled: 16,2 Mpps

So these patches, with this particular hardware, compiler, and test 
application, adding a tiny bit of additional logic actually make the 
RX+TX adapter perform better. This is contrary to what you might expect, 
and I'm sure YMMV.

Enabling service core statistics (which boils down to a 2x rdtsc and 
some cheap arithmetic in rte_service.c) actually make the RX+TX adapter 
core perform better, both before and after this patchset. Also contrary 
to what you might expect.

The results are consistent across multiple runs.

GCC 11.2.0 and AMD Zen 3 @ 3,7 GHz. Event device is DSW and I/O is the 
ring Ethdev.

>>> +		rx_adapter->stats.rx_intr_packets += nb_rx;
>>> +		work = true;
>>> +	}
>>> +
>>> +	return work;
>>>   }
>>>
>>>   /*
>>> @@ -1305,7 +1317,7 @@ rxa_intr_ring_dequeue(struct
>> event_eth_rx_adapter *rx_adapter)
>>>    * the hypervisor's switching layer where adjustments can be made to deal
>> with
>>>    * it.
>>>    */
>>> -static inline void
>>> +static inline bool
>>>   rxa_poll(struct event_eth_rx_adapter *rx_adapter)  {
>>>   	uint32_t num_queue;
>>> @@ -1314,6 +1326,7 @@ rxa_poll(struct event_eth_rx_adapter
>> *rx_adapter)
>>>   	struct rte_event_eth_rx_adapter_stats *stats = NULL;
>>>   	uint32_t wrr_pos;
>>>   	uint32_t max_nb_rx;
>>> +	bool work = false;
>>>
>>>   	wrr_pos = rx_adapter->wrr_pos;
>>>   	max_nb_rx = rx_adapter->max_nb_rx;
>>> @@ -1329,14 +1342,20 @@ rxa_poll(struct event_eth_rx_adapter
>> *rx_adapter)
>>>   		/* Don't do a batch dequeue from the rx queue if there isn't
>>>   		 * enough space in the enqueue buffer.
>>>   		 */
>>> -		if (buf->count >= BATCH_SIZE)
>>> -			rxa_flush_event_buffer(rx_adapter, buf, stats);
>>> +		if (buf->count >= BATCH_SIZE) {
>>> +			uint16_t n;
>>> +
>>> +			n = rxa_flush_event_buffer(rx_adapter, buf, stats);
>>> +
>>> +			if (likely(n > 0))
>>> +				work = true;
> 
> Same as above
> 
>>> +		}
>>>   		if (!rxa_pkt_buf_available(buf)) {
>>>   			if (rx_adapter->use_queue_event_buf)
>>>   				goto poll_next_entry;
>>>   			else {
>>>   				rx_adapter->wrr_pos = wrr_pos;
>>> -				return;
>>> +				break;
>>>   			}
>>>   		}
>>>
>>> @@ -1352,6 +1371,11 @@ rxa_poll(struct event_eth_rx_adapter
>> *rx_adapter)
>>>   		if (++wrr_pos == rx_adapter->wrr_len)
>>>   			wrr_pos = 0;
>>>   	}
>>> +
>>> +	if (nb_rx > 0)
>>> +		work = true;
>>> +
>>> +	return work;
> 
> Same as above
> 
>>>   }
>>>
>>>   static void
>>> @@ -1384,12 +1408,14 @@ static int
>>>   rxa_service_func(void *args)
>>>   {
>>>   	struct event_eth_rx_adapter *rx_adapter = args;
>>> +	bool intr_work;
>>> +	bool poll_work;
>>>
>>>   	if (rte_spinlock_trylock(&rx_adapter->rx_lock) == 0)
>>> -		return 0;
>>> +		return -EAGAIN;
>>>   	if (!rx_adapter->rxa_started) {
>>>   		rte_spinlock_unlock(&rx_adapter->rx_lock);
>>> -		return 0;
>>> +		return -EAGAIN;
>>>   	}
>>>
>>>   	if (rx_adapter->ena_vector) {
>>> @@ -1410,12 +1436,12 @@ rxa_service_func(void *args)
>>>   		}
>>>   	}
>>>
>>> -	rxa_intr_ring_dequeue(rx_adapter);
>>> -	rxa_poll(rx_adapter);
>>> +	intr_work = rxa_intr_ring_dequeue(rx_adapter);
>>> +	poll_work = rxa_poll(rx_adapter);
>>>
>>>   	rte_spinlock_unlock(&rx_adapter->rx_lock);
>>>
>>> -	return 0;
>>> +	return intr_work || poll_work ? 0 : -EAGAIN;
>>>   }
>>>
>>>   static void *
>>> --
>>> 2.34.1
>
  
Jerin Jacob Oct. 14, 2022, 5:36 p.m. UTC | #4
On Thu, Oct 13, 2022 at 3:23 PM Mattias Rönnblom
<mattias.ronnblom@ericsson.com> wrote:
>
> On 2022-10-13 03:32, Naga Harish K, S V wrote:
> >
> >
> >> -----Original Message-----
> >> From: Jayatheerthan, Jay <jay.jayatheerthan@intel.com>
> >> Sent: Tuesday, October 11, 2022 12:40 PM
> >> To: mattias.ronnblom <mattias.ronnblom@ericsson.com>; Carrillo, Erik G
> >> <erik.g.carrillo@intel.com>; Gujjar, Abhinandan S
> >> <abhinandan.gujjar@intel.com>; Jerin Jacob <jerinj@marvell.com>; Naga
> >> Harish K, S V <s.v.naga.harish.k@intel.com>
> >> Cc: dev@dpdk.org; Van Haaren, Harry <harry.van.haaren@intel.com>;
> >> hofors@lysator.liu.se; mattias.ronnblom <mattias.ronnblom@ericsson.com>
> >> Subject: RE: [PATCH 2/4] eventdev: have ethernet Rx adapter appropriately
> >> report idle
> >>
> >> @Harish, Could you review the patch ?
> >>
> >> -Jay
> >>
> >>>
> >>>   done:
> >>> -   rx_adapter->stats.rx_intr_packets += nb_rx;
> >>> +   if (nb_rx > 0) {
> >
> > How are the performance numbers before and after this patch?
> > Trying to understand the performance impact, as new condition is added to the service function Datapath.
> >
> I haven't tested the RX and TX adapters separately, but if you run them
> on the same core, I get the following result:
>
> Without patches, with stats disabled: 16,0 Mpps
> Without patches, with stats enabled: 16,1 Mpps
> With patches, with stats disabled: 16,1 Mpps
> With patches, with stats enabled: 16,2 Mpps
>
> So these patches, with this particular hardware, compiler, and test
> application, adding a tiny bit of additional logic actually make the
> RX+TX adapter perform better. This is contrary to what you might expect,
> and I'm sure YMMV.
>
> Enabling service core statistics (which boils down to a 2x rdtsc and
> some cheap arithmetic in rte_service.c) actually make the RX+TX adapter
> core perform better, both before and after this patchset. Also contrary
> to what you might expect.
>
> The results are consistent across multiple runs.
>
> GCC 11.2.0 and AMD Zen 3 @ 3,7 GHz. Event device is DSW and I/O is the
> ring Ethdev.

@Naga Harish K, S V @Jayatheerthan, Jay  @Gujjar, Abhinandan S  @Erik
Gabriel Carrillo
Planning to take this series for rc2. If there are no other comments,
I will merge the series then.
  
Naga Harish K, S V Oct. 17, 2022, 12:36 p.m. UTC | #5
> -----Original Message-----
> From: Jerin Jacob <jerinjacobk@gmail.com>
> Sent: Friday, October 14, 2022 11:07 PM
> To: mattias.ronnblom <mattias.ronnblom@ericsson.com>
> Cc: Naga Harish K, S V <s.v.naga.harish.k@intel.com>; Jayatheerthan, Jay
> <jay.jayatheerthan@intel.com>; Carrillo, Erik G <Erik.G.Carrillo@intel.com>;
> Gujjar, Abhinandan S <abhinandan.gujjar@intel.com>; Jerin Jacob
> <jerinj@marvell.com>; dev@dpdk.org; Van Haaren, Harry
> <harry.van.haaren@intel.com>; hofors@lysator.liu.se
> Subject: Re: [PATCH 2/4] eventdev: have ethernet Rx adapter appropriately
> report idle
> 
> On Thu, Oct 13, 2022 at 3:23 PM Mattias Rönnblom
> <mattias.ronnblom@ericsson.com> wrote:
> >
> > On 2022-10-13 03:32, Naga Harish K, S V wrote:
> > >
> > >
> > >> -----Original Message-----
> > >> From: Jayatheerthan, Jay <jay.jayatheerthan@intel.com>
> > >> Sent: Tuesday, October 11, 2022 12:40 PM
> > >> To: mattias.ronnblom <mattias.ronnblom@ericsson.com>; Carrillo,
> > >> Erik G <erik.g.carrillo@intel.com>; Gujjar, Abhinandan S
> > >> <abhinandan.gujjar@intel.com>; Jerin Jacob <jerinj@marvell.com>;
> > >> Naga Harish K, S V <s.v.naga.harish.k@intel.com>
> > >> Cc: dev@dpdk.org; Van Haaren, Harry <harry.van.haaren@intel.com>;
> > >> hofors@lysator.liu.se; mattias.ronnblom
> > >> <mattias.ronnblom@ericsson.com>
> > >> Subject: RE: [PATCH 2/4] eventdev: have ethernet Rx adapter
> > >> appropriately report idle

nitpick: 
the headline can be modified as "eventdev/eth_rx:" for Rx adapter patches,
to make it consistent with the currently following procedure.

Similar change for other patches of other adapters.

After these changes, my "Reviewed-by: " tag can be added for patches 2 and 3 (Rx adapter and Tx adapter).

-Harish

> > >>
> > >> @Harish, Could you review the patch ?
> > >>
> > >> -Jay
> > >>
> > >>>
> > >>>   done:
> > >>> -   rx_adapter->stats.rx_intr_packets += nb_rx;
> > >>> +   if (nb_rx > 0) {
> > >
> > > How are the performance numbers before and after this patch?
> > > Trying to understand the performance impact, as new condition is added
> to the service function Datapath.
> > >
> > I haven't tested the RX and TX adapters separately, but if you run
> > them on the same core, I get the following result:
> >
> > Without patches, with stats disabled: 16,0 Mpps Without patches, with
> > stats enabled: 16,1 Mpps With patches, with stats disabled: 16,1 Mpps
> > With patches, with stats enabled: 16,2 Mpps
> >
> > So these patches, with this particular hardware, compiler, and test
> > application, adding a tiny bit of additional logic actually make the
> > RX+TX adapter perform better. This is contrary to what you might
> > RX+expect,
> > and I'm sure YMMV.
> >
> > Enabling service core statistics (which boils down to a 2x rdtsc and
> > some cheap arithmetic in rte_service.c) actually make the RX+TX
> > adapter core perform better, both before and after this patchset. Also
> > contrary to what you might expect.
> >
> > The results are consistent across multiple runs.
> >
> > GCC 11.2.0 and AMD Zen 3 @ 3,7 GHz. Event device is DSW and I/O is the
> > ring Ethdev.
> 
> @Naga Harish K, S V @Jayatheerthan, Jay  @Gujjar, Abhinandan S  @Erik
> Gabriel Carrillo Planning to take this series for rc2. If there are no other
> comments, I will merge the series then.
  
Jayatheerthan, Jay Oct. 18, 2022, 9:19 a.m. UTC | #6
Looks good to me. Thanks for submitting this!

Acked by: Jay Jayatheerthan <jay.jayatheerthan@intel.com>

-Jay



> -----Original Message-----
> From: Naga Harish K, S V <s.v.naga.harish.k@intel.com>
> Sent: Monday, October 17, 2022 6:07 PM
> To: Jerin Jacob <jerinjacobk@gmail.com>; mattias.ronnblom <mattias.ronnblom@ericsson.com>
> Cc: Jayatheerthan, Jay <jay.jayatheerthan@intel.com>; Carrillo, Erik G <erik.g.carrillo@intel.com>; Gujjar, Abhinandan S
> <abhinandan.gujjar@intel.com>; Jerin Jacob <jerinj@marvell.com>; dev@dpdk.org; Van Haaren, Harry <harry.van.haaren@intel.com>;
> hofors@lysator.liu.se
> Subject: RE: [PATCH 2/4] eventdev: have ethernet Rx adapter appropriately report idle
> 
> 
> 
> > -----Original Message-----
> > From: Jerin Jacob <jerinjacobk@gmail.com>
> > Sent: Friday, October 14, 2022 11:07 PM
> > To: mattias.ronnblom <mattias.ronnblom@ericsson.com>
> > Cc: Naga Harish K, S V <s.v.naga.harish.k@intel.com>; Jayatheerthan, Jay
> > <jay.jayatheerthan@intel.com>; Carrillo, Erik G <Erik.G.Carrillo@intel.com>;
> > Gujjar, Abhinandan S <abhinandan.gujjar@intel.com>; Jerin Jacob
> > <jerinj@marvell.com>; dev@dpdk.org; Van Haaren, Harry
> > <harry.van.haaren@intel.com>; hofors@lysator.liu.se
> > Subject: Re: [PATCH 2/4] eventdev: have ethernet Rx adapter appropriately
> > report idle
> >
> > On Thu, Oct 13, 2022 at 3:23 PM Mattias Rönnblom
> > <mattias.ronnblom@ericsson.com> wrote:
> > >
> > > On 2022-10-13 03:32, Naga Harish K, S V wrote:
> > > >
> > > >
> > > >> -----Original Message-----
> > > >> From: Jayatheerthan, Jay <jay.jayatheerthan@intel.com>
> > > >> Sent: Tuesday, October 11, 2022 12:40 PM
> > > >> To: mattias.ronnblom <mattias.ronnblom@ericsson.com>; Carrillo,
> > > >> Erik G <erik.g.carrillo@intel.com>; Gujjar, Abhinandan S
> > > >> <abhinandan.gujjar@intel.com>; Jerin Jacob <jerinj@marvell.com>;
> > > >> Naga Harish K, S V <s.v.naga.harish.k@intel.com>
> > > >> Cc: dev@dpdk.org; Van Haaren, Harry <harry.van.haaren@intel.com>;
> > > >> hofors@lysator.liu.se; mattias.ronnblom
> > > >> <mattias.ronnblom@ericsson.com>
> > > >> Subject: RE: [PATCH 2/4] eventdev: have ethernet Rx adapter
> > > >> appropriately report idle
> 
> nitpick:
> the headline can be modified as "eventdev/eth_rx:" for Rx adapter patches,
> to make it consistent with the currently following procedure.
> 
> Similar change for other patches of other adapters.
> 
> After these changes, my "Reviewed-by: " tag can be added for patches 2 and 3 (Rx adapter and Tx adapter).
> 
> -Harish
> 
> > > >>
> > > >> @Harish, Could you review the patch ?
> > > >>
> > > >> -Jay
> > > >>
> > > >>>
> > > >>>   done:
> > > >>> -   rx_adapter->stats.rx_intr_packets += nb_rx;
> > > >>> +   if (nb_rx > 0) {
> > > >
> > > > How are the performance numbers before and after this patch?
> > > > Trying to understand the performance impact, as new condition is added
> > to the service function Datapath.
> > > >
> > > I haven't tested the RX and TX adapters separately, but if you run
> > > them on the same core, I get the following result:
> > >
> > > Without patches, with stats disabled: 16,0 Mpps Without patches, with
> > > stats enabled: 16,1 Mpps With patches, with stats disabled: 16,1 Mpps
> > > With patches, with stats enabled: 16,2 Mpps
> > >
> > > So these patches, with this particular hardware, compiler, and test
> > > application, adding a tiny bit of additional logic actually make the
> > > RX+TX adapter perform better. This is contrary to what you might
> > > RX+expect,
> > > and I'm sure YMMV.
> > >
> > > Enabling service core statistics (which boils down to a 2x rdtsc and
> > > some cheap arithmetic in rte_service.c) actually make the RX+TX
> > > adapter core perform better, both before and after this patchset. Also
> > > contrary to what you might expect.
> > >
> > > The results are consistent across multiple runs.
> > >
> > > GCC 11.2.0 and AMD Zen 3 @ 3,7 GHz. Event device is DSW and I/O is the
> > > ring Ethdev.
> >
> > @Naga Harish K, S V @Jayatheerthan, Jay  @Gujjar, Abhinandan S  @Erik
> > Gabriel Carrillo Planning to take this series for rc2. If there are no other
> > comments, I will merge the series then.
  

Patch

diff --git a/lib/eventdev/rte_event_eth_rx_adapter.c b/lib/eventdev/rte_event_eth_rx_adapter.c
index 5c3021a184..cf7bbd4d69 100644
--- a/lib/eventdev/rte_event_eth_rx_adapter.c
+++ b/lib/eventdev/rte_event_eth_rx_adapter.c
@@ -1184,7 +1184,7 @@  rxa_intr_thread(void *arg)
 /* Dequeue <port, q> from interrupt ring and enqueue received
  * mbufs to eventdev
  */
-static inline void
+static inline bool
 rxa_intr_ring_dequeue(struct event_eth_rx_adapter *rx_adapter)
 {
 	uint32_t n;
@@ -1194,20 +1194,27 @@  rxa_intr_ring_dequeue(struct event_eth_rx_adapter *rx_adapter)
 	struct rte_event_eth_rx_adapter_stats *stats;
 	rte_spinlock_t *ring_lock;
 	uint8_t max_done = 0;
+	bool work = false;
 
 	if (rx_adapter->num_rx_intr == 0)
-		return;
+		return work;
 
 	if (rte_ring_count(rx_adapter->intr_ring) == 0
 		&& !rx_adapter->qd_valid)
-		return;
+		return work;
 
 	buf = &rx_adapter->event_enqueue_buffer;
 	stats = &rx_adapter->stats;
 	ring_lock = &rx_adapter->intr_ring_lock;
 
-	if (buf->count >= BATCH_SIZE)
-		rxa_flush_event_buffer(rx_adapter, buf, stats);
+	if (buf->count >= BATCH_SIZE) {
+		uint16_t n;
+
+		n = rxa_flush_event_buffer(rx_adapter, buf, stats);
+
+		if (likely(n > 0))
+			work = true;
+	}
 
 	while (rxa_pkt_buf_available(buf)) {
 		struct eth_device_info *dev_info;
@@ -1289,7 +1296,12 @@  rxa_intr_ring_dequeue(struct event_eth_rx_adapter *rx_adapter)
 	}
 
 done:
-	rx_adapter->stats.rx_intr_packets += nb_rx;
+	if (nb_rx > 0) {
+		rx_adapter->stats.rx_intr_packets += nb_rx;
+		work = true;
+	}
+
+	return work;
 }
 
 /*
@@ -1305,7 +1317,7 @@  rxa_intr_ring_dequeue(struct event_eth_rx_adapter *rx_adapter)
  * the hypervisor's switching layer where adjustments can be made to deal with
  * it.
  */
-static inline void
+static inline bool
 rxa_poll(struct event_eth_rx_adapter *rx_adapter)
 {
 	uint32_t num_queue;
@@ -1314,6 +1326,7 @@  rxa_poll(struct event_eth_rx_adapter *rx_adapter)
 	struct rte_event_eth_rx_adapter_stats *stats = NULL;
 	uint32_t wrr_pos;
 	uint32_t max_nb_rx;
+	bool work = false;
 
 	wrr_pos = rx_adapter->wrr_pos;
 	max_nb_rx = rx_adapter->max_nb_rx;
@@ -1329,14 +1342,20 @@  rxa_poll(struct event_eth_rx_adapter *rx_adapter)
 		/* Don't do a batch dequeue from the rx queue if there isn't
 		 * enough space in the enqueue buffer.
 		 */
-		if (buf->count >= BATCH_SIZE)
-			rxa_flush_event_buffer(rx_adapter, buf, stats);
+		if (buf->count >= BATCH_SIZE) {
+			uint16_t n;
+
+			n = rxa_flush_event_buffer(rx_adapter, buf, stats);
+
+			if (likely(n > 0))
+				work = true;
+		}
 		if (!rxa_pkt_buf_available(buf)) {
 			if (rx_adapter->use_queue_event_buf)
 				goto poll_next_entry;
 			else {
 				rx_adapter->wrr_pos = wrr_pos;
-				return;
+				break;
 			}
 		}
 
@@ -1352,6 +1371,11 @@  rxa_poll(struct event_eth_rx_adapter *rx_adapter)
 		if (++wrr_pos == rx_adapter->wrr_len)
 			wrr_pos = 0;
 	}
+
+	if (nb_rx > 0)
+		work = true;
+
+	return work;
 }
 
 static void
@@ -1384,12 +1408,14 @@  static int
 rxa_service_func(void *args)
 {
 	struct event_eth_rx_adapter *rx_adapter = args;
+	bool intr_work;
+	bool poll_work;
 
 	if (rte_spinlock_trylock(&rx_adapter->rx_lock) == 0)
-		return 0;
+		return -EAGAIN;
 	if (!rx_adapter->rxa_started) {
 		rte_spinlock_unlock(&rx_adapter->rx_lock);
-		return 0;
+		return -EAGAIN;
 	}
 
 	if (rx_adapter->ena_vector) {
@@ -1410,12 +1436,12 @@  rxa_service_func(void *args)
 		}
 	}
 
-	rxa_intr_ring_dequeue(rx_adapter);
-	rxa_poll(rx_adapter);
+	intr_work = rxa_intr_ring_dequeue(rx_adapter);
+	poll_work = rxa_poll(rx_adapter);
 
 	rte_spinlock_unlock(&rx_adapter->rx_lock);
 
-	return 0;
+	return intr_work || poll_work ? 0 : -EAGAIN;
 }
 
 static void *