[dpdk-dev,v4,14/17] mempool: add support to non-EAL thread

Message ID	1422842559-13617-15-git-send-email-cunming.liang@intel.com (mailing list archive)
State	Superseded, archived
Headers	From: Cunming Liang <cunming.liang@intel.com> To: dev@dpdk.org Date: Mon, 2 Feb 2015 10:02:36 +0800 Message-Id: <1422842559-13617-15-git-send-email-cunming.liang@intel.com> In-Reply-To: <1422842559-13617-1-git-send-email-cunming.liang@intel.com> References: <1422491072-5114-1-git-send-email-cunming.liang@intel.com> <1422842559-13617-1-git-send-email-cunming.liang@intel.com> Subject: [dpdk-dev] [PATCH v4 14/17] mempool: add support to non-EAL thread Precedence: list Errors-To: dev-bounces@dpdk.org Sender: "dev" <dev-bounces@dpdk.org>

Message ID

1422842559-13617-15-git-send-email-cunming.liang@intel.com (mailing list archive)

State

Superseded, archived

Headers

From: Cunming Liang <cunming.liang@intel.com>
To: dev@dpdk.org
Date: Mon,  2 Feb 2015 10:02:36 +0800
Message-Id: <1422842559-13617-15-git-send-email-cunming.liang@intel.com>
In-Reply-To: <1422842559-13617-1-git-send-email-cunming.liang@intel.com>
References: <1422491072-5114-1-git-send-email-cunming.liang@intel.com>
	<1422842559-13617-1-git-send-email-cunming.liang@intel.com>
Subject: [dpdk-dev] [PATCH v4 14/17] mempool: add support to non-EAL thread
Precedence: list
Errors-To: dev-bounces@dpdk.org
Sender: "dev" <dev-bounces@dpdk.org>

Commit Message

Cunming Liang Feb. 2, 2015, 2:02 a.m. UTC

  For non-EAL thread, bypass per lcore cache, directly use ring pool.
It allows using rte_mempool in either EAL thread or any user pthread.
As in non-EAL thread, it directly rely on rte_ring and it's none preemptive.
It doesn't suggest to run multi-pthread/cpu which compete the rte_mempool.
It will get bad performance and has critical risk if scheduling policy is RT.

Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
 lib/librte_mempool/rte_mempool.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

Comments

Olivier Matz Feb. 8, 2015, 8:01 p.m. UTC | #1

Hi,

On 02/02/2015 03:02 AM, Cunming Liang wrote:
> For non-EAL thread, bypass per lcore cache, directly use ring pool.
> It allows using rte_mempool in either EAL thread or any user pthread.
> As in non-EAL thread, it directly rely on rte_ring and it's none preemptive.
> It doesn't suggest to run multi-pthread/cpu which compete the rte_mempool.
> It will get bad performance and has critical risk if scheduling policy is RT.
> 
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> ---
>  lib/librte_mempool/rte_mempool.h | 18 +++++++++++-------
>  1 file changed, 11 insertions(+), 7 deletions(-)
> 
> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
> index 3314651..4845f27 100644
> --- a/lib/librte_mempool/rte_mempool.h
> +++ b/lib/librte_mempool/rte_mempool.h
> @@ -198,10 +198,12 @@ struct rte_mempool {
>   *   Number to add to the object-oriented statistics.
>   */
>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> -#define __MEMPOOL_STAT_ADD(mp, name, n) do {			\
> -		unsigned __lcore_id = rte_lcore_id();		\
> -		mp->stats[__lcore_id].name##_objs += n;		\
> -		mp->stats[__lcore_id].name##_bulk += 1;		\
> +#define __MEMPOOL_STAT_ADD(mp, name, n) do {                    \
> +		unsigned __lcore_id = rte_lcore_id();           \
> +		if (__lcore_id < RTE_MAX_LCORE) {               \
> +			mp->stats[__lcore_id].name##_objs += n;	\
> +			mp->stats[__lcore_id].name##_bulk += 1;	\
> +		}                                               \

Does it mean that we have no statistics for non-EAL threads?
(same question for rings and timers in the next patches)


>  	} while(0)
>  #else
>  #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
> @@ -767,8 +769,9 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
>  	__MEMPOOL_STAT_ADD(mp, put, n);
>  
>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> -	/* cache is not enabled or single producer */
> -	if (unlikely(cache_size == 0 || is_mp == 0))
> +	/* cache is not enabled or single producer or none EAL thread */
> +	if (unlikely(cache_size == 0 || is_mp == 0 ||
> +		     lcore_id >= RTE_MAX_LCORE))
>  		goto ring_enqueue;
>  
>  	/* Go straight to ring if put would overflow mem allocated for cache */
> @@ -952,7 +955,8 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
>  	uint32_t cache_size = mp->cache_size;
>  
>  	/* cache is not enabled or single consumer */
> -	if (unlikely(cache_size == 0 || is_mc == 0 || n >= cache_size))
> +	if (unlikely(cache_size == 0 || is_mc == 0 ||
> +		     n >= cache_size || lcore_id >= RTE_MAX_LCORE))
>  		goto ring_dequeue;
>  
>  	cache = &mp->local_cache[lcore_id];
> 

What is the performance impact of adding this test?


Regards,
Olivier

Cunming Liang Feb. 9, 2015, 2:41 p.m. UTC | #2

> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Monday, February 09, 2015 4:01 AM
> To: Liang, Cunming; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 14/17] mempool: add support to non-EAL
> thread
> 
> Hi,
> 
> On 02/02/2015 03:02 AM, Cunming Liang wrote:
> > For non-EAL thread, bypass per lcore cache, directly use ring pool.
> > It allows using rte_mempool in either EAL thread or any user pthread.
> > As in non-EAL thread, it directly rely on rte_ring and it's none preemptive.
> > It doesn't suggest to run multi-pthread/cpu which compete the rte_mempool.
> > It will get bad performance and has critical risk if scheduling policy is RT.
> >
> > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > ---
> >  lib/librte_mempool/rte_mempool.h | 18 +++++++++++-------
> >  1 file changed, 11 insertions(+), 7 deletions(-)
> >
> > diff --git a/lib/librte_mempool/rte_mempool.h
> b/lib/librte_mempool/rte_mempool.h
> > index 3314651..4845f27 100644
> > --- a/lib/librte_mempool/rte_mempool.h
> > +++ b/lib/librte_mempool/rte_mempool.h
> > @@ -198,10 +198,12 @@ struct rte_mempool {
> >   *   Number to add to the object-oriented statistics.
> >   */
> >  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> > -#define __MEMPOOL_STAT_ADD(mp, name, n) do {			\
> > -		unsigned __lcore_id = rte_lcore_id();		\
> > -		mp->stats[__lcore_id].name##_objs += n;		\
> > -		mp->stats[__lcore_id].name##_bulk += 1;		\
> > +#define __MEMPOOL_STAT_ADD(mp, name, n) do {                    \
> > +		unsigned __lcore_id = rte_lcore_id();           \
> > +		if (__lcore_id < RTE_MAX_LCORE) {               \
> > +			mp->stats[__lcore_id].name##_objs += n;	\
> > +			mp->stats[__lcore_id].name##_bulk += 1;	\
> > +		}                                               \
> 
> Does it mean that we have no statistics for non-EAL threads?
> (same question for rings and timers in the next patches)
[LCM] Yes, it is in this patch set, mainly focus on EAL thread and make sure no running issue on non-EAL thread.
For full non-EAL function, will have other patch set to enhance non-EAL thread as the 2nd step.
> 
> 
> >  	} while(0)
> >  #else
> >  #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
> > @@ -767,8 +769,9 @@ __mempool_put_bulk(struct rte_mempool *mp, void
> * const *obj_table,
> >  	__MEMPOOL_STAT_ADD(mp, put, n);
> >
> >  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> > -	/* cache is not enabled or single producer */
> > -	if (unlikely(cache_size == 0 || is_mp == 0))
> > +	/* cache is not enabled or single producer or none EAL thread */
> > +	if (unlikely(cache_size == 0 || is_mp == 0 ||
> > +		     lcore_id >= RTE_MAX_LCORE))
> >  		goto ring_enqueue;
> >
> >  	/* Go straight to ring if put would overflow mem allocated for cache */
> > @@ -952,7 +955,8 @@ __mempool_get_bulk(struct rte_mempool *mp, void
> **obj_table,
> >  	uint32_t cache_size = mp->cache_size;
> >
> >  	/* cache is not enabled or single consumer */
> > -	if (unlikely(cache_size == 0 || is_mc == 0 || n >= cache_size))
> > +	if (unlikely(cache_size == 0 || is_mc == 0 ||
> > +		     n >= cache_size || lcore_id >= RTE_MAX_LCORE))
> >  		goto ring_dequeue;
> >
> >  	cache = &mp->local_cache[lcore_id];
> >
> 
> What is the performance impact of adding this test?
[LCM] By perf in unit test, it's almost the same. But haven't measure EAL thread and non-EAL thread share the same mempool.
> 
> 
> Regards,
> Olivier

Olivier Matz Feb. 9, 2015, 5:52 p.m. UTC | #3

Hi,

On 02/09/2015 03:41 PM, Liang, Cunming wrote:
>>>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>>> -#define __MEMPOOL_STAT_ADD(mp, name, n) do {			\
>>> -		unsigned __lcore_id = rte_lcore_id();		\
>>> -		mp->stats[__lcore_id].name##_objs += n;		\
>>> -		mp->stats[__lcore_id].name##_bulk += 1;		\
>>> +#define __MEMPOOL_STAT_ADD(mp, name, n) do {                    \
>>> +		unsigned __lcore_id = rte_lcore_id();           \
>>> +		if (__lcore_id < RTE_MAX_LCORE) {               \
>>> +			mp->stats[__lcore_id].name##_objs += n;	\
>>> +			mp->stats[__lcore_id].name##_bulk += 1;	\
>>> +		}                                               \
>>
>> Does it mean that we have no statistics for non-EAL threads?
>> (same question for rings and timers in the next patches)
> [LCM] Yes, it is in this patch set, mainly focus on EAL thread and make sure no running issue on non-EAL thread.
> For full non-EAL function, will have other patch set to enhance non-EAL thread as the 2nd step.

OK

>>> @@ -952,7 +955,8 @@ __mempool_get_bulk(struct rte_mempool *mp, void
>> **obj_table,
>>>  	uint32_t cache_size = mp->cache_size;
>>>
>>>  	/* cache is not enabled or single consumer */
>>> -	if (unlikely(cache_size == 0 || is_mc == 0 || n >= cache_size))
>>> +	if (unlikely(cache_size == 0 || is_mc == 0 ||
>>> +		     n >= cache_size || lcore_id >= RTE_MAX_LCORE))
>>>  		goto ring_dequeue;
>>>
>>>  	cache = &mp->local_cache[lcore_id];
>>>
>>
>> What is the performance impact of adding this test?
> [LCM] By perf in unit test, it's almost the same. But haven't measure EAL thread and non-EAL thread share the same mempool.


When you say "unit test", are you talking about mempool tests from
"make test"? Do you have some numbers to share?

Cunming Liang Feb. 10, 2015, 2:57 a.m. UTC | #4

> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Tuesday, February 10, 2015 1:52 AM
> To: Liang, Cunming; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 14/17] mempool: add support to non-EAL
> thread
> 
> Hi,
> 
> On 02/09/2015 03:41 PM, Liang, Cunming wrote:
> >>>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> >>> -#define __MEMPOOL_STAT_ADD(mp, name, n) do {			\
> >>> -		unsigned __lcore_id = rte_lcore_id();		\
> >>> -		mp->stats[__lcore_id].name##_objs += n;		\
> >>> -		mp->stats[__lcore_id].name##_bulk += 1;		\
> >>> +#define __MEMPOOL_STAT_ADD(mp, name, n) do {                    \
> >>> +		unsigned __lcore_id = rte_lcore_id();           \
> >>> +		if (__lcore_id < RTE_MAX_LCORE) {               \
> >>> +			mp->stats[__lcore_id].name##_objs += n;	\
> >>> +			mp->stats[__lcore_id].name##_bulk += 1;	\
> >>> +		}                                               \
> >>
> >> Does it mean that we have no statistics for non-EAL threads?
> >> (same question for rings and timers in the next patches)
> > [LCM] Yes, it is in this patch set, mainly focus on EAL thread and make sure no
> running issue on non-EAL thread.
> > For full non-EAL function, will have other patch set to enhance non-EAL thread
> as the 2nd step.
> 
> OK
> 
> >>> @@ -952,7 +955,8 @@ __mempool_get_bulk(struct rte_mempool *mp,
> void
> >> **obj_table,
> >>>  	uint32_t cache_size = mp->cache_size;
> >>>
> >>>  	/* cache is not enabled or single consumer */
> >>> -	if (unlikely(cache_size == 0 || is_mc == 0 || n >= cache_size))
> >>> +	if (unlikely(cache_size == 0 || is_mc == 0 ||
> >>> +		     n >= cache_size || lcore_id >= RTE_MAX_LCORE))
> >>>  		goto ring_dequeue;
> >>>
> >>>  	cache = &mp->local_cache[lcore_id];
> >>>
> >>
> >> What is the performance impact of adding this test?
> > [LCM] By perf in unit test, it's almost the same. But haven't measure EAL thread
> and non-EAL thread share the same mempool.
> 
> 
> When you say "unit test", are you talking about mempool tests from
> "make test"? Do you have some numbers to share?
[LCM] I means DPDK app/test, run mempool_perf_test. Will add numbers on v5.

diff mbox

Patch

diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 3314651..4845f27 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -198,10 +198,12 @@  struct rte_mempool {
  *   Number to add to the object-oriented statistics.
  */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
-#define __MEMPOOL_STAT_ADD(mp, name, n) do {			\
-		unsigned __lcore_id = rte_lcore_id();		\
-		mp->stats[__lcore_id].name##_objs += n;		\
-		mp->stats[__lcore_id].name##_bulk += 1;		\
+#define __MEMPOOL_STAT_ADD(mp, name, n) do {                    \
+		unsigned __lcore_id = rte_lcore_id();           \
+		if (__lcore_id < RTE_MAX_LCORE) {               \
+			mp->stats[__lcore_id].name##_objs += n;	\
+			mp->stats[__lcore_id].name##_bulk += 1;	\
+		}                                               \
 	} while(0)
 #else
 #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
@@ -767,8 +769,9 @@  __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 	__MEMPOOL_STAT_ADD(mp, put, n);
 
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	/* cache is not enabled or single producer */
-	if (unlikely(cache_size == 0 || is_mp == 0))
+	/* cache is not enabled or single producer or none EAL thread */
+	if (unlikely(cache_size == 0 || is_mp == 0 ||
+		     lcore_id >= RTE_MAX_LCORE))
 		goto ring_enqueue;
 
 	/* Go straight to ring if put would overflow mem allocated for cache */
@@ -952,7 +955,8 @@  __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 	uint32_t cache_size = mp->cache_size;
 
 	/* cache is not enabled or single consumer */
-	if (unlikely(cache_size == 0 || is_mc == 0 || n >= cache_size))
+	if (unlikely(cache_size == 0 || is_mc == 0 ||
+		     n >= cache_size || lcore_id >= RTE_MAX_LCORE))
 		goto ring_dequeue;
 
 	cache = &mp->local_cache[lcore_id];