[dpdk-dev,v4,14/17] mempool: add support to non-EAL thread

Message ID 1422842559-13617-15-git-send-email-cunming.liang@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Cunming Liang Feb. 2, 2015, 2:02 a.m. UTC
  For non-EAL thread, bypass per lcore cache, directly use ring pool.
It allows using rte_mempool in either EAL thread or any user pthread.
As in non-EAL thread, it directly rely on rte_ring and it's none preemptive.
It doesn't suggest to run multi-pthread/cpu which compete the rte_mempool.
It will get bad performance and has critical risk if scheduling policy is RT.

Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
 lib/librte_mempool/rte_mempool.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)
  

Comments

Olivier Matz Feb. 8, 2015, 8:01 p.m. UTC | #1
Hi,

On 02/02/2015 03:02 AM, Cunming Liang wrote:
> For non-EAL thread, bypass per lcore cache, directly use ring pool.
> It allows using rte_mempool in either EAL thread or any user pthread.
> As in non-EAL thread, it directly rely on rte_ring and it's none preemptive.
> It doesn't suggest to run multi-pthread/cpu which compete the rte_mempool.
> It will get bad performance and has critical risk if scheduling policy is RT.
> 
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> ---
>  lib/librte_mempool/rte_mempool.h | 18 +++++++++++-------
>  1 file changed, 11 insertions(+), 7 deletions(-)
> 
> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
> index 3314651..4845f27 100644
> --- a/lib/librte_mempool/rte_mempool.h
> +++ b/lib/librte_mempool/rte_mempool.h
> @@ -198,10 +198,12 @@ struct rte_mempool {
>   *   Number to add to the object-oriented statistics.
>   */
>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> -#define __MEMPOOL_STAT_ADD(mp, name, n) do {			\
> -		unsigned __lcore_id = rte_lcore_id();		\
> -		mp->stats[__lcore_id].name##_objs += n;		\
> -		mp->stats[__lcore_id].name##_bulk += 1;		\
> +#define __MEMPOOL_STAT_ADD(mp, name, n) do {                    \
> +		unsigned __lcore_id = rte_lcore_id();           \
> +		if (__lcore_id < RTE_MAX_LCORE) {               \
> +			mp->stats[__lcore_id].name##_objs += n;	\
> +			mp->stats[__lcore_id].name##_bulk += 1;	\
> +		}                                               \

Does it mean that we have no statistics for non-EAL threads?
(same question for rings and timers in the next patches)


>  	} while(0)
>  #else
>  #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
> @@ -767,8 +769,9 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
>  	__MEMPOOL_STAT_ADD(mp, put, n);
>  
>  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> -	/* cache is not enabled or single producer */
> -	if (unlikely(cache_size == 0 || is_mp == 0))
> +	/* cache is not enabled or single producer or none EAL thread */
> +	if (unlikely(cache_size == 0 || is_mp == 0 ||
> +		     lcore_id >= RTE_MAX_LCORE))
>  		goto ring_enqueue;
>  
>  	/* Go straight to ring if put would overflow mem allocated for cache */
> @@ -952,7 +955,8 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
>  	uint32_t cache_size = mp->cache_size;
>  
>  	/* cache is not enabled or single consumer */
> -	if (unlikely(cache_size == 0 || is_mc == 0 || n >= cache_size))
> +	if (unlikely(cache_size == 0 || is_mc == 0 ||
> +		     n >= cache_size || lcore_id >= RTE_MAX_LCORE))
>  		goto ring_dequeue;
>  
>  	cache = &mp->local_cache[lcore_id];
> 

What is the performance impact of adding this test?


Regards,
Olivier
  
Cunming Liang Feb. 9, 2015, 2:41 p.m. UTC | #2
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Monday, February 09, 2015 4:01 AM
> To: Liang, Cunming; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 14/17] mempool: add support to non-EAL
> thread
> 
> Hi,
> 
> On 02/02/2015 03:02 AM, Cunming Liang wrote:
> > For non-EAL thread, bypass per lcore cache, directly use ring pool.
> > It allows using rte_mempool in either EAL thread or any user pthread.
> > As in non-EAL thread, it directly rely on rte_ring and it's none preemptive.
> > It doesn't suggest to run multi-pthread/cpu which compete the rte_mempool.
> > It will get bad performance and has critical risk if scheduling policy is RT.
> >
> > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > ---
> >  lib/librte_mempool/rte_mempool.h | 18 +++++++++++-------
> >  1 file changed, 11 insertions(+), 7 deletions(-)
> >
> > diff --git a/lib/librte_mempool/rte_mempool.h
> b/lib/librte_mempool/rte_mempool.h
> > index 3314651..4845f27 100644
> > --- a/lib/librte_mempool/rte_mempool.h
> > +++ b/lib/librte_mempool/rte_mempool.h
> > @@ -198,10 +198,12 @@ struct rte_mempool {
> >   *   Number to add to the object-oriented statistics.
> >   */
> >  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> > -#define __MEMPOOL_STAT_ADD(mp, name, n) do {			\
> > -		unsigned __lcore_id = rte_lcore_id();		\
> > -		mp->stats[__lcore_id].name##_objs += n;		\
> > -		mp->stats[__lcore_id].name##_bulk += 1;		\
> > +#define __MEMPOOL_STAT_ADD(mp, name, n) do {                    \
> > +		unsigned __lcore_id = rte_lcore_id();           \
> > +		if (__lcore_id < RTE_MAX_LCORE) {               \
> > +			mp->stats[__lcore_id].name##_objs += n;	\
> > +			mp->stats[__lcore_id].name##_bulk += 1;	\
> > +		}                                               \
> 
> Does it mean that we have no statistics for non-EAL threads?
> (same question for rings and timers in the next patches)
[LCM] Yes, it is in this patch set, mainly focus on EAL thread and make sure no running issue on non-EAL thread.
For full non-EAL function, will have other patch set to enhance non-EAL thread as the 2nd step.
> 
> 
> >  	} while(0)
> >  #else
> >  #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
> > @@ -767,8 +769,9 @@ __mempool_put_bulk(struct rte_mempool *mp, void
> * const *obj_table,
> >  	__MEMPOOL_STAT_ADD(mp, put, n);
> >
> >  #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> > -	/* cache is not enabled or single producer */
> > -	if (unlikely(cache_size == 0 || is_mp == 0))
> > +	/* cache is not enabled or single producer or none EAL thread */
> > +	if (unlikely(cache_size == 0 || is_mp == 0 ||
> > +		     lcore_id >= RTE_MAX_LCORE))
> >  		goto ring_enqueue;
> >
> >  	/* Go straight to ring if put would overflow mem allocated for cache */
> > @@ -952,7 +955,8 @@ __mempool_get_bulk(struct rte_mempool *mp, void
> **obj_table,
> >  	uint32_t cache_size = mp->cache_size;
> >
> >  	/* cache is not enabled or single consumer */
> > -	if (unlikely(cache_size == 0 || is_mc == 0 || n >= cache_size))
> > +	if (unlikely(cache_size == 0 || is_mc == 0 ||
> > +		     n >= cache_size || lcore_id >= RTE_MAX_LCORE))
> >  		goto ring_dequeue;
> >
> >  	cache = &mp->local_cache[lcore_id];
> >
> 
> What is the performance impact of adding this test?
[LCM] By perf in unit test, it's almost the same. But haven't measure EAL thread and non-EAL thread share the same mempool.
> 
> 
> Regards,
> Olivier
  
Olivier Matz Feb. 9, 2015, 5:52 p.m. UTC | #3
Hi,

On 02/09/2015 03:41 PM, Liang, Cunming wrote:
>>>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>>> -#define __MEMPOOL_STAT_ADD(mp, name, n) do {			\
>>> -		unsigned __lcore_id = rte_lcore_id();		\
>>> -		mp->stats[__lcore_id].name##_objs += n;		\
>>> -		mp->stats[__lcore_id].name##_bulk += 1;		\
>>> +#define __MEMPOOL_STAT_ADD(mp, name, n) do {                    \
>>> +		unsigned __lcore_id = rte_lcore_id();           \
>>> +		if (__lcore_id < RTE_MAX_LCORE) {               \
>>> +			mp->stats[__lcore_id].name##_objs += n;	\
>>> +			mp->stats[__lcore_id].name##_bulk += 1;	\
>>> +		}                                               \
>>
>> Does it mean that we have no statistics for non-EAL threads?
>> (same question for rings and timers in the next patches)
> [LCM] Yes, it is in this patch set, mainly focus on EAL thread and make sure no running issue on non-EAL thread.
> For full non-EAL function, will have other patch set to enhance non-EAL thread as the 2nd step.

OK

>>> @@ -952,7 +955,8 @@ __mempool_get_bulk(struct rte_mempool *mp, void
>> **obj_table,
>>>  	uint32_t cache_size = mp->cache_size;
>>>
>>>  	/* cache is not enabled or single consumer */
>>> -	if (unlikely(cache_size == 0 || is_mc == 0 || n >= cache_size))
>>> +	if (unlikely(cache_size == 0 || is_mc == 0 ||
>>> +		     n >= cache_size || lcore_id >= RTE_MAX_LCORE))
>>>  		goto ring_dequeue;
>>>
>>>  	cache = &mp->local_cache[lcore_id];
>>>
>>
>> What is the performance impact of adding this test?
> [LCM] By perf in unit test, it's almost the same. But haven't measure EAL thread and non-EAL thread share the same mempool.


When you say "unit test", are you talking about mempool tests from
"make test"? Do you have some numbers to share?
  
Cunming Liang Feb. 10, 2015, 2:57 a.m. UTC | #4
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Tuesday, February 10, 2015 1:52 AM
> To: Liang, Cunming; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 14/17] mempool: add support to non-EAL
> thread
> 
> Hi,
> 
> On 02/09/2015 03:41 PM, Liang, Cunming wrote:
> >>>  #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> >>> -#define __MEMPOOL_STAT_ADD(mp, name, n) do {			\
> >>> -		unsigned __lcore_id = rte_lcore_id();		\
> >>> -		mp->stats[__lcore_id].name##_objs += n;		\
> >>> -		mp->stats[__lcore_id].name##_bulk += 1;		\
> >>> +#define __MEMPOOL_STAT_ADD(mp, name, n) do {                    \
> >>> +		unsigned __lcore_id = rte_lcore_id();           \
> >>> +		if (__lcore_id < RTE_MAX_LCORE) {               \
> >>> +			mp->stats[__lcore_id].name##_objs += n;	\
> >>> +			mp->stats[__lcore_id].name##_bulk += 1;	\
> >>> +		}                                               \
> >>
> >> Does it mean that we have no statistics for non-EAL threads?
> >> (same question for rings and timers in the next patches)
> > [LCM] Yes, it is in this patch set, mainly focus on EAL thread and make sure no
> running issue on non-EAL thread.
> > For full non-EAL function, will have other patch set to enhance non-EAL thread
> as the 2nd step.
> 
> OK
> 
> >>> @@ -952,7 +955,8 @@ __mempool_get_bulk(struct rte_mempool *mp,
> void
> >> **obj_table,
> >>>  	uint32_t cache_size = mp->cache_size;
> >>>
> >>>  	/* cache is not enabled or single consumer */
> >>> -	if (unlikely(cache_size == 0 || is_mc == 0 || n >= cache_size))
> >>> +	if (unlikely(cache_size == 0 || is_mc == 0 ||
> >>> +		     n >= cache_size || lcore_id >= RTE_MAX_LCORE))
> >>>  		goto ring_dequeue;
> >>>
> >>>  	cache = &mp->local_cache[lcore_id];
> >>>
> >>
> >> What is the performance impact of adding this test?
> > [LCM] By perf in unit test, it's almost the same. But haven't measure EAL thread
> and non-EAL thread share the same mempool.
> 
> 
> When you say "unit test", are you talking about mempool tests from
> "make test"? Do you have some numbers to share?
[LCM] I means DPDK app/test, run mempool_perf_test. Will add numbers on v5.
  

Patch

diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 3314651..4845f27 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -198,10 +198,12 @@  struct rte_mempool {
  *   Number to add to the object-oriented statistics.
  */
 #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
-#define __MEMPOOL_STAT_ADD(mp, name, n) do {			\
-		unsigned __lcore_id = rte_lcore_id();		\
-		mp->stats[__lcore_id].name##_objs += n;		\
-		mp->stats[__lcore_id].name##_bulk += 1;		\
+#define __MEMPOOL_STAT_ADD(mp, name, n) do {                    \
+		unsigned __lcore_id = rte_lcore_id();           \
+		if (__lcore_id < RTE_MAX_LCORE) {               \
+			mp->stats[__lcore_id].name##_objs += n;	\
+			mp->stats[__lcore_id].name##_bulk += 1;	\
+		}                                               \
 	} while(0)
 #else
 #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
@@ -767,8 +769,9 @@  __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
 	__MEMPOOL_STAT_ADD(mp, put, n);
 
 #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
-	/* cache is not enabled or single producer */
-	if (unlikely(cache_size == 0 || is_mp == 0))
+	/* cache is not enabled or single producer or none EAL thread */
+	if (unlikely(cache_size == 0 || is_mp == 0 ||
+		     lcore_id >= RTE_MAX_LCORE))
 		goto ring_enqueue;
 
 	/* Go straight to ring if put would overflow mem allocated for cache */
@@ -952,7 +955,8 @@  __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
 	uint32_t cache_size = mp->cache_size;
 
 	/* cache is not enabled or single consumer */
-	if (unlikely(cache_size == 0 || is_mc == 0 || n >= cache_size))
+	if (unlikely(cache_size == 0 || is_mc == 0 ||
+		     n >= cache_size || lcore_id >= RTE_MAX_LCORE))
 		goto ring_dequeue;
 
 	cache = &mp->local_cache[lcore_id];