[dpdk-dev,v4,14/17] mempool: add support to non-EAL thread
Commit Message
For non-EAL thread, bypass per lcore cache, directly use ring pool.
It allows using rte_mempool in either EAL thread or any user pthread.
As in non-EAL thread, it directly rely on rte_ring and it's none preemptive.
It doesn't suggest to run multi-pthread/cpu which compete the rte_mempool.
It will get bad performance and has critical risk if scheduling policy is RT.
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
---
lib/librte_mempool/rte_mempool.h | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
Comments
Hi,
On 02/02/2015 03:02 AM, Cunming Liang wrote:
> For non-EAL thread, bypass per lcore cache, directly use ring pool.
> It allows using rte_mempool in either EAL thread or any user pthread.
> As in non-EAL thread, it directly rely on rte_ring and it's none preemptive.
> It doesn't suggest to run multi-pthread/cpu which compete the rte_mempool.
> It will get bad performance and has critical risk if scheduling policy is RT.
>
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> ---
> lib/librte_mempool/rte_mempool.h | 18 +++++++++++-------
> 1 file changed, 11 insertions(+), 7 deletions(-)
>
> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
> index 3314651..4845f27 100644
> --- a/lib/librte_mempool/rte_mempool.h
> +++ b/lib/librte_mempool/rte_mempool.h
> @@ -198,10 +198,12 @@ struct rte_mempool {
> * Number to add to the object-oriented statistics.
> */
> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> -#define __MEMPOOL_STAT_ADD(mp, name, n) do { \
> - unsigned __lcore_id = rte_lcore_id(); \
> - mp->stats[__lcore_id].name##_objs += n; \
> - mp->stats[__lcore_id].name##_bulk += 1; \
> +#define __MEMPOOL_STAT_ADD(mp, name, n) do { \
> + unsigned __lcore_id = rte_lcore_id(); \
> + if (__lcore_id < RTE_MAX_LCORE) { \
> + mp->stats[__lcore_id].name##_objs += n; \
> + mp->stats[__lcore_id].name##_bulk += 1; \
> + } \
Does it mean that we have no statistics for non-EAL threads?
(same question for rings and timers in the next patches)
> } while(0)
> #else
> #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
> @@ -767,8 +769,9 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
> __MEMPOOL_STAT_ADD(mp, put, n);
>
> #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> - /* cache is not enabled or single producer */
> - if (unlikely(cache_size == 0 || is_mp == 0))
> + /* cache is not enabled or single producer or none EAL thread */
> + if (unlikely(cache_size == 0 || is_mp == 0 ||
> + lcore_id >= RTE_MAX_LCORE))
> goto ring_enqueue;
>
> /* Go straight to ring if put would overflow mem allocated for cache */
> @@ -952,7 +955,8 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
> uint32_t cache_size = mp->cache_size;
>
> /* cache is not enabled or single consumer */
> - if (unlikely(cache_size == 0 || is_mc == 0 || n >= cache_size))
> + if (unlikely(cache_size == 0 || is_mc == 0 ||
> + n >= cache_size || lcore_id >= RTE_MAX_LCORE))
> goto ring_dequeue;
>
> cache = &mp->local_cache[lcore_id];
>
What is the performance impact of adding this test?
Regards,
Olivier
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Monday, February 09, 2015 4:01 AM
> To: Liang, Cunming; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 14/17] mempool: add support to non-EAL
> thread
>
> Hi,
>
> On 02/02/2015 03:02 AM, Cunming Liang wrote:
> > For non-EAL thread, bypass per lcore cache, directly use ring pool.
> > It allows using rte_mempool in either EAL thread or any user pthread.
> > As in non-EAL thread, it directly rely on rte_ring and it's none preemptive.
> > It doesn't suggest to run multi-pthread/cpu which compete the rte_mempool.
> > It will get bad performance and has critical risk if scheduling policy is RT.
> >
> > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > ---
> > lib/librte_mempool/rte_mempool.h | 18 +++++++++++-------
> > 1 file changed, 11 insertions(+), 7 deletions(-)
> >
> > diff --git a/lib/librte_mempool/rte_mempool.h
> b/lib/librte_mempool/rte_mempool.h
> > index 3314651..4845f27 100644
> > --- a/lib/librte_mempool/rte_mempool.h
> > +++ b/lib/librte_mempool/rte_mempool.h
> > @@ -198,10 +198,12 @@ struct rte_mempool {
> > * Number to add to the object-oriented statistics.
> > */
> > #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> > -#define __MEMPOOL_STAT_ADD(mp, name, n) do { \
> > - unsigned __lcore_id = rte_lcore_id(); \
> > - mp->stats[__lcore_id].name##_objs += n; \
> > - mp->stats[__lcore_id].name##_bulk += 1; \
> > +#define __MEMPOOL_STAT_ADD(mp, name, n) do { \
> > + unsigned __lcore_id = rte_lcore_id(); \
> > + if (__lcore_id < RTE_MAX_LCORE) { \
> > + mp->stats[__lcore_id].name##_objs += n; \
> > + mp->stats[__lcore_id].name##_bulk += 1; \
> > + } \
>
> Does it mean that we have no statistics for non-EAL threads?
> (same question for rings and timers in the next patches)
[LCM] Yes, it is in this patch set, mainly focus on EAL thread and make sure no running issue on non-EAL thread.
For full non-EAL function, will have other patch set to enhance non-EAL thread as the 2nd step.
>
>
> > } while(0)
> > #else
> > #define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
> > @@ -767,8 +769,9 @@ __mempool_put_bulk(struct rte_mempool *mp, void
> * const *obj_table,
> > __MEMPOOL_STAT_ADD(mp, put, n);
> >
> > #if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
> > - /* cache is not enabled or single producer */
> > - if (unlikely(cache_size == 0 || is_mp == 0))
> > + /* cache is not enabled or single producer or none EAL thread */
> > + if (unlikely(cache_size == 0 || is_mp == 0 ||
> > + lcore_id >= RTE_MAX_LCORE))
> > goto ring_enqueue;
> >
> > /* Go straight to ring if put would overflow mem allocated for cache */
> > @@ -952,7 +955,8 @@ __mempool_get_bulk(struct rte_mempool *mp, void
> **obj_table,
> > uint32_t cache_size = mp->cache_size;
> >
> > /* cache is not enabled or single consumer */
> > - if (unlikely(cache_size == 0 || is_mc == 0 || n >= cache_size))
> > + if (unlikely(cache_size == 0 || is_mc == 0 ||
> > + n >= cache_size || lcore_id >= RTE_MAX_LCORE))
> > goto ring_dequeue;
> >
> > cache = &mp->local_cache[lcore_id];
> >
>
> What is the performance impact of adding this test?
[LCM] By perf in unit test, it's almost the same. But haven't measure EAL thread and non-EAL thread share the same mempool.
>
>
> Regards,
> Olivier
Hi,
On 02/09/2015 03:41 PM, Liang, Cunming wrote:
>>> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
>>> -#define __MEMPOOL_STAT_ADD(mp, name, n) do { \
>>> - unsigned __lcore_id = rte_lcore_id(); \
>>> - mp->stats[__lcore_id].name##_objs += n; \
>>> - mp->stats[__lcore_id].name##_bulk += 1; \
>>> +#define __MEMPOOL_STAT_ADD(mp, name, n) do { \
>>> + unsigned __lcore_id = rte_lcore_id(); \
>>> + if (__lcore_id < RTE_MAX_LCORE) { \
>>> + mp->stats[__lcore_id].name##_objs += n; \
>>> + mp->stats[__lcore_id].name##_bulk += 1; \
>>> + } \
>>
>> Does it mean that we have no statistics for non-EAL threads?
>> (same question for rings and timers in the next patches)
> [LCM] Yes, it is in this patch set, mainly focus on EAL thread and make sure no running issue on non-EAL thread.
> For full non-EAL function, will have other patch set to enhance non-EAL thread as the 2nd step.
OK
>>> @@ -952,7 +955,8 @@ __mempool_get_bulk(struct rte_mempool *mp, void
>> **obj_table,
>>> uint32_t cache_size = mp->cache_size;
>>>
>>> /* cache is not enabled or single consumer */
>>> - if (unlikely(cache_size == 0 || is_mc == 0 || n >= cache_size))
>>> + if (unlikely(cache_size == 0 || is_mc == 0 ||
>>> + n >= cache_size || lcore_id >= RTE_MAX_LCORE))
>>> goto ring_dequeue;
>>>
>>> cache = &mp->local_cache[lcore_id];
>>>
>>
>> What is the performance impact of adding this test?
> [LCM] By perf in unit test, it's almost the same. But haven't measure EAL thread and non-EAL thread share the same mempool.
When you say "unit test", are you talking about mempool tests from
"make test"? Do you have some numbers to share?
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Tuesday, February 10, 2015 1:52 AM
> To: Liang, Cunming; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4 14/17] mempool: add support to non-EAL
> thread
>
> Hi,
>
> On 02/09/2015 03:41 PM, Liang, Cunming wrote:
> >>> #ifdef RTE_LIBRTE_MEMPOOL_DEBUG
> >>> -#define __MEMPOOL_STAT_ADD(mp, name, n) do { \
> >>> - unsigned __lcore_id = rte_lcore_id(); \
> >>> - mp->stats[__lcore_id].name##_objs += n; \
> >>> - mp->stats[__lcore_id].name##_bulk += 1; \
> >>> +#define __MEMPOOL_STAT_ADD(mp, name, n) do { \
> >>> + unsigned __lcore_id = rte_lcore_id(); \
> >>> + if (__lcore_id < RTE_MAX_LCORE) { \
> >>> + mp->stats[__lcore_id].name##_objs += n; \
> >>> + mp->stats[__lcore_id].name##_bulk += 1; \
> >>> + } \
> >>
> >> Does it mean that we have no statistics for non-EAL threads?
> >> (same question for rings and timers in the next patches)
> > [LCM] Yes, it is in this patch set, mainly focus on EAL thread and make sure no
> running issue on non-EAL thread.
> > For full non-EAL function, will have other patch set to enhance non-EAL thread
> as the 2nd step.
>
> OK
>
> >>> @@ -952,7 +955,8 @@ __mempool_get_bulk(struct rte_mempool *mp,
> void
> >> **obj_table,
> >>> uint32_t cache_size = mp->cache_size;
> >>>
> >>> /* cache is not enabled or single consumer */
> >>> - if (unlikely(cache_size == 0 || is_mc == 0 || n >= cache_size))
> >>> + if (unlikely(cache_size == 0 || is_mc == 0 ||
> >>> + n >= cache_size || lcore_id >= RTE_MAX_LCORE))
> >>> goto ring_dequeue;
> >>>
> >>> cache = &mp->local_cache[lcore_id];
> >>>
> >>
> >> What is the performance impact of adding this test?
> > [LCM] By perf in unit test, it's almost the same. But haven't measure EAL thread
> and non-EAL thread share the same mempool.
>
>
> When you say "unit test", are you talking about mempool tests from
> "make test"? Do you have some numbers to share?
[LCM] I means DPDK app/test, run mempool_perf_test. Will add numbers on v5.
@@ -198,10 +198,12 @@ struct rte_mempool {
* Number to add to the object-oriented statistics.
*/
#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
-#define __MEMPOOL_STAT_ADD(mp, name, n) do { \
- unsigned __lcore_id = rte_lcore_id(); \
- mp->stats[__lcore_id].name##_objs += n; \
- mp->stats[__lcore_id].name##_bulk += 1; \
+#define __MEMPOOL_STAT_ADD(mp, name, n) do { \
+ unsigned __lcore_id = rte_lcore_id(); \
+ if (__lcore_id < RTE_MAX_LCORE) { \
+ mp->stats[__lcore_id].name##_objs += n; \
+ mp->stats[__lcore_id].name##_bulk += 1; \
+ } \
} while(0)
#else
#define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0)
@@ -767,8 +769,9 @@ __mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table,
__MEMPOOL_STAT_ADD(mp, put, n);
#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0
- /* cache is not enabled or single producer */
- if (unlikely(cache_size == 0 || is_mp == 0))
+ /* cache is not enabled or single producer or none EAL thread */
+ if (unlikely(cache_size == 0 || is_mp == 0 ||
+ lcore_id >= RTE_MAX_LCORE))
goto ring_enqueue;
/* Go straight to ring if put would overflow mem allocated for cache */
@@ -952,7 +955,8 @@ __mempool_get_bulk(struct rte_mempool *mp, void **obj_table,
uint32_t cache_size = mp->cache_size;
/* cache is not enabled or single consumer */
- if (unlikely(cache_size == 0 || is_mc == 0 || n >= cache_size))
+ if (unlikely(cache_size == 0 || is_mc == 0 ||
+ n >= cache_size || lcore_id >= RTE_MAX_LCORE))
goto ring_dequeue;
cache = &mp->local_cache[lcore_id];