diff mbox

[dpdk-dev] rte_mbuf: scattered pktmbufs freeing optimization

Message ID 1424992506-20484-1-git-send-email-vadim.suraev@gmail.com (mailing list archive)
State Changes Requested, archived
Headers show

Commit Message

vadim.suraev@gmail.com Feb. 26, 2015, 11:15 p.m. UTC
From: "vadim.suraev@gmail.com" <vadim.suraev@gmail.com>

new function - rte_pktmbuf_free_bulk makes freeing long
scattered (chained) pktmbufs belonging to the same pool 
more optimal using rte_mempool_put_bulk rather than calling
rte_mempool_put for each segment. 
Inlike rte_pktmbuf_free, which calls rte_pktmbuf_free_seg,
this function calls __rte_pktmbuf_prefree_seg. If non-NULL
returned, the pointer is placed in an array. When array is
filled or when the last segment is processed, rte_mempool_put_bulk 
is called. In case of multiple producers, performs 3 times better.


Signed-off-by: vadim.suraev@gmail.com <vadim.suraev@gmail.com>
---
 lib/librte_mbuf/rte_mbuf.h |   55 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

Comments

Stephen Hemminger Feb. 27, 2015, 12:49 a.m. UTC | #1
On Fri, 27 Feb 2015 01:15:06 +0200
"vadim.suraev@gmail.com" <vadim.suraev@gmail.com> wrote:

> +static inline void __attribute__((always_inline))
> +rte_pktmbuf_free_bulk(struct rte_mbuf *head)

Quit with the inlining. Inlining all the time isn't faster
it just increase the code bloat and causes i-cache misses.

> +{
> +    void *mbufs[MAX_MBUF_FREE_SIZE];
> +    unsigned mbufs_count = 0;
> +    struct rte_mbuf *next;
> +
> +    RTE_MBUF_MEMPOOL_CHECK1(head);
> +
> +    while(head) {
> +        next = head->next;
> +        head->next = NULL;
> +        if(__rte_pktmbuf_prefree_seg(head)) {

Missing space after 'if'

> +            RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(head) == 0);
> +            RTE_MBUF_MEMPOOL_CHECK2(head);
> +            mbufs[mbufs_count++] = head;
> +        }
> +        head = next;
> +        if(mbufs_count == MAX_MBUF_FREE_SIZE) {
> +            rte_mempool_put_bulk(((struct rte_mbuf *)mbufs[0])->pool,mbufs,mbufs_count);
why not have mbufs[] be type struct rte_mbuf * and avoid casting.
Casting is one of the sources of bugs in C code.


> +            mbufs_count = 0;
> +        }
> +    }
> +    if(mbufs_count > 0) {
Don't need {} on one line if clause

> +        rte_mempool_put_bulk(((struct rte_mbuf *)mbufs[0])->pool,mbufs,mbufs_count);
> +    }
> +}
Ananyev, Konstantin Feb. 27, 2015, 11:17 a.m. UTC | #2
Hi Vadim,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of vadim.suraev@gmail.com
> Sent: Thursday, February 26, 2015 11:15 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] rte_mbuf: scattered pktmbufs freeing optimization
> 
> From: "vadim.suraev@gmail.com" <vadim.suraev@gmail.com>
> 
> new function - rte_pktmbuf_free_bulk makes freeing long
> scattered (chained) pktmbufs belonging to the same pool
> more optimal using rte_mempool_put_bulk rather than calling
> rte_mempool_put for each segment.
> Inlike rte_pktmbuf_free, which calls rte_pktmbuf_free_seg,
> this function calls __rte_pktmbuf_prefree_seg. If non-NULL
> returned, the pointer is placed in an array. When array is
> filled or when the last segment is processed, rte_mempool_put_bulk
> is called. In case of multiple producers, performs 3 times better.
> 
> 
> Signed-off-by: vadim.suraev@gmail.com <vadim.suraev@gmail.com>
> ---
>  lib/librte_mbuf/rte_mbuf.h |   55 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 55 insertions(+)
> 
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 17ba791..1d6f848 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -824,6 +824,61 @@ static inline void rte_pktmbuf_free(struct rte_mbuf *m)
>  	}
>  }
> 
> +/* This macro defines the size of max bulk of mbufs to free for rte_pktmbuf_free_bulk */
> +#define MAX_MBUF_FREE_SIZE 32
> +
> +/* If RTE_LIBRTE_MBUF_DEBUG is enabled, checks if all mbufs must belong to the same mempool */
> +#ifdef RTE_LIBRTE_MBUF_DEBUG
> +
> +#define RTE_MBUF_MEMPOOL_CHECK1(m) struct rte_mempool *first_buffers_mempool = (m) ? (m)->pool : NULL
> +
> +#define RTE_MBUF_MEMPOOL_CHECK2(m) RTE_MBUF_ASSERT(first_buffers_mempool == (m)->pool)
> +
> +#else
> +
> +#define RTE_MBUF_MEMPOOL_CHECK1(m)
> +
> +#define RTE_MBUF_MEMPOOL_CHECK2(m)
> +
> +#endif
> +
> +/**
> + * Free chained (scattered) mbuf into its original mempool.
> + *
> + * All the mbufs in the chain must belong to the same mempool.

Seems really useful.
One thought - why to introduce the limitation that all mbufs have to be from the same mempool?
I think you can reorder it a bit, so it can handle situation when chained mbufs belong to different mempools.
Something like:
...
mbufs[mbufs_count] = head;
if (unlikely (head->mp != mbufs[0]->mp || mbufs_count == RTE_DIM(mbufs) - 1)) {
    rte_mempool_put_bulk(mbufs[0]->pool, mbufs, mbufs_count);
    mbufs[0] = mbufs[mbufs_count];
    mbufs_count = 0;
} 
mbufs_count++;
...
 
Another nit: probably better name it rte_pktmbuf_free_chain() or something?
For me _bulk implies that we have an array of mbufs that we need to free.
Actually probably would be another useful function to have:
rte_pktmbuf_free_seg_bulk(struct rte_mbuf *m[], uint32_t num);

Konstantin

> + *
> + * @param head
> + *   The head of mbufs to be freed chain
> + */
> +
> +static inline void __attribute__((always_inline))
> +rte_pktmbuf_free_bulk(struct rte_mbuf *head)
> +{
> +    void *mbufs[MAX_MBUF_FREE_SIZE];
> +    unsigned mbufs_count = 0;
> +    struct rte_mbuf *next;
> +
> +    RTE_MBUF_MEMPOOL_CHECK1(head);
> +
> +    while(head) {
> +        next = head->next;
> +        head->next = NULL;
> +        if(__rte_pktmbuf_prefree_seg(head)) {
> +            RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(head) == 0);
> +            RTE_MBUF_MEMPOOL_CHECK2(head);
> +            mbufs[mbufs_count++] = head;
> +        }
> +        head = next;
> +        if(mbufs_count == MAX_MBUF_FREE_SIZE) {
> +            rte_mempool_put_bulk(((struct rte_mbuf *)mbufs[0])->pool,mbufs,mbufs_count);
> +            mbufs_count = 0;
> +        }
> +    }
> +    if(mbufs_count > 0) {
> +        rte_mempool_put_bulk(((struct rte_mbuf *)mbufs[0])->pool,mbufs,mbufs_count);
> +    }
> +}
> +
>  /**
>   * Creates a "clone" of the given packet mbuf.
>   *
> --
> 1.7.9.5
vadim.suraev@gmail.com Feb. 27, 2015, 12:18 p.m. UTC | #3
Hi, Konstantin,

>Seems really useful.
>One thought - why to introduce the limitation that all mbufs have to be
from the same mempool?
>I think you can reorder it a bit, so it can handle situation when chained
mbufs belong to different mempools.

I had a doubt, my concern was how practical is that (multiple mempools)
case?

Do you think there should be two versions: lightweight (with the
restriction) and generic?


>Actually probably would be another useful function to have:
>rte_pktmbuf_free_seg_bulk(struct rte_mbuf *m[], uint32_t num);

Yes, this could be a sub-routine of rte_pktmbuf_free_chain()

Regards,

 Vadim.

On Feb 27, 2015 3:18 PM, "Ananyev, Konstantin" <konstantin.ananyev@intel.com>
wrote:

> Hi Vadim,
>
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of
> vadim.suraev@gmail.com
> > Sent: Thursday, February 26, 2015 11:15 PM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH] rte_mbuf: scattered pktmbufs freeing
> optimization
> >
> > From: "vadim.suraev@gmail.com" <vadim.suraev@gmail.com>
> >
> > new function - rte_pktmbuf_free_bulk makes freeing long
> > scattered (chained) pktmbufs belonging to the same pool
> > more optimal using rte_mempool_put_bulk rather than calling
> > rte_mempool_put for each segment.
> > Inlike rte_pktmbuf_free, which calls rte_pktmbuf_free_seg,
> > this function calls __rte_pktmbuf_prefree_seg. If non-NULL
> > returned, the pointer is placed in an array. When array is
> > filled or when the last segment is processed, rte_mempool_put_bulk
> > is called. In case of multiple producers, performs 3 times better.
> >
> >
> > Signed-off-by: vadim.suraev@gmail.com <vadim.suraev@gmail.com>
> > ---
> >  lib/librte_mbuf/rte_mbuf.h |   55
> ++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 55 insertions(+)
> >
> > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> > index 17ba791..1d6f848 100644
> > --- a/lib/librte_mbuf/rte_mbuf.h
> > +++ b/lib/librte_mbuf/rte_mbuf.h
> > @@ -824,6 +824,61 @@ static inline void rte_pktmbuf_free(struct rte_mbuf
> *m)
> >       }
> >  }
> >
> > +/* This macro defines the size of max bulk of mbufs to free for
> rte_pktmbuf_free_bulk */
> > +#define MAX_MBUF_FREE_SIZE 32
> > +
> > +/* If RTE_LIBRTE_MBUF_DEBUG is enabled, checks if all mbufs must belong
> to the same mempool */
> > +#ifdef RTE_LIBRTE_MBUF_DEBUG
> > +
> > +#define RTE_MBUF_MEMPOOL_CHECK1(m) struct rte_mempool
> *first_buffers_mempool = (m) ? (m)->pool : NULL
> > +
> > +#define RTE_MBUF_MEMPOOL_CHECK2(m)
> RTE_MBUF_ASSERT(first_buffers_mempool == (m)->pool)
> > +
> > +#else
> > +
> > +#define RTE_MBUF_MEMPOOL_CHECK1(m)
> > +
> > +#define RTE_MBUF_MEMPOOL_CHECK2(m)
> > +
> > +#endif
> > +
> > +/**
> > + * Free chained (scattered) mbuf into its original mempool.
> > + *
> > + * All the mbufs in the chain must belong to the same mempool.
>
> Seems really useful.
> One thought - why to introduce the limitation that all mbufs have to be
> from the same mempool?
> I think you can reorder it a bit, so it can handle situation when chained
> mbufs belong to different mempools.
> Something like:
> ...
> mbufs[mbufs_count] = head;
> if (unlikely (head->mp != mbufs[0]->mp || mbufs_count == RTE_DIM(mbufs) -
> 1)) {
>     rte_mempool_put_bulk(mbufs[0]->pool, mbufs, mbufs_count);
>     mbufs[0] = mbufs[mbufs_count];
>     mbufs_count = 0;
> }
> mbufs_count++;
> ...
>
> Another nit: probably better name it rte_pktmbuf_free_chain() or something?
> For me _bulk implies that we have an array of mbufs that we need to free.
> Actually probably would be another useful function to have:
> rte_pktmbuf_free_seg_bulk(struct rte_mbuf *m[], uint32_t num);
>
> Konstantin
>
> > + *
> > + * @param head
> > + *   The head of mbufs to be freed chain
> > + */
> > +
> > +static inline void __attribute__((always_inline))
> > +rte_pktmbuf_free_bulk(struct rte_mbuf *head)
> > +{
> > +    void *mbufs[MAX_MBUF_FREE_SIZE];
> > +    unsigned mbufs_count = 0;
> > +    struct rte_mbuf *next;
> > +
> > +    RTE_MBUF_MEMPOOL_CHECK1(head);
> > +
> > +    while(head) {
> > +        next = head->next;
> > +        head->next = NULL;
> > +        if(__rte_pktmbuf_prefree_seg(head)) {
> > +            RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(head) == 0);
> > +            RTE_MBUF_MEMPOOL_CHECK2(head);
> > +            mbufs[mbufs_count++] = head;
> > +        }
> > +        head = next;
> > +        if(mbufs_count == MAX_MBUF_FREE_SIZE) {
> > +            rte_mempool_put_bulk(((struct rte_mbuf
> *)mbufs[0])->pool,mbufs,mbufs_count);
> > +            mbufs_count = 0;
> > +        }
> > +    }
> > +    if(mbufs_count > 0) {
> > +        rte_mempool_put_bulk(((struct rte_mbuf
> *)mbufs[0])->pool,mbufs,mbufs_count);
> > +    }
> > +}
> > +
> >  /**
> >   * Creates a "clone" of the given packet mbuf.
> >   *
> > --
> > 1.7.9.5
>
>
Ananyev, Konstantin Feb. 27, 2015, 1:10 p.m. UTC | #4
> From: Vadim Suraev [mailto:vadim.suraev@gmail.com]

> Sent: Friday, February 27, 2015 12:19 PM

> To: Ananyev, Konstantin

> Cc: dev@dpdk.org

> Subject: RE: [dpdk-dev] [PATCH] rte_mbuf: scattered pktmbufs freeing optimization

> 

> Hi, Konstantin,

> >Seems really useful.

> >One thought - why to introduce the limitation that all mbufs have to be from the same mempool?

> >I think you can reorder it a bit, so it can handle situation when chained mbufs belong to different mempools.

> I had a doubt, my concern was how practical is that (multiple mempools) case?


Well, inside DPDK we have at least 2 samples: ip_fragmentation and ipv4_multicast that chain together mbufs from different pools.  
How often that occurs in 'real world' apps - I am not sure.

> Do you think there should be two versions: lightweight (with the restriction) and generic?


I'd suggest to measure what is the performance difference between these 2 versions.
If the difference is noticeable, then probably it is better to have 2 versions.
If it would be neglectable, then I suppose just generic is good enough. 

Konstantin

> 

> >Actually probably would be another useful function to have:

> >rte_pktmbuf_free_seg_bulk(struct rte_mbuf *m[], uint32_t num);

> Yes, this could be a sub-routine of rte_pktmbuf_free_chain()

> Regards,

>  Vadim.

> 

> On Feb 27, 2015 3:18 PM, "Ananyev, Konstantin" <konstantin.ananyev@intel.com> wrote:

> Hi Vadim,

> 

> > -----Original Message-----

> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of vadim.suraev@gmail.com

> > Sent: Thursday, February 26, 2015 11:15 PM

> > To: dev@dpdk.org

> > Subject: [dpdk-dev] [PATCH] rte_mbuf: scattered pktmbufs freeing optimization

> >

> > From: "vadim.suraev@gmail.com" <vadim.suraev@gmail.com>

> >

> > new function - rte_pktmbuf_free_bulk makes freeing long

> > scattered (chained) pktmbufs belonging to the same pool

> > more optimal using rte_mempool_put_bulk rather than calling

> > rte_mempool_put for each segment.

> > Inlike rte_pktmbuf_free, which calls rte_pktmbuf_free_seg,

> > this function calls __rte_pktmbuf_prefree_seg. If non-NULL

> > returned, the pointer is placed in an array. When array is

> > filled or when the last segment is processed, rte_mempool_put_bulk

> > is called. In case of multiple producers, performs 3 times better.

> >

> >

> > Signed-off-by: vadim.suraev@gmail.com <vadim.suraev@gmail.com>

> > ---

> >  lib/librte_mbuf/rte_mbuf.h |   55 ++++++++++++++++++++++++++++++++++++++++++++

> >  1 file changed, 55 insertions(+)

> >

> > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h

> > index 17ba791..1d6f848 100644

> > --- a/lib/librte_mbuf/rte_mbuf.h

> > +++ b/lib/librte_mbuf/rte_mbuf.h

> > @@ -824,6 +824,61 @@ static inline void rte_pktmbuf_free(struct rte_mbuf *m)

> >       }

> >  }

> >

> > +/* This macro defines the size of max bulk of mbufs to free for rte_pktmbuf_free_bulk */

> > +#define MAX_MBUF_FREE_SIZE 32

> > +

> > +/* If RTE_LIBRTE_MBUF_DEBUG is enabled, checks if all mbufs must belong to the same mempool */

> > +#ifdef RTE_LIBRTE_MBUF_DEBUG

> > +

> > +#define RTE_MBUF_MEMPOOL_CHECK1(m) struct rte_mempool *first_buffers_mempool = (m) ? (m)->pool : NULL

> > +

> > +#define RTE_MBUF_MEMPOOL_CHECK2(m) RTE_MBUF_ASSERT(first_buffers_mempool == (m)->pool)

> > +

> > +#else

> > +

> > +#define RTE_MBUF_MEMPOOL_CHECK1(m)

> > +

> > +#define RTE_MBUF_MEMPOOL_CHECK2(m)

> > +

> > +#endif

> > +

> > +/**

> > + * Free chained (scattered) mbuf into its original mempool.

> > + *

> > + * All the mbufs in the chain must belong to the same mempool.

> 

> Seems really useful.

> One thought - why to introduce the limitation that all mbufs have to be from the same mempool?

> I think you can reorder it a bit, so it can handle situation when chained mbufs belong to different mempools.

> Something like:

> ...

> mbufs[mbufs_count] = head;

> if (unlikely (head->mp != mbufs[0]->mp || mbufs_count == RTE_DIM(mbufs) - 1)) {

>     rte_mempool_put_bulk(mbufs[0]->pool, mbufs, mbufs_count);

>     mbufs[0] = mbufs[mbufs_count];

>     mbufs_count = 0;

> }

> mbufs_count++;

> ...

> 

> Another nit: probably better name it rte_pktmbuf_free_chain() or something?

> For me _bulk implies that we have an array of mbufs that we need to free.

> Actually probably would be another useful function to have:

> rte_pktmbuf_free_seg_bulk(struct rte_mbuf *m[], uint32_t num);

> 

> Konstantin

> 

> > + *

> > + * @param head

> > + *   The head of mbufs to be freed chain

> > + */

> > +

> > +static inline void __attribute__((always_inline))

> > +rte_pktmbuf_free_bulk(struct rte_mbuf *head)

> > +{

> > +    void *mbufs[MAX_MBUF_FREE_SIZE];

> > +    unsigned mbufs_count = 0;

> > +    struct rte_mbuf *next;

> > +

> > +    RTE_MBUF_MEMPOOL_CHECK1(head);

> > +

> > +    while(head) {

> > +        next = head->next;

> > +        head->next = NULL;

> > +        if(__rte_pktmbuf_prefree_seg(head)) {

> > +            RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(head) == 0);

> > +            RTE_MBUF_MEMPOOL_CHECK2(head);

> > +            mbufs[mbufs_count++] = head;

> > +        }

> > +        head = next;

> > +        if(mbufs_count == MAX_MBUF_FREE_SIZE) {

> > +            rte_mempool_put_bulk(((struct rte_mbuf *)mbufs[0])->pool,mbufs,mbufs_count);

> > +            mbufs_count = 0;

> > +        }

> > +    }

> > +    if(mbufs_count > 0) {

> > +        rte_mempool_put_bulk(((struct rte_mbuf *)mbufs[0])->pool,mbufs,mbufs_count);

> > +    }

> > +}

> > +

> >  /**

> >   * Creates a "clone" of the given packet mbuf.

> >   *

> > --

> > 1.7.9.5
Olivier Matz Feb. 27, 2015, 1:20 p.m. UTC | #5
Hi Vadim, Hi Konstantin,

On 02/27/2015 01:18 PM, Vadim Suraev wrote:
> Hi, Konstantin,
> 
>> Seems really useful.

Indeed, this function looks useful, and I also have a work in progress
on this topic, but currently it is not well tested.

As we are on the mbuf subject, for 2.1, I would like to discuss some
points of mbuf API:

- support mbuf and segment bulk allocation/free
- clarify the difference between raw_alloc/raw_free and
  mempool_get/mempool_put: For instance, I think that the reference
  counter initialization should be managed by rte_pktmbuf_reset() like
  the rest of the fields, therefore raw_alloc/raw_free could be replaced
  by mempool functions (this would imply some modifications in drivers).
- clarify which functions are internal and which are not. Some could be
  made public and hence should be documented: from what I see, the
  vhost example currently uses internal APIs and I suspect it may
  not work when MBUF_DEBUG=y.
- factorize rte_rxmbuf_alloc() which is duplicated in almost any driver
- discuss if driver should be calling pktmbuf_reset().
  It would avoid some bugs where fields are not properly initialized.
- check if rte_pktmbuf_free() is called on mbufs with m->next or
  refcount not initialized
- support clones of clones
- support attach/detach of mbufs embedding a private part
- move m->next in the first cache line as it's initialized in rx
- rename "__rte_pktmbuf_prefree_seg" in something clearer like "decref".

To avoid doing the work twice, please note that I already have an
implementation for:
- rte_mbuf_raw_free_bulk(): each mbuf return in its own pool
- rte_mbuf_free_seg_bulk(): similar to call rte_pktmbuf_free_seg()
  on each mbuf
but they are not tested at all now.

I will submit a RFC series in the coming weeks that addresses these
issues. It could be used as a basis for a discussion.

Sorry Vadim for polluting the topic, but I think the subjects are
quite linked together. Please find some comments on your patch below.

>> One thought - why to introduce the limitation that all mbufs have to be
> from the same mempool?
>> I think you can reorder it a bit, so it can handle situation when chained
> mbufs belong to different mempools.
> 
> I had a doubt, my concern was how practical is that (multiple mempools)
> case?

I think having several mempool is common on multi-socket machines.

> Do you think there should be two versions: lightweight (with the
> restriction) and generic?

I think having the version that supports different mempools is more
important. Checking if the mempool is the same may not cost too much
compared to the rest of the work. But if we find a good use case
where having the lightweight version is preferable, we could have
2 versions (ex: if performance is really different).

>>> +/* This macro defines the size of max bulk of mbufs to free for
>> rte_pktmbuf_free_bulk */
>>> +#define MAX_MBUF_FREE_SIZE 32

I wonder if it's required to have this macro.

In my implementation, I use a dynamic-sized table:

static inline void
rte_mbuf_free_seg_bulk(struct rte_mbuf **m_table, unsigned n)
{
	void *obj_table[n];
[...]


>>> + *
>>> + * @param head
>>> + *   The head of mbufs to be freed chain
>>> + */
>>> +
>>> +static inline void __attribute__((always_inline))
>>> +rte_pktmbuf_free_bulk(struct rte_mbuf *head)

About the inlining, I have no objection now, although Stephen may be
right. I think we can consider un-inline some functions, based on
performance measurements.

>>> +{
>>> +    void *mbufs[MAX_MBUF_FREE_SIZE];
>>> +    unsigned mbufs_count = 0;
>>> +    struct rte_mbuf *next;
>>> +
>>> +    RTE_MBUF_MEMPOOL_CHECK1(head);
>>> +
>>> +    while(head) {
>>> +        next = head->next;
>>> +        head->next = NULL;
>>> +        if(__rte_pktmbuf_prefree_seg(head)) {
>>> +            RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(head) == 0);
>>> +            RTE_MBUF_MEMPOOL_CHECK2(head);
>>> +            mbufs[mbufs_count++] = head;
>>> +        }
>>> +        head = next;
>>> +        if(mbufs_count == MAX_MBUF_FREE_SIZE) {
>>> +            rte_mempool_put_bulk(((struct rte_mbuf
>> *)mbufs[0])->pool,mbufs,mbufs_count);
>>> +            mbufs_count = 0;
>>> +        }
>>> +    }
>>> +    if(mbufs_count > 0) {
>>> +        rte_mempool_put_bulk(((struct rte_mbuf
>> *)mbufs[0])->pool,mbufs,mbufs_count);
>>> +    }
>>> +}
>>> +

Also, I think a unit test should be added.

Thanks,
Olivier
vadim.suraev@gmail.com Feb. 27, 2015, 5:09 p.m. UTC | #6
Hi, Olivier, Hi Konstantin,

>Indeed, this function looks useful, and I also have a work in progress
>on this topic, but currently it is not well tested.
I'm sorry, I didn't know. I'll not interfere with my patch))

>About the inlining, I have no objection now, although Stephen may be
>right. I think we can consider un-inline some functions, based on
>performance measurements.
I've also noticed in many cases it makes no difference. Seems to be some
trade-off.

>- clarify the difference between raw_alloc/raw_free and
>  mempool_get/mempool_put: For instance, I think that the reference
>  counter initialization should be managed by rte_pktmbuf_reset() like
>  the rest of the fields, therefore raw_alloc/raw_free could be replaced
It looks useful to me since not all of the fields are used in every
particular application so
if the allocation is decoupled from reset, one can save some cycles.

Regards,
 Vadim.

On Fri, Feb 27, 2015 at 3:20 PM, Olivier MATZ <olivier.matz@6wind.com>
wrote:

> Hi Vadim, Hi Konstantin,
>
> On 02/27/2015 01:18 PM, Vadim Suraev wrote:
> > Hi, Konstantin,
> >
> >> Seems really useful.
>
> Indeed, this function looks useful, and I also have a work in progress
> on this topic, but currently it is not well tested.
>
> As we are on the mbuf subject, for 2.1, I would like to discuss some
> points of mbuf API:
>
> - support mbuf and segment bulk allocation/free
> - clarify the difference between raw_alloc/raw_free and
>   mempool_get/mempool_put: For instance, I think that the reference
>   counter initialization should be managed by rte_pktmbuf_reset() like
>   the rest of the fields, therefore raw_alloc/raw_free could be replaced
>   by mempool functions (this would imply some modifications in drivers).
> - clarify which functions are internal and which are not. Some could be
>   made public and hence should be documented: from what I see, the
>   vhost example currently uses internal APIs and I suspect it may
>   not work when MBUF_DEBUG=y.
> - factorize rte_rxmbuf_alloc() which is duplicated in almost any driver
> - discuss if driver should be calling pktmbuf_reset().
>   It would avoid some bugs where fields are not properly initialized.
> - check if rte_pktmbuf_free() is called on mbufs with m->next or
>   refcount not initialized
> - support clones of clones
> - support attach/detach of mbufs embedding a private part
> - move m->next in the first cache line as it's initialized in rx
> - rename "__rte_pktmbuf_prefree_seg" in something clearer like "decref".
>
> To avoid doing the work twice, please note that I already have an
> implementation for:
> - rte_mbuf_raw_free_bulk(): each mbuf return in its own pool
> - rte_mbuf_free_seg_bulk(): similar to call rte_pktmbuf_free_seg()
>   on each mbuf
> but they are not tested at all now.
>
> I will submit a RFC series in the coming weeks that addresses these
> issues. It could be used as a basis for a discussion.
>
> Sorry Vadim for polluting the topic, but I think the subjects are
> quite linked together. Please find some comments on your patch below.
>
> >> One thought - why to introduce the limitation that all mbufs have to be
> > from the same mempool?
> >> I think you can reorder it a bit, so it can handle situation when
> chained
> > mbufs belong to different mempools.
> >
> > I had a doubt, my concern was how practical is that (multiple mempools)
> > case?
>
> I think having several mempool is common on multi-socket machines.
>
> > Do you think there should be two versions: lightweight (with the
> > restriction) and generic?
>
> I think having the version that supports different mempools is more
> important. Checking if the mempool is the same may not cost too much
> compared to the rest of the work. But if we find a good use case
> where having the lightweight version is preferable, we could have
> 2 versions (ex: if performance is really different).
>
> >>> +/* This macro defines the size of max bulk of mbufs to free for
> >> rte_pktmbuf_free_bulk */
> >>> +#define MAX_MBUF_FREE_SIZE 32
>
> I wonder if it's required to have this macro.
>
> In my implementation, I use a dynamic-sized table:
>
> static inline void
> rte_mbuf_free_seg_bulk(struct rte_mbuf **m_table, unsigned n)
> {
>         void *obj_table[n];
> [...]
>
>
> >>> + *
> >>> + * @param head
> >>> + *   The head of mbufs to be freed chain
> >>> + */
> >>> +
> >>> +static inline void __attribute__((always_inline))
> >>> +rte_pktmbuf_free_bulk(struct rte_mbuf *head)
>
> About the inlining, I have no objection now, although Stephen may be
> right. I think we can consider un-inline some functions, based on
> performance measurements.
>
> >>> +{
> >>> +    void *mbufs[MAX_MBUF_FREE_SIZE];
> >>> +    unsigned mbufs_count = 0;
> >>> +    struct rte_mbuf *next;
> >>> +
> >>> +    RTE_MBUF_MEMPOOL_CHECK1(head);
> >>> +
> >>> +    while(head) {
> >>> +        next = head->next;
> >>> +        head->next = NULL;
> >>> +        if(__rte_pktmbuf_prefree_seg(head)) {
> >>> +            RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(head) == 0);
> >>> +            RTE_MBUF_MEMPOOL_CHECK2(head);
> >>> +            mbufs[mbufs_count++] = head;
> >>> +        }
> >>> +        head = next;
> >>> +        if(mbufs_count == MAX_MBUF_FREE_SIZE) {
> >>> +            rte_mempool_put_bulk(((struct rte_mbuf
> >> *)mbufs[0])->pool,mbufs,mbufs_count);
> >>> +            mbufs_count = 0;
> >>> +        }
> >>> +    }
> >>> +    if(mbufs_count > 0) {
> >>> +        rte_mempool_put_bulk(((struct rte_mbuf
> >> *)mbufs[0])->pool,mbufs,mbufs_count);
> >>> +    }
> >>> +}
> >>> +
>
> Also, I think a unit test should be added.
>
> Thanks,
> Olivier
>
Olivier Matz March 4, 2015, 8:54 a.m. UTC | #7
Hi Vadim,

On 02/27/2015 06:09 PM, Vadim Suraev wrote:
>  >Indeed, this function looks useful, and I also have a work in progress
>  >on this topic, but currently it is not well tested.
> I'm sorry, I didn't know. I'll not interfere with my patch))

That not what I wanted to say :)

You are very welcome with your patch, I just wanted to notify
that I am also working in the same area and that's why I listed
the things I'm currently working on.


>  >About the inlining, I have no objection now, although Stephen may be
>  >right. I think we can consider un-inline some functions, based on
>  >performance measurements.
> I've also noticed in many cases it makes no difference. Seems to be some
> trade-off.
>
>  >- clarify the difference between raw_alloc/raw_free and
>  >  mempool_get/mempool_put: For instance, I think that the reference
>  >  counter initialization should be managed by rte_pktmbuf_reset() like
>  >  the rest of the fields, therefore raw_alloc/raw_free could be replaced
> It looks useful to me since not all of the fields are used in every
> particular application so
> if the allocation is decoupled from reset, one can save some cycles.

Yes, this is also a trade-off between maintainability and speed, and
speed is probably the first constraint for the dpdk. But maybe we can
find an alternative that is both fast and maintainable.

Thanks,
Olivier
vadim.suraev@gmail.com March 6, 2015, 11:24 p.m. UTC | #8
Hi, Olivier,
I realized that if local cache for the mempool is enabled and greater than
0,
if, say, the mempool size is X and local cache length is Y (and it is not
empty,Y>0)
an attempt to allocate a bulk, whose size is greater than local cache size
(max) and greater than X-Y (which is the number of entries in the ring)
will fail.
The reason is:
__mempool_get_bulk will check whether the bulk to be allocated is greater
than mp->cache_size and will fall to ring_dequeue.
And the ring does not contain enough entries in this case while the sum of
ring entries + cache length may be greater or equal to the bulk's size, so
theoretically the bulk could be allocated.
Is it an expected behaviour? Am I missing something?
By the way, rte_mempool_count returns a ring count + sum of all local
caches, IMHO it could mislead, even twice.
Regards,
 Vadim.

On Wed, Mar 4, 2015 at 10:54 AM, Olivier MATZ <olivier.matz@6wind.com>
wrote:

> Hi Vadim,
>
> On 02/27/2015 06:09 PM, Vadim Suraev wrote:
>
>>  >Indeed, this function looks useful, and I also have a work in progress
>>  >on this topic, but currently it is not well tested.
>> I'm sorry, I didn't know. I'll not interfere with my patch))
>>
>
> That not what I wanted to say :)
>
> You are very welcome with your patch, I just wanted to notify
> that I am also working in the same area and that's why I listed
> the things I'm currently working on.
>
>
>   >About the inlining, I have no objection now, although Stephen may be
>>  >right. I think we can consider un-inline some functions, based on
>>  >performance measurements.
>> I've also noticed in many cases it makes no difference. Seems to be some
>> trade-off.
>>
>>  >- clarify the difference between raw_alloc/raw_free and
>>  >  mempool_get/mempool_put: For instance, I think that the reference
>>  >  counter initialization should be managed by rte_pktmbuf_reset() like
>>  >  the rest of the fields, therefore raw_alloc/raw_free could be replaced
>> It looks useful to me since not all of the fields are used in every
>> particular application so
>> if the allocation is decoupled from reset, one can save some cycles.
>>
>
> Yes, this is also a trade-off between maintainability and speed, and
> speed is probably the first constraint for the dpdk. But maybe we can
> find an alternative that is both fast and maintainable.
>
> Thanks,
> Olivier
>
>
Olivier Matz March 9, 2015, 8:38 a.m. UTC | #9
Hi Vadim,

On 03/07/2015 12:24 AM, Vadim Suraev wrote:
> Hi, Olivier,
> I realized that if local cache for the mempool is enabled and greater
> than 0,
> if, say, the mempool size is X and local cache length is Y (and it is
> not empty,Y>0)
> an attempt to allocate a bulk, whose size is greater than local cache
> size (max) and greater than X-Y (which is the number of entries in the
> ring) will fail.
> The reason is:
> __mempool_get_bulk will check whether the bulk to be allocated is
> greater than mp->cache_size and will fall to ring_dequeue.
> And the ring does not contain enough entries in this case while the sum
> of ring entries + cache length may be greater or equal to the bulk's
> size, so theoretically the bulk could be allocated.
> Is it an expected behaviour? Am I missing something?

I think it's the expected behavior as the code of mempool_get()
tries to minimize the number of tests. In this situation, even if
len(mempool) + len(cache) is greater than the number of requested
objects, we are almost out of buffers, so returning ENOBUF is not
a problem.

If the user wants to ensure that he can allocates at least X buffers,
he can create the pool with:
  mempool_create(X + cache_size * MAX_LCORE)

> By the way, rte_mempool_count returns a ring count + sum of all local
> caches, IMHO it could mislead, even twice.

Right, today rte_mempool_count() cannot really be used for something
else than debug or stats. Adding rte_mempool_common_count() and
rte_mempool_cache_len() may be useful to give the user a better control
(and they will be faster because they won't browse the cache lengths of
all lcores).

But we have to keep in mind that for multi-consumer pools checking the
common_count before retrieving objects is useless because the other
lcores can retrieve objects at the same time.

Regards,
Olivier
diff mbox

Patch

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 17ba791..1d6f848 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -824,6 +824,61 @@  static inline void rte_pktmbuf_free(struct rte_mbuf *m)
 	}
 }
 
+/* This macro defines the size of max bulk of mbufs to free for rte_pktmbuf_free_bulk */
+#define MAX_MBUF_FREE_SIZE 32
+
+/* If RTE_LIBRTE_MBUF_DEBUG is enabled, checks if all mbufs must belong to the same mempool */
+#ifdef RTE_LIBRTE_MBUF_DEBUG
+
+#define RTE_MBUF_MEMPOOL_CHECK1(m) struct rte_mempool *first_buffers_mempool = (m) ? (m)->pool : NULL
+
+#define RTE_MBUF_MEMPOOL_CHECK2(m) RTE_MBUF_ASSERT(first_buffers_mempool == (m)->pool)
+
+#else
+
+#define RTE_MBUF_MEMPOOL_CHECK1(m)
+
+#define RTE_MBUF_MEMPOOL_CHECK2(m)
+
+#endif
+
+/**
+ * Free chained (scattered) mbuf into its original mempool.
+ *
+ * All the mbufs in the chain must belong to the same mempool.
+ *
+ * @param head
+ *   The head of mbufs to be freed chain
+ */
+
+static inline void __attribute__((always_inline))
+rte_pktmbuf_free_bulk(struct rte_mbuf *head)
+{
+    void *mbufs[MAX_MBUF_FREE_SIZE];
+    unsigned mbufs_count = 0;
+    struct rte_mbuf *next;
+
+    RTE_MBUF_MEMPOOL_CHECK1(head);
+
+    while(head) {
+        next = head->next;
+        head->next = NULL;
+        if(__rte_pktmbuf_prefree_seg(head)) {
+            RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(head) == 0);
+            RTE_MBUF_MEMPOOL_CHECK2(head);
+            mbufs[mbufs_count++] = head;
+        }
+        head = next;
+        if(mbufs_count == MAX_MBUF_FREE_SIZE) {
+            rte_mempool_put_bulk(((struct rte_mbuf *)mbufs[0])->pool,mbufs,mbufs_count);
+            mbufs_count = 0;
+        }
+    }
+    if(mbufs_count > 0) {
+        rte_mempool_put_bulk(((struct rte_mbuf *)mbufs[0])->pool,mbufs,mbufs_count);
+    }
+}
+
 /**
  * Creates a "clone" of the given packet mbuf.
  *