[v3,1/4] mbuf: detach mbuf with pinned external buffer

Message ID 1578993305-15165-2-git-send-email-viacheslavo@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series mbuf: detach mbuf with pinned external buffer |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-testing fail Testing issues
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-nxp-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/Intel-compilation success Compilation OK

Commit Message

Slava Ovsiienko Jan. 14, 2020, 9:15 a.m. UTC
  Update detach routine to check the mbuf pool type.

Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
---
 lib/librte_mbuf/rte_mbuf.h | 64 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 60 insertions(+), 4 deletions(-)
  

Comments

Olivier Matz Jan. 14, 2020, 3:27 p.m. UTC | #1
Hi Viacheslav,

Please see some comments below.

On Tue, Jan 14, 2020 at 09:15:02AM +0000, Viacheslav Ovsiienko wrote:
> Update detach routine to check the mbuf pool type.
> 
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> ---
>  lib/librte_mbuf/rte_mbuf.h | 64 +++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 60 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 219b110..8f486af 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -306,6 +306,46 @@ struct rte_pktmbuf_pool_private {
>  	uint32_t flags; /**< reserved for future use. */
>  };
>  
> +/**
> + * When set pktmbuf mempool will hold only mbufs with pinned external
> + * buffer. The external buffer will be attached on the mbuf at the
> + * memory pool creation and will never be detached by the mbuf free calls.
> + * mbuf should not contain any room for data after the mbuf structure.
> + */
> +#define RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF (1 << 0)

Out of curiosity, why using a pool flag instead of flagging the mbufs?
The reason I see is that adding a new m->flag would impact places where
we copy or reset the flags (it should be excluded). Is there another
reason?

> +/**
> + * Returns TRUE if given mbuf has an pinned external buffer, or FALSE
> + * otherwise. The pinned external buffer is allocated at pool creation
> + * time and should not be freed.
> + *
> + * External buffer is a user-provided anonymous buffer.
> + */
> +#ifdef ALLOW_EXPERIMENTAL_API
> +#define RTE_MBUF_HAS_PINNED_EXTBUF(mb) rte_mbuf_has_pinned_extbuf(mb)
> +#else
> +#define RTE_MBUF_HAS_PINNED_EXTBUF(mb) false
> +#endif

I suppose you added these lines because the compilation was broken after
introducing the new __rte_experimental API, which is called from detach().

I find a bit strange that we require to do this. I don't see what would
be broken without the ifdef: an application compiled for 19.11 cannot use
the pinned-ext-buf feature (because it did not exist), so the modification
looks safe to me.

> +
> +__rte_experimental
> +static inline uint32_t

I don't think uint32_t is really better than uint64_t. I agree with Stephen
that bool is the preferred choice, however if it breaks compilation in some
cases, I think int is better.

> +rte_mbuf_has_pinned_extbuf(const struct rte_mbuf *m)
> +{
> +	if (RTE_MBUF_HAS_EXTBUF(m)) {
> +		/*
> +		 * The mbuf has the external attached buffer,
> +		 * we should check the type of the memory pool where
> +		 * the mbuf was allocated from.
> +		 */
> +		struct rte_pktmbuf_pool_private *priv =
> +			(struct rte_pktmbuf_pool_private *)
> +				rte_mempool_get_priv(m->pool);
> +
> +		return priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF;

What about introducing a rte_pktmbuf_priv_flags() function, on the
same model than rte_pktmbuf_priv_size() or rte_pktmbuf_data_room_size()?


> +	}
> +	return 0;
> +}
> +
>  #ifdef RTE_LIBRTE_MBUF_DEBUG
>  
>  /**  check mbuf type in debug mode */
> @@ -571,7 +611,8 @@ static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp)
>  static __rte_always_inline void
>  rte_mbuf_raw_free(struct rte_mbuf *m)
>  {
> -	RTE_ASSERT(RTE_MBUF_DIRECT(m));
> +	RTE_ASSERT(!RTE_MBUF_CLONED(m) &&
> +		  (!RTE_MBUF_HAS_EXTBUF(m) || RTE_MBUF_HAS_PINNED_EXTBUF(m)));
>  	RTE_ASSERT(rte_mbuf_refcnt_read(m) == 1);
>  	RTE_ASSERT(m->next == NULL);
>  	RTE_ASSERT(m->nb_segs == 1);
> @@ -1141,11 +1182,26 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
>  	uint32_t mbuf_size, buf_len;
>  	uint16_t priv_size;
>  
> -	if (RTE_MBUF_HAS_EXTBUF(m))
> +	if (RTE_MBUF_HAS_EXTBUF(m)) {
> +		/*
> +		 * The mbuf has the external attached buffed,
> +		 * we should check the type of the memory pool where
> +		 * the mbuf was allocated from to detect the pinned
> +		 * external buffer.
> +		 */
> +		struct rte_pktmbuf_pool_private *priv =
> +			(struct rte_pktmbuf_pool_private *)
> +				rte_mempool_get_priv(mp);
> +

It could be rte_pktmbuf_priv_flags() as said above.

> +		if (priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) {
> +			RTE_ASSERT(m->shinfo == NULL);
> +			m->ol_flags = EXT_ATTACHED_MBUF;
> +			return;
> +		}

I think it is not possible to have m->shinfo == NULL (this comment is
also related to next patch, because shinfo init is done there). If you
try to clone a mbuf that comes from an ext-pinned pool, it will
crash. Here is the code from attach():

	static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m)
	{
	        RTE_ASSERT(RTE_MBUF_DIRECT(mi) &&
	            rte_mbuf_refcnt_read(mi) == 1);

	        if (RTE_MBUF_HAS_EXTBUF(m)) {
	                rte_mbuf_ext_refcnt_update(m->shinfo, 1); << HERE
	                mi->ol_flags = m->ol_flags;
	                mi->shinfo = m->shinfo;
		...

The 2 alternatives I see are:

- do not allow to clone these mbufs, but today there is no return value
  to attach() functions, so there is no way to know if it failed or not

- manage shinfo to support clones

I think just ignoring the rte_mbuf_ext_refcnt_update() if shinfo == NULL
is not an option, because if could result in recycling the extbuf while a
clone still references it.


>  		__rte_pktmbuf_free_extbuf(m);
> -	else
> +	} else {
>  		__rte_pktmbuf_free_direct(m);
> -
> +	}
>  	priv_size = rte_pktmbuf_priv_size(mp);
>  	mbuf_size = (uint32_t)(sizeof(struct rte_mbuf) + priv_size);
>  	buf_len = rte_pktmbuf_data_room_size(mp);
> -- 
> 1.8.3.1
>
  
Stephen Hemminger Jan. 14, 2020, 3:50 p.m. UTC | #2
On Tue, 14 Jan 2020 09:15:02 +0000
Viacheslav Ovsiienko <viacheslavo@mellanox.com> wrote:

> +/**
> + * Returns TRUE if given mbuf has an pinned external buffer, or FALSE
> + * otherwise. The pinned external buffer is allocated at pool creation
> + * time and should not be freed.
> + *
> + * External buffer is a user-provided anonymous buffer.
> + */
> +#ifdef ALLOW_EXPERIMENTAL_API
> +#define RTE_MBUF_HAS_PINNED_EXTBUF(mb) rte_mbuf_has_pinned_extbuf(mb)
> +#else
> +#define RTE_MBUF_HAS_PINNED_EXTBUF(mb) false
> +#endif

This is worse than just letting new code in.
If you have to use conditional compilation, then please base it off
an config value.

And make the resulting function an inline, this avoid introducing yet
another macro. MACROS ARE HARDER TO READ.

#ifdef RTE_CONFIG_MBUF_PINNED
static inline bool
rte_mbuf_has_pinned_extbuf(const struct rte_mbuf *m)
{
...
}
#else
static inline bool
rte_mbuf_has_pinned_extbuf(const struct rte_mbuf *m)
{
	return false;
}
#endif

> +__rte_experimental
> +static inline uint32_t
> +rte_mbuf_has_pinned_extbuf(const struct rte_mbuf *m)
> +{
> +	if (RTE_MBUF_HAS_EXTBUF(m)) {
> +		/*
> +		 * The mbuf has the external attached buffer,
> +		 * we should check the type of the memory pool where
> +		 * the mbuf was allocated from.
> +		 */
> +		struct rte_pktmbuf_pool_private *priv =
> +			(struct rte_pktmbuf_pool_private *)
> +				rte_mempool_get_priv(m->pool);

Since rte_mempool_get_priv() returns void *, the cast is unnecessary
in standard C. Maybe you still need it for people using rte_mbuf.h in C++
  
Slava Ovsiienko Jan. 15, 2020, 12:52 p.m. UTC | #3
> -----Original Message-----
> From: Olivier Matz <olivier.matz@6wind.com>
> Sent: Tuesday, January 14, 2020 17:27
> To: Slava Ovsiienko <viacheslavo@mellanox.com>
> Cc: dev@dpdk.org; Matan Azrad <matan@mellanox.com>; Raslan Darawsheh
> <rasland@mellanox.com>; Ori Kam <orika@mellanox.com>; Shahaf Shuler
> <shahafs@mellanox.com>; stephen@networkplumber.org
> Subject: Re: [PATCH v3 1/4] mbuf: detach mbuf with pinned external buffer
> 
> Hi Viacheslav,
> 
> Please see some comments below.
> 
> On Tue, Jan 14, 2020 at 09:15:02AM +0000, Viacheslav Ovsiienko wrote:
> > Update detach routine to check the mbuf pool type.
> >
> > Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> > Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
> > ---
> >  lib/librte_mbuf/rte_mbuf.h | 64
> > +++++++++++++++++++++++++++++++++++++++++++---
> >  1 file changed, 60 insertions(+), 4 deletions(-)
> >
> > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> > index 219b110..8f486af 100644
> > --- a/lib/librte_mbuf/rte_mbuf.h
> > +++ b/lib/librte_mbuf/rte_mbuf.h
> > @@ -306,6 +306,46 @@ struct rte_pktmbuf_pool_private {
> >  	uint32_t flags; /**< reserved for future use. */  };
> >
> > +/**
> > + * When set pktmbuf mempool will hold only mbufs with pinned external
> > + * buffer. The external buffer will be attached on the mbuf at the
> > + * memory pool creation and will never be detached by the mbuf free calls.
> > + * mbuf should not contain any room for data after the mbuf structure.
> > + */
> > +#define RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF (1 << 0)
> 
> Out of curiosity, why using a pool flag instead of flagging the mbufs?
> The reason I see is that adding a new m->flag would impact places where we
> copy or reset the flags (it should be excluded). Is there another reason?
> 
Can we introduce the new static flag for mbuf?
Yes, there is some problem - there are many places in DPDK where the flags
in new allocated mbufs are disregarded (ol_flags field is just set to zero).
So, any flag set on allocation (even static, dynamic one is not possible to handle at all)
would get lost. We could fix it in new application (this feature is addressed to the
new ones) and PMDs supporting this, the question is whether we are allowed to
define the new mbuf static (in meaning not dynamic) flag.

> > +/**
> > + * Returns TRUE if given mbuf has an pinned external buffer, or FALSE
> > + * otherwise. The pinned external buffer is allocated at pool
> > +creation
> > + * time and should not be freed.
> > + *
> > + * External buffer is a user-provided anonymous buffer.
> > + */
> > +#ifdef ALLOW_EXPERIMENTAL_API
> > +#define RTE_MBUF_HAS_PINNED_EXTBUF(mb)
> rte_mbuf_has_pinned_extbuf(mb)
> > +#else #define RTE_MBUF_HAS_PINNED_EXTBUF(mb) false #endif
> 
> I suppose you added these lines because the compilation was broken after
> introducing the new __rte_experimental API, which is called from detach().
> 
> I find a bit strange that we require to do this. I don't see what would be
> broken without the ifdef: an application compiled for 19.11 cannot use the
> pinned-ext-buf feature (because it did not exist), so the modification looks
> safe to me.
Without ifdef compilation fails if there is no experimental API usage configured.

> 
> > +
> > +__rte_experimental
> > +static inline uint32_t
> 
> I don't think uint32_t is really better than uint64_t. I agree with Stephen that
> bool is the preferred choice, however if it breaks compilation in some cases, I
> think int is better.
Yes, bool causes compilation issues (just including stdbool.h causes build failure),
and, for example bool  is reserved gcc keyword if AltiVec support is enabled.
uint32_t is the type of priv->flags. 

> 
> > +rte_mbuf_has_pinned_extbuf(const struct rte_mbuf *m) {
> > +	if (RTE_MBUF_HAS_EXTBUF(m)) {
> > +		/*
> > +		 * The mbuf has the external attached buffer,
> > +		 * we should check the type of the memory pool where
> > +		 * the mbuf was allocated from.
> > +		 */
> > +		struct rte_pktmbuf_pool_private *priv =
> > +			(struct rte_pktmbuf_pool_private *)
> > +				rte_mempool_get_priv(m->pool);
> > +
> > +		return priv->flags &
> RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF;
> 
> What about introducing a rte_pktmbuf_priv_flags() function, on the same
> model than rte_pktmbuf_priv_size() or rte_pktmbuf_data_room_size()?

Nice idea, thanks. I think this routine can be not experimental and we would
get rid of ifdef and other stuff.

> 
> 
> > +	}
> > +	return 0;
> > +}
> > +
> >  #ifdef RTE_LIBRTE_MBUF_DEBUG
> >
> >  /**  check mbuf type in debug mode */ @@ -571,7 +611,8 @@ static
> > inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp)
> > static __rte_always_inline void  rte_mbuf_raw_free(struct rte_mbuf *m)
> > {
> > -	RTE_ASSERT(RTE_MBUF_DIRECT(m));
> > +	RTE_ASSERT(!RTE_MBUF_CLONED(m) &&
> > +		  (!RTE_MBUF_HAS_EXTBUF(m) ||
> RTE_MBUF_HAS_PINNED_EXTBUF(m)));
> >  	RTE_ASSERT(rte_mbuf_refcnt_read(m) == 1);
> >  	RTE_ASSERT(m->next == NULL);
> >  	RTE_ASSERT(m->nb_segs == 1);
> > @@ -1141,11 +1182,26 @@ static inline void rte_pktmbuf_detach(struct
> rte_mbuf *m)
> >  	uint32_t mbuf_size, buf_len;
> >  	uint16_t priv_size;
> >
> > -	if (RTE_MBUF_HAS_EXTBUF(m))
> > +	if (RTE_MBUF_HAS_EXTBUF(m)) {
> > +		/*
> > +		 * The mbuf has the external attached buffed,
> > +		 * we should check the type of the memory pool where
> > +		 * the mbuf was allocated from to detect the pinned
> > +		 * external buffer.
> > +		 */
> > +		struct rte_pktmbuf_pool_private *priv =
> > +			(struct rte_pktmbuf_pool_private *)
> > +				rte_mempool_get_priv(mp);
> > +
> 
> It could be rte_pktmbuf_priv_flags() as said above.
> 
> > +		if (priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) {
> > +			RTE_ASSERT(m->shinfo == NULL);
> > +			m->ol_flags = EXT_ATTACHED_MBUF;
> > +			return;
> > +		}
> 
> I think it is not possible to have m->shinfo == NULL (this comment is also
> related to next patch, because shinfo init is done there). If you try to clone a
> mbuf that comes from an ext-pinned pool, it will crash. Here is the code from
> attach():
> 
> 	static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct
> rte_mbuf *m)
> 	{
> 	        RTE_ASSERT(RTE_MBUF_DIRECT(mi) &&
> 	            rte_mbuf_refcnt_read(mi) == 1);
> 
> 	        if (RTE_MBUF_HAS_EXTBUF(m)) {
> 	                rte_mbuf_ext_refcnt_update(m->shinfo, 1); << HERE
> 	                mi->ol_flags = m->ol_flags;
> 	                mi->shinfo = m->shinfo;
> 		...
> 
> The 2 alternatives I see are:
> 
> - do not allow to clone these mbufs, but today there is no return value
>   to attach() functions, so there is no way to know if it failed or not
> 
> - manage shinfo to support clones
> 
> I think just ignoring the rte_mbuf_ext_refcnt_update() if shinfo == NULL is not
> an option, because if could result in recycling the extbuf while a clone still
> references it.
> 
> 
The clone for the mbufs with pinned buffers is not supposed at all, this was
chosen as development precondition. We can't touch the buf_adr/iova_addr fields in mbuf,
because there is no way to restore these pointers (nomore fixed offset between mbuf and data  buffer),
so rte_mbuf_detach() would be not operational at all.

Also, we can't deduce the mbuf base address from data buffer address. 
We could add RTE_ASSERT to prevent attaching (to) mbufs with pinned data, what do
you think?

> >  		__rte_pktmbuf_free_extbuf(m);
> > -	else
> > +	} else {
> >  		__rte_pktmbuf_free_direct(m);
> > -
> > +	}
> >  	priv_size = rte_pktmbuf_priv_size(mp);
> >  	mbuf_size = (uint32_t)(sizeof(struct rte_mbuf) + priv_size);
> >  	buf_len = rte_pktmbuf_data_room_size(mp);
> > --
> > 1.8.3.1
> >
  

Patch

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 219b110..8f486af 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -306,6 +306,46 @@  struct rte_pktmbuf_pool_private {
 	uint32_t flags; /**< reserved for future use. */
 };
 
+/**
+ * When set pktmbuf mempool will hold only mbufs with pinned external
+ * buffer. The external buffer will be attached on the mbuf at the
+ * memory pool creation and will never be detached by the mbuf free calls.
+ * mbuf should not contain any room for data after the mbuf structure.
+ */
+#define RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF (1 << 0)
+
+/**
+ * Returns TRUE if given mbuf has an pinned external buffer, or FALSE
+ * otherwise. The pinned external buffer is allocated at pool creation
+ * time and should not be freed.
+ *
+ * External buffer is a user-provided anonymous buffer.
+ */
+#ifdef ALLOW_EXPERIMENTAL_API
+#define RTE_MBUF_HAS_PINNED_EXTBUF(mb) rte_mbuf_has_pinned_extbuf(mb)
+#else
+#define RTE_MBUF_HAS_PINNED_EXTBUF(mb) false
+#endif
+
+__rte_experimental
+static inline uint32_t
+rte_mbuf_has_pinned_extbuf(const struct rte_mbuf *m)
+{
+	if (RTE_MBUF_HAS_EXTBUF(m)) {
+		/*
+		 * The mbuf has the external attached buffer,
+		 * we should check the type of the memory pool where
+		 * the mbuf was allocated from.
+		 */
+		struct rte_pktmbuf_pool_private *priv =
+			(struct rte_pktmbuf_pool_private *)
+				rte_mempool_get_priv(m->pool);
+
+		return priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF;
+	}
+	return 0;
+}
+
 #ifdef RTE_LIBRTE_MBUF_DEBUG
 
 /**  check mbuf type in debug mode */
@@ -571,7 +611,8 @@  static inline struct rte_mbuf *rte_mbuf_raw_alloc(struct rte_mempool *mp)
 static __rte_always_inline void
 rte_mbuf_raw_free(struct rte_mbuf *m)
 {
-	RTE_ASSERT(RTE_MBUF_DIRECT(m));
+	RTE_ASSERT(!RTE_MBUF_CLONED(m) &&
+		  (!RTE_MBUF_HAS_EXTBUF(m) || RTE_MBUF_HAS_PINNED_EXTBUF(m)));
 	RTE_ASSERT(rte_mbuf_refcnt_read(m) == 1);
 	RTE_ASSERT(m->next == NULL);
 	RTE_ASSERT(m->nb_segs == 1);
@@ -1141,11 +1182,26 @@  static inline void rte_pktmbuf_detach(struct rte_mbuf *m)
 	uint32_t mbuf_size, buf_len;
 	uint16_t priv_size;
 
-	if (RTE_MBUF_HAS_EXTBUF(m))
+	if (RTE_MBUF_HAS_EXTBUF(m)) {
+		/*
+		 * The mbuf has the external attached buffed,
+		 * we should check the type of the memory pool where
+		 * the mbuf was allocated from to detect the pinned
+		 * external buffer.
+		 */
+		struct rte_pktmbuf_pool_private *priv =
+			(struct rte_pktmbuf_pool_private *)
+				rte_mempool_get_priv(mp);
+
+		if (priv->flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) {
+			RTE_ASSERT(m->shinfo == NULL);
+			m->ol_flags = EXT_ATTACHED_MBUF;
+			return;
+		}
 		__rte_pktmbuf_free_extbuf(m);
-	else
+	} else {
 		__rte_pktmbuf_free_direct(m);
-
+	}
 	priv_size = rte_pktmbuf_priv_size(mp);
 	mbuf_size = (uint32_t)(sizeof(struct rte_mbuf) + priv_size);
 	buf_len = rte_pktmbuf_data_room_size(mp);