[v2] mempool: fix rte_mempool_avail_count may segment fault when used in multiprocess

Message ID 20221115123502.12560-1-changfengnan@bytedance.com (mailing list archive)
State Changes Requested, archived
Delegated to: David Marchand
Headers
Series [v2] mempool: fix rte_mempool_avail_count may segment fault when used in multiprocess |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/intel-Testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS

Commit Message

Fengnan Chang Nov. 15, 2022, 12:35 p.m. UTC
  rte_mempool_create put tailq entry into rte_mempool_tailq list before
populate, and pool_data set when populate. So in multi process, if
process A create mempool, and process B can get mempool through
rte_mempool_lookup before pool_data set, if B call rte_mempool_avail_count,
it will cause segment fault.

Fix this by put tailq entry into rte_mempool_tailq after populate.

Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
---
 lib/mempool/rte_mempool.c | 43 ++++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 19 deletions(-)
  

Comments

Olivier Matz Nov. 22, 2022, 3:24 p.m. UTC | #1
Hi,

On Tue, Nov 15, 2022 at 08:35:02PM +0800, Fengnan Chang wrote:
> rte_mempool_create put tailq entry into rte_mempool_tailq list before
> populate, and pool_data set when populate. So in multi process, if
> process A create mempool, and process B can get mempool through
> rte_mempool_lookup before pool_data set, if B call rte_mempool_avail_count,
> it will cause segment fault.
> 
> Fix this by put tailq entry into rte_mempool_tailq after populate.
> 
> Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
> ---
>  lib/mempool/rte_mempool.c | 43 ++++++++++++++++++++++-----------------
>  1 file changed, 24 insertions(+), 19 deletions(-)
> 
> diff --git a/lib/mempool/rte_mempool.c b/lib/mempool/rte_mempool.c
> index 4c78071a34..b3a6572fc8 100644
> --- a/lib/mempool/rte_mempool.c
> +++ b/lib/mempool/rte_mempool.c
> @@ -155,6 +155,27 @@ get_min_page_size(int socket_id)
>  	return wa.min == SIZE_MAX ? (size_t) rte_mem_page_size() : wa.min;
>  }
>  
> +static int
> +add_mempool_to_list(struct rte_mempool *mp)
> +{
> +	struct rte_mempool_list *mempool_list;
> +	struct rte_tailq_entry *te = NULL;
> +
> +	/* try to allocate tailq entry */
> +	te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0);
> +	if (te == NULL) {
> +		RTE_LOG(ERR, MEMPOOL, "Cannot allocate tailq entry!\n");
> +		return -ENOMEM;
> +	}
> +
> +	te->data = mp;
> +	mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
> +	rte_mcfg_tailq_write_lock();
> +	TAILQ_INSERT_TAIL(mempool_list, te, next);
> +	rte_mcfg_tailq_write_unlock();
> +
> +	return 0;
> +}
>  
>  static void
>  mempool_add_elem(struct rte_mempool *mp, __rte_unused void *opaque,
> @@ -304,6 +325,9 @@ mempool_ops_alloc_once(struct rte_mempool *mp)
>  		if (ret != 0)
>  			return ret;
>  		mp->flags |= RTE_MEMPOOL_F_POOL_CREATED;
> +		ret = add_mempool_to_list(mp);
> +		if (ret != 0)
> +			return ret;

One issue here is that if the rte_zmalloc("MEMPOOL_TAILQ_ENTRY") fails,
the function will fail, but rte_mempool_ops_alloc() may already be
successful.

I agree it's theorical, because an allocation failure would cause more
issues at the end. But, to be rigorous, I think we should do something
like this instead (not tested, just for the idea):

	static int
	mempool_ops_alloc_once(struct rte_mempool *mp)
	{
		struct rte_mempool_list *mempool_list;
		struct rte_tailq_entry *te = NULL;
		int ret;

		/* only create the driver ops and add in tailq in if not already done */
		if ((mp->flags & RTE_MEMPOOL_F_POOL_CREATED))
			return 0;

		te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0);
		if (te == NULL) {
			RTE_LOG(ERR, MEMPOOL, "Cannot allocate tailq entry!\n");
			ret = -rte_errno;
			goto fail;
		}
		te->data = mp;
		mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);

		ret = rte_mempool_ops_alloc(mp);
		if (ret != 0)
			goto fail;

		mp->flags |= RTE_MEMPOOL_F_POOL_CREATED;
		rte_mcfg_tailq_write_lock();
		TAILQ_INSERT_TAIL(mempool_list, te, next);
		rte_mcfg_tailq_write_unlock();

		return 0;

	fail:
		rte_free(te);
		return ret;
	}


Thinking a bit more about the problem itself: the segfault that you
describe could also happen in a primary, without multi-process:
- create an empty mempool
- call rte_mempool_avail_count() before it is populated

This simply means that an empty mempool is not ready for use, until
rte_mempool_set_ops_byname() or rte_mempool_populate*() is called. This
is something that we should document above the declaration of
rte_mempool_create_empty(). We could also say there that the mempool
will become visible to the secondary processes as soon as the driver ops
are set.

However I still believe that a better synchronization point is required
in the application. After all, the presence in the TAILQ does not give
any hint on the status of the object. Can we imagine a case where a
mempool is created empty in a primary, and populated in a secondary? If
such use-case exist, we may not want to take this patch.

>  	}
>  	return 0;
>  }
> @@ -798,9 +822,7 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
>  	int socket_id, unsigned flags)
>  {
>  	char mz_name[RTE_MEMZONE_NAMESIZE];
> -	struct rte_mempool_list *mempool_list;
>  	struct rte_mempool *mp = NULL;
> -	struct rte_tailq_entry *te = NULL;
>  	const struct rte_memzone *mz = NULL;
>  	size_t mempool_size;
>  	unsigned int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
> @@ -820,8 +842,6 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
>  			  RTE_CACHE_LINE_MASK) != 0);
>  #endif
>  
> -	mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
> -
>  	/* asked for zero items */
>  	if (n == 0) {
>  		rte_errno = EINVAL;
> @@ -866,14 +886,6 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
>  	private_data_size = (private_data_size +
>  			     RTE_MEMPOOL_ALIGN_MASK) & (~RTE_MEMPOOL_ALIGN_MASK);
>  
> -
> -	/* try to allocate tailq entry */
> -	te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0);
> -	if (te == NULL) {
> -		RTE_LOG(ERR, MEMPOOL, "Cannot allocate tailq entry!\n");
> -		goto exit_unlock;
> -	}
> -
>  	mempool_size = RTE_MEMPOOL_HEADER_SIZE(mp, cache_size);
>  	mempool_size += private_data_size;
>  	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
> @@ -923,20 +935,13 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
>  					   cache_size);
>  	}
>  
> -	te->data = mp;
> -
> -	rte_mcfg_tailq_write_lock();
> -	TAILQ_INSERT_TAIL(mempool_list, te, next);
> -	rte_mcfg_tailq_write_unlock();
>  	rte_mcfg_mempool_write_unlock();
> -
>  	rte_mempool_trace_create_empty(name, n, elt_size, cache_size,
>  		private_data_size, flags, mp);
>  	return mp;
>  
>  exit_unlock:
>  	rte_mcfg_mempool_write_unlock();
> -	rte_free(te);
>  	rte_mempool_free(mp);
>  	return NULL;
>  }
> -- 
> 2.37.0 (Apple Git-136)
>
  
Fengnan Chang Nov. 29, 2022, 9:57 a.m. UTC | #2
Olivier Matz <olivier.matz@6wind.com> 于2022年11月22日周二 23:25写道:
>
> Hi,
>
> On Tue, Nov 15, 2022 at 08:35:02PM +0800, Fengnan Chang wrote:
> > rte_mempool_create put tailq entry into rte_mempool_tailq list before
> > populate, and pool_data set when populate. So in multi process, if
> > process A create mempool, and process B can get mempool through
> > rte_mempool_lookup before pool_data set, if B call rte_mempool_avail_count,
> > it will cause segment fault.
> >
> > Fix this by put tailq entry into rte_mempool_tailq after populate.
> >
> > Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>
> > ---
> >  lib/mempool/rte_mempool.c | 43 ++++++++++++++++++++++-----------------
> >  1 file changed, 24 insertions(+), 19 deletions(-)
> >
> > diff --git a/lib/mempool/rte_mempool.c b/lib/mempool/rte_mempool.c
> > index 4c78071a34..b3a6572fc8 100644
> > --- a/lib/mempool/rte_mempool.c
> > +++ b/lib/mempool/rte_mempool.c
> > @@ -155,6 +155,27 @@ get_min_page_size(int socket_id)
> >       return wa.min == SIZE_MAX ? (size_t) rte_mem_page_size() : wa.min;
> >  }
> >
> > +static int
> > +add_mempool_to_list(struct rte_mempool *mp)
> > +{
> > +     struct rte_mempool_list *mempool_list;
> > +     struct rte_tailq_entry *te = NULL;
> > +
> > +     /* try to allocate tailq entry */
> > +     te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0);
> > +     if (te == NULL) {
> > +             RTE_LOG(ERR, MEMPOOL, "Cannot allocate tailq entry!\n");
> > +             return -ENOMEM;
> > +     }
> > +
> > +     te->data = mp;
> > +     mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
> > +     rte_mcfg_tailq_write_lock();
> > +     TAILQ_INSERT_TAIL(mempool_list, te, next);
> > +     rte_mcfg_tailq_write_unlock();
> > +
> > +     return 0;
> > +}
> >
> >  static void
> >  mempool_add_elem(struct rte_mempool *mp, __rte_unused void *opaque,
> > @@ -304,6 +325,9 @@ mempool_ops_alloc_once(struct rte_mempool *mp)
> >               if (ret != 0)
> >                       return ret;
> >               mp->flags |= RTE_MEMPOOL_F_POOL_CREATED;
> > +             ret = add_mempool_to_list(mp);
> > +             if (ret != 0)
> > +                     return ret;
>
> One issue here is that if the rte_zmalloc("MEMPOOL_TAILQ_ENTRY") fails,
> the function will fail, but rte_mempool_ops_alloc() may already be
> successful.
>
> I agree it's theorical, because an allocation failure would cause more
> issues at the end. But, to be rigorous, I think we should do something
> like this instead (not tested, just for the idea):
>
>         static int
>         mempool_ops_alloc_once(struct rte_mempool *mp)
>         {
>                 struct rte_mempool_list *mempool_list;
>                 struct rte_tailq_entry *te = NULL;
>                 int ret;
>
>                 /* only create the driver ops and add in tailq in if not already done */
>                 if ((mp->flags & RTE_MEMPOOL_F_POOL_CREATED))
>                         return 0;
>
>                 te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0);
>                 if (te == NULL) {
>                         RTE_LOG(ERR, MEMPOOL, "Cannot allocate tailq entry!\n");
>                         ret = -rte_errno;
>                         goto fail;
>                 }
>                 te->data = mp;
>                 mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
>
>                 ret = rte_mempool_ops_alloc(mp);
>                 if (ret != 0)
>                         goto fail;
>
>                 mp->flags |= RTE_MEMPOOL_F_POOL_CREATED;
>                 rte_mcfg_tailq_write_lock();
>                 TAILQ_INSERT_TAIL(mempool_list, te, next);
>                 rte_mcfg_tailq_write_unlock();
>
>                 return 0;
>
>         fail:
>                 rte_free(te);
>                 return ret;
>         }
>
>
> Thinking a bit more about the problem itself: the segfault that you
> describe could also happen in a primary, without multi-process:
> - create an empty mempool
> - call rte_mempool_avail_count() before it is populated
>
> This simply means that an empty mempool is not ready for use, until
> rte_mempool_set_ops_byname() or rte_mempool_populate*() is called. This
> is something that we should document above the declaration of
> rte_mempool_create_empty(). We could also say there that the mempool
> will become visible to the secondary processes as soon as the driver ops
> are set.
>
> However I still believe that a better synchronization point is required
> in the application. After all, the presence in the TAILQ does not give
> any hint on the status of the object. Can we imagine a case where a
> mempool is created empty in a primary, and populated in a secondary? If
> such use-case exist, we may not want to take this patch.

Maybe there is a case like you said, do you think adding check mempool flags in
rte_mempool_avail_count is acceptable ?
If RTE_MEMPOOL_F_POOL_CREATED not set, just return 0.

>
> >       }
> >       return 0;
> >  }
> > @@ -798,9 +822,7 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
> >       int socket_id, unsigned flags)
> >  {
> >       char mz_name[RTE_MEMZONE_NAMESIZE];
> > -     struct rte_mempool_list *mempool_list;
> >       struct rte_mempool *mp = NULL;
> > -     struct rte_tailq_entry *te = NULL;
> >       const struct rte_memzone *mz = NULL;
> >       size_t mempool_size;
> >       unsigned int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
> > @@ -820,8 +842,6 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
> >                         RTE_CACHE_LINE_MASK) != 0);
> >  #endif
> >
> > -     mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
> > -
> >       /* asked for zero items */
> >       if (n == 0) {
> >               rte_errno = EINVAL;
> > @@ -866,14 +886,6 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
> >       private_data_size = (private_data_size +
> >                            RTE_MEMPOOL_ALIGN_MASK) & (~RTE_MEMPOOL_ALIGN_MASK);
> >
> > -
> > -     /* try to allocate tailq entry */
> > -     te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0);
> > -     if (te == NULL) {
> > -             RTE_LOG(ERR, MEMPOOL, "Cannot allocate tailq entry!\n");
> > -             goto exit_unlock;
> > -     }
> > -
> >       mempool_size = RTE_MEMPOOL_HEADER_SIZE(mp, cache_size);
> >       mempool_size += private_data_size;
> >       mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
> > @@ -923,20 +935,13 @@ rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
> >                                          cache_size);
> >       }
> >
> > -     te->data = mp;
> > -
> > -     rte_mcfg_tailq_write_lock();
> > -     TAILQ_INSERT_TAIL(mempool_list, te, next);
> > -     rte_mcfg_tailq_write_unlock();
> >       rte_mcfg_mempool_write_unlock();
> > -
> >       rte_mempool_trace_create_empty(name, n, elt_size, cache_size,
> >               private_data_size, flags, mp);
> >       return mp;
> >
> >  exit_unlock:
> >       rte_mcfg_mempool_write_unlock();
> > -     rte_free(te);
> >       rte_mempool_free(mp);
> >       return NULL;
> >  }
> > --
> > 2.37.0 (Apple Git-136)
> >
  
Stephen Hemminger July 17, 2023, 4:43 p.m. UTC | #3
On Tue, 29 Nov 2022 17:57:05 +0800
Fengnan Chang <changfengnan@bytedance.com> wrote:

> Olivier Matz <olivier.matz@6wind.com> 于2022年11月22日周二 23:25写道:
> >
> > Hi,
> >
> > On Tue, Nov 15, 2022 at 08:35:02PM +0800, Fengnan Chang wrote:  
> > > rte_mempool_create put tailq entry into rte_mempool_tailq list before
> > > populate, and pool_data set when populate. So in multi process, if
> > > process A create mempool, and process B can get mempool through
> > > rte_mempool_lookup before pool_data set, if B call rte_mempool_avail_count,
> > > it will cause segment fault.
> > >
> > > Fix this by put tailq entry into rte_mempool_tailq after populate.
> > >
> > > Signed-off-by: Fengnan Chang <changfengnan@bytedance.com>

Why not just handle this in rte_mempool_avail_count?  It would be much simpler there.


diff --git a/lib/mempool/rte_mempool.c b/lib/mempool/rte_mempool.c
index 4d337fca8dcd..14855e21801f 100644
--- a/lib/mempool/rte_mempool.c
+++ b/lib/mempool/rte_mempool.c
@@ -1006,6 +1006,10 @@ rte_mempool_avail_count(const struct rte_mempool *mp)
        unsigned count;
        unsigned lcore_id;
 
+       /* Handle race where pool created but ops not allocated yet */
+       if (!(mp->flags & RTE_MEMPOOL_F_POOL_CREATED))
+               return 0;
+
        count = rte_mempool_ops_get_count(mp);
 
        if (mp->cache_size == 0)
  

Patch

diff --git a/lib/mempool/rte_mempool.c b/lib/mempool/rte_mempool.c
index 4c78071a34..b3a6572fc8 100644
--- a/lib/mempool/rte_mempool.c
+++ b/lib/mempool/rte_mempool.c
@@ -155,6 +155,27 @@  get_min_page_size(int socket_id)
 	return wa.min == SIZE_MAX ? (size_t) rte_mem_page_size() : wa.min;
 }
 
+static int
+add_mempool_to_list(struct rte_mempool *mp)
+{
+	struct rte_mempool_list *mempool_list;
+	struct rte_tailq_entry *te = NULL;
+
+	/* try to allocate tailq entry */
+	te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, MEMPOOL, "Cannot allocate tailq entry!\n");
+		return -ENOMEM;
+	}
+
+	te->data = mp;
+	mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
+	rte_mcfg_tailq_write_lock();
+	TAILQ_INSERT_TAIL(mempool_list, te, next);
+	rte_mcfg_tailq_write_unlock();
+
+	return 0;
+}
 
 static void
 mempool_add_elem(struct rte_mempool *mp, __rte_unused void *opaque,
@@ -304,6 +325,9 @@  mempool_ops_alloc_once(struct rte_mempool *mp)
 		if (ret != 0)
 			return ret;
 		mp->flags |= RTE_MEMPOOL_F_POOL_CREATED;
+		ret = add_mempool_to_list(mp);
+		if (ret != 0)
+			return ret;
 	}
 	return 0;
 }
@@ -798,9 +822,7 @@  rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
 	int socket_id, unsigned flags)
 {
 	char mz_name[RTE_MEMZONE_NAMESIZE];
-	struct rte_mempool_list *mempool_list;
 	struct rte_mempool *mp = NULL;
-	struct rte_tailq_entry *te = NULL;
 	const struct rte_memzone *mz = NULL;
 	size_t mempool_size;
 	unsigned int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY;
@@ -820,8 +842,6 @@  rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
 			  RTE_CACHE_LINE_MASK) != 0);
 #endif
 
-	mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list);
-
 	/* asked for zero items */
 	if (n == 0) {
 		rte_errno = EINVAL;
@@ -866,14 +886,6 @@  rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
 	private_data_size = (private_data_size +
 			     RTE_MEMPOOL_ALIGN_MASK) & (~RTE_MEMPOOL_ALIGN_MASK);
 
-
-	/* try to allocate tailq entry */
-	te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0);
-	if (te == NULL) {
-		RTE_LOG(ERR, MEMPOOL, "Cannot allocate tailq entry!\n");
-		goto exit_unlock;
-	}
-
 	mempool_size = RTE_MEMPOOL_HEADER_SIZE(mp, cache_size);
 	mempool_size += private_data_size;
 	mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN);
@@ -923,20 +935,13 @@  rte_mempool_create_empty(const char *name, unsigned n, unsigned elt_size,
 					   cache_size);
 	}
 
-	te->data = mp;
-
-	rte_mcfg_tailq_write_lock();
-	TAILQ_INSERT_TAIL(mempool_list, te, next);
-	rte_mcfg_tailq_write_unlock();
 	rte_mcfg_mempool_write_unlock();
-
 	rte_mempool_trace_create_empty(name, n, elt_size, cache_size,
 		private_data_size, flags, mp);
 	return mp;
 
 exit_unlock:
 	rte_mcfg_mempool_write_unlock();
-	rte_free(te);
 	rte_mempool_free(mp);
 	return NULL;
 }