[v2] eal: fix race in ctrl thread creation

Message ID 20210325112731.16324-1-olivier.matz@6wind.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series [v2] eal: fix race in ctrl thread creation |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/travis-robot success travis build: passed
ci/github-robot success github build: passed
ci/iol-abi-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS

Commit Message

Olivier Matz March 25, 2021, 11:27 a.m. UTC
  As reported by Luc, there is a race where the barrier is destroyed by
one thread, while the other thread did not yet leave
pthread_barrier_wait.

This patch fixes the race condition by adding an atomic counter to
ensure that the barrier is destroyed only it is not used by any thread.

Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread creation")
Cc: jianfeng.tan@intel.com
Cc: stable@dpdk.org

Reported-by: Luc Pelletier <lucp.at.work@gmail.com>
Signed-off-by: David Marchand <david.marchand@redhat.com>
Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
---

Hi Luc,

Thank you for reporting this problem and submitting the patch.
I think the issue can be fixed without any loop, like in this
patch. What do you think?

Regards,
Olivier


 lib/librte_eal/common/eal_common_thread.c | 38 +++++++++++++----------
 1 file changed, 21 insertions(+), 17 deletions(-)
  

Comments

Luc Pelletier March 25, 2021, 2:42 p.m. UTC | #1
Hi Olivier,

> Thank you for reporting this problem and submitting the patch.
> I think the issue can be fixed without any loop, like in this
> patch. What do you think?

I think getting rid of the loop is an excellent idea. Good thinking.
Your version is much cleaner.

> +       __atomic_store_n(&params->barrier_refcnt, 2, __ATOMIC_RELEASE);

I don't mean to nitpick but I don't think you need to use
__atomic_store_n to initialize the refcnt. Either way is fine of
course :)

Thanks.

Le jeu. 25 mars 2021 à 07:27, Olivier Matz <olivier.matz@6wind.com> a écrit :
>
> As reported by Luc, there is a race where the barrier is destroyed by
> one thread, while the other thread did not yet leave
> pthread_barrier_wait.
>
> This patch fixes the race condition by adding an atomic counter to
> ensure that the barrier is destroyed only it is not used by any thread.
>
> Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread creation")
> Cc: jianfeng.tan@intel.com
> Cc: stable@dpdk.org
>
> Reported-by: Luc Pelletier <lucp.at.work@gmail.com>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---
>
> Hi Luc,
>
> Thank you for reporting this problem and submitting the patch.
> I think the issue can be fixed without any loop, like in this
> patch. What do you think?
>
> Regards,
> Olivier
>
>
>  lib/librte_eal/common/eal_common_thread.c | 38 +++++++++++++----------
>  1 file changed, 21 insertions(+), 17 deletions(-)
>
> diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
> index 73a055902a..891f825e87 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -170,11 +170,11 @@ struct rte_thread_ctrl_params {
>         void *(*start_routine)(void *);
>         void *arg;
>         pthread_barrier_t configured;
> +       unsigned int barrier_refcnt;
>  };
>
>  static void *ctrl_thread_init(void *arg)
>  {
> -       int ret;
>         struct internal_config *internal_conf =
>                 eal_get_internal_configuration();
>         rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
> @@ -184,8 +184,9 @@ static void *ctrl_thread_init(void *arg)
>
>         __rte_thread_init(rte_lcore_id(), cpuset);
>
> -       ret = pthread_barrier_wait(&params->configured);
> -       if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> +       pthread_barrier_wait(&params->configured);
> +       if (__atomic_sub_fetch(&params->barrier_refcnt, 1,
> +                               __ATOMIC_ACQ_REL) == 0) {
>                 pthread_barrier_destroy(&params->configured);
>                 free(params);
>         }
> @@ -210,15 +211,17 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>
>         params->start_routine = start_routine;
>         params->arg = arg;
> -
> -       pthread_barrier_init(&params->configured, NULL, 2);
> -
> -       ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
> +       __atomic_store_n(&params->barrier_refcnt, 2, __ATOMIC_RELEASE);
> +       ret = pthread_barrier_init(&params->configured, NULL, 2);
>         if (ret != 0) {
>                 free(params);
>                 return -ret;
>         }
>
> +       ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
> +       if (ret != 0)
> +               goto fail;
> +
>         if (name != NULL) {
>                 ret = rte_thread_setname(*thread, name);
>                 if (ret < 0)
> @@ -227,25 +230,26 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>         }
>
>         ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
> -       if (ret)
> -               goto fail;
> +       if (ret != 0)
> +               goto fail_cancel;
>
> -       ret = pthread_barrier_wait(&params->configured);
> -       if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> +       pthread_barrier_wait(&params->configured);
> +       if (__atomic_sub_fetch(&params->barrier_refcnt, 1,
> +                               __ATOMIC_ACQ_REL) == 0) {
>                 pthread_barrier_destroy(&params->configured);
>                 free(params);
>         }
>
>         return 0;
>
> -fail:
> -       if (PTHREAD_BARRIER_SERIAL_THREAD ==
> -           pthread_barrier_wait(&params->configured)) {
> -               pthread_barrier_destroy(&params->configured);
> -               free(params);
> -       }
> +fail_cancel:
>         pthread_cancel(*thread);
>         pthread_join(*thread, NULL);
> +
> +fail:
> +       pthread_barrier_destroy(&params->configured);
> +       free(params);
> +
>         return -ret;
>  }
>
> --
> 2.29.2
>
  
Honnappa Nagarahalli April 2, 2021, 4:34 a.m. UTC | #2
<snip>

> 
> As reported by Luc, there is a race where the barrier is destroyed by one
> thread, while the other thread did not yet leave pthread_barrier_wait.
Please correct me if I am wrong. We are using the pthread_barrier to

1) know when to free 'params'.
2) set the thread affinity before the thread starts running its function

I think this patch is fine. But, it can be simplified.

> 
> This patch fixes the race condition by adding an atomic counter to ensure
> that the barrier is destroyed only it is not used by any thread.
> 
> Fixes: 3a0d465d4c53 ("eal: fix use-after-free on control thread creation")
> Cc: jianfeng.tan@intel.com
> Cc: stable@dpdk.org
> 
> Reported-by: Luc Pelletier <lucp.at.work@gmail.com>
> Signed-off-by: David Marchand <david.marchand@redhat.com>
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---
> 
> Hi Luc,
> 
> Thank you for reporting this problem and submitting the patch.
> I think the issue can be fixed without any loop, like in this patch. What do
> you think?
> 
> Regards,
> Olivier
> 
> 
>  lib/librte_eal/common/eal_common_thread.c | 38 +++++++++++++----------
>  1 file changed, 21 insertions(+), 17 deletions(-)
> 
> diff --git a/lib/librte_eal/common/eal_common_thread.c
> b/lib/librte_eal/common/eal_common_thread.c
> index 73a055902a..891f825e87 100644
> --- a/lib/librte_eal/common/eal_common_thread.c
> +++ b/lib/librte_eal/common/eal_common_thread.c
> @@ -170,11 +170,11 @@ struct rte_thread_ctrl_params {
>  	void *(*start_routine)(void *);
>  	void *arg;
>  	pthread_barrier_t configured;
We can get rid of the barrier variable. The 'barrier_refcnt' is enough to know when to free the memory.
Setting the thread affinity can be moved to 'ctrl_thread_init' function.

> +	unsigned int barrier_refcnt;
>  };
> 
>  static void *ctrl_thread_init(void *arg)  {
> -	int ret;
>  	struct internal_config *internal_conf =
>  		eal_get_internal_configuration();
>  	rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset; @@ -184,8
> +184,9 @@ static void *ctrl_thread_init(void *arg)
> 
>  	__rte_thread_init(rte_lcore_id(), cpuset);
> 
> -	ret = pthread_barrier_wait(&params->configured);
> -	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> +	pthread_barrier_wait(&params->configured);
> +	if (__atomic_sub_fetch(&params->barrier_refcnt, 1,
> +				__ATOMIC_ACQ_REL) == 0) {
>  		pthread_barrier_destroy(&params->configured);
>  		free(params);
>  	}
> @@ -210,15 +211,17 @@ rte_ctrl_thread_create(pthread_t *thread, const
> char *name,
> 
>  	params->start_routine = start_routine;
>  	params->arg = arg;
> -
> -	pthread_barrier_init(&params->configured, NULL, 2);
> -
> -	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
> +	__atomic_store_n(&params->barrier_refcnt, 2,
> __ATOMIC_RELEASE);
We can use __ATOMIC_RELAXED or just a simple assignment as pthread_create ensures that all the earlier memory writes are visible to the 'ctrl_thread_init'.

> +	ret = pthread_barrier_init(&params->configured, NULL, 2);
>  	if (ret != 0) {
>  		free(params);
>  		return -ret;
>  	}
> 
> +	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
> +	if (ret != 0)
> +		goto fail;
> +
>  	if (name != NULL) {
>  		ret = rte_thread_setname(*thread, name);
>  		if (ret < 0)
> @@ -227,25 +230,26 @@ rte_ctrl_thread_create(pthread_t *thread, const
> char *name,
>  	}
> 
>  	ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
I think this can be moved to 'ctrl_thread_init'.

> -	if (ret)
> -		goto fail;
> +	if (ret != 0)
> +		goto fail_cancel;
> 
> -	ret = pthread_barrier_wait(&params->configured);
> -	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
> +	pthread_barrier_wait(&params->configured);
> +	if (__atomic_sub_fetch(&params->barrier_refcnt, 1,
> +				__ATOMIC_ACQ_REL) == 0) {
>  		pthread_barrier_destroy(&params->configured);
>  		free(params);
>  	}
> 
>  	return 0;
> 
> -fail:
> -	if (PTHREAD_BARRIER_SERIAL_THREAD ==
> -	    pthread_barrier_wait(&params->configured)) {
> -		pthread_barrier_destroy(&params->configured);
> -		free(params);
> -	}
> +fail_cancel:
>  	pthread_cancel(*thread);
>  	pthread_join(*thread, NULL);
> +
> +fail:
> +	pthread_barrier_destroy(&params->configured);
> +	free(params);
> +
>  	return -ret;
>  }
> 
> --
> 2.29.2
  

Patch

diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 73a055902a..891f825e87 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -170,11 +170,11 @@  struct rte_thread_ctrl_params {
 	void *(*start_routine)(void *);
 	void *arg;
 	pthread_barrier_t configured;
+	unsigned int barrier_refcnt;
 };
 
 static void *ctrl_thread_init(void *arg)
 {
-	int ret;
 	struct internal_config *internal_conf =
 		eal_get_internal_configuration();
 	rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
@@ -184,8 +184,9 @@  static void *ctrl_thread_init(void *arg)
 
 	__rte_thread_init(rte_lcore_id(), cpuset);
 
-	ret = pthread_barrier_wait(&params->configured);
-	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
+	pthread_barrier_wait(&params->configured);
+	if (__atomic_sub_fetch(&params->barrier_refcnt, 1,
+				__ATOMIC_ACQ_REL) == 0) {
 		pthread_barrier_destroy(&params->configured);
 		free(params);
 	}
@@ -210,15 +211,17 @@  rte_ctrl_thread_create(pthread_t *thread, const char *name,
 
 	params->start_routine = start_routine;
 	params->arg = arg;
-
-	pthread_barrier_init(&params->configured, NULL, 2);
-
-	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
+	__atomic_store_n(&params->barrier_refcnt, 2, __ATOMIC_RELEASE);
+	ret = pthread_barrier_init(&params->configured, NULL, 2);
 	if (ret != 0) {
 		free(params);
 		return -ret;
 	}
 
+	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
+	if (ret != 0)
+		goto fail;
+
 	if (name != NULL) {
 		ret = rte_thread_setname(*thread, name);
 		if (ret < 0)
@@ -227,25 +230,26 @@  rte_ctrl_thread_create(pthread_t *thread, const char *name,
 	}
 
 	ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
-	if (ret)
-		goto fail;
+	if (ret != 0)
+		goto fail_cancel;
 
-	ret = pthread_barrier_wait(&params->configured);
-	if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
+	pthread_barrier_wait(&params->configured);
+	if (__atomic_sub_fetch(&params->barrier_refcnt, 1,
+				__ATOMIC_ACQ_REL) == 0) {
 		pthread_barrier_destroy(&params->configured);
 		free(params);
 	}
 
 	return 0;
 
-fail:
-	if (PTHREAD_BARRIER_SERIAL_THREAD ==
-	    pthread_barrier_wait(&params->configured)) {
-		pthread_barrier_destroy(&params->configured);
-		free(params);
-	}
+fail_cancel:
 	pthread_cancel(*thread);
 	pthread_join(*thread, NULL);
+
+fail:
+	pthread_barrier_destroy(&params->configured);
+	free(params);
+
 	return -ret;
 }