diff mbox series

[RFC,v2] eal: simplify the implementation of rte_ctrl_thread_create

Message ID 20210802051652.3611-1-honnappa.nagarahalli@arm.com (mailing list archive)
State Superseded
Delegated to: David Marchand
Headers show
Series [RFC,v2] eal: simplify the implementation of rte_ctrl_thread_create | expand

Checks

Context Check Description
ci/intel-Testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

Honnappa Nagarahalli Aug. 2, 2021, 5:16 a.m. UTC
The current described behaviour of rte_ctrl_thread_create is
rigid which makes the implementation of the function complex.
The behavior is abstracted to allow for simplified implementation.

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
v2: Used compiler's C++11 atomic built-ins to access the shared variable.

 lib/eal/common/eal_common_thread.c | 65 +++++++++++++-----------------
 lib/eal/include/rte_lcore.h        |  8 ++--
 2 files changed, 32 insertions(+), 41 deletions(-)

Comments

Olivier Matz Aug. 23, 2021, 10:16 a.m. UTC | #1
Hi Honnappa,

On Mon, Aug 02, 2021 at 12:16:52AM -0500, Honnappa Nagarahalli wrote:
> The current described behaviour of rte_ctrl_thread_create is
> rigid which makes the implementation of the function complex.
> The behavior is abstracted to allow for simplified implementation.

I agree that the behavior description should not reference the pthread
functions, however I don't think the current description prevents from
rewriting the code as you do.

I think it would be better to explain in commit log why the proposed
code is simpler than the current one.

> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
> v2: Used compiler's C++11 atomic built-ins to access the shared variable.
> 
>  lib/eal/common/eal_common_thread.c | 65 +++++++++++++-----------------
>  lib/eal/include/rte_lcore.h        |  8 ++--
>  2 files changed, 32 insertions(+), 41 deletions(-)
> 
> diff --git a/lib/eal/common/eal_common_thread.c b/lib/eal/common/eal_common_thread.c
> index 1a52f42a2b..e3e0bf4bff 100644
> --- a/lib/eal/common/eal_common_thread.c
> +++ b/lib/eal/common/eal_common_thread.c
> @@ -169,35 +169,35 @@ __rte_thread_uninit(void)
>  struct rte_thread_ctrl_params {
>  	void *(*start_routine)(void *);
>  	void *arg;
> -	pthread_barrier_t configured;
> -	unsigned int refcnt;
> +	int ret;
> +	/* Synchronization variable between the control thread
> +	 * and the thread calling rte_ctrl_thread_create function.
> +	 * 0 - Initialized
> +	 * 1 - Control thread is running successfully
> +	 * 2 - Control thread encountered an error. 'ret' has the
> +	 *     error code.
> +	 */
> +	unsigned int sync;

what do you think about using an enum or defines?

>  };
>  
> -static void ctrl_params_free(struct rte_thread_ctrl_params *params)
> -{
> -	if (__atomic_sub_fetch(&params->refcnt, 1, __ATOMIC_ACQ_REL) == 0) {
> -		(void)pthread_barrier_destroy(&params->configured);
> -		free(params);
> -	}
> -}
> -
>  static void *ctrl_thread_init(void *arg)
>  {
>  	struct internal_config *internal_conf =
>  		eal_get_internal_configuration();
>  	rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
>  	struct rte_thread_ctrl_params *params = arg;
> -	void *(*start_routine)(void *);
> +	void *(*start_routine)(void *) = params->start_routine;
>  	void *routine_arg = params->arg;
>  
>  	__rte_thread_init(rte_lcore_id(), cpuset);
> -
> -	pthread_barrier_wait(&params->configured);
> -	start_routine = params->start_routine;
> -	ctrl_params_free(params);
> -
> -	if (start_routine == NULL)
> +	params->ret = pthread_setaffinity_np(pthread_self(),
> +				sizeof(*cpuset), cpuset);
> +	if (params->ret != 0) {
> +		__atomic_store_n(&params->sync, 2, __ATOMIC_RELEASE);
>  		return NULL;
> +	}

Sorry if the question is stupid (I'm still not familiar with C++11 atomic
built-ins), but do we have the assurance that params->ret is set in memory
before params->sync is set? See below at [1].

> +
> +	__atomic_store_n(&params->sync, 1, __ATOMIC_RELEASE);
>  
>  	return start_routine(routine_arg);
>  }
> @@ -207,9 +207,6 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>  		const pthread_attr_t *attr,
>  		void *(*start_routine)(void *), void *arg)
>  {
> -	struct internal_config *internal_conf =
> -		eal_get_internal_configuration();
> -	rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
>  	struct rte_thread_ctrl_params *params;
>  	int ret;
>  
> @@ -219,15 +216,12 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>  
>  	params->start_routine = start_routine;
>  	params->arg = arg;
> -	params->refcnt = 2;
> -
> -	ret = pthread_barrier_init(&params->configured, NULL, 2);
> -	if (ret != 0)
> -		goto fail_no_barrier;
> +	params->ret = 0;
> +	params->sync = 0;
>  
>  	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
>  	if (ret != 0)
> -		goto fail_with_barrier;
> +		goto thread_create_failed;
>  
>  	if (name != NULL) {
>  		ret = rte_thread_setname(*thread, name);
> @@ -236,24 +230,21 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>  				"Cannot set name for ctrl thread\n");
>  	}
>  
> -	ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
> -	if (ret != 0)
> -		params->start_routine = NULL;
> +	/* Wait for the control thread to initialize successfully */
> +	while (!__atomic_load_n(&params->sync, __ATOMIC_ACQUIRE))
> +		rte_pause();

One difference between this implementation and the previous one is this
busy loop. rte_pause() relaxes the cpu, but will not make the calling
thread to sleep and wait for the sync event. So here we can spin a quite
long time until the other thread is scheduled by the OS.

> +	ret = params->ret;

[1]

Here, it is expected that when params->ret is seen as set before
param->sync.

>  
> -	pthread_barrier_wait(&params->configured);
> -	ctrl_params_free(params);
> +	free(params);
>  
> -	if (ret != 0)
> -		/* start_routine has been set to NULL above; */
> -		/* ctrl thread will exit immediately */
> +	if (__atomic_load_n(&params->sync, __ATOMIC_ACQUIRE) != 1)

it suggest == instead of !=, like this:

  if (__atomic_load_n(&params->sync, __ATOMIC_ACQUIRE) == CTRL_THREAD_ERR)


> +		/* ctrl thread is exiting */
>  		pthread_join(*thread, NULL);
>  
>  	return -ret;
>  
> -fail_with_barrier:
> -	(void)pthread_barrier_destroy(&params->configured);
> +thread_create_failed:
>  
> -fail_no_barrier:
>  	free(params);
>  
>  	return -ret;
> diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h
> index 1550b75da0..f1cc5e38dc 100644
> --- a/lib/eal/include/rte_lcore.h
> +++ b/lib/eal/include/rte_lcore.h
> @@ -420,10 +420,10 @@ rte_thread_unregister(void);
>  /**
>   * Create a control thread.
>   *
> - * Wrapper to pthread_create(), pthread_setname_np() and
> - * pthread_setaffinity_np(). The affinity of the new thread is based
> - * on the CPU affinity retrieved at the time rte_eal_init() was called,
> - * the dataplane and service lcores are then excluded.
> + * Creates a control thread with the given name and attributes. The
> + * affinity of the new thread is based on the CPU affinity retrieved
> + * at the time rte_eal_init() was called, the dataplane and service
> + * lcores are then excluded.

The description is indeed better. Maybe it is the opportunity to
highlight that if the name cannot be set, no error is returned, see
commit 368a91d6bdc8 ("eal: ignore failure of naming a control thread")

>   *
>   * @param thread
>   *   Filled with the thread id of the new created thread.
> -- 
> 2.17.1
>
Honnappa Nagarahalli Aug. 24, 2021, 8:03 p.m. UTC | #2
<snip>

> 
> Hi Honnappa,
> 
> On Mon, Aug 02, 2021 at 12:16:52AM -0500, Honnappa Nagarahalli wrote:
> > The current described behaviour of rte_ctrl_thread_create is rigid
> > which makes the implementation of the function complex.
> > The behavior is abstracted to allow for simplified implementation.
> 
> I agree that the behavior description should not reference the pthread
> functions, however I don't think the current description prevents from rewriting
> the code as you do.
Ok

> 
> I think it would be better to explain in commit log why the proposed code is
> simpler than the current one.
> 
> > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > ---
> > v2: Used compiler's C++11 atomic built-ins to access the shared variable.
> >
> >  lib/eal/common/eal_common_thread.c | 65 +++++++++++++-----------------
> >  lib/eal/include/rte_lcore.h        |  8 ++--
> >  2 files changed, 32 insertions(+), 41 deletions(-)
> >
> > diff --git a/lib/eal/common/eal_common_thread.c
> > b/lib/eal/common/eal_common_thread.c
> > index 1a52f42a2b..e3e0bf4bff 100644
> > --- a/lib/eal/common/eal_common_thread.c
> > +++ b/lib/eal/common/eal_common_thread.c
> > @@ -169,35 +169,35 @@ __rte_thread_uninit(void)  struct
> > rte_thread_ctrl_params {
> >  	void *(*start_routine)(void *);
> >  	void *arg;
> > -	pthread_barrier_t configured;
> > -	unsigned int refcnt;
> > +	int ret;
> > +	/* Synchronization variable between the control thread
> > +	 * and the thread calling rte_ctrl_thread_create function.
> > +	 * 0 - Initialized
> > +	 * 1 - Control thread is running successfully
> > +	 * 2 - Control thread encountered an error. 'ret' has the
> > +	 *     error code.
> > +	 */
> > +	unsigned int sync;
> 
> what do you think about using an enum or defines?
Will use defines

> 
> >  };
> >
> > -static void ctrl_params_free(struct rte_thread_ctrl_params *params)
> > -{
> > -	if (__atomic_sub_fetch(&params->refcnt, 1, __ATOMIC_ACQ_REL) ==
> 0) {
> > -		(void)pthread_barrier_destroy(&params->configured);
> > -		free(params);
> > -	}
> > -}
> > -
> >  static void *ctrl_thread_init(void *arg)  {
> >  	struct internal_config *internal_conf =
> >  		eal_get_internal_configuration();
> >  	rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
> >  	struct rte_thread_ctrl_params *params = arg;
> > -	void *(*start_routine)(void *);
> > +	void *(*start_routine)(void *) = params->start_routine;
> >  	void *routine_arg = params->arg;
> >
> >  	__rte_thread_init(rte_lcore_id(), cpuset);
> > -
> > -	pthread_barrier_wait(&params->configured);
> > -	start_routine = params->start_routine;
> > -	ctrl_params_free(params);
> > -
> > -	if (start_routine == NULL)
> > +	params->ret = pthread_setaffinity_np(pthread_self(),
> > +				sizeof(*cpuset), cpuset);
> > +	if (params->ret != 0) {
> > +		__atomic_store_n(&params->sync, 2, __ATOMIC_RELEASE);
> >  		return NULL;
> > +	}
> 
> Sorry if the question is stupid (I'm still not familiar with C++11 atomic built-ins),
> but do we have the assurance that params->ret is set in memory before
> params->sync is set? See below at [1].
Yes, the '__ATOMIC_RELEASE' store ensures that all prior memory operations are completed before 'sync' is updated.

> 
> > +
> > +	__atomic_store_n(&params->sync, 1, __ATOMIC_RELEASE);
> >
> >  	return start_routine(routine_arg);
> >  }
> > @@ -207,9 +207,6 @@ rte_ctrl_thread_create(pthread_t *thread, const char
> *name,
> >  		const pthread_attr_t *attr,
> >  		void *(*start_routine)(void *), void *arg)  {
> > -	struct internal_config *internal_conf =
> > -		eal_get_internal_configuration();
> > -	rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
> >  	struct rte_thread_ctrl_params *params;
> >  	int ret;
> >
> > @@ -219,15 +216,12 @@ rte_ctrl_thread_create(pthread_t *thread, const
> > char *name,
> >
> >  	params->start_routine = start_routine;
> >  	params->arg = arg;
> > -	params->refcnt = 2;
> > -
> > -	ret = pthread_barrier_init(&params->configured, NULL, 2);
> > -	if (ret != 0)
> > -		goto fail_no_barrier;
> > +	params->ret = 0;
> > +	params->sync = 0;
> >
> >  	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
> >  	if (ret != 0)
> > -		goto fail_with_barrier;
> > +		goto thread_create_failed;
> >
> >  	if (name != NULL) {
> >  		ret = rte_thread_setname(*thread, name); @@ -236,24
> +230,21 @@
> > rte_ctrl_thread_create(pthread_t *thread, const char *name,
> >  				"Cannot set name for ctrl thread\n");
> >  	}
> >
> > -	ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
> > -	if (ret != 0)
> > -		params->start_routine = NULL;
> > +	/* Wait for the control thread to initialize successfully */
> > +	while (!__atomic_load_n(&params->sync, __ATOMIC_ACQUIRE))
> > +		rte_pause();
> 
> One difference between this implementation and the previous one is this busy
> loop. rte_pause() relaxes the cpu, but will not make the calling thread to sleep
> and wait for the sync event. So here we can spin a quite long time until the
> other thread is scheduled by the OS.
Yes, this is a difference. We could add a microsleep to allow for the OS to un-schedule the current thread.

> 
> > +	ret = params->ret;
> 
> [1]
> 
> Here, it is expected that when params->ret is seen as set before
> param->sync.
Yes, the __ATOMIC_ACQUIRE load ensures that the params->ret is loaded only after params->sync is loaded.

> 
> >
> > -	pthread_barrier_wait(&params->configured);
> > -	ctrl_params_free(params);
> > +	free(params);
> >
> > -	if (ret != 0)
> > -		/* start_routine has been set to NULL above; */
> > -		/* ctrl thread will exit immediately */
> > +	if (__atomic_load_n(&params->sync, __ATOMIC_ACQUIRE) != 1)
> 
> it suggest == instead of !=, like this:
> 
>   if (__atomic_load_n(&params->sync, __ATOMIC_ACQUIRE) ==
> CTRL_THREAD_ERR)
Ok

> 
> 
> > +		/* ctrl thread is exiting */
> >  		pthread_join(*thread, NULL);
> >
> >  	return -ret;
> >
> > -fail_with_barrier:
> > -	(void)pthread_barrier_destroy(&params->configured);
> > +thread_create_failed:
> >
> > -fail_no_barrier:
> >  	free(params);
> >
> >  	return -ret;
> > diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h
> > index 1550b75da0..f1cc5e38dc 100644
> > --- a/lib/eal/include/rte_lcore.h
> > +++ b/lib/eal/include/rte_lcore.h
> > @@ -420,10 +420,10 @@ rte_thread_unregister(void);
> >  /**
> >   * Create a control thread.
> >   *
> > - * Wrapper to pthread_create(), pthread_setname_np() and
> > - * pthread_setaffinity_np(). The affinity of the new thread is based
> > - * on the CPU affinity retrieved at the time rte_eal_init() was
> > called,
> > - * the dataplane and service lcores are then excluded.
> > + * Creates a control thread with the given name and attributes. The
> > + * affinity of the new thread is based on the CPU affinity retrieved
> > + * at the time rte_eal_init() was called, the dataplane and service
> > + * lcores are then excluded.
> 
> The description is indeed better. Maybe it is the opportunity to highlight that if
> the name cannot be set, no error is returned, see commit 368a91d6bdc8 ("eal:
> ignore failure of naming a control thread")
Makes sense, will add.

> 
> >   *
> >   * @param thread
> >   *   Filled with the thread id of the new created thread.
> > --
> > 2.17.1
> >
Stephen Hemminger Aug. 24, 2021, 9:30 p.m. UTC | #3
On Tue, 24 Aug 2021 20:03:03 +0000
Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:

> > One difference between this implementation and the previous one is this busy
> > loop. rte_pause() relaxes the cpu, but will not make the calling thread to sleep
> > and wait for the sync event. So here we can spin a quite long time until the
> > other thread is scheduled by the OS.  
> Yes, this is a difference. We could add a microsleep to allow for the OS to un-schedule the current thread.

Why not use sched_yield() here?
Honnappa Nagarahalli Aug. 25, 2021, 1:26 a.m. UTC | #4
<snip>

> 
> On Tue, 24 Aug 2021 20:03:03 +0000
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
> > > One difference between this implementation and the previous one is
> > > this busy loop. rte_pause() relaxes the cpu, but will not make the
> > > calling thread to sleep and wait for the sync event. So here we can
> > > spin a quite long time until the other thread is scheduled by the OS.
> > Yes, this is a difference. We could add a microsleep to allow for the OS to un-
> schedule the current thread.
> 
> Why not use sched_yield() here?
Makes sense
Mattias Rönnblom Aug. 25, 2021, 3:12 p.m. UTC | #5
On 2021-08-02 07:16, Honnappa Nagarahalli wrote:
> The current described behaviour of rte_ctrl_thread_create is
> rigid which makes the implementation of the function complex.
> The behavior is abstracted to allow for simplified implementation.
> 

Have you considered using a POSIX condition variable instead of atomics 
for synchronization?

> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
> v2: Used compiler's C++11 atomic built-ins to access the shared variable.
> 
>   lib/eal/common/eal_common_thread.c | 65 +++++++++++++-----------------
>   lib/eal/include/rte_lcore.h        |  8 ++--
>   2 files changed, 32 insertions(+), 41 deletions(-)
> 
> diff --git a/lib/eal/common/eal_common_thread.c b/lib/eal/common/eal_common_thread.c
> index 1a52f42a2b..e3e0bf4bff 100644
> --- a/lib/eal/common/eal_common_thread.c
> +++ b/lib/eal/common/eal_common_thread.c
> @@ -169,35 +169,35 @@ __rte_thread_uninit(void)
>   struct rte_thread_ctrl_params {
>   	void *(*start_routine)(void *);
>   	void *arg;
> -	pthread_barrier_t configured;
> -	unsigned int refcnt;
> +	int ret;
> +	/* Synchronization variable between the control thread
> +	 * and the thread calling rte_ctrl_thread_create function.
> +	 * 0 - Initialized
> +	 * 1 - Control thread is running successfully
> +	 * 2 - Control thread encountered an error. 'ret' has the
> +	 *     error code.
> +	 */
> +	unsigned int sync;
>   };
>   
> -static void ctrl_params_free(struct rte_thread_ctrl_params *params)
> -{
> -	if (__atomic_sub_fetch(&params->refcnt, 1, __ATOMIC_ACQ_REL) == 0) {
> -		(void)pthread_barrier_destroy(&params->configured);
> -		free(params);
> -	}
> -}
> -
>   static void *ctrl_thread_init(void *arg)
>   {
>   	struct internal_config *internal_conf =
>   		eal_get_internal_configuration();
>   	rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
>   	struct rte_thread_ctrl_params *params = arg;
> -	void *(*start_routine)(void *);
> +	void *(*start_routine)(void *) = params->start_routine;
>   	void *routine_arg = params->arg;
>   
>   	__rte_thread_init(rte_lcore_id(), cpuset);
> -
> -	pthread_barrier_wait(&params->configured);
> -	start_routine = params->start_routine;
> -	ctrl_params_free(params);
> -
> -	if (start_routine == NULL)
> +	params->ret = pthread_setaffinity_np(pthread_self(),
> +				sizeof(*cpuset), cpuset);
> +	if (params->ret != 0) {
> +		__atomic_store_n(&params->sync, 2, __ATOMIC_RELEASE);
>   		return NULL;
> +	}
> +
> +	__atomic_store_n(&params->sync, 1, __ATOMIC_RELEASE);
>   
>   	return start_routine(routine_arg);
>   }
> @@ -207,9 +207,6 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>   		const pthread_attr_t *attr,
>   		void *(*start_routine)(void *), void *arg)
>   {
> -	struct internal_config *internal_conf =
> -		eal_get_internal_configuration();
> -	rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
>   	struct rte_thread_ctrl_params *params;
>   	int ret;
>   
> @@ -219,15 +216,12 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>   
>   	params->start_routine = start_routine;
>   	params->arg = arg;
> -	params->refcnt = 2;
> -
> -	ret = pthread_barrier_init(&params->configured, NULL, 2);
> -	if (ret != 0)
> -		goto fail_no_barrier;
> +	params->ret = 0;
> +	params->sync = 0;
>   
>   	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
>   	if (ret != 0)
> -		goto fail_with_barrier;
> +		goto thread_create_failed;
>   
>   	if (name != NULL) {
>   		ret = rte_thread_setname(*thread, name);
> @@ -236,24 +230,21 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
>   				"Cannot set name for ctrl thread\n");
>   	}
>   
> -	ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
> -	if (ret != 0)
> -		params->start_routine = NULL;
> +	/* Wait for the control thread to initialize successfully */
> +	while (!__atomic_load_n(&params->sync, __ATOMIC_ACQUIRE))
> +		rte_pause();
> +	ret = params->ret;
>   
> -	pthread_barrier_wait(&params->configured);
> -	ctrl_params_free(params);
> +	free(params);
>   
> -	if (ret != 0)
> -		/* start_routine has been set to NULL above; */
> -		/* ctrl thread will exit immediately */
> +	if (__atomic_load_n(&params->sync, __ATOMIC_ACQUIRE) != 1)
> +		/* ctrl thread is exiting */
>   		pthread_join(*thread, NULL);
>   
>   	return -ret;
>   
> -fail_with_barrier:
> -	(void)pthread_barrier_destroy(&params->configured);
> +thread_create_failed:
>   
> -fail_no_barrier:
>   	free(params);
>   
>   	return -ret;
> diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h
> index 1550b75da0..f1cc5e38dc 100644
> --- a/lib/eal/include/rte_lcore.h
> +++ b/lib/eal/include/rte_lcore.h
> @@ -420,10 +420,10 @@ rte_thread_unregister(void);
>   /**
>    * Create a control thread.
>    *
> - * Wrapper to pthread_create(), pthread_setname_np() and
> - * pthread_setaffinity_np(). The affinity of the new thread is based
> - * on the CPU affinity retrieved at the time rte_eal_init() was called,
> - * the dataplane and service lcores are then excluded.
> + * Creates a control thread with the given name and attributes. The
> + * affinity of the new thread is based on the CPU affinity retrieved
> + * at the time rte_eal_init() was called, the dataplane and service
> + * lcores are then excluded.
>    *
>    * @param thread
>    *   Filled with the thread id of the new created thread.
>
Mattias Rönnblom Aug. 25, 2021, 3:19 p.m. UTC | #6
On 2021-08-24 23:30, Stephen Hemminger wrote:
> On Tue, 24 Aug 2021 20:03:03 +0000
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
>>> One difference between this implementation and the previous one is this busy
>>> loop. rte_pause() relaxes the cpu, but will not make the calling thread to sleep
>>> and wait for the sync event. So here we can spin a quite long time until the
>>> other thread is scheduled by the OS.
>> Yes, this is a difference. We could add a microsleep to allow for the OS to un-schedule the current thread.
> 
> Why not use sched_yield() here?
> 

The man page is not exactly encouraging the use sched_yield on CFS.
Honnappa Nagarahalli Aug. 25, 2021, 3:23 p.m. UTC | #7
<snip>

> 
> On 2021-08-02 07:16, Honnappa Nagarahalli wrote:
> > The current described behaviour of rte_ctrl_thread_create is rigid
> > which makes the implementation of the function complex.
> > The behavior is abstracted to allow for simplified implementation.
> >
> 
> Have you considered using a POSIX condition variable instead of atomics for
> synchronization?
No, I have not considered. The current implementation is complex because of the error handling of pthread_barrier_xxx APIs. I am thinking similar complexity will come in with the condition variable functions.

I am using atomic variable in this patch to synchronize which does not need much error handling which simplifies the implementation.

<snip>
Honnappa Nagarahalli Aug. 25, 2021, 3:31 p.m. UTC | #8
<snip>

> 
> On 2021-08-24 23:30, Stephen Hemminger wrote:
> > On Tue, 24 Aug 2021 20:03:03 +0000
> > Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> >
> >>> One difference between this implementation and the previous one is
> >>> this busy loop. rte_pause() relaxes the cpu, but will not make the
> >>> calling thread to sleep and wait for the sync event. So here we can
> >>> spin a quite long time until the other thread is scheduled by the OS.
> >> Yes, this is a difference. We could add a microsleep to allow for the OS to
> un-schedule the current thread.
> >
> > Why not use sched_yield() here?
> >
> 
> The man page is not exactly encouraging the use sched_yield on CFS.
Sorry, what is CFS?
There are already several uses of sched_yield in the code.
Stephen Hemminger Aug. 25, 2021, 9:57 p.m. UTC | #9
On Wed, 25 Aug 2021 15:23:08 +0000
Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:

> <snip>
> 
> > 
> > On 2021-08-02 07:16, Honnappa Nagarahalli wrote:  
> > > The current described behaviour of rte_ctrl_thread_create is rigid
> > > which makes the implementation of the function complex.
> > > The behavior is abstracted to allow for simplified implementation.
> > >  
> > 
> > Have you considered using a POSIX condition variable instead of atomics for
> > synchronization?  
> No, I have not considered. The current implementation is complex because of the error handling of pthread_barrier_xxx APIs. I am thinking similar complexity will come in with the condition variable functions.
> 
> I am using atomic variable in this patch to synchronize which does not need much error handling which simplifies the implementation.
> 
> <snip>

The problem with condition variables is that it then adds a mutex
as well.
Honnappa Nagarahalli Aug. 30, 2021, 10:30 p.m. UTC | #10
<snip>
> 
> On Tue, 24 Aug 2021 20:03:03 +0000
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> 
> > > One difference between this implementation and the previous one is
> > > this busy loop. rte_pause() relaxes the cpu, but will not make the
> > > calling thread to sleep and wait for the sync event. So here we can
> > > spin a quite long time until the other thread is scheduled by the OS.
> > Yes, this is a difference. We could add a microsleep to allow for the OS to un-
> schedule the current thread.
> 
> Why not use sched_yield() here?
This means, it is not portable to Windows. The function needs to be moved to OS specific files. Are there any guidelines on creating OS specific functions? Can I create in this case?
Mattias Rönnblom Sept. 1, 2021, 8:04 p.m. UTC | #11
On 2021-08-25 17:31, Honnappa Nagarahalli wrote:
> <snip>
> 
>>
>> On 2021-08-24 23:30, Stephen Hemminger wrote:
>>> On Tue, 24 Aug 2021 20:03:03 +0000
>>> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
>>>
>>>>> One difference between this implementation and the previous one is
>>>>> this busy loop. rte_pause() relaxes the cpu, but will not make the
>>>>> calling thread to sleep and wait for the sync event. So here we can
>>>>> spin a quite long time until the other thread is scheduled by the OS.
>>>> Yes, this is a difference. We could add a microsleep to allow for the OS to
>> un-schedule the current thread.
>>>
>>> Why not use sched_yield() here?
>>>
>>
>> The man page is not exactly encouraging the use sched_yield on CFS.
> Sorry, what is CFS?
> There are already several uses of sched_yield in the code.
> 

CFS is the Linux best-effort scheduler and default scheduling policy, 
and likely used by most DPDK applications.
Honnappa Nagarahalli Sept. 1, 2021, 10:21 p.m. UTC | #12
<snip>
> >
> >>
> >> On 2021-08-24 23:30, Stephen Hemminger wrote:
> >>> On Tue, 24 Aug 2021 20:03:03 +0000
> >>> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com> wrote:
> >>>
> >>>>> One difference between this implementation and the previous one is
> >>>>> this busy loop. rte_pause() relaxes the cpu, but will not make the
> >>>>> calling thread to sleep and wait for the sync event. So here we
> >>>>> can spin a quite long time until the other thread is scheduled by the OS.
> >>>> Yes, this is a difference. We could add a microsleep to allow for
> >>>> the OS to
> >> un-schedule the current thread.
> >>>
> >>> Why not use sched_yield() here?
> >>>
> >>
> >> The man page is not exactly encouraging the use sched_yield on CFS.
> > Sorry, what is CFS?
> > There are already several uses of sched_yield in the code.
> >
> 
> CFS is the Linux best-effort scheduler and default scheduling policy, and likely
> used by most DPDK applications.
Thanks, I read some of the issues. If sched_yield has to be used, the API needs to be implemented for Linux/Windows/FreeBSD. From my perspective, this API is called mostly during init time. I do not think it is worth duplicating the code and maintaining it. I would prefer to go with usleep.
diff mbox series

Patch

diff --git a/lib/eal/common/eal_common_thread.c b/lib/eal/common/eal_common_thread.c
index 1a52f42a2b..e3e0bf4bff 100644
--- a/lib/eal/common/eal_common_thread.c
+++ b/lib/eal/common/eal_common_thread.c
@@ -169,35 +169,35 @@  __rte_thread_uninit(void)
 struct rte_thread_ctrl_params {
 	void *(*start_routine)(void *);
 	void *arg;
-	pthread_barrier_t configured;
-	unsigned int refcnt;
+	int ret;
+	/* Synchronization variable between the control thread
+	 * and the thread calling rte_ctrl_thread_create function.
+	 * 0 - Initialized
+	 * 1 - Control thread is running successfully
+	 * 2 - Control thread encountered an error. 'ret' has the
+	 *     error code.
+	 */
+	unsigned int sync;
 };
 
-static void ctrl_params_free(struct rte_thread_ctrl_params *params)
-{
-	if (__atomic_sub_fetch(&params->refcnt, 1, __ATOMIC_ACQ_REL) == 0) {
-		(void)pthread_barrier_destroy(&params->configured);
-		free(params);
-	}
-}
-
 static void *ctrl_thread_init(void *arg)
 {
 	struct internal_config *internal_conf =
 		eal_get_internal_configuration();
 	rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
 	struct rte_thread_ctrl_params *params = arg;
-	void *(*start_routine)(void *);
+	void *(*start_routine)(void *) = params->start_routine;
 	void *routine_arg = params->arg;
 
 	__rte_thread_init(rte_lcore_id(), cpuset);
-
-	pthread_barrier_wait(&params->configured);
-	start_routine = params->start_routine;
-	ctrl_params_free(params);
-
-	if (start_routine == NULL)
+	params->ret = pthread_setaffinity_np(pthread_self(),
+				sizeof(*cpuset), cpuset);
+	if (params->ret != 0) {
+		__atomic_store_n(&params->sync, 2, __ATOMIC_RELEASE);
 		return NULL;
+	}
+
+	__atomic_store_n(&params->sync, 1, __ATOMIC_RELEASE);
 
 	return start_routine(routine_arg);
 }
@@ -207,9 +207,6 @@  rte_ctrl_thread_create(pthread_t *thread, const char *name,
 		const pthread_attr_t *attr,
 		void *(*start_routine)(void *), void *arg)
 {
-	struct internal_config *internal_conf =
-		eal_get_internal_configuration();
-	rte_cpuset_t *cpuset = &internal_conf->ctrl_cpuset;
 	struct rte_thread_ctrl_params *params;
 	int ret;
 
@@ -219,15 +216,12 @@  rte_ctrl_thread_create(pthread_t *thread, const char *name,
 
 	params->start_routine = start_routine;
 	params->arg = arg;
-	params->refcnt = 2;
-
-	ret = pthread_barrier_init(&params->configured, NULL, 2);
-	if (ret != 0)
-		goto fail_no_barrier;
+	params->ret = 0;
+	params->sync = 0;
 
 	ret = pthread_create(thread, attr, ctrl_thread_init, (void *)params);
 	if (ret != 0)
-		goto fail_with_barrier;
+		goto thread_create_failed;
 
 	if (name != NULL) {
 		ret = rte_thread_setname(*thread, name);
@@ -236,24 +230,21 @@  rte_ctrl_thread_create(pthread_t *thread, const char *name,
 				"Cannot set name for ctrl thread\n");
 	}
 
-	ret = pthread_setaffinity_np(*thread, sizeof(*cpuset), cpuset);
-	if (ret != 0)
-		params->start_routine = NULL;
+	/* Wait for the control thread to initialize successfully */
+	while (!__atomic_load_n(&params->sync, __ATOMIC_ACQUIRE))
+		rte_pause();
+	ret = params->ret;
 
-	pthread_barrier_wait(&params->configured);
-	ctrl_params_free(params);
+	free(params);
 
-	if (ret != 0)
-		/* start_routine has been set to NULL above; */
-		/* ctrl thread will exit immediately */
+	if (__atomic_load_n(&params->sync, __ATOMIC_ACQUIRE) != 1)
+		/* ctrl thread is exiting */
 		pthread_join(*thread, NULL);
 
 	return -ret;
 
-fail_with_barrier:
-	(void)pthread_barrier_destroy(&params->configured);
+thread_create_failed:
 
-fail_no_barrier:
 	free(params);
 
 	return -ret;
diff --git a/lib/eal/include/rte_lcore.h b/lib/eal/include/rte_lcore.h
index 1550b75da0..f1cc5e38dc 100644
--- a/lib/eal/include/rte_lcore.h
+++ b/lib/eal/include/rte_lcore.h
@@ -420,10 +420,10 @@  rte_thread_unregister(void);
 /**
  * Create a control thread.
  *
- * Wrapper to pthread_create(), pthread_setname_np() and
- * pthread_setaffinity_np(). The affinity of the new thread is based
- * on the CPU affinity retrieved at the time rte_eal_init() was called,
- * the dataplane and service lcores are then excluded.
+ * Creates a control thread with the given name and attributes. The
+ * affinity of the new thread is based on the CPU affinity retrieved
+ * at the time rte_eal_init() was called, the dataplane and service
+ * lcores are then excluded.
  *
  * @param thread
  *   Filled with the thread id of the new created thread.