eal: have unregistered non-EAL threads use dedicated PRNG

Message ID 20221205100334.3500-1-mattias.ronnblom@ericsson.com (mailing list archive)
State Accepted, archived
Delegated to: David Marchand
Headers
Series eal: have unregistered non-EAL threads use dedicated PRNG |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/github-robot: build success github build: passed
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/iol-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS

Commit Message

Mattias Rönnblom Dec. 5, 2022, 10:03 a.m. UTC
  Prior to this change, unregistered non-EAL threads shared a PRNG
instance with the main lcore. The main lcore may well be used for fast
path processing, potentially making rte_rand() calls in the
process. It should not need to synchronize with control threads.

With this change, all unregistered non-EAL threads share one dedicated
PRNG instance.

The API documentation is updated to use the proper terminology when
referring to threads equipped with an lcore id.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
 lib/eal/common/rte_random.c  | 17 +++++++++++------
 lib/eal/include/rte_random.h | 10 +++++++---
 2 files changed, 18 insertions(+), 9 deletions(-)
  

Comments

Morten Brørup Dec. 5, 2022, 10:58 a.m. UTC | #1
> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
> Sent: Monday, 5 December 2022 11.04
> 
> Prior to this change, unregistered non-EAL threads shared a PRNG
> instance with the main lcore. The main lcore may well be used for fast
> path processing, potentially making rte_rand() calls in the
> process. It should not need to synchronize with control threads.
> 
> With this change, all unregistered non-EAL threads share one dedicated
> PRNG instance.
> 
> The API documentation is updated to use the proper terminology when
> referring to threads equipped with an lcore id.
> 
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> ---
>  lib/eal/common/rte_random.c  | 17 +++++++++++------
>  lib/eal/include/rte_random.h | 10 +++++++---
>  2 files changed, 18 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c
> index 166b0d8921..565f2401ce 100644
> --- a/lib/eal/common/rte_random.c
> +++ b/lib/eal/common/rte_random.c
> @@ -20,7 +20,11 @@ struct rte_rand_state {
>  	uint64_t z5;
>  } __rte_cache_aligned;
> 
> -static struct rte_rand_state rand_states[RTE_MAX_LCORE];
> +/* One instance each for every lcore id-equipped thread, and one
> + * additional instance to be shared by all others threads (i.e., all
> + * unregistered non-EAL threads).
> + */
> +static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1];
> 
>  static uint32_t
>  __rte_rand_lcg32(uint32_t *seed)
> @@ -114,14 +118,15 @@ __rte_rand_lfsr258(struct rte_rand_state *state)
>  static __rte_always_inline
>  struct rte_rand_state *__rte_rand_get_state(void)
>  {
> -	unsigned int lcore_id;
> +	unsigned int idx;
> 
> -	lcore_id = rte_lcore_id();
> +	idx = rte_lcore_id();
> 
> -	if (unlikely(lcore_id == LCORE_ID_ANY))
> -		lcore_id = rte_get_main_lcore();
> +	/* last instance reserved for unregistered non-EAL threads */
> +	if (unlikely(idx == LCORE_ID_ANY))
> +		idx = RTE_MAX_LCORE;
> 
> -	return &rand_states[lcore_id];
> +	return &rand_states[idx];
>  }
> 
>  uint64_t
> diff --git a/lib/eal/include/rte_random.h
> b/lib/eal/include/rte_random.h
> index d90e4d2192..2edf5d210b 100644
> --- a/lib/eal/include/rte_random.h
> +++ b/lib/eal/include/rte_random.h
> @@ -41,7 +41,8 @@ rte_srand(uint64_t seedval);
>   *
>   * The generator is not cryptographically secure.
>   *
> - * If called from lcore threads, this function is thread-safe.
> + * If called from EAL threads or registered non-EAL threads, this
> function
> + * is thread-safe.
>   *
>   * @return
>   *   A pseudo-random value between 0 and (1<<64)-1.
> @@ -55,7 +56,8 @@ rte_rand(void);
>   * This function returns an uniformly distributed (unbiased) random
>   * number less than a user-specified maximum value.
>   *
> - * If called from lcore threads, this function is thread-safe.
> + * If called from EAL threads or registered non-EAL threads, this
> function
> + * is thread-safe.
>   *
>   * @param upper_bound
>   *   The upper bound of the generated number.
> @@ -75,7 +77,9 @@ rte_rand_max(uint64_t upper_bound);
>   * number uniformly distributed over the interval [0.0, 1.0).
>   *
>   * The generator is not cryptographically secure.
> - * If called from lcore threads, this function is thread-safe.
> + *
> + * If called from EAL threads or registered non-EAL threads, this
> function
> + * is thread-safe.
>   *
>   * @return
>   *   A pseudo-random value between 0 and 1.0.
> --
> 2.34.1
> 

A nice improvement.

Acked-by: Morten Brørup <mb@smartsharesystems.com>


Here's some serious feature creep...

Instead of using "static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1];", we could use thread local storage ("__tread rte_rand_state rand_state;") to keep the state per O/S thread (independent of lcore_id etc.), making it completely thread safe.

But then, how do we seed the state?

Currently, we use the RTE_INIT() constructor attribute to seed the array of rand_states; but there is no thread constructor attribute. So here comes the feature creep:

It would be very useful with RTE_THREAD_INIT()/_FINI constructor/destructor macros, so libraries and applications could define functions to be called by thread_func_wrapper() before/after calling tread_func.

Using arrays like some_variable[RTE_MAX_LCORE (+ 1)] is common practice in DPDK, but only really required for variables that are not private to the thread, i.e. variables that other threads need access to.

Per-thread constructors/destructors is a generic feature suggestion, so please don't hold back this rte_random patch!

-Morten
  
Mattias Rönnblom Dec. 6, 2022, 3:14 p.m. UTC | #2
On 2022-12-05 11:58, Morten Brørup wrote:
>> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
>> Sent: Monday, 5 December 2022 11.04
>>
>> Prior to this change, unregistered non-EAL threads shared a PRNG
>> instance with the main lcore. The main lcore may well be used for fast
>> path processing, potentially making rte_rand() calls in the
>> process. It should not need to synchronize with control threads.
>>
>> With this change, all unregistered non-EAL threads share one dedicated
>> PRNG instance.
>>
>> The API documentation is updated to use the proper terminology when
>> referring to threads equipped with an lcore id.
>>
>> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>> ---
>>   lib/eal/common/rte_random.c  | 17 +++++++++++------
>>   lib/eal/include/rte_random.h | 10 +++++++---
>>   2 files changed, 18 insertions(+), 9 deletions(-)
>>
>> diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c
>> index 166b0d8921..565f2401ce 100644
>> --- a/lib/eal/common/rte_random.c
>> +++ b/lib/eal/common/rte_random.c
>> @@ -20,7 +20,11 @@ struct rte_rand_state {
>>   	uint64_t z5;
>>   } __rte_cache_aligned;
>>
>> -static struct rte_rand_state rand_states[RTE_MAX_LCORE];
>> +/* One instance each for every lcore id-equipped thread, and one
>> + * additional instance to be shared by all others threads (i.e., all
>> + * unregistered non-EAL threads).
>> + */
>> +static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1];
>>
>>   static uint32_t
>>   __rte_rand_lcg32(uint32_t *seed)
>> @@ -114,14 +118,15 @@ __rte_rand_lfsr258(struct rte_rand_state *state)
>>   static __rte_always_inline
>>   struct rte_rand_state *__rte_rand_get_state(void)
>>   {
>> -	unsigned int lcore_id;
>> +	unsigned int idx;
>>
>> -	lcore_id = rte_lcore_id();
>> +	idx = rte_lcore_id();
>>
>> -	if (unlikely(lcore_id == LCORE_ID_ANY))
>> -		lcore_id = rte_get_main_lcore();
>> +	/* last instance reserved for unregistered non-EAL threads */
>> +	if (unlikely(idx == LCORE_ID_ANY))
>> +		idx = RTE_MAX_LCORE;
>>
>> -	return &rand_states[lcore_id];
>> +	return &rand_states[idx];
>>   }
>>
>>   uint64_t
>> diff --git a/lib/eal/include/rte_random.h
>> b/lib/eal/include/rte_random.h
>> index d90e4d2192..2edf5d210b 100644
>> --- a/lib/eal/include/rte_random.h
>> +++ b/lib/eal/include/rte_random.h
>> @@ -41,7 +41,8 @@ rte_srand(uint64_t seedval);
>>    *
>>    * The generator is not cryptographically secure.
>>    *
>> - * If called from lcore threads, this function is thread-safe.
>> + * If called from EAL threads or registered non-EAL threads, this
>> function
>> + * is thread-safe.
>>    *
>>    * @return
>>    *   A pseudo-random value between 0 and (1<<64)-1.
>> @@ -55,7 +56,8 @@ rte_rand(void);
>>    * This function returns an uniformly distributed (unbiased) random
>>    * number less than a user-specified maximum value.
>>    *
>> - * If called from lcore threads, this function is thread-safe.
>> + * If called from EAL threads or registered non-EAL threads, this
>> function
>> + * is thread-safe.
>>    *
>>    * @param upper_bound
>>    *   The upper bound of the generated number.
>> @@ -75,7 +77,9 @@ rte_rand_max(uint64_t upper_bound);
>>    * number uniformly distributed over the interval [0.0, 1.0).
>>    *
>>    * The generator is not cryptographically secure.
>> - * If called from lcore threads, this function is thread-safe.
>> + *
>> + * If called from EAL threads or registered non-EAL threads, this
>> function
>> + * is thread-safe.
>>    *
>>    * @return
>>    *   A pseudo-random value between 0 and 1.0.
>> --
>> 2.34.1
>>
> 
> A nice improvement.
> 
> Acked-by: Morten Brørup <mb@smartsharesystems.com>
> 
> 

Thanks Morten.

> Here's some serious feature creep...
> 
> Instead of using "static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1];", we could use thread local storage ("__tread rte_rand_state rand_state;") to keep the state per O/S thread (independent of lcore_id etc.), making it completely thread safe.
> 
> But then, how do we seed the state?
> 
> Currently, we use the RTE_INIT() constructor attribute to seed the array of rand_states; but there is no thread constructor attribute. So here comes the feature creep:
> 
> It would be very useful with RTE_THREAD_INIT()/_FINI constructor/destructor macros, so libraries and applications could define functions to be called by thread_func_wrapper() before/after calling tread_func.
> 
> Using arrays like some_variable[RTE_MAX_LCORE (+ 1)] is common practice in DPDK, but only really required for variables that are not private to the thread, i.e. variables that other threads need access to.
> 
> Per-thread constructors/destructors is a generic feature suggestion, so please don't hold back this rte_random patch!
> 

The performance (CPU & memory) implications of using TLS for the whole 
per-thread data structure (a PRNG in this case), as opposed to the DPDK 
pattern of keeping just an per-thread index in TLS and the rest in an 
instance of a static array, is very unclear to me.

A middle ground would be to keep only a pointer in TLS, and have a lazy 
allocation of an instance, when needed. I think you could solve the 
seeding issue by having a lock-protected LCG for the purpose of seeding 
(only).

For rte_random.c this is hair splitting, but considering this is a 
general pattern, I think the discussion is relevant.

> -Morten
>
  
David Marchand Feb. 10, 2023, 11:44 a.m. UTC | #3
On Mon, Dec 5, 2022 at 11:08 AM Mattias Rönnblom
<mattias.ronnblom@ericsson.com> wrote:
>
> Prior to this change, unregistered non-EAL threads shared a PRNG
> instance with the main lcore. The main lcore may well be used for fast
> path processing, potentially making rte_rand() calls in the
> process. It should not need to synchronize with control threads.
>
> With this change, all unregistered non-EAL threads share one dedicated
> PRNG instance.
>
> The API documentation is updated to use the proper terminology when
> referring to threads equipped with an lcore id.
>
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>

Applied, thanks.
  

Patch

diff --git a/lib/eal/common/rte_random.c b/lib/eal/common/rte_random.c
index 166b0d8921..565f2401ce 100644
--- a/lib/eal/common/rte_random.c
+++ b/lib/eal/common/rte_random.c
@@ -20,7 +20,11 @@  struct rte_rand_state {
 	uint64_t z5;
 } __rte_cache_aligned;
 
-static struct rte_rand_state rand_states[RTE_MAX_LCORE];
+/* One instance each for every lcore id-equipped thread, and one
+ * additional instance to be shared by all others threads (i.e., all
+ * unregistered non-EAL threads).
+ */
+static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1];
 
 static uint32_t
 __rte_rand_lcg32(uint32_t *seed)
@@ -114,14 +118,15 @@  __rte_rand_lfsr258(struct rte_rand_state *state)
 static __rte_always_inline
 struct rte_rand_state *__rte_rand_get_state(void)
 {
-	unsigned int lcore_id;
+	unsigned int idx;
 
-	lcore_id = rte_lcore_id();
+	idx = rte_lcore_id();
 
-	if (unlikely(lcore_id == LCORE_ID_ANY))
-		lcore_id = rte_get_main_lcore();
+	/* last instance reserved for unregistered non-EAL threads */
+	if (unlikely(idx == LCORE_ID_ANY))
+		idx = RTE_MAX_LCORE;
 
-	return &rand_states[lcore_id];
+	return &rand_states[idx];
 }
 
 uint64_t
diff --git a/lib/eal/include/rte_random.h b/lib/eal/include/rte_random.h
index d90e4d2192..2edf5d210b 100644
--- a/lib/eal/include/rte_random.h
+++ b/lib/eal/include/rte_random.h
@@ -41,7 +41,8 @@  rte_srand(uint64_t seedval);
  *
  * The generator is not cryptographically secure.
  *
- * If called from lcore threads, this function is thread-safe.
+ * If called from EAL threads or registered non-EAL threads, this function
+ * is thread-safe.
  *
  * @return
  *   A pseudo-random value between 0 and (1<<64)-1.
@@ -55,7 +56,8 @@  rte_rand(void);
  * This function returns an uniformly distributed (unbiased) random
  * number less than a user-specified maximum value.
  *
- * If called from lcore threads, this function is thread-safe.
+ * If called from EAL threads or registered non-EAL threads, this function
+ * is thread-safe.
  *
  * @param upper_bound
  *   The upper bound of the generated number.
@@ -75,7 +77,9 @@  rte_rand_max(uint64_t upper_bound);
  * number uniformly distributed over the interval [0.0, 1.0).
  *
  * The generator is not cryptographically secure.
- * If called from lcore threads, this function is thread-safe.
+ *
+ * If called from EAL threads or registered non-EAL threads, this function
+ * is thread-safe.
  *
  * @return
  *   A pseudo-random value between 0 and 1.0.