[RFC,v2,3/5] random: keep PRNG state in lcore variable
Checks
Commit Message
Replace keeping PRNG state in a RTE_MAX_LCORE-sized static array of
cache-aligned and RTE_CACHE_GUARDed struct instances with keeping the
same state in a more cache-friendly lcore variable.
Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
---
lib/eal/common/rte_random.c | 30 ++++++++++++++++++------------
1 file changed, 18 insertions(+), 12 deletions(-)
Comments
> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
> Sent: Monday, 19 February 2024 10.41
>
> Replace keeping PRNG state in a RTE_MAX_LCORE-sized static array of
> cache-aligned and RTE_CACHE_GUARDed struct instances with keeping the
> same state in a more cache-friendly lcore variable.
>
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> ---
[...]
> @@ -19,14 +20,12 @@ struct rte_rand_state {
> uint64_t z3;
> uint64_t z4;
> uint64_t z5;
> - RTE_CACHE_GUARD;
> -} __rte_cache_aligned;
> +};
>
> -/* One instance each for every lcore id-equipped thread, and one
> - * additional instance to be shared by all others threads (i.e., all
> - * unregistered non-EAL threads).
> - */
> -static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1];
> +RTE_LCORE_VAR_HANDLE(struct rte_rand_state, rand_state);
> +
> +/* instance to be shared by all unregistered non-EAL threads */
> +static struct rte_rand_state unregistered_rand_state
> __rte_cache_aligned;
The unregistered_rand_state instance is still __rte_cache_aligned; consider also adding an RTE_CACHE_GUARD to it.
On 2024-02-19 12:22, Morten Brørup wrote:
>> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
>> Sent: Monday, 19 February 2024 10.41
>>
>> Replace keeping PRNG state in a RTE_MAX_LCORE-sized static array of
>> cache-aligned and RTE_CACHE_GUARDed struct instances with keeping the
>> same state in a more cache-friendly lcore variable.
>>
>> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
>> ---
>
> [...]
>
>> @@ -19,14 +20,12 @@ struct rte_rand_state {
>> uint64_t z3;
>> uint64_t z4;
>> uint64_t z5;
>> - RTE_CACHE_GUARD;
>> -} __rte_cache_aligned;
>> +};
>>
>> -/* One instance each for every lcore id-equipped thread, and one
>> - * additional instance to be shared by all others threads (i.e., all
>> - * unregistered non-EAL threads).
>> - */
>> -static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1];
>> +RTE_LCORE_VAR_HANDLE(struct rte_rand_state, rand_state);
>> +
>> +/* instance to be shared by all unregistered non-EAL threads */
>> +static struct rte_rand_state unregistered_rand_state
>> __rte_cache_aligned;
>
> The unregistered_rand_state instance is still __rte_cache_aligned; consider also adding an RTE_CACHE_GUARD to it.
>
It shouldn't be cache-line aligned. I'll remove it. Thanks.
> From: Mattias Rönnblom [mailto:hofors@lysator.liu.se]
> Sent: Monday, 19 February 2024 15.04
>
> On 2024-02-19 12:22, Morten Brørup wrote:
> >> From: Mattias Rönnblom [mailto:mattias.ronnblom@ericsson.com]
> >> Sent: Monday, 19 February 2024 10.41
> >>
> >> Replace keeping PRNG state in a RTE_MAX_LCORE-sized static array of
> >> cache-aligned and RTE_CACHE_GUARDed struct instances with keeping
> the
> >> same state in a more cache-friendly lcore variable.
> >>
> >> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
> >> ---
> >
> > [...]
> >
> >> @@ -19,14 +20,12 @@ struct rte_rand_state {
> >> uint64_t z3;
> >> uint64_t z4;
> >> uint64_t z5;
> >> - RTE_CACHE_GUARD;
> >> -} __rte_cache_aligned;
> >> +};
> >>
> >> -/* One instance each for every lcore id-equipped thread, and one
> >> - * additional instance to be shared by all others threads (i.e.,
> all
> >> - * unregistered non-EAL threads).
> >> - */
> >> -static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1];
> >> +RTE_LCORE_VAR_HANDLE(struct rte_rand_state, rand_state);
> >> +
> >> +/* instance to be shared by all unregistered non-EAL threads */
> >> +static struct rte_rand_state unregistered_rand_state
> >> __rte_cache_aligned;
> >
> > The unregistered_rand_state instance is still __rte_cache_aligned;
> consider also adding an RTE_CACHE_GUARD to it.
> >
>
> It shouldn't be cache-line aligned. I'll remove it. Thanks.
Agreed; that fix is just as good. Then,
Acked-by: Morten Brørup <mb@smartsharesystems.com>
@@ -11,6 +11,7 @@
#include <rte_branch_prediction.h>
#include <rte_cycles.h>
#include <rte_lcore.h>
+#include <rte_lcore_var.h>
#include <rte_random.h>
struct rte_rand_state {
@@ -19,14 +20,12 @@ struct rte_rand_state {
uint64_t z3;
uint64_t z4;
uint64_t z5;
- RTE_CACHE_GUARD;
-} __rte_cache_aligned;
+};
-/* One instance each for every lcore id-equipped thread, and one
- * additional instance to be shared by all others threads (i.e., all
- * unregistered non-EAL threads).
- */
-static struct rte_rand_state rand_states[RTE_MAX_LCORE + 1];
+RTE_LCORE_VAR_HANDLE(struct rte_rand_state, rand_state);
+
+/* instance to be shared by all unregistered non-EAL threads */
+static struct rte_rand_state unregistered_rand_state __rte_cache_aligned;
static uint32_t
__rte_rand_lcg32(uint32_t *seed)
@@ -85,8 +84,14 @@ rte_srand(uint64_t seed)
unsigned int lcore_id;
/* add lcore_id to seed to avoid having the same sequence */
- for (lcore_id = 0; lcore_id < RTE_DIM(rand_states); lcore_id++)
- __rte_srand_lfsr258(seed + lcore_id, &rand_states[lcore_id]);
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ struct rte_rand_state *lcore_state =
+ RTE_LCORE_VAR_LCORE_PTR(lcore_id, rand_state);
+
+ __rte_srand_lfsr258(seed + lcore_id, lcore_state);
+ }
+
+ __rte_srand_lfsr258(seed + lcore_id, &unregistered_rand_state);
}
static __rte_always_inline uint64_t
@@ -124,11 +129,10 @@ struct rte_rand_state *__rte_rand_get_state(void)
idx = rte_lcore_id();
- /* last instance reserved for unregistered non-EAL threads */
if (unlikely(idx == LCORE_ID_ANY))
- idx = RTE_MAX_LCORE;
+ return &unregistered_rand_state;
- return &rand_states[idx];
+ return RTE_LCORE_VAR_PTR(rand_state);
}
uint64_t
@@ -228,6 +232,8 @@ RTE_INIT(rte_rand_init)
{
uint64_t seed;
+ RTE_LCORE_VAR_ALLOC(rand_state);
+
seed = __rte_random_initial_seed();
rte_srand(seed);