[v12,6/7] eal: add unit tests for atomic bit access functions

Message ID 20240920104754.739033-7-mattias.ronnblom@ericsson.com (mailing list archive)
State Accepted, archived
Delegated to: David Marchand
Headers
Series Improve EAL bit operations API |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Mattias Rönnblom Sept. 20, 2024, 10:47 a.m. UTC
Extend bitops tests to cover the rte_bit_atomic_*() family of
functions.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
Acked-by: Tyler Retzlaff <roretzla@linux.microsoft.com>
Acked-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>

--

RFC v4:
 * Add atomicity test for atomic bit flip.

RFC v3:
 * Rename variable 'main' to make ICC happy.
---
 app/test/test_bitops.c | 313 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 312 insertions(+), 1 deletion(-)
  

Comments

David Marchand Oct. 10, 2024, 10:45 a.m. UTC | #1
On Fri, Sep 20, 2024 at 12:57 PM Mattias Rönnblom
<mattias.ronnblom@ericsson.com> wrote:
> +       static int                                                      \
> +       run_parallel_test_and_modify ## size(void *arg)         \
> +       {                                                               \
> +               struct parallel_test_and_set_lcore ## size *lcore = arg; \
> +               uint64_t deadline = rte_get_timer_cycles() +            \
> +                       PARALLEL_TEST_RUNTIME * rte_get_timer_hz();     \
> +               do {                                                    \
> +                       bool old_value;                                 \
> +                       bool new_value = rte_rand() & 1;                \
> +                       bool use_assign = rte_rand() & 1;               \
> +                                                                       \
> +                       if (use_assign)                                 \
> +                               old_value = rte_bit_atomic_test_and_assign( \
> +                                       lcore->word, lcore->bit, new_value, \
> +                                       rte_memory_order_relaxed);      \
> +                       else                                            \
> +                               old_value = new_value ?                 \
> +                                       rte_bit_atomic_test_and_set(    \
> +                                               lcore->word, lcore->bit, \
> +                                               rte_memory_order_relaxed) : \
> +                                       rte_bit_atomic_test_and_clear(  \
> +                                               lcore->word, lcore->bit, \
> +                                               rte_memory_order_relaxed); \
> +                       if (old_value != new_value)                     \
> +                               lcore->flips++;                         \
> +               } while (rte_get_timer_cycles() < deadline);            \
> +                                                                       \
> +               return 0;                                               \
> +       }                                                               \
> +                                                                       \
> +       static int                                                      \
> +       test_bit_atomic_parallel_test_and_modify ## size(void)          \
> +       {                                                               \
> +               unsigned int worker_lcore_id;                           \
> +               uint ## size ## _t word = 0;                            \
> +               unsigned int bit = rte_rand_max(size);                  \
> +               struct parallel_test_and_set_lcore ## size lmain = {    \
> +                       .word = &word,                                  \
> +                       .bit = bit                                      \
> +               };                                                      \
> +               struct parallel_test_and_set_lcore ## size lworker = {  \
> +                       .word = &word,                                  \
> +                       .bit = bit                                      \
> +               };                                                      \
> +                                                                       \
> +               if (rte_lcore_count() < 2) {                            \
> +                       printf("Need multiple cores to run parallel test.\n"); \
> +                       return TEST_SKIPPED;                            \
> +               }                                                       \
> +                                                                       \
> +               worker_lcore_id = rte_get_next_lcore(-1, 1, 0);         \
> +                                                                       \
> +               int rc = rte_eal_remote_launch(run_parallel_test_and_modify ## size, \
> +                                              &lworker, worker_lcore_id); \
> +               TEST_ASSERT(rc == 0, "Worker thread launch failed");    \
> +                                                                       \
> +               run_parallel_test_and_modify ## size(&lmain);           \
> +                                                                       \
> +               rte_eal_mp_wait_lcore();                                \
> +                                                                       \
> +               uint64_t total_flips = lmain.flips + lworker.flips;     \
> +               bool expected_value = total_flips % 2;                  \
> +                                                                       \
> +               TEST_ASSERT(expected_value == rte_bit_test(&word, bit), \
> +                           "After %"PRId64" flips, the bit value "     \
> +                           "should be %d", total_flips, expected_value); \
> +                                                                       \
> +               uint64_t expected_word = 0;                             \
> +               rte_bit_assign(&expected_word, bit, expected_value);    \
> +                                                                       \
> +               TEST_ASSERT(expected_word == word, "Untouched bits have " \
> +                           "changed value");                           \
> +                                                                       \
> +               return TEST_SUCCESS;                                    \
> +       }
> +
> +GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(32)
> +GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(64)

It appears this test failed once in the CI for an unrelated series
(uAPI kernel header import):
https://lab.dpdk.org/results/dashboard/testruns/logs/1385993/

 + TestCase [ 0] : test_bit_access32 succeeded
 + TestCase [ 1] : test_bit_access64 succeeded
 + TestCase [ 2] : test_bit_access32 succeeded
 + TestCase [ 3] : test_bit_access64 succeeded
 + TestCase [ 4] : test_bit_v_access32 succeeded
 + TestCase [ 5] : test_bit_v_access64 succeeded
 + TestCase [ 6] : test_bit_atomic_access32 succeeded
 + TestCase [ 7] : test_bit_atomic_access64 succeeded
 + TestCase [ 8] : test_bit_atomic_v_access32 succeeded
 + TestCase [ 9] : test_bit_atomic_v_access64 succeeded
 + TestCase [10] : test_bit_atomic_parallel_assign32 succeeded
 + TestCase [11] : test_bit_atomic_parallel_assign64 succeeded
 + TestCase [12] : test_bit_atomic_parallel_test_and_modify32 failed
 + TestCase [13] : test_bit_atomic_parallel_test_and_modify64 succeeded
 + TestCase [14] : test_bit_atomic_parallel_flip32 succeeded
 + TestCase [15] : test_bit_atomic_parallel_flip64 succeeded
 + TestCase [16] : test_bit_relaxed_set succeeded
 + TestCase [17] : test_bit_relaxed_clear succeeded
 + TestCase [18] : test_bit_relaxed_test_set_clear succeeded

EAL: Test assert test_bit_atomic_parallel_test_and_modify32 line 236
failed: After 1070523 flips, the bit value should be 1

Please have a look.
  
Mattias Rönnblom Oct. 10, 2024, 11:55 a.m. UTC | #2
On 2024-10-10 12:45, David Marchand wrote:
> On Fri, Sep 20, 2024 at 12:57 PM Mattias Rönnblom
> <mattias.ronnblom@ericsson.com> wrote:
>> +       static int                                                      \
>> +       run_parallel_test_and_modify ## size(void *arg)         \
>> +       {                                                               \
>> +               struct parallel_test_and_set_lcore ## size *lcore = arg; \
>> +               uint64_t deadline = rte_get_timer_cycles() +            \
>> +                       PARALLEL_TEST_RUNTIME * rte_get_timer_hz();     \
>> +               do {                                                    \
>> +                       bool old_value;                                 \
>> +                       bool new_value = rte_rand() & 1;                \
>> +                       bool use_assign = rte_rand() & 1;               \
>> +                                                                       \
>> +                       if (use_assign)                                 \
>> +                               old_value = rte_bit_atomic_test_and_assign( \
>> +                                       lcore->word, lcore->bit, new_value, \
>> +                                       rte_memory_order_relaxed);      \
>> +                       else                                            \
>> +                               old_value = new_value ?                 \
>> +                                       rte_bit_atomic_test_and_set(    \
>> +                                               lcore->word, lcore->bit, \
>> +                                               rte_memory_order_relaxed) : \
>> +                                       rte_bit_atomic_test_and_clear(  \
>> +                                               lcore->word, lcore->bit, \
>> +                                               rte_memory_order_relaxed); \
>> +                       if (old_value != new_value)                     \
>> +                               lcore->flips++;                         \
>> +               } while (rte_get_timer_cycles() < deadline);            \
>> +                                                                       \
>> +               return 0;                                               \
>> +       }                                                               \
>> +                                                                       \
>> +       static int                                                      \
>> +       test_bit_atomic_parallel_test_and_modify ## size(void)          \
>> +       {                                                               \
>> +               unsigned int worker_lcore_id;                           \
>> +               uint ## size ## _t word = 0;                            \
>> +               unsigned int bit = rte_rand_max(size);                  \
>> +               struct parallel_test_and_set_lcore ## size lmain = {    \
>> +                       .word = &word,                                  \
>> +                       .bit = bit                                      \
>> +               };                                                      \
>> +               struct parallel_test_and_set_lcore ## size lworker = {  \
>> +                       .word = &word,                                  \
>> +                       .bit = bit                                      \
>> +               };                                                      \
>> +                                                                       \
>> +               if (rte_lcore_count() < 2) {                            \
>> +                       printf("Need multiple cores to run parallel test.\n"); \
>> +                       return TEST_SKIPPED;                            \
>> +               }                                                       \
>> +                                                                       \
>> +               worker_lcore_id = rte_get_next_lcore(-1, 1, 0);         \
>> +                                                                       \
>> +               int rc = rte_eal_remote_launch(run_parallel_test_and_modify ## size, \
>> +                                              &lworker, worker_lcore_id); \
>> +               TEST_ASSERT(rc == 0, "Worker thread launch failed");    \
>> +                                                                       \
>> +               run_parallel_test_and_modify ## size(&lmain);           \
>> +                                                                       \
>> +               rte_eal_mp_wait_lcore();                                \
>> +                                                                       \
>> +               uint64_t total_flips = lmain.flips + lworker.flips;     \
>> +               bool expected_value = total_flips % 2;                  \
>> +                                                                       \
>> +               TEST_ASSERT(expected_value == rte_bit_test(&word, bit), \
>> +                           "After %"PRId64" flips, the bit value "     \
>> +                           "should be %d", total_flips, expected_value); \
>> +                                                                       \
>> +               uint64_t expected_word = 0;                             \
>> +               rte_bit_assign(&expected_word, bit, expected_value);    \
>> +                                                                       \
>> +               TEST_ASSERT(expected_word == word, "Untouched bits have " \
>> +                           "changed value");                           \
>> +                                                                       \
>> +               return TEST_SUCCESS;                                    \
>> +       }
>> +
>> +GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(32)
>> +GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(64)
> 
> It appears this test failed once in the CI for an unrelated series
> (uAPI kernel header import):
> https://lab.dpdk.org/results/dashboard/testruns/logs/1385993/
> 
>   + TestCase [ 0] : test_bit_access32 succeeded
>   + TestCase [ 1] : test_bit_access64 succeeded
>   + TestCase [ 2] : test_bit_access32 succeeded
>   + TestCase [ 3] : test_bit_access64 succeeded
>   + TestCase [ 4] : test_bit_v_access32 succeeded
>   + TestCase [ 5] : test_bit_v_access64 succeeded
>   + TestCase [ 6] : test_bit_atomic_access32 succeeded
>   + TestCase [ 7] : test_bit_atomic_access64 succeeded
>   + TestCase [ 8] : test_bit_atomic_v_access32 succeeded
>   + TestCase [ 9] : test_bit_atomic_v_access64 succeeded
>   + TestCase [10] : test_bit_atomic_parallel_assign32 succeeded
>   + TestCase [11] : test_bit_atomic_parallel_assign64 succeeded
>   + TestCase [12] : test_bit_atomic_parallel_test_and_modify32 failed
>   + TestCase [13] : test_bit_atomic_parallel_test_and_modify64 succeeded
>   + TestCase [14] : test_bit_atomic_parallel_flip32 succeeded
>   + TestCase [15] : test_bit_atomic_parallel_flip64 succeeded
>   + TestCase [16] : test_bit_relaxed_set succeeded
>   + TestCase [17] : test_bit_relaxed_clear succeeded
>   + TestCase [18] : test_bit_relaxed_test_set_clear succeeded
> 
> EAL: Test assert test_bit_atomic_parallel_test_and_modify32 line 236
> failed: After 1070523 flips, the bit value should be 1
> 
> Please have a look.
> 
> 

OK. Nothing obvious from what I can see in the code. Unrelated: why did 
you remove all empty lines in the "template" macros? Makes them much 
harder to read.

I've been unable to reproduce this on my Raptor Lake x86_64 with GCC 
12.3 (CI machine used GCC 12.2).

I'll try if I have better luck on some other systems.
  
David Marchand Oct. 10, 2024, 12:14 p.m. UTC | #3
On Thu, Oct 10, 2024 at 1:56 PM Mattias Rönnblom <hofors@lysator.liu.se> wrote:
> OK. Nothing obvious from what I can see in the code. Unrelated: why did
> you remove all empty lines in the "template" macros? Makes them much
> harder to read.

Those macros are hard to read.

There was an extra indent that resulted in splitting many lines.
So I started with removing this extra indent.

Then, the trailing tabs (with some pseudo randomly mixed in spaces)
were replaced with a single space before the \ for line continuation.
This enhances code review for update: with a single systematic space,
there is no need to consider if you must update all the context when
adding a line longer than what was already present.

In the end, I was left with cases like:
        <some code> \
 \
        <some code> \

And I preferred to remove this extra line as it did not enhance the
clarity of those macros.
  
Mattias Rönnblom Oct. 10, 2024, 12:35 p.m. UTC | #4
On 2024-10-10 14:14, David Marchand wrote:
> On Thu, Oct 10, 2024 at 1:56 PM Mattias Rönnblom <hofors@lysator.liu.se> wrote:
>> OK. Nothing obvious from what I can see in the code. Unrelated: why did
>> you remove all empty lines in the "template" macros? Makes them much
>> harder to read.
> 
> Those macros are hard to read.
> 

Even the more reason to make them easier to read.

> There was an extra indent that resulted in splitting many lines.

They were just formatted with the same indentation as all other macros, 
no? So not "extra".

That said, the indentation leaves less room on for the actual code, so I 
have no issue with the removal of the standard multi-line macro 
indentation, although it wouldn't have hurt to have pointed this out 
earlier, so we wouldn't have to have this discussion after-the-fact.

> So I started with removing this extra indent.
> 
> Then, the trailing tabs (with some pseudo randomly mixed in spaces)
> were replaced with a single space before the \ for line continuation.
> This enhances code review for update: with a single systematic space,
> there is no need to consider if you must update all the context when
> adding a line longer than what was already present.

DPDK uses both the kernel-style multi-line macro formatting, and what 
you describe above.

Your argument above makes sense, but I also find the kernel style more 
visually appealing.

> 
> In the end, I was left with cases like:
>          <some code> \
>   \
>          <some code> \
> 
> And I preferred to remove this extra line as it did not enhance the
> clarity of those macros.
> 
> 

           <some code> \
                       \
           <some code> \

Would have been visually more appealing, but less consistent.

To me, removing these delimiting empty lines is no different from doing 
the same in a regular C function. Empty lines are delimiters, there to 
group related statements, and they serve a purpose. Now, the macros look 
like they are written by someone who doesn't care about readability.
  
Thomas Monjalon Oct. 10, 2024, 1:07 p.m. UTC | #5
10/10/2024 14:35, Mattias Rönnblom:
> On 2024-10-10 14:14, David Marchand wrote:
> > On Thu, Oct 10, 2024 at 1:56 PM Mattias Rönnblom <hofors@lysator.liu.se> wrote:
> Your argument above makes sense, but I also find the kernel style more 
> visually appealing.
> 
> > 
> > In the end, I was left with cases like:
> >          <some code> \
> >   \
> >          <some code> \
> > 
> > And I preferred to remove this extra line as it did not enhance the
> > clarity of those macros.
> > 
> > 
> 
>            <some code> \
>                        \
>            <some code> \
> 
> Would have been visually more appealing, but less consistent.
> 
> To me, removing these delimiting empty lines is no different from doing 
> the same in a regular C function. Empty lines are delimiters, there to 
> group related statements, and they serve a purpose. Now, the macros look 
> like they are written by someone who doesn't care about readability.

Coding style discussions may become endless.
At the end, the maintainers have to decide.

In this case, the problem is having complex macros.
That's why we tend to forbid macros when possible.

I understand your frustration that your commit does not show how you wanted it to appear.
Please understand that David did his best to make it easy to maintain, consistent,
and - the most important - merged in -rc1, given the limited time we all have.

Nothing is perfect, but we all try our best.
Thanks
  
David Marchand Oct. 11, 2024, 8:35 a.m. UTC | #6
On Thu, Oct 10, 2024 at 1:56 PM Mattias Rönnblom <hofors@lysator.liu.se> wrote:
>
> On 2024-10-10 12:45, David Marchand wrote:
> > On Fri, Sep 20, 2024 at 12:57 PM Mattias Rönnblom
> > <mattias.ronnblom@ericsson.com> wrote:
> >> +       static int                                                      \
> >> +       run_parallel_test_and_modify ## size(void *arg)         \
> >> +       {                                                               \
> >> +               struct parallel_test_and_set_lcore ## size *lcore = arg; \
> >> +               uint64_t deadline = rte_get_timer_cycles() +            \
> >> +                       PARALLEL_TEST_RUNTIME * rte_get_timer_hz();     \
> >> +               do {                                                    \
> >> +                       bool old_value;                                 \
> >> +                       bool new_value = rte_rand() & 1;                \
> >> +                       bool use_assign = rte_rand() & 1;               \
> >> +                                                                       \
> >> +                       if (use_assign)                                 \
> >> +                               old_value = rte_bit_atomic_test_and_assign( \
> >> +                                       lcore->word, lcore->bit, new_value, \
> >> +                                       rte_memory_order_relaxed);      \
> >> +                       else                                            \
> >> +                               old_value = new_value ?                 \
> >> +                                       rte_bit_atomic_test_and_set(    \
> >> +                                               lcore->word, lcore->bit, \
> >> +                                               rte_memory_order_relaxed) : \
> >> +                                       rte_bit_atomic_test_and_clear(  \
> >> +                                               lcore->word, lcore->bit, \
> >> +                                               rte_memory_order_relaxed); \
> >> +                       if (old_value != new_value)                     \
> >> +                               lcore->flips++;                         \
> >> +               } while (rte_get_timer_cycles() < deadline);            \
> >> +                                                                       \
> >> +               return 0;                                               \
> >> +       }                                                               \
> >> +                                                                       \
> >> +       static int                                                      \
> >> +       test_bit_atomic_parallel_test_and_modify ## size(void)          \
> >> +       {                                                               \
> >> +               unsigned int worker_lcore_id;                           \
> >> +               uint ## size ## _t word = 0;                            \
> >> +               unsigned int bit = rte_rand_max(size);                  \
> >> +               struct parallel_test_and_set_lcore ## size lmain = {    \
> >> +                       .word = &word,                                  \
> >> +                       .bit = bit                                      \
> >> +               };                                                      \
> >> +               struct parallel_test_and_set_lcore ## size lworker = {  \
> >> +                       .word = &word,                                  \
> >> +                       .bit = bit                                      \
> >> +               };                                                      \
> >> +                                                                       \
> >> +               if (rte_lcore_count() < 2) {                            \
> >> +                       printf("Need multiple cores to run parallel test.\n"); \
> >> +                       return TEST_SKIPPED;                            \
> >> +               }                                                       \
> >> +                                                                       \
> >> +               worker_lcore_id = rte_get_next_lcore(-1, 1, 0);         \
> >> +                                                                       \
> >> +               int rc = rte_eal_remote_launch(run_parallel_test_and_modify ## size, \
> >> +                                              &lworker, worker_lcore_id); \
> >> +               TEST_ASSERT(rc == 0, "Worker thread launch failed");    \
> >> +                                                                       \
> >> +               run_parallel_test_and_modify ## size(&lmain);           \
> >> +                                                                       \
> >> +               rte_eal_mp_wait_lcore();                                \
> >> +                                                                       \
> >> +               uint64_t total_flips = lmain.flips + lworker.flips;     \
> >> +               bool expected_value = total_flips % 2;                  \
> >> +                                                                       \
> >> +               TEST_ASSERT(expected_value == rte_bit_test(&word, bit), \
> >> +                           "After %"PRId64" flips, the bit value "     \
> >> +                           "should be %d", total_flips, expected_value); \
> >> +                                                                       \
> >> +               uint64_t expected_word = 0;                             \
> >> +               rte_bit_assign(&expected_word, bit, expected_value);    \
> >> +                                                                       \
> >> +               TEST_ASSERT(expected_word == word, "Untouched bits have " \
> >> +                           "changed value");                           \
> >> +                                                                       \
> >> +               return TEST_SUCCESS;                                    \
> >> +       }
> >> +
> >> +GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(32)
> >> +GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(64)
> >
> > It appears this test failed once in the CI for an unrelated series
> > (uAPI kernel header import):
> > https://lab.dpdk.org/results/dashboard/testruns/logs/1385993/
> >
> >   + TestCase [ 0] : test_bit_access32 succeeded
> >   + TestCase [ 1] : test_bit_access64 succeeded
> >   + TestCase [ 2] : test_bit_access32 succeeded
> >   + TestCase [ 3] : test_bit_access64 succeeded
> >   + TestCase [ 4] : test_bit_v_access32 succeeded
> >   + TestCase [ 5] : test_bit_v_access64 succeeded
> >   + TestCase [ 6] : test_bit_atomic_access32 succeeded
> >   + TestCase [ 7] : test_bit_atomic_access64 succeeded
> >   + TestCase [ 8] : test_bit_atomic_v_access32 succeeded
> >   + TestCase [ 9] : test_bit_atomic_v_access64 succeeded
> >   + TestCase [10] : test_bit_atomic_parallel_assign32 succeeded
> >   + TestCase [11] : test_bit_atomic_parallel_assign64 succeeded
> >   + TestCase [12] : test_bit_atomic_parallel_test_and_modify32 failed
> >   + TestCase [13] : test_bit_atomic_parallel_test_and_modify64 succeeded
> >   + TestCase [14] : test_bit_atomic_parallel_flip32 succeeded
> >   + TestCase [15] : test_bit_atomic_parallel_flip64 succeeded
> >   + TestCase [16] : test_bit_relaxed_set succeeded
> >   + TestCase [17] : test_bit_relaxed_clear succeeded
> >   + TestCase [18] : test_bit_relaxed_test_set_clear succeeded
> >
> > EAL: Test assert test_bit_atomic_parallel_test_and_modify32 line 236
> > failed: After 1070523 flips, the bit value should be 1
> >
>
> I've been unable to reproduce this on my Raptor Lake x86_64 with GCC
> 12.3 (CI machine used GCC 12.2).
>
> I'll try if I have better luck on some other systems.

Could it have to do with how UNH tests in containers?
(with physical cores that may not be isolated/dedicated to a process).

This morning, I see at least two similar errors, on the same part of the test:
https://patchwork.dpdk.org/project/dpdk/patch/20241010203209.63911-1-nandinipersad361@gmail.com/
EAL: Test assert test_bit_atomic_parallel_test_and_modify32 line 236
failed: After 1719375 flips, the bit value should be 1

https://patchwork.dpdk.org/project/dpdk/patch/20241010194148.1877659-18-rjarry@redhat.com/
EAL: Test assert test_bit_atomic_parallel_test_and_modify32 line 236
failed: After 1199935 flips, the bit value should be 1
  
Morten Brørup Oct. 11, 2024, 3:06 p.m. UTC | #7
> From: David Marchand [mailto:david.marchand@redhat.com]
> Sent: Thursday, 10 October 2024 12.45
> 
> On Fri, Sep 20, 2024 at 12:57 PM Mattias Rönnblom
> <mattias.ronnblom@ericsson.com> wrote:
> > +       static int
> \
> > +       run_parallel_test_and_modify ## size(void *arg)         \
> > +       {
> \
> > +               struct parallel_test_and_set_lcore ## size *lcore =
> arg; \
> > +               uint64_t deadline = rte_get_timer_cycles() +
> \
> > +                       PARALLEL_TEST_RUNTIME * rte_get_timer_hz();
> \
> > +               do {
> \
> > +                       bool old_value;
> \
> > +                       bool new_value = rte_rand() & 1;
> \
> > +                       bool use_assign = rte_rand() & 1;
> \
> > +
> \
> > +                       if (use_assign)
> \
> > +                               old_value =
> rte_bit_atomic_test_and_assign( \
> > +                                       lcore->word, lcore->bit,
> new_value, \
> > +                                       rte_memory_order_relaxed);
> \
> > +                       else
> \
> > +                               old_value = new_value ?
> \
> > +                                       rte_bit_atomic_test_and_set(
> \
> > +                                               lcore->word, lcore-
> >bit, \
> > +
> rte_memory_order_relaxed) : \
> > +
> rte_bit_atomic_test_and_clear(  \
> > +                                               lcore->word, lcore-
> >bit, \
> > +
> rte_memory_order_relaxed); \
> > +                       if (old_value != new_value)
> \
> > +                               lcore->flips++;
> \
> > +               } while (rte_get_timer_cycles() < deadline);
> \
> > +
> \
> > +               return 0;
> \
> > +       }
> \
> > +
> \
> > +       static int
> \
> > +       test_bit_atomic_parallel_test_and_modify ## size(void)
> \
> > +       {
> \
> > +               unsigned int worker_lcore_id;
> \
> > +               uint ## size ## _t word = 0;
> \
> > +               unsigned int bit = rte_rand_max(size);
> \
> > +               struct parallel_test_and_set_lcore ## size lmain = {
> \
> > +                       .word = &word,
> \
> > +                       .bit = bit
> \
> > +               };
> \
> > +               struct parallel_test_and_set_lcore ## size lworker =
> {  \
> > +                       .word = &word,
> \
> > +                       .bit = bit
> \
> > +               };
> \
> > +
> \
> > +               if (rte_lcore_count() < 2) {
> \
> > +                       printf("Need multiple cores to run parallel
> test.\n"); \
> > +                       return TEST_SKIPPED;
> \
> > +               }
> \
> > +
> \
> > +               worker_lcore_id = rte_get_next_lcore(-1, 1, 0);
> \
> > +
> \
> > +               int rc =
> rte_eal_remote_launch(run_parallel_test_and_modify ## size, \
> > +                                              &lworker,
> worker_lcore_id); \
> > +               TEST_ASSERT(rc == 0, "Worker thread launch failed");
> \
> > +
> \
> > +               run_parallel_test_and_modify ## size(&lmain);
> \
> > +
> \
> > +               rte_eal_mp_wait_lcore();
> \
> > +
> \
> > +               uint64_t total_flips = lmain.flips + lworker.flips;
> \
> > +               bool expected_value = total_flips % 2;
> \
> > +
> \
> > +               TEST_ASSERT(expected_value == rte_bit_test(&word,
> bit), \
> > +                           "After %"PRId64" flips, the bit value "
> \
> > +                           "should be %d", total_flips,
> expected_value); \
> > +
> \
> > +               uint64_t expected_word = 0;
> \
> > +               rte_bit_assign(&expected_word, bit, expected_value);
> \
> > +
> \
> > +               TEST_ASSERT(expected_word == word, "Untouched bits
> have " \
> > +                           "changed value");
> \
> > +
> \
> > +               return TEST_SUCCESS;
> \
> > +       }
> > +
> > +GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(32)
> > +GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(64)
> 
> It appears this test failed once in the CI for an unrelated series
> (uAPI kernel header import):
> https://lab.dpdk.org/results/dashboard/testruns/logs/1385993/
> 
>  + TestCase [ 0] : test_bit_access32 succeeded
>  + TestCase [ 1] : test_bit_access64 succeeded
>  + TestCase [ 2] : test_bit_access32 succeeded
>  + TestCase [ 3] : test_bit_access64 succeeded
>  + TestCase [ 4] : test_bit_v_access32 succeeded
>  + TestCase [ 5] : test_bit_v_access64 succeeded
>  + TestCase [ 6] : test_bit_atomic_access32 succeeded
>  + TestCase [ 7] : test_bit_atomic_access64 succeeded
>  + TestCase [ 8] : test_bit_atomic_v_access32 succeeded
>  + TestCase [ 9] : test_bit_atomic_v_access64 succeeded
>  + TestCase [10] : test_bit_atomic_parallel_assign32 succeeded
>  + TestCase [11] : test_bit_atomic_parallel_assign64 succeeded
>  + TestCase [12] : test_bit_atomic_parallel_test_and_modify32 failed
>  + TestCase [13] : test_bit_atomic_parallel_test_and_modify64 succeeded
>  + TestCase [14] : test_bit_atomic_parallel_flip32 succeeded
>  + TestCase [15] : test_bit_atomic_parallel_flip64 succeeded
>  + TestCase [16] : test_bit_relaxed_set succeeded
>  + TestCase [17] : test_bit_relaxed_clear succeeded
>  + TestCase [18] : test_bit_relaxed_test_set_clear succeeded
> 
> EAL: Test assert test_bit_atomic_parallel_test_and_modify32 line 236
> failed: After 1070523 flips, the bit value should be 1
> 
> Please have a look.

The Coverity report [1] just gave me an idea:

worker_lcore_id = rte_get_next_lcore(-1, 1, 0);
+ TEST_ASSERT(worker_lcore_id < RTE_MAX_LCORE, "Worker thread allocation failed");

Or even better:
Improve rte_eal_remote_launch() by checking the validity of the worker_id parameter.

[1]: https://scan4.scan.coverity.com/#/project-view/60887/10075?selectedIssue=445384
  
David Marchand Oct. 11, 2024, 3:11 p.m. UTC | #8
On Fri, Oct 11, 2024 at 5:06 PM Morten Brørup <mb@smartsharesystems.com> wrote:
>
> > From: David Marchand [mailto:david.marchand@redhat.com]
> > Sent: Thursday, 10 October 2024 12.45
> >
> > On Fri, Sep 20, 2024 at 12:57 PM Mattias Rönnblom
> > <mattias.ronnblom@ericsson.com> wrote:
> > > +       static int
> > \
> > > +       run_parallel_test_and_modify ## size(void *arg)         \
> > > +       {
> > \
> > > +               struct parallel_test_and_set_lcore ## size *lcore =
> > arg; \
> > > +               uint64_t deadline = rte_get_timer_cycles() +
> > \
> > > +                       PARALLEL_TEST_RUNTIME * rte_get_timer_hz();
> > \
> > > +               do {
> > \
> > > +                       bool old_value;
> > \
> > > +                       bool new_value = rte_rand() & 1;
> > \
> > > +                       bool use_assign = rte_rand() & 1;
> > \
> > > +
> > \
> > > +                       if (use_assign)
> > \
> > > +                               old_value =
> > rte_bit_atomic_test_and_assign( \
> > > +                                       lcore->word, lcore->bit,
> > new_value, \
> > > +                                       rte_memory_order_relaxed);
> > \
> > > +                       else
> > \
> > > +                               old_value = new_value ?
> > \
> > > +                                       rte_bit_atomic_test_and_set(
> > \
> > > +                                               lcore->word, lcore-
> > >bit, \
> > > +
> > rte_memory_order_relaxed) : \
> > > +
> > rte_bit_atomic_test_and_clear(  \
> > > +                                               lcore->word, lcore-
> > >bit, \
> > > +
> > rte_memory_order_relaxed); \
> > > +                       if (old_value != new_value)
> > \
> > > +                               lcore->flips++;
> > \
> > > +               } while (rte_get_timer_cycles() < deadline);
> > \
> > > +
> > \
> > > +               return 0;
> > \
> > > +       }
> > \
> > > +
> > \
> > > +       static int
> > \
> > > +       test_bit_atomic_parallel_test_and_modify ## size(void)
> > \
> > > +       {
> > \
> > > +               unsigned int worker_lcore_id;
> > \
> > > +               uint ## size ## _t word = 0;
> > \
> > > +               unsigned int bit = rte_rand_max(size);
> > \
> > > +               struct parallel_test_and_set_lcore ## size lmain = {
> > \
> > > +                       .word = &word,
> > \
> > > +                       .bit = bit
> > \
> > > +               };
> > \
> > > +               struct parallel_test_and_set_lcore ## size lworker =
> > {  \
> > > +                       .word = &word,
> > \
> > > +                       .bit = bit
> > \
> > > +               };
> > \
> > > +
> > \
> > > +               if (rte_lcore_count() < 2) {
> > \
> > > +                       printf("Need multiple cores to run parallel
> > test.\n"); \
> > > +                       return TEST_SKIPPED;
> > \
> > > +               }
> > \
> > > +
> > \
> > > +               worker_lcore_id = rte_get_next_lcore(-1, 1, 0);
> > \
> > > +
> > \
> > > +               int rc =
> > rte_eal_remote_launch(run_parallel_test_and_modify ## size, \
> > > +                                              &lworker,
> > worker_lcore_id); \
> > > +               TEST_ASSERT(rc == 0, "Worker thread launch failed");
> > \
> > > +
> > \
> > > +               run_parallel_test_and_modify ## size(&lmain);
> > \
> > > +
> > \
> > > +               rte_eal_mp_wait_lcore();
> > \
> > > +
> > \
> > > +               uint64_t total_flips = lmain.flips + lworker.flips;
> > \
> > > +               bool expected_value = total_flips % 2;
> > \
> > > +
> > \
> > > +               TEST_ASSERT(expected_value == rte_bit_test(&word,
> > bit), \
> > > +                           "After %"PRId64" flips, the bit value "
> > \
> > > +                           "should be %d", total_flips,
> > expected_value); \
> > > +
> > \
> > > +               uint64_t expected_word = 0;
> > \
> > > +               rte_bit_assign(&expected_word, bit, expected_value);
> > \
> > > +
> > \
> > > +               TEST_ASSERT(expected_word == word, "Untouched bits
> > have " \
> > > +                           "changed value");
> > \
> > > +
> > \
> > > +               return TEST_SUCCESS;
> > \
> > > +       }
> > > +
> > > +GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(32)
> > > +GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(64)
> >
> > It appears this test failed once in the CI for an unrelated series
> > (uAPI kernel header import):
> > https://lab.dpdk.org/results/dashboard/testruns/logs/1385993/
> >
> >  + TestCase [ 0] : test_bit_access32 succeeded
> >  + TestCase [ 1] : test_bit_access64 succeeded
> >  + TestCase [ 2] : test_bit_access32 succeeded
> >  + TestCase [ 3] : test_bit_access64 succeeded
> >  + TestCase [ 4] : test_bit_v_access32 succeeded
> >  + TestCase [ 5] : test_bit_v_access64 succeeded
> >  + TestCase [ 6] : test_bit_atomic_access32 succeeded
> >  + TestCase [ 7] : test_bit_atomic_access64 succeeded
> >  + TestCase [ 8] : test_bit_atomic_v_access32 succeeded
> >  + TestCase [ 9] : test_bit_atomic_v_access64 succeeded
> >  + TestCase [10] : test_bit_atomic_parallel_assign32 succeeded
> >  + TestCase [11] : test_bit_atomic_parallel_assign64 succeeded
> >  + TestCase [12] : test_bit_atomic_parallel_test_and_modify32 failed
> >  + TestCase [13] : test_bit_atomic_parallel_test_and_modify64 succeeded
> >  + TestCase [14] : test_bit_atomic_parallel_flip32 succeeded
> >  + TestCase [15] : test_bit_atomic_parallel_flip64 succeeded
> >  + TestCase [16] : test_bit_relaxed_set succeeded
> >  + TestCase [17] : test_bit_relaxed_clear succeeded
> >  + TestCase [18] : test_bit_relaxed_test_set_clear succeeded
> >
> > EAL: Test assert test_bit_atomic_parallel_test_and_modify32 line 236
> > failed: After 1070523 flips, the bit value should be 1
> >
> > Please have a look.
>
> The Coverity report [1] just gave me an idea:
>
> worker_lcore_id = rte_get_next_lcore(-1, 1, 0);
> + TEST_ASSERT(worker_lcore_id < RTE_MAX_LCORE, "Worker thread allocation failed");
>
> Or even better:
> Improve rte_eal_remote_launch() by checking the validity of the worker_id parameter.
>
> [1]: https://scan4.scan.coverity.com/#/project-view/60887/10075?selectedIssue=445384
>

I have the same fix in my workdir, I was about to send, but if you
want to do it, go ahead.
  
Morten Brørup Oct. 11, 2024, 3:15 p.m. UTC | #9
> > The Coverity report [1] just gave me an idea:
> >
> > worker_lcore_id = rte_get_next_lcore(-1, 1, 0);
> > + TEST_ASSERT(worker_lcore_id < RTE_MAX_LCORE, "Worker thread
> allocation failed");
> >
> > Or even better:
> > Improve rte_eal_remote_launch() by checking the validity of the
> worker_id parameter.
> >
> > [1]: https://scan4.scan.coverity.com/#/project-
> view/60887/10075?selectedIssue=445384
> >
> 
> I have the same fix in my workdir, I was about to send, but if you
> want to do it, go ahead.

No, I only had the idea, didn't actually work on it.

Please send your fix.

-Morten
  
David Marchand Oct. 11, 2024, 3:18 p.m. UTC | #10
On Fri, Oct 11, 2024 at 5:16 PM Morten Brørup <mb@smartsharesystems.com> wrote:
>
> > > The Coverity report [1] just gave me an idea:
> > >
> > > worker_lcore_id = rte_get_next_lcore(-1, 1, 0);
> > > + TEST_ASSERT(worker_lcore_id < RTE_MAX_LCORE, "Worker thread
> > allocation failed");
> > >
> > > Or even better:
> > > Improve rte_eal_remote_launch() by checking the validity of the
> > worker_id parameter.
> > >
> > > [1]: https://scan4.scan.coverity.com/#/project-
> > view/60887/10075?selectedIssue=445384
> > >
> >
> > I have the same fix in my workdir, I was about to send, but if you
> > want to do it, go ahead.
>
> No, I only had the idea, didn't actually work on it.
>
> Please send your fix.

Yes, let's ask the maintainer to do the job.
Ok, I'll see later.
  

Patch

diff --git a/app/test/test_bitops.c b/app/test/test_bitops.c
index 322f58c066..b80216a0a1 100644
--- a/app/test/test_bitops.c
+++ b/app/test/test_bitops.c
@@ -3,10 +3,13 @@ 
  * Copyright(c) 2024 Ericsson AB
  */
 
+#include <inttypes.h>
 #include <stdbool.h>
 
-#include <rte_launch.h>
 #include <rte_bitops.h>
+#include <rte_cycles.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
 #include <rte_random.h>
 #include "test.h"
 
@@ -61,6 +64,304 @@  GEN_TEST_BIT_ACCESS(test_bit_access32, rte_bit_set, rte_bit_clear,
 GEN_TEST_BIT_ACCESS(test_bit_access64, rte_bit_set, rte_bit_clear,
 		    rte_bit_assign, rte_bit_flip, rte_bit_test, 64)
 
+#define bit_atomic_set(addr, nr)				\
+	rte_bit_atomic_set(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_clear(addr, nr)					\
+	rte_bit_atomic_clear(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_assign(addr, nr, value)				\
+	rte_bit_atomic_assign(addr, nr, value, rte_memory_order_relaxed)
+
+#define bit_atomic_flip(addr, nr)					\
+    rte_bit_atomic_flip(addr, nr, rte_memory_order_relaxed)
+
+#define bit_atomic_test(addr, nr)				\
+	rte_bit_atomic_test(addr, nr, rte_memory_order_relaxed)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_access32, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 32)
+
+GEN_TEST_BIT_ACCESS(test_bit_atomic_access64, bit_atomic_set,
+		    bit_atomic_clear, bit_atomic_assign,
+		    bit_atomic_flip, bit_atomic_test, 64)
+
+#define PARALLEL_TEST_RUNTIME 0.25
+
+#define GEN_TEST_BIT_PARALLEL_ASSIGN(size)				\
+									\
+	struct parallel_access_lcore ## size				\
+	{								\
+		unsigned int bit;					\
+		uint ## size ##_t *word;				\
+		bool failed;						\
+	};								\
+									\
+	static int							\
+	run_parallel_assign ## size(void *arg)				\
+	{								\
+		struct parallel_access_lcore ## size *lcore = arg;	\
+		uint64_t deadline = rte_get_timer_cycles() +		\
+			PARALLEL_TEST_RUNTIME * rte_get_timer_hz();	\
+		bool value = false;					\
+									\
+		do {							\
+			bool new_value = rte_rand() & 1;		\
+			bool use_test_and_modify = rte_rand() & 1;	\
+			bool use_assign = rte_rand() & 1;		\
+									\
+			if (rte_bit_atomic_test(lcore->word, lcore->bit, \
+						rte_memory_order_relaxed) != value) { \
+				lcore->failed = true;			\
+				break;					\
+			}						\
+									\
+			if (use_test_and_modify) {			\
+				bool old_value;				\
+				if (use_assign) 			\
+					old_value = rte_bit_atomic_test_and_assign( \
+						lcore->word, lcore->bit, new_value, \
+						rte_memory_order_relaxed); \
+				else {					\
+					old_value = new_value ?		\
+						rte_bit_atomic_test_and_set( \
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed) : \
+						rte_bit_atomic_test_and_clear( \
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed); \
+				}					\
+				if (old_value != value) {		\
+					lcore->failed = true;		\
+					break;				\
+				}					\
+			} else {					\
+				if (use_assign)				\
+					rte_bit_atomic_assign(lcore->word, lcore->bit, \
+							      new_value, \
+							      rte_memory_order_relaxed); \
+				else {					\
+					if (new_value)			\
+						rte_bit_atomic_set(	\
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed); \
+					else				\
+						rte_bit_atomic_clear(	\
+							lcore->word, lcore->bit, \
+							rte_memory_order_relaxed); \
+				}					\
+			}						\
+									\
+			value = new_value;				\
+		} while (rte_get_timer_cycles() < deadline);		\
+									\
+		return 0;						\
+	}								\
+									\
+	static int							\
+	test_bit_atomic_parallel_assign ## size(void)			\
+	{								\
+		unsigned int worker_lcore_id;				\
+		uint ## size ## _t word = 0;				\
+		struct parallel_access_lcore ## size lmain = {		\
+			.word = &word					\
+		};							\
+		struct parallel_access_lcore ## size lworker = {	\
+			.word = &word					\
+		};							\
+									\
+		if (rte_lcore_count() < 2) {				\
+			printf("Need multiple cores to run parallel test.\n"); \
+			return TEST_SKIPPED;				\
+		}							\
+									\
+		worker_lcore_id = rte_get_next_lcore(-1, 1, 0);		\
+									\
+		lmain.bit = rte_rand_max(size);				\
+		do {							\
+			lworker.bit = rte_rand_max(size);		\
+		} while (lworker.bit == lmain.bit);			\
+									\
+		int rc = rte_eal_remote_launch(run_parallel_assign ## size, \
+					       &lworker, worker_lcore_id); \
+		TEST_ASSERT(rc == 0, "Worker thread launch failed");	\
+									\
+		run_parallel_assign ## size(&lmain);			\
+									\
+		rte_eal_mp_wait_lcore();				\
+									\
+		TEST_ASSERT(!lmain.failed, "Main lcore atomic access failed"); \
+		TEST_ASSERT(!lworker.failed, "Worker lcore atomic access " \
+			    "failed");					\
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_PARALLEL_ASSIGN(32)
+GEN_TEST_BIT_PARALLEL_ASSIGN(64)
+
+#define GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(size)			\
+									\
+	struct parallel_test_and_set_lcore ## size			\
+	{								\
+		uint ## size ##_t *word;				\
+		unsigned int bit;					\
+		uint64_t flips;						\
+	};								\
+									\
+	static int							\
+	run_parallel_test_and_modify ## size(void *arg)		\
+	{								\
+		struct parallel_test_and_set_lcore ## size *lcore = arg; \
+		uint64_t deadline = rte_get_timer_cycles() +		\
+			PARALLEL_TEST_RUNTIME * rte_get_timer_hz();	\
+		do {							\
+			bool old_value;					\
+			bool new_value = rte_rand() & 1;		\
+			bool use_assign = rte_rand() & 1;		\
+									\
+			if (use_assign)					\
+				old_value = rte_bit_atomic_test_and_assign( \
+					lcore->word, lcore->bit, new_value, \
+					rte_memory_order_relaxed);	\
+			else						\
+				old_value = new_value ?			\
+					rte_bit_atomic_test_and_set(	\
+						lcore->word, lcore->bit, \
+						rte_memory_order_relaxed) : \
+					rte_bit_atomic_test_and_clear(	\
+						lcore->word, lcore->bit, \
+						rte_memory_order_relaxed); \
+			if (old_value != new_value)			\
+				lcore->flips++;				\
+		} while (rte_get_timer_cycles() < deadline);		\
+									\
+		return 0;						\
+	}								\
+									\
+	static int							\
+	test_bit_atomic_parallel_test_and_modify ## size(void)		\
+	{								\
+		unsigned int worker_lcore_id;				\
+		uint ## size ## _t word = 0;				\
+		unsigned int bit = rte_rand_max(size);			\
+		struct parallel_test_and_set_lcore ## size lmain = {	\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+		struct parallel_test_and_set_lcore ## size lworker = {	\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+									\
+		if (rte_lcore_count() < 2) {				\
+			printf("Need multiple cores to run parallel test.\n"); \
+			return TEST_SKIPPED;				\
+		}							\
+									\
+		worker_lcore_id = rte_get_next_lcore(-1, 1, 0);		\
+									\
+		int rc = rte_eal_remote_launch(run_parallel_test_and_modify ## size, \
+					       &lworker, worker_lcore_id); \
+		TEST_ASSERT(rc == 0, "Worker thread launch failed");	\
+									\
+		run_parallel_test_and_modify ## size(&lmain);		\
+									\
+		rte_eal_mp_wait_lcore();				\
+									\
+		uint64_t total_flips = lmain.flips + lworker.flips;	\
+		bool expected_value = total_flips % 2;			\
+									\
+		TEST_ASSERT(expected_value == rte_bit_test(&word, bit), \
+			    "After %"PRId64" flips, the bit value "	\
+			    "should be %d", total_flips, expected_value); \
+									\
+		uint64_t expected_word = 0;				\
+		rte_bit_assign(&expected_word, bit, expected_value);	\
+									\
+		TEST_ASSERT(expected_word == word, "Untouched bits have " \
+			    "changed value");				\
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(32)
+GEN_TEST_BIT_PARALLEL_TEST_AND_MODIFY(64)
+
+#define GEN_TEST_BIT_PARALLEL_FLIP(size)				\
+									\
+	struct parallel_flip_lcore ## size				\
+	{								\
+		uint ## size ##_t *word;				\
+		unsigned int bit;					\
+		uint64_t flips;						\
+	};								\
+									\
+	static int							\
+	run_parallel_flip ## size(void *arg)				\
+	{								\
+		struct parallel_flip_lcore ## size *lcore = arg; \
+		uint64_t deadline = rte_get_timer_cycles() +		\
+			PARALLEL_TEST_RUNTIME * rte_get_timer_hz();	\
+		do {							\
+			rte_bit_atomic_flip(lcore->word, lcore->bit,	\
+					    rte_memory_order_relaxed);	\
+			lcore->flips++;					\
+		} while (rte_get_timer_cycles() < deadline);		\
+									\
+		return 0;						\
+	}								\
+									\
+	static int							\
+	test_bit_atomic_parallel_flip ## size(void)			\
+	{								\
+		unsigned int worker_lcore_id;				\
+		uint ## size ## _t word = 0;				\
+		unsigned int bit = rte_rand_max(size);			\
+		struct parallel_flip_lcore ## size lmain = {		\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+		struct parallel_flip_lcore ## size lworker = {		\
+			.word = &word,					\
+			.bit = bit					\
+		};							\
+									\
+		if (rte_lcore_count() < 2) {				\
+			printf("Need multiple cores to run parallel test.\n"); \
+			return TEST_SKIPPED;				\
+		}							\
+									\
+		worker_lcore_id = rte_get_next_lcore(-1, 1, 0);		\
+									\
+		int rc = rte_eal_remote_launch(run_parallel_flip ## size, \
+					       &lworker, worker_lcore_id); \
+		TEST_ASSERT(rc == 0, "Worker thread launch failed");	\
+									\
+		run_parallel_flip ## size(&lmain);			\
+									\
+		rte_eal_mp_wait_lcore();				\
+									\
+		uint64_t total_flips = lmain.flips + lworker.flips;	\
+		bool expected_value = total_flips % 2;			\
+									\
+		TEST_ASSERT(expected_value == rte_bit_test(&word, bit), \
+			    "After %"PRId64" flips, the bit value "	\
+			    "should be %d", total_flips, expected_value); \
+									\
+		uint64_t expected_word = 0;				\
+		rte_bit_assign(&expected_word, bit, expected_value);	\
+									\
+		TEST_ASSERT(expected_word == word, "Untouched bits have " \
+			    "changed value");				\
+									\
+		return TEST_SUCCESS;					\
+	}
+
+GEN_TEST_BIT_PARALLEL_FLIP(32)
+GEN_TEST_BIT_PARALLEL_FLIP(64)
+
 static uint32_t val32;
 static uint64_t val64;
 
@@ -177,6 +478,16 @@  static struct unit_test_suite test_suite = {
 	.unit_test_cases = {
 		TEST_CASE(test_bit_access32),
 		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_access32),
+		TEST_CASE(test_bit_access64),
+		TEST_CASE(test_bit_atomic_access32),
+		TEST_CASE(test_bit_atomic_access64),
+		TEST_CASE(test_bit_atomic_parallel_assign32),
+		TEST_CASE(test_bit_atomic_parallel_assign64),
+		TEST_CASE(test_bit_atomic_parallel_test_and_modify32),
+		TEST_CASE(test_bit_atomic_parallel_test_and_modify64),
+		TEST_CASE(test_bit_atomic_parallel_flip32),
+		TEST_CASE(test_bit_atomic_parallel_flip64),
 		TEST_CASE(test_bit_relaxed_set),
 		TEST_CASE(test_bit_relaxed_clear),
 		TEST_CASE(test_bit_relaxed_test_set_clear),