[v2,2/3] test/distributor: replace sync builtins with atomic builtins
Checks
Commit Message
'__sync' built-in functions are deprecated, should use the '__atomic'
built-in instead. the sync built-in functions are full barriers, while
atomic built-in functions offer less restrictive one-way barriers,
which help performance.
Here is the example test result on TX2:
sudo ./arm64-armv8a-linuxapp-gcc/app/test -l 112-139 \
-n 4 --socket-mem=1024,1024 -- -i
RTE>>distributor_perf_autotest
*** distributor_perf_autotest without this patch ***
Comments
>
> diff --git a/app/test/test_distributor.c b/app/test/test_distributor.c index
> 98919ec..ddab08d 100644
> --- a/app/test/test_distributor.c
> +++ b/app/test/test_distributor.c
> @@ -62,9 +62,14 @@ handle_work(void *arg)
> struct worker_params *wp = arg;
> struct rte_distributor *db = wp->dist;
> unsigned int count = 0, num = 0;
> - unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
> int i;
>
> +#ifdef RTE_USE_C11_MEM_MODEL
> + unsigned int id = __atomic_fetch_add(&worker_idx, 1,
> +__ATOMIC_RELAXED); #else
> + unsigned int id = __sync_fetch_and_add(&worker_idx, 1); #endif
> +
I suggest we remove the conditional compilation and just keep the __atomic_xxx calls as this is test code. More over the distributor library does not have a C11 version of the library (assuming that using __atomic_xxx does not impact performance on other platforms negatively). This applies to other instances in this patch.
> for (i = 0; i < 8; i++)
> buf[i] = NULL;
> num = rte_distributor_get_pkt(db, id, buf, buf, num); @@ -270,7
> +275,12 @@ handle_work_with_free_mbufs(void *arg)
> unsigned int count = 0;
> unsigned int i;
> unsigned int num = 0;
> +
> +#ifdef RTE_USE_C11_MEM_MODEL
> + unsigned int id = __atomic_fetch_add(&worker_idx, 1,
> +__ATOMIC_RELAXED); #else
> unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
> +#endif
>
> for (i = 0; i < 8; i++)
> buf[i] = NULL;
> @@ -343,7 +353,13 @@ handle_work_for_shutdown_test(void *arg)
> unsigned int total = 0;
> unsigned int i;
> unsigned int returned = 0;
> +
> +#ifdef RTE_USE_C11_MEM_MODEL
> + const unsigned int id = __atomic_fetch_add(&worker_idx, 1,
> + __ATOMIC_RELAXED);
> +#else
> const unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
> +#endif
>
> num = rte_distributor_get_pkt(d, id, buf, buf, num);
>
> diff --git a/app/test/test_distributor_perf.c b/app/test/test_distributor_perf.c
> index edf1998..9367460 100644
> --- a/app/test/test_distributor_perf.c
> +++ b/app/test/test_distributor_perf.c
> @@ -111,9 +111,14 @@ handle_work(void *arg)
> unsigned int count = 0;
> unsigned int num = 0;
> int i;
> - unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
> struct rte_mbuf *buf[8] __rte_cache_aligned;
>
> +#ifdef RTE_USE_C11_MEM_MODEL
> + unsigned int id = __atomic_fetch_add(&worker_idx, 1,
> +__ATOMIC_RELAXED); #else
> + unsigned int id = __sync_fetch_and_add(&worker_idx, 1); #endif
> +
> for (i = 0; i < 8; i++)
> buf[i] = NULL;
>
> --
> 2.7.4
> -----Original Message-----
> From: Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>
> Sent: Tuesday, April 2, 2019 12:24 AM
> To: Phil Yang (Arm Technology China) <Phil.Yang@arm.com>; dev@dpdk.org;
> thomas@monjalon.net
> Cc: david.hunt@intel.com; reshma.pattan@intel.com; Gavin Hu (Arm
> Technology China) <Gavin.Hu@arm.com>; Phil Yang (Arm Technology China)
> <Phil.Yang@arm.com>; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v2 2/3] test/distributor: replace sync builtins with atomic
> builtins
>
> >
> > diff --git a/app/test/test_distributor.c b/app/test/test_distributor.c
> > index 98919ec..ddab08d 100644
> > --- a/app/test/test_distributor.c
> > +++ b/app/test/test_distributor.c
> > @@ -62,9 +62,14 @@ handle_work(void *arg)
> > struct worker_params *wp = arg;
> > struct rte_distributor *db = wp->dist;
> > unsigned int count = 0, num = 0;
> > - unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
> > int i;
> >
> > +#ifdef RTE_USE_C11_MEM_MODEL
> > + unsigned int id = __atomic_fetch_add(&worker_idx, 1,
> > +__ATOMIC_RELAXED); #else
> > + unsigned int id = __sync_fetch_and_add(&worker_idx, 1); #endif
> > +
> I suggest we remove the conditional compilation and just keep the
> __atomic_xxx calls as this is test code. More over the distributor library does
> not have a C11 version of the library (assuming that using __atomic_xxx does
> not impact performance on other platforms negatively). This applies to other
> instances in this patch.
Hi Honnappa,
Thanks for your comments.
Agree. I don't think __atomic_xxx will cause performance degradation on other platforms. Remove the conditional compilation will make the code more clear.
Thanks,
Phil Yang
>
> > for (i = 0; i < 8; i++)
> > buf[i] = NULL;
> > num = rte_distributor_get_pkt(db, id, buf, buf, num); @@ -270,7
> > +275,12 @@ handle_work_with_free_mbufs(void *arg)
> > unsigned int count = 0;
> > unsigned int i;
> > unsigned int num = 0;
> > +
> > +#ifdef RTE_USE_C11_MEM_MODEL
> > + unsigned int id = __atomic_fetch_add(&worker_idx, 1,
> > +__ATOMIC_RELAXED); #else
> > unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
> > +#endif
> >
> > for (i = 0; i < 8; i++)
> > buf[i] = NULL;
> > @@ -343,7 +353,13 @@ handle_work_for_shutdown_test(void *arg)
> > unsigned int total = 0;
> > unsigned int i;
> > unsigned int returned = 0;
> > +
> > +#ifdef RTE_USE_C11_MEM_MODEL
> > + const unsigned int id = __atomic_fetch_add(&worker_idx, 1,
> > + __ATOMIC_RELAXED);
> > +#else
> > const unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
> > +#endif
> >
> > num = rte_distributor_get_pkt(d, id, buf, buf, num);
> >
> > diff --git a/app/test/test_distributor_perf.c
> > b/app/test/test_distributor_perf.c
> > index edf1998..9367460 100644
> > --- a/app/test/test_distributor_perf.c
> > +++ b/app/test/test_distributor_perf.c
> > @@ -111,9 +111,14 @@ handle_work(void *arg)
> > unsigned int count = 0;
> > unsigned int num = 0;
> > int i;
> > - unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
> > struct rte_mbuf *buf[8] __rte_cache_aligned;
> >
> > +#ifdef RTE_USE_C11_MEM_MODEL
> > + unsigned int id = __atomic_fetch_add(&worker_idx, 1,
> > +__ATOMIC_RELAXED); #else
> > + unsigned int id = __sync_fetch_and_add(&worker_idx, 1); #endif
> > +
> > for (i = 0; i < 8; i++)
> > buf[i] = NULL;
> >
> > --
> > 2.7.4
==== Cache line switch test ===
Time for 33554432 iterations = 1519202730 ticks
Ticks per iteration = 45
==== Cache line switch test ===
Time for 33554432 iterations = 1251715496 ticks
Ticks per iteration = 37
Less ticks needed for the cache line switch test. It got 17% of
performance improvement.
Signed-off-by: Phil Yang <phil.yang@arm.com>
Reviewed-by: Gavin Hu <gavin.hu@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
---
app/test/test_distributor.c | 18 +++++++++++++++++-
app/test/test_distributor_perf.c | 7 ++++++-
2 files changed, 23 insertions(+), 2 deletions(-)
@@ -62,9 +62,14 @@ handle_work(void *arg)
struct worker_params *wp = arg;
struct rte_distributor *db = wp->dist;
unsigned int count = 0, num = 0;
- unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
int i;
+#ifdef RTE_USE_C11_MEM_MODEL
+ unsigned int id = __atomic_fetch_add(&worker_idx, 1, __ATOMIC_RELAXED);
+#else
+ unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
+#endif
+
for (i = 0; i < 8; i++)
buf[i] = NULL;
num = rte_distributor_get_pkt(db, id, buf, buf, num);
@@ -270,7 +275,12 @@ handle_work_with_free_mbufs(void *arg)
unsigned int count = 0;
unsigned int i;
unsigned int num = 0;
+
+#ifdef RTE_USE_C11_MEM_MODEL
+ unsigned int id = __atomic_fetch_add(&worker_idx, 1, __ATOMIC_RELAXED);
+#else
unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
+#endif
for (i = 0; i < 8; i++)
buf[i] = NULL;
@@ -343,7 +353,13 @@ handle_work_for_shutdown_test(void *arg)
unsigned int total = 0;
unsigned int i;
unsigned int returned = 0;
+
+#ifdef RTE_USE_C11_MEM_MODEL
+ const unsigned int id = __atomic_fetch_add(&worker_idx, 1,
+ __ATOMIC_RELAXED);
+#else
const unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
+#endif
num = rte_distributor_get_pkt(d, id, buf, buf, num);
@@ -111,9 +111,14 @@ handle_work(void *arg)
unsigned int count = 0;
unsigned int num = 0;
int i;
- unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
struct rte_mbuf *buf[8] __rte_cache_aligned;
+#ifdef RTE_USE_C11_MEM_MODEL
+ unsigned int id = __atomic_fetch_add(&worker_idx, 1, __ATOMIC_RELAXED);
+#else
+ unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
+#endif
+
for (i = 0; i < 8; i++)
buf[i] = NULL;