service: split tests to perf and autotest to avoid spurious CI failures
Checks
Commit Message
On some CI runs, some service-cores tests spuriously fail as the
service lcore thread is not actually scheduled by the OS in the
given amount of time.
Increasing timeouts has not resolved the issue in the CI, so the
solution in this patch is to move them to a separate perf test
suite.
Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
---
See DPDK ML discussion in this thread:
http://mails.dpdk.org/archives/dev/2023-February/263523.html
---
app/test/meson.build | 1 +
app/test/test_service_cores.c | 32 +++++++++++++++++++++++++++-----
2 files changed, 28 insertions(+), 5 deletions(-)
Comments
Hello,
On Fri, Feb 24, 2023 at 7:04 PM Harry van Haaren
<harry.van.haaren@intel.com> wrote:
>
> On some CI runs, some service-cores tests spuriously fail as the
> service lcore thread is not actually scheduled by the OS in the
> given amount of time.
>
> Increasing timeouts has not resolved the issue in the CI, so the
> solution in this patch is to move them to a separate perf test
> suite.
>
> Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
>
> ---
>
> See DPDK ML discussion in this thread:
> http://mails.dpdk.org/archives/dev/2023-February/263523.html
> ---
> app/test/meson.build | 1 +
> app/test/test_service_cores.c | 32 +++++++++++++++++++++++++++-----
> 2 files changed, 28 insertions(+), 5 deletions(-)
>
> diff --git a/app/test/meson.build b/app/test/meson.build
> index f34d19e3c3..2db5ccf4ff 100644
> --- a/app/test/meson.build
> +++ b/app/test/meson.build
> @@ -287,6 +287,7 @@ perf_test_names = [
> 'pie_perf',
> 'distributor_perf_autotest',
> 'pmd_perf_autotest',
> + 'service_perf_autotest',
> 'stack_perf_autotest',
> 'stack_lf_perf_autotest',
> 'rand_perf_autotest',
> diff --git a/app/test/test_service_cores.c b/app/test/test_service_cores.c
> index 637fcd7cf9..06653dfdef 100644
> --- a/app/test/test_service_cores.c
> +++ b/app/test/test_service_cores.c
> @@ -1022,17 +1022,12 @@ static struct unit_test_suite service_tests = {
> TEST_CASE_ST(dummy_register, NULL, service_name),
> TEST_CASE_ST(dummy_register, NULL, service_get_by_name),
> TEST_CASE_ST(dummy_register, NULL, service_dump),
> - TEST_CASE_ST(dummy_register, NULL, service_attr_get),
> - TEST_CASE_ST(dummy_register, NULL, service_lcore_attr_get),
> TEST_CASE_ST(dummy_register, NULL, service_probe_capability),
> TEST_CASE_ST(dummy_register, NULL, service_start_stop),
> TEST_CASE_ST(dummy_register, NULL, service_lcore_add_del),
> - TEST_CASE_ST(dummy_register, NULL, service_lcore_start_stop),
> TEST_CASE_ST(dummy_register, NULL, service_lcore_en_dis_able),
> TEST_CASE_ST(dummy_register, NULL, service_mt_unsafe_poll),
> TEST_CASE_ST(dummy_register, NULL, service_mt_safe_poll),
> - TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_safe),
> - TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_unsafe),
> TEST_CASE_ST(dummy_register, NULL, service_may_be_active),
> TEST_CASE_ST(dummy_register, NULL, service_active_two_cores),
> TEST_CASES_END() /**< NULL terminate unit test array */
> @@ -1046,3 +1041,30 @@ test_service_common(void)
> }
>
> REGISTER_TEST_COMMAND(service_autotest, test_service_common);
> +
> +
> +/* The tests below have been split from the auto-test suite, as the
What is the auto-test suite?
Plus "as the when" reads strange.
In the end, I don't think it helps much to have this comment in the code.
The commitlog is supposed to tell the story, so I would simply remove
this comment.
> + * when they are run in a cloud CI environment they can give false-positive
> + * errors, due to the service-cores not being scheduled by the OS.
> + */
> +static struct unit_test_suite service_perf_tests = {
> + .suite_name = "service core test suite",
Maybe add "performance" in the name, so we have a uniquely named
testsuite object.
> + .setup = testsuite_setup,
> + .teardown = testsuite_teardown,
> + .unit_test_cases = {
> + TEST_CASE_ST(dummy_register, NULL, service_attr_get),
> + TEST_CASE_ST(dummy_register, NULL, service_lcore_attr_get),
> + TEST_CASE_ST(dummy_register, NULL, service_lcore_start_stop),
Looking at service_lcore_running_check(), don't you think
service_may_be_active() and service_active_two_cores() are also
subject to race?
> + TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_safe),
> + TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_unsafe),
> + TEST_CASES_END() /**< NULL terminate unit test array */
> + }
> +};
> +
> +static int
> +test_service_perf(void)
> +{
> + return unit_test_suite_runner(&service_perf_tests);
> +}
> +
> +REGISTER_TEST_COMMAND(service_perf_autotest, test_service_perf);
> --
> 2.34.1
>
On Mon, Feb 27, 2023 at 5:08 PM David Marchand
<david.marchand@redhat.com> wrote:
>
> Hello,
>
> On Fri, Feb 24, 2023 at 7:04 PM Harry van Haaren
> <harry.van.haaren@intel.com> wrote:
> >
> > On some CI runs, some service-cores tests spuriously fail as the
> > service lcore thread is not actually scheduled by the OS in the
> > given amount of time.
> >
> > Increasing timeouts has not resolved the issue in the CI, so the
> > solution in this patch is to move them to a separate perf test
> > suite.
> >
> > Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
> >
> > ---
> >
> > See DPDK ML discussion in this thread:
> > http://mails.dpdk.org/archives/dev/2023-February/263523.html
> > ---
> > app/test/meson.build | 1 +
> > app/test/test_service_cores.c | 32 +++++++++++++++++++++++++++-----
> > 2 files changed, 28 insertions(+), 5 deletions(-)
> >
> > diff --git a/app/test/meson.build b/app/test/meson.build
> > index f34d19e3c3..2db5ccf4ff 100644
> > --- a/app/test/meson.build
> > +++ b/app/test/meson.build
> > @@ -287,6 +287,7 @@ perf_test_names = [
> > 'pie_perf',
> > 'distributor_perf_autotest',
> > 'pmd_perf_autotest',
> > + 'service_perf_autotest',
> > 'stack_perf_autotest',
> > 'stack_lf_perf_autotest',
> > 'rand_perf_autotest',
> > diff --git a/app/test/test_service_cores.c b/app/test/test_service_cores.c
> > index 637fcd7cf9..06653dfdef 100644
> > --- a/app/test/test_service_cores.c
> > +++ b/app/test/test_service_cores.c
> > @@ -1022,17 +1022,12 @@ static struct unit_test_suite service_tests = {
> > TEST_CASE_ST(dummy_register, NULL, service_name),
> > TEST_CASE_ST(dummy_register, NULL, service_get_by_name),
> > TEST_CASE_ST(dummy_register, NULL, service_dump),
> > - TEST_CASE_ST(dummy_register, NULL, service_attr_get),
> > - TEST_CASE_ST(dummy_register, NULL, service_lcore_attr_get),
> > TEST_CASE_ST(dummy_register, NULL, service_probe_capability),
> > TEST_CASE_ST(dummy_register, NULL, service_start_stop),
> > TEST_CASE_ST(dummy_register, NULL, service_lcore_add_del),
> > - TEST_CASE_ST(dummy_register, NULL, service_lcore_start_stop),
> > TEST_CASE_ST(dummy_register, NULL, service_lcore_en_dis_able),
> > TEST_CASE_ST(dummy_register, NULL, service_mt_unsafe_poll),
> > TEST_CASE_ST(dummy_register, NULL, service_mt_safe_poll),
> > - TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_safe),
> > - TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_unsafe),
> > TEST_CASE_ST(dummy_register, NULL, service_may_be_active),
> > TEST_CASE_ST(dummy_register, NULL, service_active_two_cores),
> > TEST_CASES_END() /**< NULL terminate unit test array */
> > @@ -1046,3 +1041,30 @@ test_service_common(void)
> > }
> >
> > REGISTER_TEST_COMMAND(service_autotest, test_service_common);
> > +
> > +
> > +/* The tests below have been split from the auto-test suite, as the
>
> What is the auto-test suite?
> Plus "as the when" reads strange.
>
> In the end, I don't think it helps much to have this comment in the code.
> The commitlog is supposed to tell the story, so I would simply remove
> this comment.
>
>
> > + * when they are run in a cloud CI environment they can give false-positive
> > + * errors, due to the service-cores not being scheduled by the OS.
> > + */
> > +static struct unit_test_suite service_perf_tests = {
> > + .suite_name = "service core test suite",
>
> Maybe add "performance" in the name, so we have a uniquely named
> testsuite object.
>
>
> > + .setup = testsuite_setup,
> > + .teardown = testsuite_teardown,
> > + .unit_test_cases = {
> > + TEST_CASE_ST(dummy_register, NULL, service_attr_get),
> > + TEST_CASE_ST(dummy_register, NULL, service_lcore_attr_get),
> > + TEST_CASE_ST(dummy_register, NULL, service_lcore_start_stop),
>
> Looking at service_lcore_running_check(), don't you think
> service_may_be_active() and service_active_two_cores() are also
> subject to race?
>
>
> > + TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_safe),
> > + TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_unsafe),
> > + TEST_CASES_END() /**< NULL terminate unit test array */
> > + }
> > +};
> > +
> > +static int
> > +test_service_perf(void)
> > +{
> > + return unit_test_suite_runner(&service_perf_tests);
> > +}
> > +
> > +REGISTER_TEST_COMMAND(service_perf_autotest, test_service_perf);
> > --
> > 2.34.1
> >
ping.
> -----Original Message-----
> From: David Marchand <david.marchand@redhat.com>
> Sent: Monday, February 27, 2023 4:09 PM
> To: Van Haaren, Harry <harry.van.haaren@intel.com>
> Cc: dev@dpdk.org; dpdklab@iol.unh.edu; ci@dpdk.org;
> Honnappa.Nagarahalli@arm.com; mattias.ronnblom
> <mattias.ronnblom@ericsson.com>; mb@smartsharesystems.com;
> roretzla@linux.microsoft.com; aconole@redhat.com; Richardson, Bruce
> <bruce.richardson@intel.com>
> Subject: Re: [PATCH] service: split tests to perf and autotest to avoid spurious CI
> failures
>
> Hello,
>
> On Fri, Feb 24, 2023 at 7:04 PM Harry van Haaren
> <harry.van.haaren@intel.com> wrote:
> >
> > On some CI runs, some service-cores tests spuriously fail as the
> > service lcore thread is not actually scheduled by the OS in the
> > given amount of time.
> >
> > Increasing timeouts has not resolved the issue in the CI, so the
> > solution in this patch is to move them to a separate perf test
> > suite.
> >
> > Signed-off-by: Harry van Haaren <harry.van.haaren@intel.com>
> >
> > ---
> >
> > See DPDK ML discussion in this thread:
> > http://mails.dpdk.org/archives/dev/2023-February/263523.html
> > ---
> > app/test/meson.build | 1 +
> > app/test/test_service_cores.c | 32 +++++++++++++++++++++++++++-----
> > 2 files changed, 28 insertions(+), 5 deletions(-)
> >
> > diff --git a/app/test/meson.build b/app/test/meson.build
> > index f34d19e3c3..2db5ccf4ff 100644
> > --- a/app/test/meson.build
> > +++ b/app/test/meson.build
> > @@ -287,6 +287,7 @@ perf_test_names = [
> > 'pie_perf',
> > 'distributor_perf_autotest',
> > 'pmd_perf_autotest',
> > + 'service_perf_autotest',
> > 'stack_perf_autotest',
> > 'stack_lf_perf_autotest',
> > 'rand_perf_autotest',
> > diff --git a/app/test/test_service_cores.c b/app/test/test_service_cores.c
> > index 637fcd7cf9..06653dfdef 100644
> > --- a/app/test/test_service_cores.c
> > +++ b/app/test/test_service_cores.c
> > @@ -1022,17 +1022,12 @@ static struct unit_test_suite service_tests = {
> > TEST_CASE_ST(dummy_register, NULL, service_name),
> > TEST_CASE_ST(dummy_register, NULL, service_get_by_name),
> > TEST_CASE_ST(dummy_register, NULL, service_dump),
> > - TEST_CASE_ST(dummy_register, NULL, service_attr_get),
> > - TEST_CASE_ST(dummy_register, NULL, service_lcore_attr_get),
> > TEST_CASE_ST(dummy_register, NULL, service_probe_capability),
> > TEST_CASE_ST(dummy_register, NULL, service_start_stop),
> > TEST_CASE_ST(dummy_register, NULL, service_lcore_add_del),
> > - TEST_CASE_ST(dummy_register, NULL, service_lcore_start_stop),
> > TEST_CASE_ST(dummy_register, NULL, service_lcore_en_dis_able),
> > TEST_CASE_ST(dummy_register, NULL, service_mt_unsafe_poll),
> > TEST_CASE_ST(dummy_register, NULL, service_mt_safe_poll),
> > - TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_safe),
> > - TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_unsafe),
> > TEST_CASE_ST(dummy_register, NULL, service_may_be_active),
> > TEST_CASE_ST(dummy_register, NULL, service_active_two_cores),
> > TEST_CASES_END() /**< NULL terminate unit test array */
> > @@ -1046,3 +1041,30 @@ test_service_common(void)
> > }
> >
> > REGISTER_TEST_COMMAND(service_autotest, test_service_common);
> > +
> > +
> > +/* The tests below have been split from the auto-test suite, as the
>
> What is the auto-test suite?
> Plus "as the when" reads strange.
>
> In the end, I don't think it helps much to have this comment in the code.
> The commitlog is supposed to tell the story, so I would simply remove
> this comment.
OK, will remove comment.
> > + * when they are run in a cloud CI environment they can give false-positive
> > + * errors, due to the service-cores not being scheduled by the OS.
> > + */
> > +static struct unit_test_suite service_perf_tests = {
> > + .suite_name = "service core test suite",
>
> Maybe add "performance" in the name, so we have a uniquely named
> testsuite object.
Ack.
> > + .setup = testsuite_setup,
> > + .teardown = testsuite_teardown,
> > + .unit_test_cases = {
> > + TEST_CASE_ST(dummy_register, NULL, service_attr_get),
> > + TEST_CASE_ST(dummy_register, NULL, service_lcore_attr_get),
> > + TEST_CASE_ST(dummy_register, NULL, service_lcore_start_stop),
>
> Looking at service_lcore_running_check(), don't you think
> service_may_be_active() and service_active_two_cores() are also
> subject to race?
Perhaps, but those haven't *actually* been failing in any of the reports.
I'd prefer leave tests running if they're not causing issues in the CI.
<snip>
V2 patch on the way.
On Fri, Mar 3, 2023 at 11:59 AM Van Haaren, Harry
<harry.van.haaren@intel.com> wrote:
> > > + .setup = testsuite_setup,
> > > + .teardown = testsuite_teardown,
> > > + .unit_test_cases = {
> > > + TEST_CASE_ST(dummy_register, NULL, service_attr_get),
> > > + TEST_CASE_ST(dummy_register, NULL, service_lcore_attr_get),
> > > + TEST_CASE_ST(dummy_register, NULL, service_lcore_start_stop),
> >
> > Looking at service_lcore_running_check(), don't you think
> > service_may_be_active() and service_active_two_cores() are also
> > subject to race?
>
> Perhaps, but those haven't *actually* been failing in any of the reports.
> I'd prefer leave tests running if they're not causing issues in the CI.
service_may_be_active did fail in the near past (report from October
that triggered the discussion and the timeout extension patch).
So my fear is that we will see some ocurrences.
Time will tell.
@@ -287,6 +287,7 @@ perf_test_names = [
'pie_perf',
'distributor_perf_autotest',
'pmd_perf_autotest',
+ 'service_perf_autotest',
'stack_perf_autotest',
'stack_lf_perf_autotest',
'rand_perf_autotest',
@@ -1022,17 +1022,12 @@ static struct unit_test_suite service_tests = {
TEST_CASE_ST(dummy_register, NULL, service_name),
TEST_CASE_ST(dummy_register, NULL, service_get_by_name),
TEST_CASE_ST(dummy_register, NULL, service_dump),
- TEST_CASE_ST(dummy_register, NULL, service_attr_get),
- TEST_CASE_ST(dummy_register, NULL, service_lcore_attr_get),
TEST_CASE_ST(dummy_register, NULL, service_probe_capability),
TEST_CASE_ST(dummy_register, NULL, service_start_stop),
TEST_CASE_ST(dummy_register, NULL, service_lcore_add_del),
- TEST_CASE_ST(dummy_register, NULL, service_lcore_start_stop),
TEST_CASE_ST(dummy_register, NULL, service_lcore_en_dis_able),
TEST_CASE_ST(dummy_register, NULL, service_mt_unsafe_poll),
TEST_CASE_ST(dummy_register, NULL, service_mt_safe_poll),
- TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_safe),
- TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_unsafe),
TEST_CASE_ST(dummy_register, NULL, service_may_be_active),
TEST_CASE_ST(dummy_register, NULL, service_active_two_cores),
TEST_CASES_END() /**< NULL terminate unit test array */
@@ -1046,3 +1041,30 @@ test_service_common(void)
}
REGISTER_TEST_COMMAND(service_autotest, test_service_common);
+
+
+/* The tests below have been split from the auto-test suite, as the
+ * when they are run in a cloud CI environment they can give false-positive
+ * errors, due to the service-cores not being scheduled by the OS.
+ */
+static struct unit_test_suite service_perf_tests = {
+ .suite_name = "service core test suite",
+ .setup = testsuite_setup,
+ .teardown = testsuite_teardown,
+ .unit_test_cases = {
+ TEST_CASE_ST(dummy_register, NULL, service_attr_get),
+ TEST_CASE_ST(dummy_register, NULL, service_lcore_attr_get),
+ TEST_CASE_ST(dummy_register, NULL, service_lcore_start_stop),
+ TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_safe),
+ TEST_CASE_ST(dummy_register, NULL, service_app_lcore_mt_unsafe),
+ TEST_CASES_END() /**< NULL terminate unit test array */
+ }
+};
+
+static int
+test_service_perf(void)
+{
+ return unit_test_suite_runner(&service_perf_tests);
+}
+
+REGISTER_TEST_COMMAND(service_perf_autotest, test_service_perf);