[v4] testpmd: cleanup cleanly from signal

Message ID 20221109041046.199840-1-stephen@networkplumber.org (mailing list archive)
State Superseded, archived
Delegated to: Andrew Rybchenko
Headers
Series [v4] testpmd: cleanup cleanly from signal |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/github-robot: build fail github build: failed
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-x86_64-compile-testing fail Testing issues
ci/intel-Testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS

Commit Message

Stephen Hemminger Nov. 9, 2022, 4:10 a.m. UTC
  Do a clean shutdown of testpmd when a signal is received;
instead of having testpmd kill itself.
This fixes problem where a signal could be received
in the middle of a PMD and then the signal handler would call
PMD's close routine which could cause a deadlock.

Added benefit is it gets rid of Windows specific code.

Fixes: d9a191a00e81 ("app/testpmd: fix quitting in container")
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
v4 - use select() because that is available on Windows; and other
     functions poll() and sigaction() are not.

 app/test-pmd/testpmd.c | 63 +++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 29 deletions(-)
  

Comments

Mattias Rönnblom Nov. 9, 2022, 9:46 p.m. UTC | #1
On 2022-11-09 05:10, Stephen Hemminger wrote:
> Do a clean shutdown of testpmd when a signal is received;
> instead of having testpmd kill itself.
> This fixes problem where a signal could be received
> in the middle of a PMD and then the signal handler would call
> PMD's close routine which could cause a deadlock.
> 
> Added benefit is it gets rid of Windows specific code.
> 
> Fixes: d9a191a00e81 ("app/testpmd: fix quitting in container")
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
> v4 - use select() because that is available on Windows; and other
>       functions poll() and sigaction() are not.
> 
>   app/test-pmd/testpmd.c | 63 +++++++++++++++++++++++-------------------
>   1 file changed, 34 insertions(+), 29 deletions(-)
> 
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index cf5942d0c422..274e96cac2d4 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -12,6 +12,7 @@
>   #ifndef RTE_EXEC_ENV_WINDOWS
>   #include <sys/mman.h>
>   #endif
> +#include <sys/select.h>
>   #include <sys/types.h>
>   #include <errno.h>
>   #include <stdbool.h>
> @@ -4251,26 +4252,11 @@ print_stats(void)
>   static void
>   signal_handler(int signum)
>   {
> -	if (signum == SIGINT || signum == SIGTERM) {
> -		fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
> -			signum);
> -#ifdef RTE_LIB_PDUMP
> -		/* uninitialize packet capture framework */
> -		rte_pdump_uninit();
> -#endif
> -#ifdef RTE_LIB_LATENCYSTATS
> -		if (latencystats_enabled != 0)
> -			rte_latencystats_uninit();
> -#endif
> -		force_quit();
> -		/* Set flag to indicate the force termination. */
> -		f_quit = 1;
> -		/* exit with the expected status */
> -#ifndef RTE_EXEC_ENV_WINDOWS
> -		signal(signum, SIG_DFL);
> -		kill(getpid(), signum);
> -#endif
> -	}
> +	fprintf(stderr, "\nSignal %d %s received, preparing to exit...\n",
> +		signum, strsignal(signum));

fprintf() is not async signal safe, and neither is strsignal().

This is not a regression introduced by this patch, but I thought it 
might be worth fixing.

> +
> +	/* Set flag to indicate the force termination. */
> +	f_quit = 1;
>   }
>   
>   int
> @@ -4449,9 +4435,6 @@ main(int argc, char** argv)
>   	} else
>   #endif
>   	{
> -		char c;
> -		int rc;
> -
>   		f_quit = 0;
>   
>   		printf("No commandline core given, start packet forwarding\n");
> @@ -4476,15 +4459,37 @@ main(int argc, char** argv)
>   				prev_time = cur_time;
>   				rte_delay_us_sleep(US_PER_S);
>   			}
> -		}
> +		} else {
> +			char c;
> +			fd_set fds;
>   
> -		printf("Press enter to exit\n");
> -		rc = read(0, &c, 1);
> -		pmd_test_exit();
> -		if (rc < 0)
> -			return 1;
> +			printf("Press enter to exit\n");
> +
> +			FD_ZERO(&fds);
> +			FD_SET(0, &fds);
> +
> +			if (select(1, &fds, NULL, NULL, NULL) <= 0) {
> +				fprintf(stderr, "Select failed: %s\n",
> +					strerror(errno));

Why is select() needed? Wouldn't a blocking read suffice? Or getchar().

> +			} else if (read(0, &c, 1) <= 0) {
> +				fprintf(stderr,
> +					"Read stdin failed: %s\n",
> +					strerror(errno));
> +			}
> +		}
> +		stop_packet_forwarding();
> +		force_quit();
>   	}
>   
> +#ifdef RTE_LIB_PDUMP
> +	/* uninitialize packet capture framework */
> +	rte_pdump_uninit();
> +#endif
> +#ifdef RTE_LIB_LATENCYSTATS
> +	if (latencystats_enabled != 0)
> +		rte_latencystats_uninit();
> +#endif
> +
>   	ret = rte_eal_cleanup();
>   	if (ret != 0)
>   		rte_exit(EXIT_FAILURE,
  
Stephen Hemminger Nov. 9, 2022, 10:53 p.m. UTC | #2
On Wed, 9 Nov 2022 22:46:55 +0100
Mattias Rönnblom <hofors@lysator.liu.se> wrote:

> On 2022-11-09 05:10, Stephen Hemminger wrote:
> > Do a clean shutdown of testpmd when a signal is received;
> > instead of having testpmd kill itself.
> > This fixes problem where a signal could be received
> > in the middle of a PMD and then the signal handler would call
> > PMD's close routine which could cause a deadlock.
> > 
> > Added benefit is it gets rid of Windows specific code.
> > 
> > Fixes: d9a191a00e81 ("app/testpmd: fix quitting in container")
> > Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> > ---
> > v4 - use select() because that is available on Windows; and other
> >       functions poll() and sigaction() are not.
> > 
> >   app/test-pmd/testpmd.c | 63 +++++++++++++++++++++++-------------------
> >   1 file changed, 34 insertions(+), 29 deletions(-)
> > 
> > diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> > index cf5942d0c422..274e96cac2d4 100644
> > --- a/app/test-pmd/testpmd.c
> > +++ b/app/test-pmd/testpmd.c
> > @@ -12,6 +12,7 @@
> >   #ifndef RTE_EXEC_ENV_WINDOWS
> >   #include <sys/mman.h>
> >   #endif
> > +#include <sys/select.h>
> >   #include <sys/types.h>
> >   #include <errno.h>
> >   #include <stdbool.h>
> > @@ -4251,26 +4252,11 @@ print_stats(void)
> >   static void
> >   signal_handler(int signum)
> >   {
> > -	if (signum == SIGINT || signum == SIGTERM) {
> > -		fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
> > -			signum);
> > -#ifdef RTE_LIB_PDUMP
> > -		/* uninitialize packet capture framework */
> > -		rte_pdump_uninit();
> > -#endif
> > -#ifdef RTE_LIB_LATENCYSTATS
> > -		if (latencystats_enabled != 0)
> > -			rte_latencystats_uninit();
> > -#endif
> > -		force_quit();
> > -		/* Set flag to indicate the force termination. */
> > -		f_quit = 1;
> > -		/* exit with the expected status */
> > -#ifndef RTE_EXEC_ENV_WINDOWS
> > -		signal(signum, SIG_DFL);
> > -		kill(getpid(), signum);
> > -#endif
> > -	}
> > +	fprintf(stderr, "\nSignal %d %s received, preparing to exit...\n",
> > +		signum, strsignal(signum));  
> 
> fprintf() is not async signal safe, and neither is strsignal().
> 
> This is not a regression introduced by this patch, but I thought it 
> might be worth fixing.
> 
> > +
> > +	/* Set flag to indicate the force termination. */
> > +	f_quit = 1;
> >   }
> >   
> >   int
> > @@ -4449,9 +4435,6 @@ main(int argc, char** argv)
> >   	} else
> >   #endif
> >   	{
> > -		char c;
> > -		int rc;
> > -
> >   		f_quit = 0;
> >   
> >   		printf("No commandline core given, start packet forwarding\n");
> > @@ -4476,15 +4459,37 @@ main(int argc, char** argv)
> >   				prev_time = cur_time;
> >   				rte_delay_us_sleep(US_PER_S);
> >   			}
> > -		}
> > +		} else {
> > +			char c;
> > +			fd_set fds;
> >   
> > -		printf("Press enter to exit\n");
> > -		rc = read(0, &c, 1);
> > -		pmd_test_exit();
> > -		if (rc < 0)
> > -			return 1;
> > +			printf("Press enter to exit\n");
> > +
> > +			FD_ZERO(&fds);
> > +			FD_SET(0, &fds);
> > +
> > +			if (select(1, &fds, NULL, NULL, NULL) <= 0) {
> > +				fprintf(stderr, "Select failed: %s\n",
> > +					strerror(errno));  
> 
> Why is select() needed? Wouldn't a blocking read suffice? Or getchar().

On Linux, signal set SA_RESTART so a simple read is not interrupted.
One option was to use sigaction() which allows controlling flags, but that
won't work on Windows.  Using select() works on both.
  
Mattias Rönnblom Nov. 10, 2022, 7:50 a.m. UTC | #3
On 2022-11-09 23:53, Stephen Hemminger wrote:
> On Wed, 9 Nov 2022 22:46:55 +0100
> Mattias Rönnblom <hofors@lysator.liu.se> wrote:
> 
>> On 2022-11-09 05:10, Stephen Hemminger wrote:
>>> Do a clean shutdown of testpmd when a signal is received;
>>> instead of having testpmd kill itself.
>>> This fixes problem where a signal could be received
>>> in the middle of a PMD and then the signal handler would call
>>> PMD's close routine which could cause a deadlock.
>>>
>>> Added benefit is it gets rid of Windows specific code.
>>>
>>> Fixes: d9a191a00e81 ("app/testpmd: fix quitting in container")
>>> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
>>> ---
>>> v4 - use select() because that is available on Windows; and other
>>>        functions poll() and sigaction() are not.
>>>
>>>    app/test-pmd/testpmd.c | 63 +++++++++++++++++++++++-------------------
>>>    1 file changed, 34 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
>>> index cf5942d0c422..274e96cac2d4 100644
>>> --- a/app/test-pmd/testpmd.c
>>> +++ b/app/test-pmd/testpmd.c
>>> @@ -12,6 +12,7 @@
>>>    #ifndef RTE_EXEC_ENV_WINDOWS
>>>    #include <sys/mman.h>
>>>    #endif
>>> +#include <sys/select.h>
>>>    #include <sys/types.h>
>>>    #include <errno.h>
>>>    #include <stdbool.h>
>>> @@ -4251,26 +4252,11 @@ print_stats(void)
>>>    static void
>>>    signal_handler(int signum)
>>>    {
>>> -	if (signum == SIGINT || signum == SIGTERM) {
>>> -		fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
>>> -			signum);
>>> -#ifdef RTE_LIB_PDUMP
>>> -		/* uninitialize packet capture framework */
>>> -		rte_pdump_uninit();
>>> -#endif
>>> -#ifdef RTE_LIB_LATENCYSTATS
>>> -		if (latencystats_enabled != 0)
>>> -			rte_latencystats_uninit();
>>> -#endif
>>> -		force_quit();
>>> -		/* Set flag to indicate the force termination. */
>>> -		f_quit = 1;
>>> -		/* exit with the expected status */
>>> -#ifndef RTE_EXEC_ENV_WINDOWS
>>> -		signal(signum, SIG_DFL);
>>> -		kill(getpid(), signum);
>>> -#endif
>>> -	}
>>> +	fprintf(stderr, "\nSignal %d %s received, preparing to exit...\n",
>>> +		signum, strsignal(signum));
>>
>> fprintf() is not async signal safe, and neither is strsignal().
>>
>> This is not a regression introduced by this patch, but I thought it
>> might be worth fixing.
>>
>>> +
>>> +	/* Set flag to indicate the force termination. */
>>> +	f_quit = 1;
>>>    }
>>>    
>>>    int
>>> @@ -4449,9 +4435,6 @@ main(int argc, char** argv)
>>>    	} else
>>>    #endif
>>>    	{
>>> -		char c;
>>> -		int rc;
>>> -
>>>    		f_quit = 0;
>>>    
>>>    		printf("No commandline core given, start packet forwarding\n");
>>> @@ -4476,15 +4459,37 @@ main(int argc, char** argv)
>>>    				prev_time = cur_time;
>>>    				rte_delay_us_sleep(US_PER_S);
>>>    			}
>>> -		}
>>> +		} else {
>>> +			char c;
>>> +			fd_set fds;
>>>    
>>> -		printf("Press enter to exit\n");
>>> -		rc = read(0, &c, 1);
>>> -		pmd_test_exit();
>>> -		if (rc < 0)
>>> -			return 1;
>>> +			printf("Press enter to exit\n");
>>> +
>>> +			FD_ZERO(&fds);
>>> +			FD_SET(0, &fds);
>>> +
>>> +			if (select(1, &fds, NULL, NULL, NULL) <= 0) {
>>> +				fprintf(stderr, "Select failed: %s\n",
>>> +					strerror(errno));
>>
>> Why is select() needed? Wouldn't a blocking read suffice? Or getchar().
> 
> On Linux, signal set SA_RESTART so a simple read is not interrupted.
> One option was to use sigaction() which allows controlling flags, but that
> won't work on Windows.  Using select() works on both.
> 

OK, so select() is used because a signal might interrupt read() on Windows?

while (read(0, &c, 1) == -1 && errno == EINTR)
         ;

Would that work?

(select() won't return 0 since you don't have a timeout.)
  
Stephen Hemminger Nov. 10, 2022, 4:14 p.m. UTC | #4
On Thu, 10 Nov 2022 08:50:40 +0100
Mattias Rönnblom <hofors@lysator.liu.se> wrote:

> >>
> >> Why is select() needed? Wouldn't a blocking read suffice? Or getchar().  
> > 
> > On Linux, signal set SA_RESTART so a simple read is not interrupted.
> > One option was to use sigaction() which allows controlling flags, but that
> > won't work on Windows.  Using select() works on both.
> >   
> 
> OK, so select() is used because a signal might interrupt read() on Windows?
> 
> while (read(0, &c, 1) == -1 && errno == EINTR)
>          ;
> 
> Would that work?

Try it. On Linux the read never gets interrupted.
  
Mattias Rönnblom Nov. 10, 2022, 10:06 p.m. UTC | #5
On 2022-11-10 17:14, Stephen Hemminger wrote:
> On Thu, 10 Nov 2022 08:50:40 +0100
> Mattias Rönnblom <hofors@lysator.liu.se> wrote:
> 
>>>>
>>>> Why is select() needed? Wouldn't a blocking read suffice? Or getchar().
>>>
>>> On Linux, signal set SA_RESTART so a simple read is not interrupted.
>>> One option was to use sigaction() which allows controlling flags, but that
>>> won't work on Windows.  Using select() works on both.
>>>    
>>
>> OK, so select() is used because a signal might interrupt read() on Windows?
>>
>> while (read(0, &c, 1) == -1 && errno == EINTR)
>>           ;
>>
>> Would that work?
> 
> Try it. On Linux the read never gets interrupted.

I had no doubts about that, but I misunderstood the code and thought 
that was the required behavior.
  

Patch

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index cf5942d0c422..274e96cac2d4 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -12,6 +12,7 @@ 
 #ifndef RTE_EXEC_ENV_WINDOWS
 #include <sys/mman.h>
 #endif
+#include <sys/select.h>
 #include <sys/types.h>
 #include <errno.h>
 #include <stdbool.h>
@@ -4251,26 +4252,11 @@  print_stats(void)
 static void
 signal_handler(int signum)
 {
-	if (signum == SIGINT || signum == SIGTERM) {
-		fprintf(stderr, "\nSignal %d received, preparing to exit...\n",
-			signum);
-#ifdef RTE_LIB_PDUMP
-		/* uninitialize packet capture framework */
-		rte_pdump_uninit();
-#endif
-#ifdef RTE_LIB_LATENCYSTATS
-		if (latencystats_enabled != 0)
-			rte_latencystats_uninit();
-#endif
-		force_quit();
-		/* Set flag to indicate the force termination. */
-		f_quit = 1;
-		/* exit with the expected status */
-#ifndef RTE_EXEC_ENV_WINDOWS
-		signal(signum, SIG_DFL);
-		kill(getpid(), signum);
-#endif
-	}
+	fprintf(stderr, "\nSignal %d %s received, preparing to exit...\n",
+		signum, strsignal(signum));
+
+	/* Set flag to indicate the force termination. */
+	f_quit = 1;
 }
 
 int
@@ -4449,9 +4435,6 @@  main(int argc, char** argv)
 	} else
 #endif
 	{
-		char c;
-		int rc;
-
 		f_quit = 0;
 
 		printf("No commandline core given, start packet forwarding\n");
@@ -4476,15 +4459,37 @@  main(int argc, char** argv)
 				prev_time = cur_time;
 				rte_delay_us_sleep(US_PER_S);
 			}
-		}
+		} else {
+			char c;
+			fd_set fds;
 
-		printf("Press enter to exit\n");
-		rc = read(0, &c, 1);
-		pmd_test_exit();
-		if (rc < 0)
-			return 1;
+			printf("Press enter to exit\n");
+
+			FD_ZERO(&fds);
+			FD_SET(0, &fds);
+
+			if (select(1, &fds, NULL, NULL, NULL) <= 0) {
+				fprintf(stderr, "Select failed: %s\n",
+					strerror(errno));
+			} else if (read(0, &c, 1) <= 0) {
+				fprintf(stderr,
+					"Read stdin failed: %s\n",
+					strerror(errno));
+			}
+		}
+		stop_packet_forwarding();
+		force_quit();
 	}
 
+#ifdef RTE_LIB_PDUMP
+	/* uninitialize packet capture framework */
+	rte_pdump_uninit();
+#endif
+#ifdef RTE_LIB_LATENCYSTATS
+	if (latencystats_enabled != 0)
+		rte_latencystats_uninit();
+#endif
+
 	ret = rte_eal_cleanup();
 	if (ret != 0)
 		rte_exit(EXIT_FAILURE,