[dpdk-dev,v2] Change alarm cancel function to thread-safe:

Message ID 1411649768-8084-1-git-send-email-michalx.k.jastrzebski@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers

Commit Message

Michal Jastrzebski Sept. 25, 2014, 12:56 p.m. UTC
  Change alarm cancel function to thread-safe.
    It eliminates a race between threads using rte_alarm_cancel and
    rte_alarm_set.

Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
Reviewed-by: Michal Jastrzebski <michalx.k.jastrzebski@intel.com>

---
 lib/librte_eal/common/include/rte_alarm.h |    3 +-
 lib/librte_eal/linuxapp/eal/eal_alarm.c   |   68 ++++++++++++++++++-----------
 2 files changed, 45 insertions(+), 26 deletions(-)
  

Comments

Ananyev, Konstantin Sept. 25, 2014, 1:11 p.m. UTC | #1
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Michal Jastrzebski
> Sent: Thursday, September 25, 2014 1:56 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v2] Change alarm cancel function to thread-safe:
> 
>     Change alarm cancel function to thread-safe.
>     It eliminates a race between threads using rte_alarm_cancel and
>     rte_alarm_set.
> 
> Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> Reviewed-by: Michal Jastrzebski <michalx.k.jastrzebski@intel.com>
> 
> ---
>  lib/librte_eal/common/include/rte_alarm.h |    3 +-
>  lib/librte_eal/linuxapp/eal/eal_alarm.c   |   68 ++++++++++++++++++-----------
>  2 files changed, 45 insertions(+), 26 deletions(-)
> 
> diff --git a/lib/librte_eal/common/include/rte_alarm.h b/lib/librte_eal/common/include/rte_alarm.h
> index d451522..e7cbaef 100644
> --- a/lib/librte_eal/common/include/rte_alarm.h
> +++ b/lib/librte_eal/common/include/rte_alarm.h
> @@ -76,7 +76,8 @@ typedef void (*rte_eal_alarm_callback)(void *arg);
>  int rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb, void *cb_arg);
> 
>  /**
> - * Function to cancel an alarm callback which has been registered before.
> + * Function to cancel an alarm callback which has been registered before. If
> + * used outside alarm callback it wait for all callbacks to finish its execution.
>   *
>   * @param cb_fn
>   *  alarm callback
> diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
> index 480f0cb..ea8dfb4 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_alarm.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
> @@ -69,7 +69,8 @@ struct alarm_entry {
>  	struct timeval time;
>  	rte_eal_alarm_callback cb_fn;
>  	void *cb_arg;
> -	volatile int executing;
> +	volatile uint8_t executing;
> +	volatile pthread_t executing_id;
>  };
> 
>  static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
> @@ -108,11 +109,13 @@ eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused,
>  			(ap->time.tv_sec < now.tv_sec || (ap->time.tv_sec == now.tv_sec &&
>  						ap->time.tv_usec <= now.tv_usec))){
>  		ap->executing = 1;
> +		ap->executing_id = pthread_self();
>  		rte_spinlock_unlock(&alarm_list_lk);
> 
>  		ap->cb_fn(ap->cb_arg);
> 
>  		rte_spinlock_lock(&alarm_list_lk);
> +
>  		LIST_REMOVE(ap, next);
>  		rte_free(ap);
>  	}
> @@ -145,7 +148,7 @@ rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
>  	if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
>  		return -EINVAL;
> 
> -	new_alarm = rte_malloc(NULL, sizeof(*new_alarm), 0);
> +	new_alarm = rte_zmalloc(NULL, sizeof(*new_alarm), 0);
>  	if (new_alarm == NULL)
>  		return -ENOMEM;
> 
> @@ -156,7 +159,6 @@ rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
>  	new_alarm->cb_arg = cb_arg;
>  	new_alarm->time.tv_usec = (now.tv_usec + us) % US_PER_S;
>  	new_alarm->time.tv_sec = now.tv_sec + ((now.tv_usec + us) / US_PER_S);
> -	new_alarm->executing = 0;
> 
>  	rte_spinlock_lock(&alarm_list_lk);
>  	if (!handler_registered) {
> @@ -202,34 +204,50 @@ rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
>  {
>  	struct alarm_entry *ap, *ap_prev;
>  	int count = 0;
> +	int executing;
> 
>  	if (!cb_fn)
>  		return -1;
> 
> -	rte_spinlock_lock(&alarm_list_lk);
> -	/* remove any matches at the start of the list */
> -	while ((ap = LIST_FIRST(&alarm_list)) != NULL &&
> -			cb_fn == ap->cb_fn && ap->executing == 0 &&
> -			(cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
> -		LIST_REMOVE(ap, next);
> -		rte_free(ap);
> -		count++;
> -	}
> -	ap_prev = ap;
> -
> -	/* now go through list, removing entries not at start */
> -	LIST_FOREACH(ap, &alarm_list, next) {
> -		/* this won't be true first time through */
> -		if (cb_fn == ap->cb_fn &&  ap->executing == 0 &&
> +	do {
> +		executing = 0;
> +		rte_spinlock_lock(&alarm_list_lk);
> +		/* remove any matches at the start of the list */
> +		while ((ap = LIST_FIRST(&alarm_list)) != NULL &&
> +				cb_fn == ap->cb_fn &&
>  				(cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
> -			LIST_REMOVE(ap,next);
> -			rte_free(ap);
> -			count++;
> -			ap = ap_prev;
> +
> +			if (ap->executing == 0) {
> +				LIST_REMOVE(ap, next);
> +				rte_free(ap);
> +				count++;
> +			} else {
> +				if (pthread_equal(ap->executing_id, pthread_self()) == 0)
> +					executing++;
> +
> +				break;
> +			}
>  		}
>  		ap_prev = ap;
> -	}
> -	rte_spinlock_unlock(&alarm_list_lk);
> +
> +		/* now go through list, removing entries not at start */
> +		LIST_FOREACH(ap, &alarm_list, next) {
> +			/* this won't be true first time through */
> +			if (cb_fn == ap->cb_fn &&
> +					(cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
> +
> +				if (ap->executing == 0) {
> +					LIST_REMOVE(ap,next);
> +					rte_free(ap);
> +					count++;
> +					ap = ap_prev;
> +				} else if (pthread_equal(ap->executing_id, pthread_self()) == 0)
> +					executing++;
> +			}
> +			ap_prev = ap;
> +		}
> +		rte_spinlock_unlock(&alarm_list_lk);
> +	} while (executing != 0);
> +
>  	return count;
>  }
> -
> --
> 1.7.9.5

Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
  
Neil Horman Sept. 25, 2014, 3:08 p.m. UTC | #2
On Thu, Sep 25, 2014 at 01:56:08PM +0100, Michal Jastrzebski wrote:
>     Change alarm cancel function to thread-safe.
>     It eliminates a race between threads using rte_alarm_cancel and
>     rte_alarm_set.
> 
> Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> Reviewed-by: Michal Jastrzebski <michalx.k.jastrzebski@intel.com>
> 
> ---
>  lib/librte_eal/common/include/rte_alarm.h |    3 +-
>  lib/librte_eal/linuxapp/eal/eal_alarm.c   |   68 ++++++++++++++++++-----------
>  2 files changed, 45 insertions(+), 26 deletions(-)
> 

> diff --git a/lib/librte_eal/common/include/rte_alarm.h b/lib/librte_eal/common/include/rte_alarm.h
> index d451522..e7cbaef 100644
> --- a/lib/librte_eal/common/include/rte_alarm.h
> +++ b/lib/librte_eal/common/include/rte_alarm.h
> @@ -76,7 +76,8 @@ typedef void (*rte_eal_alarm_callback)(void *arg);
>  int rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb, void *cb_arg);
>  
>  /**
> - * Function to cancel an alarm callback which has been registered before.
> + * Function to cancel an alarm callback which has been registered before. If
> + * used outside alarm callback it wait for all callbacks to finish its execution.
>   *
>   * @param cb_fn
>   *  alarm callback
> diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
> index 480f0cb..ea8dfb4 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_alarm.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
> @@ -69,7 +69,8 @@ struct alarm_entry {
>  	struct timeval time;
>  	rte_eal_alarm_callback cb_fn;
>  	void *cb_arg;
> -	volatile int executing;
> +	volatile uint8_t executing;
> +	volatile pthread_t executing_id;
>  };
>  
>  static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
> @@ -108,11 +109,13 @@ eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused,
>  			(ap->time.tv_sec < now.tv_sec || (ap->time.tv_sec == now.tv_sec &&
>  						ap->time.tv_usec <= now.tv_usec))){
>  		ap->executing = 1;
> +		ap->executing_id = pthread_self();
How exactly does this work?  From my read all alarm callbacks are handled by the
thread created in rte_eal_intr_init (which runs forever in
eal_intr_thread_main()).  So every assignment to the above executing_id value
will be from that thread.  As such, anytime rte_eal_alarm_cancel is called from
within a callback we are guaranteed that:
a) the ap->executing flag is set to 1
b) the ap->executing_id value should equal whatever is returned from
pthread_self()

That will cause the executing counter local to the cancel function to get
incremented, meaning we will deadlock withing that do { ... } while (executing
!= 0) loop, no?

>  		rte_spinlock_unlock(&alarm_list_lk);
>  
>  		ap->cb_fn(ap->cb_arg);
>  
>  		rte_spinlock_lock(&alarm_list_lk);
> +
>  		LIST_REMOVE(ap, next);
>  		rte_free(ap);
>  	}
> @@ -145,7 +148,7 @@ rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
>  	if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
>  		return -EINVAL;
>  
> -	new_alarm = rte_malloc(NULL, sizeof(*new_alarm), 0);
> +	new_alarm = rte_zmalloc(NULL, sizeof(*new_alarm), 0);
>  	if (new_alarm == NULL)
>  		return -ENOMEM;
>  
> @@ -156,7 +159,6 @@ rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
>  	new_alarm->cb_arg = cb_arg;
>  	new_alarm->time.tv_usec = (now.tv_usec + us) % US_PER_S;
>  	new_alarm->time.tv_sec = now.tv_sec + ((now.tv_usec + us) / US_PER_S);
> -	new_alarm->executing = 0;
>  
This removes the only place where ->executing is cleared again.  If there is
only one change to this bits state (which is the case after this patch), it
seems that you can just use the executing bit as the test in the alarm_cancel
function, and remove all the pthread_self mess.  We still need to address the
deadlock question, but it seems using this single flag is easier than using
pthread_self.

Neil
  
Ananyev, Konstantin Sept. 25, 2014, 4:03 p.m. UTC | #3
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Neil Horman
> Sent: Thursday, September 25, 2014 4:08 PM
> To: Jastrzebski, MichalX K
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v2] Change alarm cancel function to thread-safe:
> 
> On Thu, Sep 25, 2014 at 01:56:08PM +0100, Michal Jastrzebski wrote:
> >     Change alarm cancel function to thread-safe.
> >     It eliminates a race between threads using rte_alarm_cancel and
> >     rte_alarm_set.
> >
> > Signed-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>
> > Reviewed-by: Michal Jastrzebski <michalx.k.jastrzebski@intel.com>
> >
> > ---
> >  lib/librte_eal/common/include/rte_alarm.h |    3 +-
> >  lib/librte_eal/linuxapp/eal/eal_alarm.c   |   68 ++++++++++++++++++-----------
> >  2 files changed, 45 insertions(+), 26 deletions(-)
> >
> 
> > diff --git a/lib/librte_eal/common/include/rte_alarm.h b/lib/librte_eal/common/include/rte_alarm.h
> > index d451522..e7cbaef 100644
> > --- a/lib/librte_eal/common/include/rte_alarm.h
> > +++ b/lib/librte_eal/common/include/rte_alarm.h
> > @@ -76,7 +76,8 @@ typedef void (*rte_eal_alarm_callback)(void *arg);
> >  int rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb, void *cb_arg);
> >
> >  /**
> > - * Function to cancel an alarm callback which has been registered before.
> > + * Function to cancel an alarm callback which has been registered before. If
> > + * used outside alarm callback it wait for all callbacks to finish its execution.
> >   *
> >   * @param cb_fn
> >   *  alarm callback
> > diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
> > index 480f0cb..ea8dfb4 100644
> > --- a/lib/librte_eal/linuxapp/eal/eal_alarm.c
> > +++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
> > @@ -69,7 +69,8 @@ struct alarm_entry {
> >  	struct timeval time;
> >  	rte_eal_alarm_callback cb_fn;
> >  	void *cb_arg;
> > -	volatile int executing;
> > +	volatile uint8_t executing;
> > +	volatile pthread_t executing_id;
> >  };
> >
> >  static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
> > @@ -108,11 +109,13 @@ eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused,
> >  			(ap->time.tv_sec < now.tv_sec || (ap->time.tv_sec == now.tv_sec &&
> >  						ap->time.tv_usec <= now.tv_usec))){
> >  		ap->executing = 1;
> > +		ap->executing_id = pthread_self();
> How exactly does this work?  From my read all alarm callbacks are handled by the
> thread created in rte_eal_intr_init (which runs forever in
> eal_intr_thread_main()). 

In current implementation - yes.

 So every assignment to the above executing_id value
> will be from that thread.  As such, anytime rte_eal_alarm_cancel is called from
> within a callback we are guaranteed that:
> a) the ap->executing flag is set to 1
> b) the ap->executing_id value should equal whatever is returned from
> pthread_self()

Yes

> 
> That will cause the executing counter local to the cancel function to get
> incremented, meaning we will deadlock withing that do { ... } while (executing
> != 0) loop, no?

No, as for the case when cancel is called from callback:
pthread_equal(ap->executing_id, pthread_self())
would return non-zero value (which means threads ids are equal), so executing will not be incremented. 

> 
> >  		rte_spinlock_unlock(&alarm_list_lk);
> >
> >  		ap->cb_fn(ap->cb_arg);
> >
> >  		rte_spinlock_lock(&alarm_list_lk);
> > +
> >  		LIST_REMOVE(ap, next);
> >  		rte_free(ap);
> >  	}
> > @@ -145,7 +148,7 @@ rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
> >  	if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
> >  		return -EINVAL;
> >
> > -	new_alarm = rte_malloc(NULL, sizeof(*new_alarm), 0);
> > +	new_alarm = rte_zmalloc(NULL, sizeof(*new_alarm), 0);
> >  	if (new_alarm == NULL)
> >  		return -ENOMEM;
> >
> > @@ -156,7 +159,6 @@ rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
> >  	new_alarm->cb_arg = cb_arg;
> >  	new_alarm->time.tv_usec = (now.tv_usec + us) % US_PER_S;
> >  	new_alarm->time.tv_sec = now.tv_sec + ((now.tv_usec + us) / US_PER_S);
> > -	new_alarm->executing = 0;
> >
> This removes the only place where ->executing is cleared again.  If there is
> only one change to this bits state (which is the case after this patch), it
> seems that you can just use the executing bit as the test in the alarm_cancel
> function, and remove all the pthread_self mess.

I believe we do need executing_id here.
It allows us to distinguish are we executing cancel from a callback or not.

> We still need to address the
> deadlock question, but it seems using this single flag is easier than using
> pthread_self.
> 
> Neil
  

Patch

diff --git a/lib/librte_eal/common/include/rte_alarm.h b/lib/librte_eal/common/include/rte_alarm.h
index d451522..e7cbaef 100644
--- a/lib/librte_eal/common/include/rte_alarm.h
+++ b/lib/librte_eal/common/include/rte_alarm.h
@@ -76,7 +76,8 @@  typedef void (*rte_eal_alarm_callback)(void *arg);
 int rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb, void *cb_arg);
 
 /**
- * Function to cancel an alarm callback which has been registered before.
+ * Function to cancel an alarm callback which has been registered before. If
+ * used outside alarm callback it wait for all callbacks to finish its execution.
  *
  * @param cb_fn
  *  alarm callback
diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
index 480f0cb..ea8dfb4 100644
--- a/lib/librte_eal/linuxapp/eal/eal_alarm.c
+++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
@@ -69,7 +69,8 @@  struct alarm_entry {
 	struct timeval time;
 	rte_eal_alarm_callback cb_fn;
 	void *cb_arg;
-	volatile int executing;
+	volatile uint8_t executing;
+	volatile pthread_t executing_id;
 };
 
 static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
@@ -108,11 +109,13 @@  eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused,
 			(ap->time.tv_sec < now.tv_sec || (ap->time.tv_sec == now.tv_sec &&
 						ap->time.tv_usec <= now.tv_usec))){
 		ap->executing = 1;
+		ap->executing_id = pthread_self();
 		rte_spinlock_unlock(&alarm_list_lk);
 
 		ap->cb_fn(ap->cb_arg);
 
 		rte_spinlock_lock(&alarm_list_lk);
+
 		LIST_REMOVE(ap, next);
 		rte_free(ap);
 	}
@@ -145,7 +148,7 @@  rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
 	if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
 		return -EINVAL;
 
-	new_alarm = rte_malloc(NULL, sizeof(*new_alarm), 0);
+	new_alarm = rte_zmalloc(NULL, sizeof(*new_alarm), 0);
 	if (new_alarm == NULL)
 		return -ENOMEM;
 
@@ -156,7 +159,6 @@  rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
 	new_alarm->cb_arg = cb_arg;
 	new_alarm->time.tv_usec = (now.tv_usec + us) % US_PER_S;
 	new_alarm->time.tv_sec = now.tv_sec + ((now.tv_usec + us) / US_PER_S);
-	new_alarm->executing = 0;
 
 	rte_spinlock_lock(&alarm_list_lk);
 	if (!handler_registered) {
@@ -202,34 +204,50 @@  rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
 {
 	struct alarm_entry *ap, *ap_prev;
 	int count = 0;
+	int executing;
 
 	if (!cb_fn)
 		return -1;
 
-	rte_spinlock_lock(&alarm_list_lk);
-	/* remove any matches at the start of the list */
-	while ((ap = LIST_FIRST(&alarm_list)) != NULL &&
-			cb_fn == ap->cb_fn && ap->executing == 0 &&
-			(cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
-		LIST_REMOVE(ap, next);
-		rte_free(ap);
-		count++;
-	}
-	ap_prev = ap;
-
-	/* now go through list, removing entries not at start */
-	LIST_FOREACH(ap, &alarm_list, next) {
-		/* this won't be true first time through */
-		if (cb_fn == ap->cb_fn &&  ap->executing == 0 &&
+	do {
+		executing = 0;
+		rte_spinlock_lock(&alarm_list_lk);
+		/* remove any matches at the start of the list */
+		while ((ap = LIST_FIRST(&alarm_list)) != NULL &&
+				cb_fn == ap->cb_fn &&
 				(cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
-			LIST_REMOVE(ap,next);
-			rte_free(ap);
-			count++;
-			ap = ap_prev;
+
+			if (ap->executing == 0) {
+				LIST_REMOVE(ap, next);
+				rte_free(ap);
+				count++;
+			} else {
+				if (pthread_equal(ap->executing_id, pthread_self()) == 0)
+					executing++;
+
+				break;
+			}
 		}
 		ap_prev = ap;
-	}
-	rte_spinlock_unlock(&alarm_list_lk);
+
+		/* now go through list, removing entries not at start */
+		LIST_FOREACH(ap, &alarm_list, next) {
+			/* this won't be true first time through */
+			if (cb_fn == ap->cb_fn &&
+					(cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) {
+
+				if (ap->executing == 0) {
+					LIST_REMOVE(ap,next);
+					rte_free(ap);
+					count++;
+					ap = ap_prev;
+				} else if (pthread_equal(ap->executing_id, pthread_self()) == 0)
+					executing++;
+			}
+			ap_prev = ap;
+		}
+		rte_spinlock_unlock(&alarm_list_lk);
+	} while (executing != 0);
+
 	return count;
 }
-