[1/1] timer: fix race condition

Message ID 1543517626-142526-1-git-send-email-erik.g.carrillo@intel.com
State Superseded, archived
Delegated to: Thomas Monjalon
Headers show
Series
  • [1/1] timer: fix race condition
Related show

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/intel-Performance-Testing success Performance Testing PASS
ci/mellanox-Performance-Testing success Performance Testing PASS
ci/Intel-compilation success Compilation OK

Commit Message

Carrillo, Erik G Nov. 29, 2018, 6:53 p.m.
rte_timer_manage() adds expired timers to a "run list", and walks the
list, transitioning each timer from the PENDING to the RUNNING state.
If another lcore resets or stops the timer at precisely this
moment, the timer state would instead be set to CONFIG by that other
lcore, which would cause timer_manage() to skip over it. This is
expected behavior.

However, if a timer expires quickly enough, there exists the
following race condition that causes the timer_manage() routine to
misinterpret a timer in CONFIG state, resulting in lost timers:

- Thread A:
  - starts a timer with rte_timer_reset()
  - the timer is moved to CONFIG state
  - the spinlock associated with the appropriate skiplist is acquired
  - timer is inserted into the skiplist
  - the spinlock is released
- Thread B:
  - executes rte_timer_manage()
  - find above timer as expired, add it to run list
  - walk run list, see above timer still in CONFIG state, unlink it from
    run list and continue on
- Thread A:
  - move timer to PENDING state
  - return from rte_timer_reset()
  - timer is now in PENDING state, but not actually linked into skiplist
    and will never get processed further by rte_timer_manage()

This commit fixes this race condition by only releasing the spinlock
after the timer state has been transitioned from CONFIG to PENDING,
which prevents rte_timer_manage() from seeing an incorrect state.

Fixes: 9b15ba895b9f ("timer: use a skip list")
Signed-off-by: Erik Gabriel Carrillo <erik.g.carrillo@intel.com>
---
 lib/librte_timer/rte_timer.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

Comments

Thomas Monjalon Dec. 19, 2018, 3:36 a.m. | #1
Who could review this fix please?

29/11/2018 19:53, Erik Gabriel Carrillo:
> rte_timer_manage() adds expired timers to a "run list", and walks the
> list, transitioning each timer from the PENDING to the RUNNING state.
> If another lcore resets or stops the timer at precisely this
> moment, the timer state would instead be set to CONFIG by that other
> lcore, which would cause timer_manage() to skip over it. This is
> expected behavior.
> 
> However, if a timer expires quickly enough, there exists the
> following race condition that causes the timer_manage() routine to
> misinterpret a timer in CONFIG state, resulting in lost timers:
> 
> - Thread A:
>   - starts a timer with rte_timer_reset()
>   - the timer is moved to CONFIG state
>   - the spinlock associated with the appropriate skiplist is acquired
>   - timer is inserted into the skiplist
>   - the spinlock is released
> - Thread B:
>   - executes rte_timer_manage()
>   - find above timer as expired, add it to run list
>   - walk run list, see above timer still in CONFIG state, unlink it from
>     run list and continue on
> - Thread A:
>   - move timer to PENDING state
>   - return from rte_timer_reset()
>   - timer is now in PENDING state, but not actually linked into skiplist
>     and will never get processed further by rte_timer_manage()
> 
> This commit fixes this race condition by only releasing the spinlock
> after the timer state has been transitioned from CONFIG to PENDING,
> which prevents rte_timer_manage() from seeing an incorrect state.
> 
> Fixes: 9b15ba895b9f ("timer: use a skip list")
> Signed-off-by: Erik Gabriel Carrillo <erik.g.carrillo@intel.com>
> ---
>  lib/librte_timer/rte_timer.c | 28 ++++++++++++++--------------
>  1 file changed, 14 insertions(+), 14 deletions(-)
> 
> diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
> index 590488c..30c7b0a 100644
> --- a/lib/librte_timer/rte_timer.c
> +++ b/lib/librte_timer/rte_timer.c
> @@ -241,24 +241,17 @@ timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
>  	}
>  }
>  
> -/*
> - * add in list, lock if needed
> +/* call with lock held as necessary
> + * add in list
>   * timer must be in config state
>   * timer must not be in a list
>   */
>  static void
> -timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked)
> +timer_add(struct rte_timer *tim, unsigned int tim_lcore)
>  {
> -	unsigned lcore_id = rte_lcore_id();
>  	unsigned lvl;
>  	struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
>  
> -	/* if timer needs to be scheduled on another core, we need to
> -	 * lock the list; if it is on local core, we need to lock if
> -	 * we are not called from rte_timer_manage() */
> -	if (tim_lcore != lcore_id || !local_is_locked)
> -		rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
> -
>  	/* find where exactly this element goes in the list of elements
>  	 * for each depth. */
>  	timer_get_prev_entries(tim->expire, tim_lcore, prev);
> @@ -282,9 +275,6 @@ timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked)
>  	 * NOTE: this is not atomic on 32-bit*/
>  	priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
>  			pending_head.sl_next[0]->expire;
> -
> -	if (tim_lcore != lcore_id || !local_is_locked)
> -		rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
>  }
>  
>  /*
> @@ -379,8 +369,15 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
>  	tim->f = fct;
>  	tim->arg = arg;
>  
> +	/* if timer needs to be scheduled on another core, we need to
> +	 * lock the destination list; if it is on local core, we need to lock if
> +	 * we are not called from rte_timer_manage()
> +	 */
> +	if (tim_lcore != lcore_id || !local_is_locked)
> +		rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
> +
>  	__TIMER_STAT_ADD(pending, 1);
> -	timer_add(tim, tim_lcore, local_is_locked);
> +	timer_add(tim, tim_lcore);
>  
>  	/* update state: as we are in CONFIG state, only us can modify
>  	 * the state so we don't need to use cmpset() here */
> @@ -389,6 +386,9 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
>  	status.owner = (int16_t)tim_lcore;
>  	tim->status.u32 = status.u32;
>  
> +	if (tim_lcore != lcore_id || !local_is_locked)
> +		rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
> +
>  	return 0;
>  }
>  
>
Gavin Hu Dec. 19, 2018, 7:57 a.m. | #2
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Thomas Monjalon
> Sent: Wednesday, December 19, 2018 11:37 AM
> To: dev@dpdk.org
> Cc: Erik Gabriel Carrillo <erik.g.carrillo@intel.com>; rsanford@akamai.com;
> olivier.matz@6wind.com; stephen@networkplumber.org;
> bruce.richardson@intel.com
> Subject: Re: [dpdk-dev] [PATCH 1/1] timer: fix race condition
>
> Who could review this fix please?
>
> 29/11/2018 19:53, Erik Gabriel Carrillo:
> > rte_timer_manage() adds expired timers to a "run list", and walks the
> > list, transitioning each timer from the PENDING to the RUNNING state.
> > If another lcore resets or stops the timer at precisely this moment,
> > the timer state would instead be set to CONFIG by that other lcore,
> > which would cause timer_manage() to skip over it. This is expected
> > behavior.
> >
> > However, if a timer expires quickly enough, there exists the following
> > race condition that causes the timer_manage() routine to misinterpret
> > a timer in CONFIG state, resulting in lost timers:
> >
> > - Thread A:
> >   - starts a timer with rte_timer_reset()
> >   - the timer is moved to CONFIG state
> >   - the spinlock associated with the appropriate skiplist is acquired
> >   - timer is inserted into the skiplist
> >   - the spinlock is released
> > - Thread B:
> >   - executes rte_timer_manage()
> >   - find above timer as expired, add it to run list
> >   - walk run list, see above timer still in CONFIG state, unlink it from
> >     run list and continue on
> > - Thread A:
> >   - move timer to PENDING state
> >   - return from rte_timer_reset()
> >   - timer is now in PENDING state, but not actually linked into skiplist
Add "nor the run list"?

> >     and will never get processed further by rte_timer_manage()
> >
> > This commit fixes this race condition by only releasing the spinlock
> > after the timer state has been transitioned from CONFIG to PENDING,
> > which prevents rte_timer_manage() from seeing an incorrect state.
> >
> > Fixes: 9b15ba895b9f ("timer: use a skip list")
> > Signed-off-by: Erik Gabriel Carrillo <erik.g.carrillo@intel.com>
> > ---
> >  lib/librte_timer/rte_timer.c | 28 ++++++++++++++--------------
> >  1 file changed, 14 insertions(+), 14 deletions(-)
> >
> > diff --git a/lib/librte_timer/rte_timer.c
> > b/lib/librte_timer/rte_timer.c index 590488c..30c7b0a 100644
> > --- a/lib/librte_timer/rte_timer.c
> > +++ b/lib/librte_timer/rte_timer.c
> > @@ -241,24 +241,17 @@ timer_get_prev_entries_for_node(struct
> rte_timer *tim, unsigned tim_lcore,
> >  }
> >  }
> >
> > -/*
> > - * add in list, lock if needed
> > +/* call with lock held as necessary
> > + * add in list
> >   * timer must be in config state
> >   * timer must not be in a list
> >   */
> >  static void
> > -timer_add(struct rte_timer *tim, unsigned tim_lcore, int
> > local_is_locked)
> > +timer_add(struct rte_timer *tim, unsigned int tim_lcore)
> >  {
> > -unsigned lcore_id = rte_lcore_id();
> >  unsigned lvl;
> >  struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
> >
> > -/* if timer needs to be scheduled on another core, we need to
> > - * lock the list; if it is on local core, we need to lock if
> > - * we are not called from rte_timer_manage() */
> > -if (tim_lcore != lcore_id || !local_is_locked)
> > -rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
> > -
> >  /* find where exactly this element goes in the list of elements
> >   * for each depth. */
> >  timer_get_prev_entries(tim->expire, tim_lcore, prev); @@ -282,9
> > +275,6 @@ timer_add(struct rte_timer *tim, unsigned tim_lcore, int
> local_is_locked)
> >   * NOTE: this is not atomic on 32-bit*/
> >  priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
> >  pending_head.sl_next[0]->expire;
> > -
> > -if (tim_lcore != lcore_id || !local_is_locked)
> > -rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
> >  }
> >
> >  /*
> > @@ -379,8 +369,15 @@ __rte_timer_reset(struct rte_timer *tim,
> uint64_t expire,
> >  tim->f = fct;
> >  tim->arg = arg;
> >
> > +/* if timer needs to be scheduled on another core, we need to
> > + * lock the destination list; if it is on local core, we need to lock if
> > + * we are not called from rte_timer_manage()
> > + */
> > +if (tim_lcore != lcore_id || !local_is_locked)
> > +rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
> > +
> >  __TIMER_STAT_ADD(pending, 1);
> > -timer_add(tim, tim_lcore, local_is_locked);
> > +timer_add(tim, tim_lcore);
> >
> >  /* update state: as we are in CONFIG state, only us can modify
> >   * the state so we don't need to use cmpset() here */ @@ -389,6
> > +386,9 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
> >  status.owner = (int16_t)tim_lcore;
> >  tim->status.u32 = status.u32;
> >
> > +if (tim_lcore != lcore_id || !local_is_locked)
> > +rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
> > +
> >  return 0;
> >  }
> >
Other than the minor comment,
Reviewed-by: Gavin Hu <gavin.hu@arm.com>

>
>
>
>

IMPORTANT NOTICE: The contents of this email and any attachments are confidential and may also be privileged. If you are not the intended recipient, please notify the sender immediately and do not disclose the contents to any other person, use it for any purpose, or store or copy the information in any medium. Thank you.
Carrillo, Erik G Dec. 19, 2018, 4:11 p.m. | #3
> > > However, if a timer expires quickly enough, there exists the
> > > following race condition that causes the timer_manage() routine to
> > > misinterpret a timer in CONFIG state, resulting in lost timers:
> > >
> > > - Thread A:
> > >   - starts a timer with rte_timer_reset()
> > >   - the timer is moved to CONFIG state
> > >   - the spinlock associated with the appropriate skiplist is acquired
> > >   - timer is inserted into the skiplist
> > >   - the spinlock is released
> > > - Thread B:
> > >   - executes rte_timer_manage()
> > >   - find above timer as expired, add it to run list
> > >   - walk run list, see above timer still in CONFIG state, unlink it from
> > >     run list and continue on
> > > - Thread A:
> > >   - move timer to PENDING state
> > >   - return from rte_timer_reset()
> > >   - timer is now in PENDING state, but not actually linked into
> > > skiplist
> Add "nor the run list"?

<...snipped...>

> Other than the minor comment,
> Reviewed-by: Gavin Hu <gavin.hu@arm.com>
> 

<...snipped...>

Thanks for the review, Gavin.  I've made the update and resubmitted.

Regards,
Erik

Patch

diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
index 590488c..30c7b0a 100644
--- a/lib/librte_timer/rte_timer.c
+++ b/lib/librte_timer/rte_timer.c
@@ -241,24 +241,17 @@  timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore,
 	}
 }
 
-/*
- * add in list, lock if needed
+/* call with lock held as necessary
+ * add in list
  * timer must be in config state
  * timer must not be in a list
  */
 static void
-timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked)
+timer_add(struct rte_timer *tim, unsigned int tim_lcore)
 {
-	unsigned lcore_id = rte_lcore_id();
 	unsigned lvl;
 	struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1];
 
-	/* if timer needs to be scheduled on another core, we need to
-	 * lock the list; if it is on local core, we need to lock if
-	 * we are not called from rte_timer_manage() */
-	if (tim_lcore != lcore_id || !local_is_locked)
-		rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
-
 	/* find where exactly this element goes in the list of elements
 	 * for each depth. */
 	timer_get_prev_entries(tim->expire, tim_lcore, prev);
@@ -282,9 +275,6 @@  timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked)
 	 * NOTE: this is not atomic on 32-bit*/
 	priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\
 			pending_head.sl_next[0]->expire;
-
-	if (tim_lcore != lcore_id || !local_is_locked)
-		rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
 }
 
 /*
@@ -379,8 +369,15 @@  __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
 	tim->f = fct;
 	tim->arg = arg;
 
+	/* if timer needs to be scheduled on another core, we need to
+	 * lock the destination list; if it is on local core, we need to lock if
+	 * we are not called from rte_timer_manage()
+	 */
+	if (tim_lcore != lcore_id || !local_is_locked)
+		rte_spinlock_lock(&priv_timer[tim_lcore].list_lock);
+
 	__TIMER_STAT_ADD(pending, 1);
-	timer_add(tim, tim_lcore, local_is_locked);
+	timer_add(tim, tim_lcore);
 
 	/* update state: as we are in CONFIG state, only us can modify
 	 * the state so we don't need to use cmpset() here */
@@ -389,6 +386,9 @@  __rte_timer_reset(struct rte_timer *tim, uint64_t expire,
 	status.owner = (int16_t)tim_lcore;
 	tim->status.u32 = status.u32;
 
+	if (tim_lcore != lcore_id || !local_is_locked)
+		rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock);
+
 	return 0;
 }