[v2] timer: fix resource leak in finalize

Message ID 1556924082-22535-1-git-send-email-erik.g.carrillo@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series [v2] timer: fix resource leak in finalize |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/intel-Performance-Testing success Performance Testing PASS
ci/mellanox-Performance-Testing success Performance Testing PASS
ci/Intel-compilation success Compilation OK

Commit Message

Carrillo, Erik G May 3, 2019, 10:54 p.m. UTC
  The finalize function should free the memzone created in the init
function, rather than freeing the allocation the memzone references,
otherwise a memzone descriptor can be leaked.

Fixes: c0749f7096c7 ("timer: allow management in shared memory")

Signed-off-by: Erik Gabriel Carrillo <erik.g.carrillo@intel.com>
---
changes in v2:
 - Handle scenario where primary process exits before secondaries such
   that memzone is not freed early (Anatoly)

 lib/librte_timer/rte_timer.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)
  

Comments

Burakov, Anatoly May 7, 2019, 11:03 a.m. UTC | #1
On 03-May-19 11:54 PM, Erik Gabriel Carrillo wrote:
> The finalize function should free the memzone created in the init
> function, rather than freeing the allocation the memzone references,
> otherwise a memzone descriptor can be leaked.
> 
> Fixes: c0749f7096c7 ("timer: allow management in shared memory")
> 
> Signed-off-by: Erik Gabriel Carrillo <erik.g.carrillo@intel.com>
> ---
> changes in v2:
>   - Handle scenario where primary process exits before secondaries such
>     that memzone is not freed early (Anatoly)
> 
>   lib/librte_timer/rte_timer.c | 20 +++++++++++++++++---
>   1 file changed, 17 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
> index eb46009..4771287 100644
> --- a/lib/librte_timer/rte_timer.c
> +++ b/lib/librte_timer/rte_timer.c
> @@ -60,6 +60,8 @@ struct rte_timer_data {
>   };
>   
>   #define RTE_MAX_DATA_ELS 64
> +static const struct rte_memzone *rte_timer_data_mz;
> +static rte_atomic16_t *rte_timer_mz_refcnt;
>   static struct rte_timer_data *rte_timer_data_arr;
>   static const uint32_t default_data_id;
>   static uint32_t rte_timer_subsystem_initialized;
> @@ -155,6 +157,7 @@ rte_timer_subsystem_init_v1905(void)
>   	struct rte_timer_data *data;
>   	int i, lcore_id;
>   	static const char *mz_name = "rte_timer_mz";
> +	size_t data_arr_size = RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr);

nitpicking, but... const?

>   
>   	if (rte_timer_subsystem_initialized)
>   		return -EALREADY;
> @@ -164,10 +167,14 @@ rte_timer_subsystem_init_v1905(void)
>   		if (mz == NULL)
>   			return -EEXIST;
>   
> +		rte_timer_data_mz = mz;
>   		rte_timer_data_arr = mz->addr;
> +		rte_timer_mz_refcnt =
> +				(void *)((char *)mz->addr + data_arr_size);
>   
>   		rte_timer_data_arr[default_data_id].internal_flags |=
>   			FL_ALLOCATED;
> +		rte_atomic16_inc(rte_timer_mz_refcnt);
>   
>   		rte_timer_subsystem_initialized = 1;
>   
> @@ -175,12 +182,15 @@ rte_timer_subsystem_init_v1905(void)
>   	}
>   
>   	mz = rte_memzone_reserve_aligned(mz_name,
> -			RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr),
> +			data_arr_size + sizeof(*rte_timer_mz_refcnt),
>   			SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
>   	if (mz == NULL)
>   		return -ENOMEM;
>   
> +	rte_timer_data_mz = mz;
>   	rte_timer_data_arr = mz->addr;
> +	rte_timer_mz_refcnt = (void *)((char *)mz->addr + data_arr_size);
> +	rte_atomic16_init(rte_timer_mz_refcnt);
>   
>   	for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
>   		data = &rte_timer_data_arr[i];
> @@ -193,6 +203,7 @@ rte_timer_subsystem_init_v1905(void)
>   	}
>   
>   	rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED;
> +	rte_atomic16_inc(rte_timer_mz_refcnt);
>   
>   	rte_timer_subsystem_initialized = 1;
>   
> @@ -205,8 +216,11 @@ BIND_DEFAULT_SYMBOL(rte_timer_subsystem_init, _v1905, 19.05);
>   void __rte_experimental
>   rte_timer_subsystem_finalize(void)
>   {
> -	if (rte_timer_data_arr)
> -		rte_free(rte_timer_data_arr);
> +	if (!rte_timer_subsystem_initialized)
> +		return;
> +
> +	if (rte_atomic16_dec_and_test(rte_timer_mz_refcnt))
> +		rte_memzone_free(rte_timer_data_mz);

I think there's a race here. You may get preempted after test but before 
free, where another secondary could initialize. As far as i know, we 
also support a case when secondary initializes after primary stops running.

Let's even suppose that we allow secondary processes to initialize the 
timer subsystem by reserving memzone and checking rte_errno. You would 
still have a chance of two init/deinit conflicting, because there's a 
hole between memzone allocation and atomic increment.

I don't think this race can be resolved in a safe way, so we might just 
have to settle for a memory leak.

>   
>   	rte_timer_subsystem_initialized = 0;
>   }
>
  
Carrillo, Erik G May 7, 2019, 10:04 p.m. UTC | #2
Hi Anatoly,

Thanks for the review.  Comments in-line:

<...snipped...>

> >   #define RTE_MAX_DATA_ELS 64
> > +static const struct rte_memzone *rte_timer_data_mz; static
> > +rte_atomic16_t *rte_timer_mz_refcnt;
> >   static struct rte_timer_data *rte_timer_data_arr;
> >   static const uint32_t default_data_id;
> >   static uint32_t rte_timer_subsystem_initialized; @@ -155,6 +157,7 @@
> > rte_timer_subsystem_init_v1905(void)
> >   	struct rte_timer_data *data;
> >   	int i, lcore_id;
> >   	static const char *mz_name = "rte_timer_mz";
> > +	size_t data_arr_size = RTE_MAX_DATA_ELS *
> > +sizeof(*rte_timer_data_arr);
> 
> nitpicking, but... const?
> 

No problem - I'll make this change if this line persists into the next version.

<...snipped...>

> >
> > @@ -205,8 +216,11 @@
> BIND_DEFAULT_SYMBOL(rte_timer_subsystem_init, _v1905, 19.05);
> >   void __rte_experimental
> >   rte_timer_subsystem_finalize(void)
> >   {
> > -	if (rte_timer_data_arr)
> > -		rte_free(rte_timer_data_arr);
> > +	if (!rte_timer_subsystem_initialized)
> > +		return;
> > +
> > +	if (rte_atomic16_dec_and_test(rte_timer_mz_refcnt))
> > +		rte_memzone_free(rte_timer_data_mz);
> 
> I think there's a race here. You may get preempted after test but before
> free, where another secondary could initialize. As far as i know, we also

Indeed, thanks for catching this.

> support a case when secondary initializes after primary stops running.
> 
> Let's even suppose that we allow secondary processes to initialize the timer
> subsystem by reserving memzone and checking rte_errno. You would still
> have a chance of two init/deinit conflicting, because there's a hole between
> memzone allocation and atomic increment.
> 
> I don't think this race can be resolved in a safe way, so we might just have to
> settle for a memory leak.
> 

I don't see a solution here currently either.  I'll look at removing the memzone_free()
call and possibly the rte_timer_subsystem_finalize() API, since it seems like
there's no reason for it to exist if it can't free the allocations.

Regards,
Erik

> >
> >   	rte_timer_subsystem_initialized = 0;
> >   }
> >
> 
> 
> --
> Thanks,
> Anatoly
  
Burakov, Anatoly May 8, 2019, 8:49 a.m. UTC | #3
On 07-May-19 11:04 PM, Carrillo, Erik G wrote:
> Hi Anatoly,
> 
> Thanks for the review.  Comments in-line:
> 
> <...snipped...>
> 
>>>    #define RTE_MAX_DATA_ELS 64
>>> +static const struct rte_memzone *rte_timer_data_mz; static
>>> +rte_atomic16_t *rte_timer_mz_refcnt;
>>>    static struct rte_timer_data *rte_timer_data_arr;
>>>    static const uint32_t default_data_id;
>>>    static uint32_t rte_timer_subsystem_initialized; @@ -155,6 +157,7 @@
>>> rte_timer_subsystem_init_v1905(void)
>>>    	struct rte_timer_data *data;
>>>    	int i, lcore_id;
>>>    	static const char *mz_name = "rte_timer_mz";
>>> +	size_t data_arr_size = RTE_MAX_DATA_ELS *
>>> +sizeof(*rte_timer_data_arr);
>>
>> nitpicking, but... const?
>>
> 
> No problem - I'll make this change if this line persists into the next version.
> 
> <...snipped...>
> 
>>>
>>> @@ -205,8 +216,11 @@
>> BIND_DEFAULT_SYMBOL(rte_timer_subsystem_init, _v1905, 19.05);
>>>    void __rte_experimental
>>>    rte_timer_subsystem_finalize(void)
>>>    {
>>> -	if (rte_timer_data_arr)
>>> -		rte_free(rte_timer_data_arr);
>>> +	if (!rte_timer_subsystem_initialized)
>>> +		return;
>>> +
>>> +	if (rte_atomic16_dec_and_test(rte_timer_mz_refcnt))
>>> +		rte_memzone_free(rte_timer_data_mz);
>>
>> I think there's a race here. You may get preempted after test but before
>> free, where another secondary could initialize. As far as i know, we also
> 
> Indeed, thanks for catching this.
> 
>> support a case when secondary initializes after primary stops running.
>>
>> Let's even suppose that we allow secondary processes to initialize the timer
>> subsystem by reserving memzone and checking rte_errno. You would still
>> have a chance of two init/deinit conflicting, because there's a hole between
>> memzone allocation and atomic increment.
>>
>> I don't think this race can be resolved in a safe way, so we might just have to
>> settle for a memory leak.
>>
> 
> I don't see a solution here currently either.  I'll look at removing the memzone_free()
> call and possibly the rte_timer_subsystem_finalize() API, since it seems like
> there's no reason for it to exist if it can't free the allocations.

I wonder if there are other places in DPDK where this pattern is used.

Technically, this kind of thing /could/ be resolved by having something 
in our multiprocess shared memory outside of DPDK heap. I.e. store 
something in rte_eal_memconfig like some other things do. This change, 
however, would require an ABI break, so while changing this particular 
API won't need a deprecation notice, the change itself would.

> 
> Regards,
> Erik
> 
>>>
>>>    	rte_timer_subsystem_initialized = 0;
>>>    }
>>>
>>
>>
>> --
>> Thanks,
>> Anatoly
  
Carrillo, Erik G May 8, 2019, 11:01 p.m. UTC | #4
> -----Original Message-----
> From: Burakov, Anatoly
> Sent: Wednesday, May 8, 2019 3:50 AM
> To: Carrillo, Erik G <erik.g.carrillo@intel.com>; rsanford@akamai.com;
> thomas@monjalon.net
> Cc: dev@dpdk.org
> Subject: Re: [PATCH v2] timer: fix resource leak in finalize
> 
> On 07-May-19 11:04 PM, Carrillo, Erik G wrote:
> > Hi Anatoly,
> >
> > Thanks for the review.  Comments in-line:
> >
> > <...snipped...>
> >
> >>>    #define RTE_MAX_DATA_ELS 64
> >>> +static const struct rte_memzone *rte_timer_data_mz; static
> >>> +rte_atomic16_t *rte_timer_mz_refcnt;
> >>>    static struct rte_timer_data *rte_timer_data_arr;
> >>>    static const uint32_t default_data_id;
> >>>    static uint32_t rte_timer_subsystem_initialized; @@ -155,6 +157,7
> >>> @@
> >>> rte_timer_subsystem_init_v1905(void)
> >>>    	struct rte_timer_data *data;
> >>>    	int i, lcore_id;
> >>>    	static const char *mz_name = "rte_timer_mz";
> >>> +	size_t data_arr_size = RTE_MAX_DATA_ELS *
> >>> +sizeof(*rte_timer_data_arr);
> >>
> >> nitpicking, but... const?
> >>
> >
> > No problem - I'll make this change if this line persists into the next version.
> >
> > <...snipped...>
> >
> >>>
> >>> @@ -205,8 +216,11 @@
> >> BIND_DEFAULT_SYMBOL(rte_timer_subsystem_init, _v1905, 19.05);
> >>>    void __rte_experimental
> >>>    rte_timer_subsystem_finalize(void)
> >>>    {
> >>> -	if (rte_timer_data_arr)
> >>> -		rte_free(rte_timer_data_arr);
> >>> +	if (!rte_timer_subsystem_initialized)
> >>> +		return;
> >>> +
> >>> +	if (rte_atomic16_dec_and_test(rte_timer_mz_refcnt))
> >>> +		rte_memzone_free(rte_timer_data_mz);
> >>
> >> I think there's a race here. You may get preempted after test but
> >> before free, where another secondary could initialize. As far as i
> >> know, we also
> >
> > Indeed, thanks for catching this.
> >
> >> support a case when secondary initializes after primary stops running.
> >>
> >> Let's even suppose that we allow secondary processes to initialize
> >> the timer subsystem by reserving memzone and checking rte_errno. You
> >> would still have a chance of two init/deinit conflicting, because
> >> there's a hole between memzone allocation and atomic increment.
> >>
> >> I don't think this race can be resolved in a safe way, so we might
> >> just have to settle for a memory leak.
> >>
> >
> > I don't see a solution here currently either.  I'll look at removing
> > the memzone_free() call and possibly the
> > rte_timer_subsystem_finalize() API, since it seems like there's no reason
> for it to exist if it can't free the allocations.
> 
> I wonder if there are other places in DPDK where this pattern is used.
> 
> Technically, this kind of thing /could/ be resolved by having something in our
> multiprocess shared memory outside of DPDK heap. I.e. store something in
> rte_eal_memconfig like some other things do. This change, however, would
> require an ABI break, so while changing this particular API won't need a
> deprecation notice, the change itself would.

I like this idea; thanks for the suggestion.  I went ahead and submitted a new version 
of this patch with this approach.  It's dependent on one other new patch that allows
secondary processes to reserve the memzone.  I also submitted a deprecation
notice for the ABI break for the change to rte_mem_config.

Thomas, I suspect that I should mark v3 of this patch as "deferred", so I've gone ahead and
done that, but let me know if that's wrong.

Thanks,
Erik

> 
> >
> > Regards,
> > Erik
> >
> >>>
> >>>    	rte_timer_subsystem_initialized = 0;
> >>>    }
> >>>
> >>
> >>
> >> --
> >> Thanks,
> >> Anatoly
> 
> 
> --
> Thanks,
> Anatoly
  
Thomas Monjalon May 9, 2019, 7:44 a.m. UTC | #5
09/05/2019 01:01, Carrillo, Erik G:
> I like this idea; thanks for the suggestion.  I went ahead and submitted a new version 
> of this patch with this approach.  It's dependent on one other new patch that allows
> secondary processes to reserve the memzone.  I also submitted a deprecation
> notice for the ABI break for the change to rte_mem_config.
> 
> Thomas, I suspect that I should mark v3 of this patch as "deferred", so I've gone ahead and
> done that, but let me know if that's wrong.

Any patch targetting 19.08 should be marked as deferred, thanks.
  

Patch

diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c
index eb46009..4771287 100644
--- a/lib/librte_timer/rte_timer.c
+++ b/lib/librte_timer/rte_timer.c
@@ -60,6 +60,8 @@  struct rte_timer_data {
 };
 
 #define RTE_MAX_DATA_ELS 64
+static const struct rte_memzone *rte_timer_data_mz;
+static rte_atomic16_t *rte_timer_mz_refcnt;
 static struct rte_timer_data *rte_timer_data_arr;
 static const uint32_t default_data_id;
 static uint32_t rte_timer_subsystem_initialized;
@@ -155,6 +157,7 @@  rte_timer_subsystem_init_v1905(void)
 	struct rte_timer_data *data;
 	int i, lcore_id;
 	static const char *mz_name = "rte_timer_mz";
+	size_t data_arr_size = RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr);
 
 	if (rte_timer_subsystem_initialized)
 		return -EALREADY;
@@ -164,10 +167,14 @@  rte_timer_subsystem_init_v1905(void)
 		if (mz == NULL)
 			return -EEXIST;
 
+		rte_timer_data_mz = mz;
 		rte_timer_data_arr = mz->addr;
+		rte_timer_mz_refcnt =
+				(void *)((char *)mz->addr + data_arr_size);
 
 		rte_timer_data_arr[default_data_id].internal_flags |=
 			FL_ALLOCATED;
+		rte_atomic16_inc(rte_timer_mz_refcnt);
 
 		rte_timer_subsystem_initialized = 1;
 
@@ -175,12 +182,15 @@  rte_timer_subsystem_init_v1905(void)
 	}
 
 	mz = rte_memzone_reserve_aligned(mz_name,
-			RTE_MAX_DATA_ELS * sizeof(*rte_timer_data_arr),
+			data_arr_size + sizeof(*rte_timer_mz_refcnt),
 			SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
 	if (mz == NULL)
 		return -ENOMEM;
 
+	rte_timer_data_mz = mz;
 	rte_timer_data_arr = mz->addr;
+	rte_timer_mz_refcnt = (void *)((char *)mz->addr + data_arr_size);
+	rte_atomic16_init(rte_timer_mz_refcnt);
 
 	for (i = 0; i < RTE_MAX_DATA_ELS; i++) {
 		data = &rte_timer_data_arr[i];
@@ -193,6 +203,7 @@  rte_timer_subsystem_init_v1905(void)
 	}
 
 	rte_timer_data_arr[default_data_id].internal_flags |= FL_ALLOCATED;
+	rte_atomic16_inc(rte_timer_mz_refcnt);
 
 	rte_timer_subsystem_initialized = 1;
 
@@ -205,8 +216,11 @@  BIND_DEFAULT_SYMBOL(rte_timer_subsystem_init, _v1905, 19.05);
 void __rte_experimental
 rte_timer_subsystem_finalize(void)
 {
-	if (rte_timer_data_arr)
-		rte_free(rte_timer_data_arr);
+	if (!rte_timer_subsystem_initialized)
+		return;
+
+	if (rte_atomic16_dec_and_test(rte_timer_mz_refcnt))
+		rte_memzone_free(rte_timer_data_mz);
 
 	rte_timer_subsystem_initialized = 0;
 }