[v3,2/2] lib/mempool: distinguish debug counters from cache and pool

Message ID 20210420000800.1504-3-dharmik.thakkar@arm.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series lib/mempool: add debug stats |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/travis-robot success travis build: passed
ci/github-robot success github build: passed
ci/iol-testing success Testing PASS
ci/iol-abi-testing success Testing PASS

Commit Message

Dharmik Thakkar April 20, 2021, 12:08 a.m. UTC
  From: Joyce Kong <joyce.kong@arm.com>

If cache is enabled, objects will be retrieved/put from/to cache,
subsequently from/to the common pool. Now the debug stats calculate
the objects retrieved/put from/to cache and pool together, it is
better to distinguish them.

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 lib/librte_mempool/rte_mempool.c | 24 ++++++++++++++++
 lib/librte_mempool/rte_mempool.h | 47 ++++++++++++++++++++++----------
 2 files changed, 57 insertions(+), 14 deletions(-)
  

Comments

Olivier Matz April 21, 2021, 4:29 p.m. UTC | #1
Hi Dharmik,

Please see some comments below.

On Mon, Apr 19, 2021 at 07:08:00PM -0500, Dharmik Thakkar wrote:
> From: Joyce Kong <joyce.kong@arm.com>
> 
> If cache is enabled, objects will be retrieved/put from/to cache,
> subsequently from/to the common pool. Now the debug stats calculate
> the objects retrieved/put from/to cache and pool together, it is
> better to distinguish them.
> 
> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>  lib/librte_mempool/rte_mempool.c | 24 ++++++++++++++++
>  lib/librte_mempool/rte_mempool.h | 47 ++++++++++++++++++++++----------
>  2 files changed, 57 insertions(+), 14 deletions(-)
> 
> diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
> index afb1239c8d48..339f14455624 100644
> --- a/lib/librte_mempool/rte_mempool.c
> +++ b/lib/librte_mempool/rte_mempool.c
> @@ -1244,6 +1244,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
>  	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
>  		sum.put_bulk += mp->stats[lcore_id].put_bulk;
>  		sum.put_objs += mp->stats[lcore_id].put_objs;
> +		sum.put_common_pool_bulk +=
> +			mp->stats[lcore_id].put_common_pool_bulk;
> +		sum.put_common_pool_objs +=
> +			mp->stats[lcore_id].put_common_pool_objs;
> +		sum.put_cache_bulk += mp->stats[lcore_id].put_cache_bulk;
> +		sum.put_cache_objs += mp->stats[lcore_id].put_cache_objs;
> +		sum.get_common_pool_bulk +=
> +			mp->stats[lcore_id].get_common_pool_bulk;
> +		sum.get_common_pool_objs +=
> +			mp->stats[lcore_id].get_common_pool_objs;
> +		sum.get_cache_bulk += mp->stats[lcore_id].get_cache_bulk;
> +		sum.get_cache_objs += mp->stats[lcore_id].get_cache_objs;
>  		sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk;
>  		sum.get_success_objs += mp->stats[lcore_id].get_success_objs;
>  		sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk;
> @@ -1254,6 +1266,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
>  	fprintf(f, "  stats:\n");
>  	fprintf(f, "    put_bulk=%"PRIu64"\n", sum.put_bulk);
>  	fprintf(f, "    put_objs=%"PRIu64"\n", sum.put_objs);
> +	fprintf(f, "    put_common_pool_bulk=%"PRIu64"\n",
> +						sum.put_common_pool_bulk);
> +	fprintf(f, "    put_common_pool_objs=%"PRIu64"\n",
> +						sum.put_common_pool_objs);
> +	fprintf(f, "    put_cache_bulk=%"PRIu64"\n", sum.put_cache_bulk);
> +	fprintf(f, "    put_cache_objs=%"PRIu64"\n", sum.put_cache_objs);
> +	fprintf(f, "    get_common_pool_bulk=%"PRIu64"\n",
> +						sum.get_common_pool_bulk);
> +	fprintf(f, "    get_common_pool_objs=%"PRIu64"\n",
> +						sum.get_common_pool_objs);
> +	fprintf(f, "    get_cache_bulk=%"PRIu64"\n", sum.get_cache_bulk);
> +	fprintf(f, "    get_cache_objs=%"PRIu64"\n", sum.get_cache_objs);
>  	fprintf(f, "    get_success_bulk=%"PRIu64"\n", sum.get_success_bulk);
>  	fprintf(f, "    get_success_objs=%"PRIu64"\n", sum.get_success_objs);
>  	fprintf(f, "    get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk);
> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
> index 848a19226149..0959f8a3f367 100644
> --- a/lib/librte_mempool/rte_mempool.h
> +++ b/lib/librte_mempool/rte_mempool.h
> @@ -66,12 +66,20 @@ extern "C" {
>   * A structure that stores the mempool statistics (per-lcore).
>   */
>  struct rte_mempool_debug_stats {
> -	uint64_t put_bulk;         /**< Number of puts. */
> -	uint64_t put_objs;         /**< Number of objects successfully put. */
> -	uint64_t get_success_bulk; /**< Successful allocation number. */
> -	uint64_t get_success_objs; /**< Objects successfully allocated. */
> -	uint64_t get_fail_bulk;    /**< Failed allocation number. */
> -	uint64_t get_fail_objs;    /**< Objects that failed to be allocated. */
> +	uint64_t put_bulk;		  /**< Number of puts. */
> +	uint64_t put_objs;		  /**< Number of objects successfully put. */
> +	uint64_t put_common_pool_bulk;	  /**< Number of bulks enqueued in common pool. */
> +	uint64_t put_common_pool_objs;	  /**< Number of objects enqueued in common pool. */
> +	uint64_t put_cache_bulk;	  /**< Number of bulks enqueued in cache. */
> +	uint64_t put_cache_objs;	  /**< Number of objects enqueued in cache. */
> +	uint64_t get_common_pool_bulk;    /**< Number of bulks dequeued from common pool. */
> +	uint64_t get_common_pool_objs;	  /**< Number of objects dequeued from common pool. */
> +	uint64_t get_cache_bulk;	  /**< Number of bulks dequeued from cache. */
> +	uint64_t get_cache_objs;	  /**< Number of objects dequeued from cache. */
> +	uint64_t get_success_bulk;	  /**< Successful allocation number. */
> +	uint64_t get_success_objs;	  /**< Objects successfully allocated. */
> +	uint64_t get_fail_bulk;		  /**< Failed allocation number. */
> +	uint64_t get_fail_objs;		  /**< Objects that failed to be allocated. */

I missed it the first time, but this changes the size of the
rte_mempool_debug_stats structure. I think we don't care about this ABI
breakage because this structure is only defined if
RTE_LIBRTE_MEMPOOL_DEBUG is set. But just in case, adding Ray as Cc.

About the field themselves, I'm not certain that there is an added value
to have stats for cache gets and puts. My feeling is that the important
stat to monitor is the access to common pool, because it is the one that
highlights a possible performance impact (contention). The cache stats
are more or less equal to "success + fail - common". Moreover, it will
simplify the patch and avoid risks of mistakes.

What do you think?

>  	/** Successful allocation number of contiguous blocks. */
>  	uint64_t get_success_blks;
>  	/** Failed allocation number of contiguous blocks. */
> @@ -699,10 +707,18 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp,
>  		void **obj_table, unsigned n)
>  {
>  	struct rte_mempool_ops *ops;
> +	int ret;
>  
>  	rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n);
>  	ops = rte_mempool_get_ops(mp->ops_index);
> -	return ops->dequeue(mp, obj_table, n);
> +	ret = ops->dequeue(mp, obj_table, n);
> +	if (ret == 0) {
> +		__MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1);
> +		__MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n);
> +		__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
> +		__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
> +	}
> +	return ret;
>  }
>  
>  /**
> @@ -749,6 +765,8 @@ rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void * const *obj_table,
>  {
>  	struct rte_mempool_ops *ops;
>  
> +	__MEMPOOL_STAT_ADD(mp, put_common_pool_bulk, 1);
> +	__MEMPOOL_STAT_ADD(mp, put_common_pool_objs, n);
>  	rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n);
>  	ops = rte_mempool_get_ops(mp->ops_index);
>  	return ops->enqueue(mp, obj_table, n);
> @@ -1297,14 +1315,18 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
>  
>  	/* Add elements back into the cache */
>  	rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n);
> -
>  	cache->len += n;
>  
> +	__MEMPOOL_STAT_ADD(mp, put_cache_bulk, 1);
> +
>  	if (cache->len >= cache->flushthresh) {
> +		__MEMPOOL_STAT_ADD(mp, put_cache_objs,
> +				   n - (cache->len - cache->size));
>  		rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size],
>  				cache->len - cache->size);
>  		cache->len = cache->size;
> -	}
> +	} else
> +		__MEMPOOL_STAT_ADD(mp, put_cache_objs, n);
>  

In case we keep cache stats, I'd add {} after the else to be consistent
with the if().

>  	return;
>  
> @@ -1438,8 +1460,8 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
>  
>  	cache->len -= n;
>  
> -	__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
> -	__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
> +	__MEMPOOL_STAT_ADD(mp, get_cache_bulk, 1);
> +	__MEMPOOL_STAT_ADD(mp, get_cache_objs, n);

In case we keep cache stats, I don't think we should remove get_success
stats increment. Else, the success stats will never be incremented when
retrieving objects from the cache.


>  
>  	return 0;
>  
> @@ -1451,9 +1473,6 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
>  	if (ret < 0) {
>  		__MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1);
>  		__MEMPOOL_STAT_ADD(mp, get_fail_objs, n);
> -	} else {
> -		__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
> -		__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
>  	}
>  
>  	return ret;
> -- 
> 2.17.1
>
  
Dharmik Thakkar April 22, 2021, 9:27 p.m. UTC | #2
Hi Olivier,

Thank you for your comments!

> On Apr 21, 2021, at 11:29 AM, Olivier Matz <olivier.matz@6wind.com> wrote:
> 
> Hi Dharmik,
> 
> Please see some comments below.
> 
> On Mon, Apr 19, 2021 at 07:08:00PM -0500, Dharmik Thakkar wrote:
>> From: Joyce Kong <joyce.kong@arm.com>
>> 
>> If cache is enabled, objects will be retrieved/put from/to cache,
>> subsequently from/to the common pool. Now the debug stats calculate
>> the objects retrieved/put from/to cache and pool together, it is
>> better to distinguish them.
>> 
>> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
>> ---
>> lib/librte_mempool/rte_mempool.c | 24 ++++++++++++++++
>> lib/librte_mempool/rte_mempool.h | 47 ++++++++++++++++++++++----------
>> 2 files changed, 57 insertions(+), 14 deletions(-)
>> 
>> diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
>> index afb1239c8d48..339f14455624 100644
>> --- a/lib/librte_mempool/rte_mempool.c
>> +++ b/lib/librte_mempool/rte_mempool.c
>> @@ -1244,6 +1244,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
>> 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
>> 		sum.put_bulk += mp->stats[lcore_id].put_bulk;
>> 		sum.put_objs += mp->stats[lcore_id].put_objs;
>> +		sum.put_common_pool_bulk +=
>> +			mp->stats[lcore_id].put_common_pool_bulk;
>> +		sum.put_common_pool_objs +=
>> +			mp->stats[lcore_id].put_common_pool_objs;
>> +		sum.put_cache_bulk += mp->stats[lcore_id].put_cache_bulk;
>> +		sum.put_cache_objs += mp->stats[lcore_id].put_cache_objs;
>> +		sum.get_common_pool_bulk +=
>> +			mp->stats[lcore_id].get_common_pool_bulk;
>> +		sum.get_common_pool_objs +=
>> +			mp->stats[lcore_id].get_common_pool_objs;
>> +		sum.get_cache_bulk += mp->stats[lcore_id].get_cache_bulk;
>> +		sum.get_cache_objs += mp->stats[lcore_id].get_cache_objs;
>> 		sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk;
>> 		sum.get_success_objs += mp->stats[lcore_id].get_success_objs;
>> 		sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk;
>> @@ -1254,6 +1266,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
>> 	fprintf(f, "  stats:\n");
>> 	fprintf(f, "    put_bulk=%"PRIu64"\n", sum.put_bulk);
>> 	fprintf(f, "    put_objs=%"PRIu64"\n", sum.put_objs);
>> +	fprintf(f, "    put_common_pool_bulk=%"PRIu64"\n",
>> +						sum.put_common_pool_bulk);
>> +	fprintf(f, "    put_common_pool_objs=%"PRIu64"\n",
>> +						sum.put_common_pool_objs);
>> +	fprintf(f, "    put_cache_bulk=%"PRIu64"\n", sum.put_cache_bulk);
>> +	fprintf(f, "    put_cache_objs=%"PRIu64"\n", sum.put_cache_objs);
>> +	fprintf(f, "    get_common_pool_bulk=%"PRIu64"\n",
>> +						sum.get_common_pool_bulk);
>> +	fprintf(f, "    get_common_pool_objs=%"PRIu64"\n",
>> +						sum.get_common_pool_objs);
>> +	fprintf(f, "    get_cache_bulk=%"PRIu64"\n", sum.get_cache_bulk);
>> +	fprintf(f, "    get_cache_objs=%"PRIu64"\n", sum.get_cache_objs);
>> 	fprintf(f, "    get_success_bulk=%"PRIu64"\n", sum.get_success_bulk);
>> 	fprintf(f, "    get_success_objs=%"PRIu64"\n", sum.get_success_objs);
>> 	fprintf(f, "    get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk);
>> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
>> index 848a19226149..0959f8a3f367 100644
>> --- a/lib/librte_mempool/rte_mempool.h
>> +++ b/lib/librte_mempool/rte_mempool.h
>> @@ -66,12 +66,20 @@ extern "C" {
>>  * A structure that stores the mempool statistics (per-lcore).
>>  */
>> struct rte_mempool_debug_stats {
>> -	uint64_t put_bulk;         /**< Number of puts. */
>> -	uint64_t put_objs;         /**< Number of objects successfully put. */
>> -	uint64_t get_success_bulk; /**< Successful allocation number. */
>> -	uint64_t get_success_objs; /**< Objects successfully allocated. */
>> -	uint64_t get_fail_bulk;    /**< Failed allocation number. */
>> -	uint64_t get_fail_objs;    /**< Objects that failed to be allocated. */
>> +	uint64_t put_bulk;		  /**< Number of puts. */
>> +	uint64_t put_objs;		  /**< Number of objects successfully put. */
>> +	uint64_t put_common_pool_bulk;	  /**< Number of bulks enqueued in common pool. */
>> +	uint64_t put_common_pool_objs;	  /**< Number of objects enqueued in common pool. */
>> +	uint64_t put_cache_bulk;	  /**< Number of bulks enqueued in cache. */
>> +	uint64_t put_cache_objs;	  /**< Number of objects enqueued in cache. */
>> +	uint64_t get_common_pool_bulk;    /**< Number of bulks dequeued from common pool. */
>> +	uint64_t get_common_pool_objs;	  /**< Number of objects dequeued from common pool. */
>> +	uint64_t get_cache_bulk;	  /**< Number of bulks dequeued from cache. */
>> +	uint64_t get_cache_objs;	  /**< Number of objects dequeued from cache. */
>> +	uint64_t get_success_bulk;	  /**< Successful allocation number. */
>> +	uint64_t get_success_objs;	  /**< Objects successfully allocated. */
>> +	uint64_t get_fail_bulk;		  /**< Failed allocation number. */
>> +	uint64_t get_fail_objs;		  /**< Objects that failed to be allocated. */
> 
> I missed it the first time, but this changes the size of the
> rte_mempool_debug_stats structure. I think we don't care about this ABI
> breakage because this structure is only defined if
> RTE_LIBRTE_MEMPOOL_DEBUG is set. But just in case, adding Ray as Cc.

Agreed, thank you!

> 
> About the field themselves, I'm not certain that there is an added value
> to have stats for cache gets and puts. My feeling is that the important
> stat to monitor is the access to common pool, because it is the one that
> highlights a possible performance impact (contention). The cache stats
> are more or less equal to "success + fail - common". Moreover, it will
> simplify the patch and avoid risks of mistakes.
> 
> What do you think?

Yes, I think the cache stats can be removed.
Also, please correct me if I’m wrong; but, in my understanding,
the cache stats are equal to “success - common”. Is adding “fail” required?

> 
>> 	/** Successful allocation number of contiguous blocks. */
>> 	uint64_t get_success_blks;
>> 	/** Failed allocation number of contiguous blocks. */
>> @@ -699,10 +707,18 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp,
>> 		void **obj_table, unsigned n)
>> {
>> 	struct rte_mempool_ops *ops;
>> +	int ret;
>> 
>> 	rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n);
>> 	ops = rte_mempool_get_ops(mp->ops_index);
>> -	return ops->dequeue(mp, obj_table, n);
>> +	ret = ops->dequeue(mp, obj_table, n);
>> +	if (ret == 0) {
>> +		__MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1);
>> +		__MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n);
>> +		__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
>> +		__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
>> +	}
>> +	return ret;
>> }
>> 
>> /**
>> @@ -749,6 +765,8 @@ rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void * const *obj_table,
>> {
>> 	struct rte_mempool_ops *ops;
>> 
>> +	__MEMPOOL_STAT_ADD(mp, put_common_pool_bulk, 1);
>> +	__MEMPOOL_STAT_ADD(mp, put_common_pool_objs, n);
>> 	rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n);
>> 	ops = rte_mempool_get_ops(mp->ops_index);
>> 	return ops->enqueue(mp, obj_table, n);
>> @@ -1297,14 +1315,18 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
>> 
>> 	/* Add elements back into the cache */
>> 	rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n);
>> -
>> 	cache->len += n;
>> 
>> +	__MEMPOOL_STAT_ADD(mp, put_cache_bulk, 1);
>> +
>> 	if (cache->len >= cache->flushthresh) {
>> +		__MEMPOOL_STAT_ADD(mp, put_cache_objs,
>> +				   n - (cache->len - cache->size));
>> 		rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size],
>> 				cache->len - cache->size);
>> 		cache->len = cache->size;
>> -	}
>> +	} else
>> +		__MEMPOOL_STAT_ADD(mp, put_cache_objs, n);
>> 
> 
> In case we keep cache stats, I'd add {} after the else to be consistent
> with the if().

Ack.

> 
>> 	return;
>> 
>> @@ -1438,8 +1460,8 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
>> 
>> 	cache->len -= n;
>> 
>> -	__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
>> -	__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
>> +	__MEMPOOL_STAT_ADD(mp, get_cache_bulk, 1);
>> +	__MEMPOOL_STAT_ADD(mp, get_cache_objs, n);
> 
> In case we keep cache stats, I don't think we should remove get_success
> stats increment. Else, the success stats will never be incremented when
> retrieving objects from the cache.
> 

Good catch. Thanks!

> 
>> 
>> 	return 0;
>> 
>> @@ -1451,9 +1473,6 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
>> 	if (ret < 0) {
>> 		__MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1);
>> 		__MEMPOOL_STAT_ADD(mp, get_fail_objs, n);
>> -	} else {
>> -		__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
>> -		__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
>> 	}
>> 
>> 	return ret;
>> -- 
>> 2.17.1
  
Honnappa Nagarahalli April 22, 2021, 9:47 p.m. UTC | #3
<snip>

> >> diff --git a/lib/librte_mempool/rte_mempool.h
> >> b/lib/librte_mempool/rte_mempool.h
> >> index 848a19226149..0959f8a3f367 100644
> >> --- a/lib/librte_mempool/rte_mempool.h
> >> +++ b/lib/librte_mempool/rte_mempool.h
> >> @@ -66,12 +66,20 @@ extern "C" {
> >>  * A structure that stores the mempool statistics (per-lcore).
> >>  */
> >> struct rte_mempool_debug_stats {
> >> -uint64_t put_bulk;         /**< Number of puts. */
> >> -uint64_t put_objs;         /**< Number of objects successfully put. */
> >> -uint64_t get_success_bulk; /**< Successful allocation number. */
> >> -uint64_t get_success_objs; /**< Objects successfully allocated. */
> >> -uint64_t get_fail_bulk;    /**< Failed allocation number. */
> >> -uint64_t get_fail_objs;    /**< Objects that failed to be allocated. */
> >> +uint64_t put_bulk;  /**< Number of puts. */ uint64_t put_objs;  /**<
> >> +Number of objects successfully put. */ uint64_t
> >> +put_common_pool_bulk;  /**< Number of bulks enqueued in common
> pool.
> >> +*/ uint64_t put_common_pool_objs;  /**< Number of objects enqueued
> >> +in common pool. */ uint64_t put_cache_bulk;  /**< Number of bulks
> >> +enqueued in cache. */ uint64_t put_cache_objs;  /**< Number of objects
> enqueued in cache. */
> >> +uint64_t get_common_pool_bulk;    /**< Number of bulks dequeued from
> common pool. */
> >> +uint64_t get_common_pool_objs;  /**< Number of objects dequeued from
> >> +common pool. */ uint64_t get_cache_bulk;  /**< Number of bulks
> >> +dequeued from cache. */ uint64_t get_cache_objs;  /**< Number of
> >> +objects dequeued from cache. */ uint64_t get_success_bulk;  /**<
> >> +Successful allocation number. */ uint64_t get_success_objs;  /**<
> >> +Objects successfully allocated. */ uint64_t get_fail_bulk;  /**<
> >> +Failed allocation number. */ uint64_t get_fail_objs;  /**< Objects
> >> +that failed to be allocated. */
> >
> > I missed it the first time, but this changes the size of the
> > rte_mempool_debug_stats structure. I think we don't care about this
> > ABI breakage because this structure is only defined if
> > RTE_LIBRTE_MEMPOOL_DEBUG is set. But just in case, adding Ray as Cc.
> 
> Agreed, thank you!
> 
> >
> > About the field themselves, I'm not certain that there is an added
> > value to have stats for cache gets and puts. My feeling is that the
> > important stat to monitor is the access to common pool, because it is
> > the one that highlights a possible performance impact (contention).
> > The cache stats are more or less equal to "success + fail - common".
> > Moreover, it will simplify the patch and avoid risks of mistakes.
> >
> > What do you think?
Agree as well. Can you please add a comment making a note of this in the stats structure?

> 
> Yes, I think the cache stats can be removed.
> Also, please correct me if I’m wrong; but, in my understanding, the cache stats
> are equal to “success - common”. Is adding “fail” required?
> 
> >
<snip>
  
Ray Kinsella April 23, 2021, 10:41 a.m. UTC | #4
On 21/04/2021 17:29, Olivier Matz wrote:
> Hi Dharmik,
> 
> Please see some comments below.
> 
> On Mon, Apr 19, 2021 at 07:08:00PM -0500, Dharmik Thakkar wrote:
>> From: Joyce Kong <joyce.kong@arm.com>
>>
>> If cache is enabled, objects will be retrieved/put from/to cache,
>> subsequently from/to the common pool. Now the debug stats calculate
>> the objects retrieved/put from/to cache and pool together, it is
>> better to distinguish them.
>>
>> Signed-off-by: Joyce Kong <joyce.kong@arm.com>
>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
>> ---
>>  lib/librte_mempool/rte_mempool.c | 24 ++++++++++++++++
>>  lib/librte_mempool/rte_mempool.h | 47 ++++++++++++++++++++++----------
>>  2 files changed, 57 insertions(+), 14 deletions(-)
>>
>> diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
>> index afb1239c8d48..339f14455624 100644
>> --- a/lib/librte_mempool/rte_mempool.c
>> +++ b/lib/librte_mempool/rte_mempool.c
>> @@ -1244,6 +1244,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
>>  	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
>>  		sum.put_bulk += mp->stats[lcore_id].put_bulk;
>>  		sum.put_objs += mp->stats[lcore_id].put_objs;
>> +		sum.put_common_pool_bulk +=
>> +			mp->stats[lcore_id].put_common_pool_bulk;
>> +		sum.put_common_pool_objs +=
>> +			mp->stats[lcore_id].put_common_pool_objs;
>> +		sum.put_cache_bulk += mp->stats[lcore_id].put_cache_bulk;
>> +		sum.put_cache_objs += mp->stats[lcore_id].put_cache_objs;
>> +		sum.get_common_pool_bulk +=
>> +			mp->stats[lcore_id].get_common_pool_bulk;
>> +		sum.get_common_pool_objs +=
>> +			mp->stats[lcore_id].get_common_pool_objs;
>> +		sum.get_cache_bulk += mp->stats[lcore_id].get_cache_bulk;
>> +		sum.get_cache_objs += mp->stats[lcore_id].get_cache_objs;
>>  		sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk;
>>  		sum.get_success_objs += mp->stats[lcore_id].get_success_objs;
>>  		sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk;
>> @@ -1254,6 +1266,18 @@ rte_mempool_dump(FILE *f, struct rte_mempool *mp)
>>  	fprintf(f, "  stats:\n");
>>  	fprintf(f, "    put_bulk=%"PRIu64"\n", sum.put_bulk);
>>  	fprintf(f, "    put_objs=%"PRIu64"\n", sum.put_objs);
>> +	fprintf(f, "    put_common_pool_bulk=%"PRIu64"\n",
>> +						sum.put_common_pool_bulk);
>> +	fprintf(f, "    put_common_pool_objs=%"PRIu64"\n",
>> +						sum.put_common_pool_objs);
>> +	fprintf(f, "    put_cache_bulk=%"PRIu64"\n", sum.put_cache_bulk);
>> +	fprintf(f, "    put_cache_objs=%"PRIu64"\n", sum.put_cache_objs);
>> +	fprintf(f, "    get_common_pool_bulk=%"PRIu64"\n",
>> +						sum.get_common_pool_bulk);
>> +	fprintf(f, "    get_common_pool_objs=%"PRIu64"\n",
>> +						sum.get_common_pool_objs);
>> +	fprintf(f, "    get_cache_bulk=%"PRIu64"\n", sum.get_cache_bulk);
>> +	fprintf(f, "    get_cache_objs=%"PRIu64"\n", sum.get_cache_objs);
>>  	fprintf(f, "    get_success_bulk=%"PRIu64"\n", sum.get_success_bulk);
>>  	fprintf(f, "    get_success_objs=%"PRIu64"\n", sum.get_success_objs);
>>  	fprintf(f, "    get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk);
>> diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
>> index 848a19226149..0959f8a3f367 100644
>> --- a/lib/librte_mempool/rte_mempool.h
>> +++ b/lib/librte_mempool/rte_mempool.h
>> @@ -66,12 +66,20 @@ extern "C" {
>>   * A structure that stores the mempool statistics (per-lcore).
>>   */
>>  struct rte_mempool_debug_stats {
>> -	uint64_t put_bulk;         /**< Number of puts. */
>> -	uint64_t put_objs;         /**< Number of objects successfully put. */
>> -	uint64_t get_success_bulk; /**< Successful allocation number. */
>> -	uint64_t get_success_objs; /**< Objects successfully allocated. */
>> -	uint64_t get_fail_bulk;    /**< Failed allocation number. */
>> -	uint64_t get_fail_objs;    /**< Objects that failed to be allocated. */
>> +	uint64_t put_bulk;		  /**< Number of puts. */
>> +	uint64_t put_objs;		  /**< Number of objects successfully put. */
>> +	uint64_t put_common_pool_bulk;	  /**< Number of bulks enqueued in common pool. */
>> +	uint64_t put_common_pool_objs;	  /**< Number of objects enqueued in common pool. */
>> +	uint64_t put_cache_bulk;	  /**< Number of bulks enqueued in cache. */
>> +	uint64_t put_cache_objs;	  /**< Number of objects enqueued in cache. */
>> +	uint64_t get_common_pool_bulk;    /**< Number of bulks dequeued from common pool. */
>> +	uint64_t get_common_pool_objs;	  /**< Number of objects dequeued from common pool. */
>> +	uint64_t get_cache_bulk;	  /**< Number of bulks dequeued from cache. */
>> +	uint64_t get_cache_objs;	  /**< Number of objects dequeued from cache. */
>> +	uint64_t get_success_bulk;	  /**< Successful allocation number. */
>> +	uint64_t get_success_objs;	  /**< Objects successfully allocated. */
>> +	uint64_t get_fail_bulk;		  /**< Failed allocation number. */
>> +	uint64_t get_fail_objs;		  /**< Objects that failed to be allocated. */
> 
> I missed it the first time, but this changes the size of the
> rte_mempool_debug_stats structure. I think we don't care about this ABI
> breakage because this structure is only defined if
> RTE_LIBRTE_MEMPOOL_DEBUG is set. But just in case, adding Ray as Cc.

Agreed, if it is just a debugging non-default feature. 

> About the field themselves, I'm not certain that there is an added value
> to have stats for cache gets and puts. My feeling is that the important
> stat to monitor is the access to common pool, because it is the one that
> highlights a possible performance impact (contention). The cache stats
> are more or less equal to "success + fail - common". Moreover, it will
> simplify the patch and avoid risks of mistakes.
> 
> What do you think?
> 
>>  	/** Successful allocation number of contiguous blocks. */
>>  	uint64_t get_success_blks;
>>  	/** Failed allocation number of contiguous blocks. */
>> @@ -699,10 +707,18 @@ rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp,
>>  		void **obj_table, unsigned n)
>>  {
>>  	struct rte_mempool_ops *ops;
>> +	int ret;
>>  
>>  	rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n);
>>  	ops = rte_mempool_get_ops(mp->ops_index);
>> -	return ops->dequeue(mp, obj_table, n);
>> +	ret = ops->dequeue(mp, obj_table, n);
>> +	if (ret == 0) {
>> +		__MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1);
>> +		__MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n);
>> +		__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
>> +		__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
>> +	}
>> +	return ret;
>>  }
>>  
>>  /**
>> @@ -749,6 +765,8 @@ rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void * const *obj_table,
>>  {
>>  	struct rte_mempool_ops *ops;
>>  
>> +	__MEMPOOL_STAT_ADD(mp, put_common_pool_bulk, 1);
>> +	__MEMPOOL_STAT_ADD(mp, put_common_pool_objs, n);
>>  	rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n);
>>  	ops = rte_mempool_get_ops(mp->ops_index);
>>  	return ops->enqueue(mp, obj_table, n);
>> @@ -1297,14 +1315,18 @@ __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
>>  
>>  	/* Add elements back into the cache */
>>  	rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n);
>> -
>>  	cache->len += n;
>>  
>> +	__MEMPOOL_STAT_ADD(mp, put_cache_bulk, 1);
>> +
>>  	if (cache->len >= cache->flushthresh) {
>> +		__MEMPOOL_STAT_ADD(mp, put_cache_objs,
>> +				   n - (cache->len - cache->size));
>>  		rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size],
>>  				cache->len - cache->size);
>>  		cache->len = cache->size;
>> -	}
>> +	} else
>> +		__MEMPOOL_STAT_ADD(mp, put_cache_objs, n);
>>  
> 
> In case we keep cache stats, I'd add {} after the else to be consistent
> with the if().
> 
>>  	return;
>>  
>> @@ -1438,8 +1460,8 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
>>  
>>  	cache->len -= n;
>>  
>> -	__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
>> -	__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
>> +	__MEMPOOL_STAT_ADD(mp, get_cache_bulk, 1);
>> +	__MEMPOOL_STAT_ADD(mp, get_cache_objs, n);
> 
> In case we keep cache stats, I don't think we should remove get_success
> stats increment. Else, the success stats will never be incremented when
> retrieving objects from the cache.
> 
> 
>>  
>>  	return 0;
>>  
>> @@ -1451,9 +1473,6 @@ __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
>>  	if (ret < 0) {
>>  		__MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1);
>>  		__MEMPOOL_STAT_ADD(mp, get_fail_objs, n);
>> -	} else {
>> -		__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
>> -		__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
>>  	}
>>  
>>  	return ret;
>> -- 
>> 2.17.1
>>
  

Patch

diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index afb1239c8d48..339f14455624 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -1244,6 +1244,18 @@  rte_mempool_dump(FILE *f, struct rte_mempool *mp)
 	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
 		sum.put_bulk += mp->stats[lcore_id].put_bulk;
 		sum.put_objs += mp->stats[lcore_id].put_objs;
+		sum.put_common_pool_bulk +=
+			mp->stats[lcore_id].put_common_pool_bulk;
+		sum.put_common_pool_objs +=
+			mp->stats[lcore_id].put_common_pool_objs;
+		sum.put_cache_bulk += mp->stats[lcore_id].put_cache_bulk;
+		sum.put_cache_objs += mp->stats[lcore_id].put_cache_objs;
+		sum.get_common_pool_bulk +=
+			mp->stats[lcore_id].get_common_pool_bulk;
+		sum.get_common_pool_objs +=
+			mp->stats[lcore_id].get_common_pool_objs;
+		sum.get_cache_bulk += mp->stats[lcore_id].get_cache_bulk;
+		sum.get_cache_objs += mp->stats[lcore_id].get_cache_objs;
 		sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk;
 		sum.get_success_objs += mp->stats[lcore_id].get_success_objs;
 		sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk;
@@ -1254,6 +1266,18 @@  rte_mempool_dump(FILE *f, struct rte_mempool *mp)
 	fprintf(f, "  stats:\n");
 	fprintf(f, "    put_bulk=%"PRIu64"\n", sum.put_bulk);
 	fprintf(f, "    put_objs=%"PRIu64"\n", sum.put_objs);
+	fprintf(f, "    put_common_pool_bulk=%"PRIu64"\n",
+						sum.put_common_pool_bulk);
+	fprintf(f, "    put_common_pool_objs=%"PRIu64"\n",
+						sum.put_common_pool_objs);
+	fprintf(f, "    put_cache_bulk=%"PRIu64"\n", sum.put_cache_bulk);
+	fprintf(f, "    put_cache_objs=%"PRIu64"\n", sum.put_cache_objs);
+	fprintf(f, "    get_common_pool_bulk=%"PRIu64"\n",
+						sum.get_common_pool_bulk);
+	fprintf(f, "    get_common_pool_objs=%"PRIu64"\n",
+						sum.get_common_pool_objs);
+	fprintf(f, "    get_cache_bulk=%"PRIu64"\n", sum.get_cache_bulk);
+	fprintf(f, "    get_cache_objs=%"PRIu64"\n", sum.get_cache_objs);
 	fprintf(f, "    get_success_bulk=%"PRIu64"\n", sum.get_success_bulk);
 	fprintf(f, "    get_success_objs=%"PRIu64"\n", sum.get_success_objs);
 	fprintf(f, "    get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk);
diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h
index 848a19226149..0959f8a3f367 100644
--- a/lib/librte_mempool/rte_mempool.h
+++ b/lib/librte_mempool/rte_mempool.h
@@ -66,12 +66,20 @@  extern "C" {
  * A structure that stores the mempool statistics (per-lcore).
  */
 struct rte_mempool_debug_stats {
-	uint64_t put_bulk;         /**< Number of puts. */
-	uint64_t put_objs;         /**< Number of objects successfully put. */
-	uint64_t get_success_bulk; /**< Successful allocation number. */
-	uint64_t get_success_objs; /**< Objects successfully allocated. */
-	uint64_t get_fail_bulk;    /**< Failed allocation number. */
-	uint64_t get_fail_objs;    /**< Objects that failed to be allocated. */
+	uint64_t put_bulk;		  /**< Number of puts. */
+	uint64_t put_objs;		  /**< Number of objects successfully put. */
+	uint64_t put_common_pool_bulk;	  /**< Number of bulks enqueued in common pool. */
+	uint64_t put_common_pool_objs;	  /**< Number of objects enqueued in common pool. */
+	uint64_t put_cache_bulk;	  /**< Number of bulks enqueued in cache. */
+	uint64_t put_cache_objs;	  /**< Number of objects enqueued in cache. */
+	uint64_t get_common_pool_bulk;    /**< Number of bulks dequeued from common pool. */
+	uint64_t get_common_pool_objs;	  /**< Number of objects dequeued from common pool. */
+	uint64_t get_cache_bulk;	  /**< Number of bulks dequeued from cache. */
+	uint64_t get_cache_objs;	  /**< Number of objects dequeued from cache. */
+	uint64_t get_success_bulk;	  /**< Successful allocation number. */
+	uint64_t get_success_objs;	  /**< Objects successfully allocated. */
+	uint64_t get_fail_bulk;		  /**< Failed allocation number. */
+	uint64_t get_fail_objs;		  /**< Objects that failed to be allocated. */
 	/** Successful allocation number of contiguous blocks. */
 	uint64_t get_success_blks;
 	/** Failed allocation number of contiguous blocks. */
@@ -699,10 +707,18 @@  rte_mempool_ops_dequeue_bulk(struct rte_mempool *mp,
 		void **obj_table, unsigned n)
 {
 	struct rte_mempool_ops *ops;
+	int ret;
 
 	rte_mempool_trace_ops_dequeue_bulk(mp, obj_table, n);
 	ops = rte_mempool_get_ops(mp->ops_index);
-	return ops->dequeue(mp, obj_table, n);
+	ret = ops->dequeue(mp, obj_table, n);
+	if (ret == 0) {
+		__MEMPOOL_STAT_ADD(mp, get_common_pool_bulk, 1);
+		__MEMPOOL_STAT_ADD(mp, get_common_pool_objs, n);
+		__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
+		__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
+	}
+	return ret;
 }
 
 /**
@@ -749,6 +765,8 @@  rte_mempool_ops_enqueue_bulk(struct rte_mempool *mp, void * const *obj_table,
 {
 	struct rte_mempool_ops *ops;
 
+	__MEMPOOL_STAT_ADD(mp, put_common_pool_bulk, 1);
+	__MEMPOOL_STAT_ADD(mp, put_common_pool_objs, n);
 	rte_mempool_trace_ops_enqueue_bulk(mp, obj_table, n);
 	ops = rte_mempool_get_ops(mp->ops_index);
 	return ops->enqueue(mp, obj_table, n);
@@ -1297,14 +1315,18 @@  __mempool_generic_put(struct rte_mempool *mp, void * const *obj_table,
 
 	/* Add elements back into the cache */
 	rte_memcpy(&cache_objs[0], obj_table, sizeof(void *) * n);
-
 	cache->len += n;
 
+	__MEMPOOL_STAT_ADD(mp, put_cache_bulk, 1);
+
 	if (cache->len >= cache->flushthresh) {
+		__MEMPOOL_STAT_ADD(mp, put_cache_objs,
+				   n - (cache->len - cache->size));
 		rte_mempool_ops_enqueue_bulk(mp, &cache->objs[cache->size],
 				cache->len - cache->size);
 		cache->len = cache->size;
-	}
+	} else
+		__MEMPOOL_STAT_ADD(mp, put_cache_objs, n);
 
 	return;
 
@@ -1438,8 +1460,8 @@  __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
 
 	cache->len -= n;
 
-	__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
-	__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
+	__MEMPOOL_STAT_ADD(mp, get_cache_bulk, 1);
+	__MEMPOOL_STAT_ADD(mp, get_cache_objs, n);
 
 	return 0;
 
@@ -1451,9 +1473,6 @@  __mempool_generic_get(struct rte_mempool *mp, void **obj_table,
 	if (ret < 0) {
 		__MEMPOOL_STAT_ADD(mp, get_fail_bulk, 1);
 		__MEMPOOL_STAT_ADD(mp, get_fail_objs, n);
-	} else {
-		__MEMPOOL_STAT_ADD(mp, get_success_bulk, 1);
-		__MEMPOOL_STAT_ADD(mp, get_success_objs, n);
 	}
 
 	return ret;