[v6,2/6] ethdev: add trace points for ethdev (part one)

Message ID 20230120084059.2926575-3-adwivedi@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series add trace points in ethdev library |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Ankur Dwivedi Jan. 20, 2023, 8:40 a.m. UTC
  Adds trace points for ethdev functions.
Moved the rte_ethdev_trace_rx_burst and rte_ethdev_trace_tx_burst to
a new file rte_ethdev_trace_fp_burst.h. This is needed to resolve
cyclic dependency between rte_ethdev.h and rte_ethdev_trace_fp.h.

Signed-off-by: Ankur Dwivedi <adwivedi@marvell.com>
---
 lib/ethdev/ethdev_private.c            |   5 +
 lib/ethdev/ethdev_trace_points.c       | 193 +++++++++++++++++
 lib/ethdev/meson.build                 |   1 +
 lib/ethdev/rte_ethdev.c                | 235 +++++++++++++++++---
 lib/ethdev/rte_ethdev.h                |   2 +-
 lib/ethdev/rte_ethdev_trace.h          | 285 +++++++++++++++++++++++++
 lib/ethdev/rte_ethdev_trace_fp.h       | 279 +++++++++++++++++++++++-
 lib/ethdev/rte_ethdev_trace_fp_burst.h |  44 ++++
 lib/ethdev/version.map                 |  66 ++++++
 9 files changed, 1075 insertions(+), 35 deletions(-)
 create mode 100644 lib/ethdev/rte_ethdev_trace_fp_burst.h
  

Comments

Ferruh Yigit Jan. 23, 2023, 5:28 p.m. UTC | #1
On 1/20/2023 8:40 AM, Ankur Dwivedi wrote:
> Adds trace points for ethdev functions.
> Moved the rte_ethdev_trace_rx_burst and rte_ethdev_trace_tx_burst to
> a new file rte_ethdev_trace_fp_burst.h. This is needed to resolve
> cyclic dependency between rte_ethdev.h and rte_ethdev_trace_fp.h.
> 
> Signed-off-by: Ankur Dwivedi <adwivedi@marvell.com>

<...>

> +RTE_TRACE_POINT_REGISTER(rte_eth_trace_rx_hairpin_queue_setup,
> +	lib.ethdev.rx.hairpin_queue_setup)
> +

s/rx.hairpin_queue_setup/rx_hairpin_queue_setup/ ?

> +RTE_TRACE_POINT_REGISTER(rte_eth_trace_tx_hairpin_queue_setup,
> +	lib.ethdev.tx.hairpin_queue_setup)
> +

s/tx.hairpin_queue_setup/tx_hairpin_queue_setup/ ?

<...>

> diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build
> index 39250b5da1..f5c0865023 100644
> --- a/lib/ethdev/meson.build
> +++ b/lib/ethdev/meson.build
> @@ -24,6 +24,7 @@ headers = files(
>          'rte_ethdev.h',
>          'rte_ethdev_trace.h',
>          'rte_ethdev_trace_fp.h',
> +        'rte_ethdev_trace_fp_burst.h',

Why these trace headers are public?
Aren't trace points only used by the APIs, so I expect them to be
internal, so applications shouldn't need them. Why they are expsed to user.

@David, what do you think?

<...>

> @@ -258,6 +259,7 @@ rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs_str)
>  
>  end:
>  	iter->cls = rte_class_find_by_name("eth");
> +	rte_eth_trace_iterator_init(devargs_str);
>  	rte_devargs_reset(&devargs);
>  	return 0;

Why not add trace call just before return, as a separate block, like:
``
  end:
  	iter->cls = rte_class_find_by_name("eth");
  	rte_devargs_reset(&devargs);

 	rte_eth_trace_iterator_init(devargs_str);

  	return 0;
``

>  
> @@ -274,6 +276,8 @@ rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs_str)
>  uint16_t
>  rte_eth_iterator_next(struct rte_dev_iterator *iter)
>  {
> +	uint16_t id;
> +
>  	if (iter == NULL) {
>  		RTE_ETHDEV_LOG(ERR,
>  			"Cannot get next device from NULL iterator\n");
> @@ -297,8 +301,11 @@ rte_eth_iterator_next(struct rte_dev_iterator *iter)
>  		/* A device is matching bus part, need to check ethdev part. */
>  		iter->class_device = iter->cls->dev_iterate(
>  				iter->class_device, iter->cls_str, iter);
> -		if (iter->class_device != NULL)
> -			return eth_dev_to_id(iter->class_device); /* match */
> +		if (iter->class_device != NULL) {
> +			id = eth_dev_to_id(iter->class_device);
> +			rte_eth_trace_iterator_next(iter, id);
> +			return id; /* match */

please move 'id' declaration within the if block, and again put trace
call into separate block to highlight it, otherwise easy to miss, like:

``
if (iter->class_device != NULL) {
	uint16_t id = eth_dev_to_id(iter->class_device);

	rte_eth_trace_iterator_next(iter, id);

	return id; /* match */
}


> +		}
>  	} while (iter->bus != NULL); /* need to try next rte_device */
>  
>  	/* No more ethdev port to iterate. */
> @@ -316,6 +323,7 @@ rte_eth_iterator_cleanup(struct rte_dev_iterator *iter)
>  
>  	if (iter->bus_str == NULL)
>  		return; /* nothing to free in pure class filter */
> +	rte_eth_trace_iterator_cleanup(iter);

Can you please make trace call a separate block, I won't comment same
for bellow, can you please apply this to all.

>  	free(RTE_CAST_FIELD(iter, bus_str, char *)); /* workaround const */
>  	free(RTE_CAST_FIELD(iter, cls_str, char *)); /* workaround const */
>  	memset(iter, 0, sizeof(*iter));
> @@ -324,12 +332,18 @@ rte_eth_iterator_cleanup(struct rte_dev_iterator *iter)
>  uint16_t
>  rte_eth_find_next(uint16_t port_id)
>  {
> +	rte_eth_trace_find_next(port_id);
> +

Why tracing previous port_id, and other one below records next port_id,
won't it be confusing to have both with same name.

I suggest just keep last one, (next port_id).

>  	while (port_id < RTE_MAX_ETHPORTS &&
>  			rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED)
>  		port_id++;
>  
> -	if (port_id >= RTE_MAX_ETHPORTS)
> +	if (port_id >= RTE_MAX_ETHPORTS) {
> +		rte_eth_trace_find_next(RTE_MAX_ETHPORTS);

Is there a specific reason to trace all paths, why not just keep the
last one?

>  		return RTE_MAX_ETHPORTS;
> +	}
> +
> +	rte_eth_trace_find_next(port_id);
>  
>  	return port_id;
>  }
> @@ -347,10 +361,15 @@ uint16_t
>  rte_eth_find_next_of(uint16_t port_id, const struct rte_device *parent)
>  {
>  	port_id = rte_eth_find_next(port_id);
> +
> +	rte_eth_trace_find_next_of(port_id);
> +
>  	while (port_id < RTE_MAX_ETHPORTS &&
>  			rte_eth_devices[port_id].device != parent)
>  		port_id = rte_eth_find_next(port_id + 1);
>  
> +	rte_eth_trace_find_next_of(port_id);
> +

Same here, lets keep only the last one.

>  	return port_id;
>  }
>  
> @@ -358,6 +377,9 @@ uint16_t
>  rte_eth_find_next_sibling(uint16_t port_id, uint16_t ref_port_id)
>  {
>  	RTE_ETH_VALID_PORTID_OR_ERR_RET(ref_port_id, RTE_MAX_ETHPORTS);
> +
> +	rte_eth_trace_find_next_sibling(port_id, ref_port_id);
> +

This doesn't record the return values, function should be updated to
keep the interim return value of 'rte_eth_find_next_of()' and trace
functions should record it.

>  	return rte_eth_find_next_of(port_id,
>  			rte_eth_devices[ref_port_id].device);
>  }
> @@ -372,10 +394,13 @@ int
>  rte_eth_dev_is_valid_port(uint16_t port_id)
>  {
>  	if (port_id >= RTE_MAX_ETHPORTS ||
> -	    (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED))
> +	    (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED)) {
> +		rte_ethdev_trace_is_valid_port(port_id, 0);
>  		return 0;
> -	else
> +	} else {
> +		rte_ethdev_trace_is_valid_port(port_id, 1);
>  		return 1;
> +	}

What about to create an interim 'is_valid' variable and record it with
single trace call?

<...>

>  uint32_t
>  rte_eth_speed_bitflag(uint32_t speed, int duplex)
>  {
> +	rte_eth_trace_speed_bitflag(speed, duplex);

Let's create an interim return value and record it for the trace
function, and please move trace function to the bottom of the function.

<...>

> @@ -2433,6 +2529,7 @@ void
>  rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent,
>  		void *userdata __rte_unused)
>  {
> +	rte_eth_trace_tx_buffer_drop_callback(pkts, unsent);

Since only pointer value is recorded, function can be moved down, please
put emtpy line in between.

<...>

> @@ -2495,7 +2598,12 @@ rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t queue_id, uint32_t free_cnt)
>  	/* Call driver to free pending mbufs. */
>  	ret = (*dev->dev_ops->tx_done_cleanup)(dev->data->tx_queues[queue_id],
>  					       free_cnt);
> -	return eth_err(port_id, ret);
> +
> +	ret = eth_err(port_id, ret);

Please don't add new empty line _before_ this functions.

<...>

> @@ -2700,6 +2834,8 @@ rte_eth_link_to_str(char *str, size_t len, const struct rte_eth_link *eth_link)
>  		return -EINVAL;
>  	}
>  
> +	rte_eth_trace_link_to_str(len, eth_link);
> +

Why not record return value and move trace function to the end of the
function and record 'str' too.

>  	if (eth_link->link_status == RTE_ETH_LINK_DOWN)
>  		return snprintf(str, len, "Link down");
>  	else
> @@ -2715,6 +2851,7 @@ int
>  rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats)
>  {
>  	struct rte_eth_dev *dev;
> +	int ret;
>  
>  	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
>  	dev = &rte_eth_devices[port_id];
> @@ -2730,7 +2867,12 @@ rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats)
>  	if (*dev->dev_ops->stats_get == NULL)
>  		return -ENOTSUP;
>  	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
> -	return eth_err(port_id, (*dev->dev_ops->stats_get)(dev, stats));
> +
> +	ret = eth_err(port_id, (*dev->dev_ops->stats_get)(dev, stats));

Pleaes don't add empyt line above.

<...>

> @@ -3229,13 +3384,19 @@ int
>  rte_eth_xstats_reset(uint16_t port_id)
>  {
>  	struct rte_eth_dev *dev;
> +	int ret;
>  
>  	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
>  	dev = &rte_eth_devices[port_id];
>  
>  	/* implemented by the driver */
> -	if (dev->dev_ops->xstats_reset != NULL)
> -		return eth_err(port_id, (*dev->dev_ops->xstats_reset)(dev));
> +	if (dev->dev_ops->xstats_reset != NULL) {
> +		ret = eth_err(port_id, (*dev->dev_ops->xstats_reset)(dev));

Can you please move 'ret' decleration in if block.

``
int ret = eth_err(port_id, (*dev->dev_ops->xstats_reset)(dev));
``

> +
> +		rte_eth_trace_xstats_reset(port_id, ret);
> +
> +		return ret;
> +	}
>  
>  	/* fallback to default */
>  	return rte_eth_stats_reset(port_id);
> @@ -3268,24 +3429,37 @@ int
>  rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id, uint16_t tx_queue_id,
>  		uint8_t stat_idx)
>  {
> -	return eth_err(port_id, eth_dev_set_queue_stats_mapping(port_id,
> +	int ret;
> +
> +	ret = eth_err(port_id, eth_dev_set_queue_stats_mapping(port_id,
>  						tx_queue_id,
>  						stat_idx, STAT_QMAP_TX));
> +
> +	rte_ethdev_trace_set_tx_queue_stats_mapping(port_id, tx_queue_id, stat_idx, ret);
> +

In below function 'rte_ethdev_trace_set_rx_queue_stats_mapping()' call
wrapped to fit 80 lines, but this one not, please be consistent and if
possible break both to fit as done below.

<...>

> +RTE_TRACE_POINT(
> +	rte_eth_trace_iterator_next,
> +	RTE_TRACE_POINT_ARGS(struct rte_dev_iterator *iter, uint16_t id),
> +	rte_trace_point_emit_ptr(iter);
> +	rte_trace_point_emit_string(iter->bus_str);
> +	rte_trace_point_emit_string(iter->cls_str);
> +	rte_trace_point_emit_u16(id);
> +)
> +

Trace functions don't chage parameters, what do you think to update all
parameters with 'const' keywords for this:
``
RTE_TRACE_POINT(
	rte_eth_trace_iterator_next,
	RTE_TRACE_POINT_ARGS(const struct rte_dev_iterator *iter, uint16_t id),
	rte_trace_point_emit_ptr(iter);
	rte_trace_point_emit_string(iter->bus_str);
	rte_trace_point_emit_string(iter->cls_str);
	rte_trace_point_emit_u16(id);
)
``

This comment is not just for this trace point, but for *all* trace points.

<...>

> +RTE_TRACE_POINT(
> +	rte_eth_trace_rx_hairpin_queue_setup,
> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t rx_queue_id,
> +		uint16_t nb_rx_desc, const struct rte_eth_hairpin_conf *conf,
> +		int ret),
> +	uint32_t peer_count = conf->peer_count;
> +	uint32_t tx_explicit = conf->tx_explicit;
> +	uint32_t manual_bind = conf->manual_bind;
> +	uint32_t use_locked_device_memory = conf->use_locked_device_memory;
> +	uint32_t use_rte_memory = conf->use_rte_memory;
> +	uint32_t force_memory = conf->force_memory;
> +
> +	rte_trace_point_emit_u16(port_id);
> +	rte_trace_point_emit_u16(rx_queue_id);
> +	rte_trace_point_emit_u16(nb_rx_desc);
> +	rte_trace_point_emit_ptr(conf);
> +	rte_trace_point_emit_u32(peer_count);
> +	rte_trace_point_emit_u32(tx_explicit);
> +	rte_trace_point_emit_u32(manual_bind);
> +	rte_trace_point_emit_u32(use_locked_device_memory);
> +	rte_trace_point_emit_u32(use_rte_memory);
> +	rte_trace_point_emit_u32(force_memory);
> +	rte_trace_point_emit_int(ret);

Do we need temporary variables like 'peer_count', why not directly use them:
``
rte_trace_point_emit_u32(conf->peer_count);
``

> +)
> +
> +RTE_TRACE_POINT(
> +	rte_eth_trace_tx_hairpin_queue_setup,
> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t tx_queue_id,
> +		uint16_t nb_tx_desc, const struct rte_eth_hairpin_conf *conf,
> +		int ret),
> +	uint32_t peer_count = conf->peer_count;
> +	uint32_t tx_explicit = conf->tx_explicit;
> +	uint32_t manual_bind = conf->manual_bind;
> +	uint32_t use_locked_device_memory = conf->use_locked_device_memory;
> +	uint32_t use_rte_memory = conf->use_rte_memory;
> +	uint32_t force_memory = conf->force_memory;
> +
> +	rte_trace_point_emit_u16(port_id);
> +	rte_trace_point_emit_u16(tx_queue_id);
> +	rte_trace_point_emit_u16(nb_tx_desc);
> +	rte_trace_point_emit_ptr(conf);
> +	rte_trace_point_emit_u32(peer_count);
> +	rte_trace_point_emit_u32(tx_explicit);
> +	rte_trace_point_emit_u32(manual_bind);
> +	rte_trace_point_emit_u32(use_locked_device_memory);
> +	rte_trace_point_emit_u32(use_rte_memory);
> +	rte_trace_point_emit_u32(force_memory);
> +	rte_trace_point_emit_int(ret);
> +)

Same as above.

<...>

> +RTE_TRACE_POINT(
> +	rte_eth_trace_allmulticast_disable,
> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, int all_multicast, int diag),
> +	rte_trace_point_emit_u16(port_id);
> +	rte_trace_point_emit_int(all_multicast);
> +	rte_trace_point_emit_int(diag);
> +)
> +

Can you replace 'diag' with 'ret' for consistency, same for
'*promiscuous/allmulticast_enable/disable'.

<...>

> +RTE_TRACE_POINT_FP(
> +	rte_eth_trace_find_next,
> +	RTE_TRACE_POINT_ARGS(uint16_t port_id),
> +	rte_trace_point_emit_u16(port_id);
> +)
> +

Why 'rte_eth_trace_find_next' added as fast path?
Can you please add comment for all why it is added as fast path, this
help to evaluate/review this later.

> +RTE_TRACE_POINT_FP(
> +	rte_eth_trace_find_next_of,
> +	RTE_TRACE_POINT_ARGS(uint16_t port_id),
> +	rte_trace_point_emit_u16(port_id);
> +)
> +

Why not record second parameter of the API, "struct rte_device *parent" too?

<...>

> +RTE_TRACE_POINT_FP(
> +	rte_eth_trace_hairpin_get_peer_ports,
> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t *peer_ports,
> +		size_t len, uint32_t direction, int ret),
> +	rte_trace_point_emit_u16(port_id);
> +	rte_trace_point_emit_ptr(peer_ports);
> +	rte_trace_point_emit_size_t(len);
> +	rte_trace_point_emit_u32(direction);
> +	rte_trace_point_emit_int(ret);
> +)
> +

Some of these functions I am not clear why fast path trace point is
used, 'rte_eth_trace_hairpin_get_peer_ports' is one of them, can you
please comment the justification to the code as suggested above.

<...>

> +RTE_TRACE_POINT_FP(
> +	rte_eth_trace_link_get,
> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, struct rte_eth_link *link),
> +	uint16_t link_duplex = link->link_duplex;
> +	uint16_t link_autoneg = link->link_autoneg;
> +	uint16_t link_status = link->link_status;
> +
> +	rte_trace_point_emit_u16(port_id);
> +	rte_trace_point_emit_u32(link->link_speed);
> +	rte_trace_point_emit_u16(link_duplex);
> +	rte_trace_point_emit_u16(link_autoneg);
> +	rte_trace_point_emit_u16(link_status);
> +)

Why creating varible for 'link_duplex' for 'link->link_duplex' but using
'link->link_speed', why not use all as 'link->xxx'?


<...>

> +RTE_TRACE_POINT_FP(
> +	rte_eth_trace_xstats_get_names,
> +	RTE_TRACE_POINT_ARGS(uint16_t port_id,
> +		struct rte_eth_xstat_name *xstats_names,
> +		unsigned int size, int cnt_used_entries),
> +	rte_trace_point_emit_u16(port_id);
> +	rte_trace_point_emit_string(xstats_names->name);
> +	rte_trace_point_emit_u32(size);
> +	rte_trace_point_emit_int(cnt_used_entries);
> +)
> +

'xstats_names' is an array, whose size is 'cnt_used_entries', just
printing 'xstats_names->name' for first element can be misleading,
can print all values as done in 'rte_eth_xstats_get()'

<...>

> +RTE_TRACE_POINT_FP(
> +	rte_eth_trace_xstats_get,
> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, struct rte_eth_xstat xstats,
> +		int i),
> +	rte_trace_point_emit_u16(port_id);
> +	rte_trace_point_emit_u64(xstats.id);
> +	rte_trace_point_emit_u64(xstats.value);
> +	rte_trace_point_emit_u32(i);
> +)
> +

as far as I can see "id == i", so maybe 'i' can be dropped.

<...>

> @@ -298,6 +298,72 @@ EXPERIMENTAL {
>  	rte_flow_get_q_aged_flows;
>  	rte_mtr_meter_policy_get;
>  	rte_mtr_meter_profile_get;
> +
> +	# added in 23.03
> +	__rte_eth_trace_allmulticast_disable;
> +	__rte_eth_trace_allmulticast_enable;
> +	__rte_eth_trace_allmulticast_get;
> +	__rte_eth_trace_call_rx_callbacks;
> +	__rte_eth_trace_call_tx_callbacks;
> +	__rte_eth_trace_find_next;
> +	__rte_eth_trace_find_next_of;
> +	__rte_eth_trace_find_next_owned_by;
> +	__rte_eth_trace_find_next_sibling;
> +	__rte_eth_trace_hairpin_bind;
> +	__rte_eth_trace_hairpin_get_peer_ports;
> +	__rte_eth_trace_hairpin_unbind;
> +	__rte_eth_trace_iterator_cleanup;
> +	__rte_eth_trace_iterator_init;
> +	__rte_eth_trace_iterator_next;
> +	__rte_eth_trace_link_get;
> +	__rte_eth_trace_link_get_nowait;
> +	__rte_eth_trace_link_speed_to_str;
> +	__rte_eth_trace_link_to_str;
> +	__rte_eth_trace_promiscuous_disable;
> +	__rte_eth_trace_promiscuous_enable;
> +	__rte_eth_trace_promiscuous_get;
> +	__rte_eth_trace_rx_hairpin_queue_setup;
> +	__rte_eth_trace_speed_bitflag;
> +	__rte_eth_trace_stats_get;
> +	__rte_eth_trace_stats_reset;
> +	__rte_eth_trace_tx_buffer_count_callback;
> +	__rte_eth_trace_tx_buffer_drop_callback;
> +	__rte_eth_trace_tx_buffer_init;
> +	__rte_eth_trace_tx_buffer_set_err_callback;
> +	__rte_eth_trace_tx_done_cleanup;
> +	__rte_eth_trace_tx_hairpin_queue_setup;
> +	__rte_eth_trace_xstats_get;
> +	__rte_eth_trace_xstats_get_by_id;
> +	__rte_eth_trace_xstats_get_id_by_name;
> +	__rte_eth_trace_xstats_get_names;
> +	__rte_eth_trace_xstats_get_names_by_id;
> +	__rte_eth_trace_xstats_reset;
> +	__rte_ethdev_trace_capability_name;
> +	__rte_ethdev_trace_count_avail;
> +	__rte_ethdev_trace_count_total;
> +	__rte_ethdev_trace_fw_version_get;
> +	__rte_ethdev_trace_get_name_by_port;
> +	__rte_ethdev_trace_get_port_by_name;
> +	__rte_ethdev_trace_get_sec_ctx;
> +	__rte_ethdev_trace_is_removed;
> +	__rte_ethdev_trace_is_valid_port;
> +	__rte_ethdev_trace_owner_delete;
> +	__rte_ethdev_trace_owner_get;
> +	__rte_ethdev_trace_owner_new;
> +	__rte_ethdev_trace_owner_set;
> +	__rte_ethdev_trace_owner_unset;
> +	__rte_ethdev_trace_reset;
> +	__rte_ethdev_trace_rx_offload_name;
> +	__rte_ethdev_trace_rx_queue_start;
> +	__rte_ethdev_trace_rx_queue_stop;
> +	__rte_ethdev_trace_set_link_down;
> +	__rte_ethdev_trace_set_link_up;
> +	__rte_ethdev_trace_set_rx_queue_stats_mapping;
> +	__rte_ethdev_trace_set_tx_queue_stats_mapping;
> +	__rte_ethdev_trace_socket_id;
> +	__rte_ethdev_trace_tx_offload_name;
> +	__rte_ethdev_trace_tx_queue_start;
> +	__rte_ethdev_trace_tx_queue_stop;

Can you please drop these trace objects?
  
Ankur Dwivedi Jan. 30, 2023, 4:01 p.m. UTC | #2
>-----Original Message-----
>From: Ferruh Yigit <ferruh.yigit@amd.com>
>Sent: Monday, January 23, 2023 10:59 PM
>To: Ankur Dwivedi <adwivedi@marvell.com>; dev@dpdk.org; David
>Marchand <david.marchand@redhat.com>
>Cc: thomas@monjalon.net; david.marchand@redhat.com; mdr@ashroe.eu;
>orika@nvidia.com; chas3@att.com; humin29@huawei.com;
>linville@tuxdriver.com; ciara.loftus@intel.com; qi.z.zhang@intel.com;
>mw@semihalf.com; mk@semihalf.com; shaibran@amazon.com;
>evgenys@amazon.com; igorch@amazon.com; chandu@amd.com; Igor
>Russkikh <irusskikh@marvell.com>; shepard.siegel@atomicrules.com;
>ed.czeck@atomicrules.com; john.miller@atomicrules.com;
>ajit.khaparde@broadcom.com; somnath.kotur@broadcom.com; Jerin Jacob
>Kollanukkaran <jerinj@marvell.com>; Maciej Czekaj [C]
><mczekaj@marvell.com>; Shijith Thotton <sthotton@marvell.com>;
>Srisivasubramanian Srinivasan <srinivasan@marvell.com>; Harman Kalra
><hkalra@marvell.com>; rahul.lakkireddy@chelsio.com; johndale@cisco.com;
>hyonkim@cisco.com; liudongdong3@huawei.com;
>yisen.zhuang@huawei.com; xuanziyang2@huawei.com;
>cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
>simei.su@intel.com; wenjun1.wu@intel.com; qiming.yang@intel.com;
>Yuying.Zhang@intel.com; beilei.xing@intel.com; xiao.w.wang@intel.com;
>jingjing.wu@intel.com; junfeng.guo@intel.com; rosen.xu@intel.com; Nithin
>Kumar Dabilpuram <ndabilpuram@marvell.com>; Kiran Kumar Kokkilagadda
><kirankumark@marvell.com>; Sunil Kumar Kori <skori@marvell.com>; Satha
>Koteswara Rao Kottidi <skoteshwar@marvell.com>; Liron Himi
><lironh@marvell.com>; zr@semihalf.com; Radha Chintakuntla
><radhac@marvell.com>; Veerasenareddy Burru <vburru@marvell.com>;
>Sathesh B Edara <sedara@marvell.com>; matan@nvidia.com;
>viacheslavo@nvidia.com; longli@microsoft.com; spinler@cesnet.cz;
>chaoyong.he@corigine.com; niklas.soderlund@corigine.com;
>hemant.agrawal@nxp.com; sachin.saxena@oss.nxp.com; g.singh@nxp.com;
>apeksha.gupta@nxp.com; sachin.saxena@nxp.com; aboyer@pensando.io;
>Rasesh Mody <rmody@marvell.com>; Shahed Shaikh
><shshaikh@marvell.com>; Devendra Singh Rawat
><dsinghrawat@marvell.com>; andrew.rybchenko@oktetlabs.ru;
>jiawenwu@trustnetic.com; jianwang@trustnetic.com;
>jbehrens@vmware.com; maxime.coquelin@redhat.com;
>chenbo.xia@intel.com; steven.webster@windriver.com;
>matt.peters@windriver.com; bruce.richardson@intel.com;
>mtetsuyah@gmail.com; grive@u256.net; jasvinder.singh@intel.com;
>cristian.dumitrescu@intel.com; jgrajcia@cisco.com;
>mb@smartsharesystems.com
>Subject: [EXT] Re: [PATCH v6 2/6] ethdev: add trace points for ethdev (part
>one)
>
>External Email
>
>----------------------------------------------------------------------
>On 1/20/2023 8:40 AM, Ankur Dwivedi wrote:
>> Adds trace points for ethdev functions.
>> Moved the rte_ethdev_trace_rx_burst and rte_ethdev_trace_tx_burst to a
>> new file rte_ethdev_trace_fp_burst.h. This is needed to resolve cyclic
>> dependency between rte_ethdev.h and rte_ethdev_trace_fp.h.
>>
>> Signed-off-by: Ankur Dwivedi <adwivedi@marvell.com>
>
><...>
>
>> +RTE_TRACE_POINT_REGISTER(rte_eth_trace_rx_hairpin_queue_setup,
>> +	lib.ethdev.rx.hairpin_queue_setup)
>> +
>
>s/rx.hairpin_queue_setup/rx_hairpin_queue_setup/ ?
>
>> +RTE_TRACE_POINT_REGISTER(rte_eth_trace_tx_hairpin_queue_setup,
>> +	lib.ethdev.tx.hairpin_queue_setup)
>> +
>
>s/tx.hairpin_queue_setup/tx_hairpin_queue_setup/ ?

Ok.
>
><...>
>
>> diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build index
>> 39250b5da1..f5c0865023 100644
>> --- a/lib/ethdev/meson.build
>> +++ b/lib/ethdev/meson.build
>> @@ -24,6 +24,7 @@ headers = files(
>>          'rte_ethdev.h',
>>          'rte_ethdev_trace.h',
>>          'rte_ethdev_trace_fp.h',
>> +        'rte_ethdev_trace_fp_burst.h',
>
>Why these trace headers are public?
>Aren't trace points only used by the APIs, so I expect them to be internal, so
>applications shouldn't need them. Why they are expsed to user.
'rte_ethdev_trace.h' can be made as internal. Not sure about 'rte_ethdev_trace_fp.h' and 'rte_ethdev_trace_fp_burst.h' as the tracepoints in fast path may be called from public inline functions.
>
>@David, what do you think?
>
><...>
>
>> @@ -258,6 +259,7 @@ rte_eth_iterator_init(struct rte_dev_iterator
>> *iter, const char *devargs_str)
>>
>>  end:
>>  	iter->cls = rte_class_find_by_name("eth");
>> +	rte_eth_trace_iterator_init(devargs_str);
>>  	rte_devargs_reset(&devargs);
>>  	return 0;
>
>Why not add trace call just before return, as a separate block, like:

Ok.
>``
>  end:
>  	iter->cls = rte_class_find_by_name("eth");
>  	rte_devargs_reset(&devargs);
>
> 	rte_eth_trace_iterator_init(devargs_str);
>
>  	return 0;
>``
>
>>
>> @@ -274,6 +276,8 @@ rte_eth_iterator_init(struct rte_dev_iterator
>> *iter, const char *devargs_str)  uint16_t
>> rte_eth_iterator_next(struct rte_dev_iterator *iter)  {
>> +	uint16_t id;
>> +
>>  	if (iter == NULL) {
>>  		RTE_ETHDEV_LOG(ERR,
>>  			"Cannot get next device from NULL iterator\n"); @@ -
>297,8 +301,11
>> @@ rte_eth_iterator_next(struct rte_dev_iterator *iter)
>>  		/* A device is matching bus part, need to check ethdev part. */
>>  		iter->class_device = iter->cls->dev_iterate(
>>  				iter->class_device, iter->cls_str, iter);
>> -		if (iter->class_device != NULL)
>> -			return eth_dev_to_id(iter->class_device); /* match */
>> +		if (iter->class_device != NULL) {
>> +			id = eth_dev_to_id(iter->class_device);
>> +			rte_eth_trace_iterator_next(iter, id);
>> +			return id; /* match */
>
>please move 'id' declaration within the if block, and again put trace call into
>separate block to highlight it, otherwise easy to miss, like:

Ok.
>
>``
>if (iter->class_device != NULL) {
>	uint16_t id = eth_dev_to_id(iter->class_device);
>
>	rte_eth_trace_iterator_next(iter, id);
>
>	return id; /* match */
>}
>
>
>> +		}
>>  	} while (iter->bus != NULL); /* need to try next rte_device */
>>
>>  	/* No more ethdev port to iterate. */ @@ -316,6 +323,7 @@
>> rte_eth_iterator_cleanup(struct rte_dev_iterator *iter)
>>
>>  	if (iter->bus_str == NULL)
>>  		return; /* nothing to free in pure class filter */
>> +	rte_eth_trace_iterator_cleanup(iter);
>
>Can you please make trace call a separate block, I won't comment same for
>bellow, can you please apply this to all.

Have missed to make few functions as separate block. Majority of the functions are made as separate block. Will make these as separate block.
>
>>  	free(RTE_CAST_FIELD(iter, bus_str, char *)); /* workaround const */
>>  	free(RTE_CAST_FIELD(iter, cls_str, char *)); /* workaround const */
>>  	memset(iter, 0, sizeof(*iter));
>> @@ -324,12 +332,18 @@ rte_eth_iterator_cleanup(struct rte_dev_iterator
>> *iter)  uint16_t  rte_eth_find_next(uint16_t port_id)  {
>> +	rte_eth_trace_find_next(port_id);
>> +
>
>Why tracing previous port_id, and other one below records next port_id,
>won't it be confusing to have both with same name.
>
>I suggest just keep last one, (next port_id).
OK.
>
>>  	while (port_id < RTE_MAX_ETHPORTS &&
>>  			rte_eth_devices[port_id].state ==
>RTE_ETH_DEV_UNUSED)
>>  		port_id++;
>>
>> -	if (port_id >= RTE_MAX_ETHPORTS)
>> +	if (port_id >= RTE_MAX_ETHPORTS) {
>> +		rte_eth_trace_find_next(RTE_MAX_ETHPORTS);
>
>Is there a specific reason to trace all paths, why not just keep the last one?
This can be kept as the function returns with RTE_MAX_ETHPORTS here.
>
>>  		return RTE_MAX_ETHPORTS;
>> +	}
>> +
>> +	rte_eth_trace_find_next(port_id);
This can be also kept.
>>
>>  	return port_id;
>>  }
>> @@ -347,10 +361,15 @@ uint16_t
>>  rte_eth_find_next_of(uint16_t port_id, const struct rte_device
>> *parent)  {
>>  	port_id = rte_eth_find_next(port_id);
>> +
>> +	rte_eth_trace_find_next_of(port_id);
>> +
>>  	while (port_id < RTE_MAX_ETHPORTS &&
>>  			rte_eth_devices[port_id].device != parent)
>>  		port_id = rte_eth_find_next(port_id + 1);
>>
>> +	rte_eth_trace_find_next_of(port_id);
>> +
>
>Same here, lets keep only the last one.
Ok.
>
>>  	return port_id;
>>  }
>>
>> @@ -358,6 +377,9 @@ uint16_t
>>  rte_eth_find_next_sibling(uint16_t port_id, uint16_t ref_port_id)  {
>>  	RTE_ETH_VALID_PORTID_OR_ERR_RET(ref_port_id,
>RTE_MAX_ETHPORTS);
>> +
>> +	rte_eth_trace_find_next_sibling(port_id, ref_port_id);
>> +
>
>This doesn't record the return values, function should be updated to keep the
>interim return value of 'rte_eth_find_next_of()' and trace functions should
>record it.
Will make this change.
>
>>  	return rte_eth_find_next_of(port_id,
>>  			rte_eth_devices[ref_port_id].device);
>>  }
>> @@ -372,10 +394,13 @@ int
>>  rte_eth_dev_is_valid_port(uint16_t port_id)  {
>>  	if (port_id >= RTE_MAX_ETHPORTS ||
>> -	    (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED))
>> +	    (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED)) {
>> +		rte_ethdev_trace_is_valid_port(port_id, 0);
>>  		return 0;
>> -	else
>> +	} else {
>> +		rte_ethdev_trace_is_valid_port(port_id, 1);
>>  		return 1;
>> +	}
>
>What about to create an interim 'is_valid' variable and record it with single
>trace call?
Ok.
>
><...>
>
>>  uint32_t
>>  rte_eth_speed_bitflag(uint32_t speed, int duplex)  {
>> +	rte_eth_trace_speed_bitflag(speed, duplex);
>
>Let's create an interim return value and record it for the trace function, and
>please move trace function to the bottom of the function.
Ok.
>
><...>
>
>> @@ -2433,6 +2529,7 @@ void
>>  rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent,
>>  		void *userdata __rte_unused)
>>  {
>> +	rte_eth_trace_tx_buffer_drop_callback(pkts, unsent);
>
>Since only pointer value is recorded, function can be moved down, please put
>emtpy line in between.
Ok.
>
><...>
>
>> @@ -2495,7 +2598,12 @@ rte_eth_tx_done_cleanup(uint16_t port_id,
>uint16_t queue_id, uint32_t free_cnt)
>>  	/* Call driver to free pending mbufs. */
>>  	ret = (*dev->dev_ops->tx_done_cleanup)(dev->data-
>>tx_queues[queue_id],
>>  					       free_cnt);
>> -	return eth_err(port_id, ret);
>> +
>> +	ret = eth_err(port_id, ret);
>
>Please don't add new empty line _before_ this functions.
Ok.
>
><...>
>
>> @@ -2700,6 +2834,8 @@ rte_eth_link_to_str(char *str, size_t len, const
>struct rte_eth_link *eth_link)
>>  		return -EINVAL;
>>  	}
>>
>> +	rte_eth_trace_link_to_str(len, eth_link);
>> +
>
>Why not record return value and move trace function to the end of the
>function and record 'str' too.
Ok.
>
>>  	if (eth_link->link_status == RTE_ETH_LINK_DOWN)
>>  		return snprintf(str, len, "Link down");
>>  	else
>> @@ -2715,6 +2851,7 @@ int
>>  rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats)  {
>>  	struct rte_eth_dev *dev;
>> +	int ret;
>>
>>  	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
>>  	dev = &rte_eth_devices[port_id];
>> @@ -2730,7 +2867,12 @@ rte_eth_stats_get(uint16_t port_id, struct
>rte_eth_stats *stats)
>>  	if (*dev->dev_ops->stats_get == NULL)
>>  		return -ENOTSUP;
>>  	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
>> -	return eth_err(port_id, (*dev->dev_ops->stats_get)(dev, stats));
>> +
>> +	ret = eth_err(port_id, (*dev->dev_ops->stats_get)(dev, stats));
>
>Pleaes don't add empyt line above.
Ok.
>
><...>
>
>> @@ -3229,13 +3384,19 @@ int
>>  rte_eth_xstats_reset(uint16_t port_id)  {
>>  	struct rte_eth_dev *dev;
>> +	int ret;
>>
>>  	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
>>  	dev = &rte_eth_devices[port_id];
>>
>>  	/* implemented by the driver */
>> -	if (dev->dev_ops->xstats_reset != NULL)
>> -		return eth_err(port_id, (*dev->dev_ops->xstats_reset)(dev));
>> +	if (dev->dev_ops->xstats_reset != NULL) {
>> +		ret = eth_err(port_id, (*dev->dev_ops->xstats_reset)(dev));
>
>Can you please move 'ret' decleration in if block.
Ok.
>
>``
>int ret = eth_err(port_id, (*dev->dev_ops->xstats_reset)(dev));
>``
>
>> +
>> +		rte_eth_trace_xstats_reset(port_id, ret);
>> +
>> +		return ret;
>> +	}
>>
>>  	/* fallback to default */
>>  	return rte_eth_stats_reset(port_id); @@ -3268,24 +3429,37 @@ int
>> rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id, uint16_t
>tx_queue_id,
>>  		uint8_t stat_idx)
>>  {
>> -	return eth_err(port_id, eth_dev_set_queue_stats_mapping(port_id,
>> +	int ret;
>> +
>> +	ret = eth_err(port_id, eth_dev_set_queue_stats_mapping(port_id,
>>  						tx_queue_id,
>>  						stat_idx, STAT_QMAP_TX));
>> +
>> +	rte_ethdev_trace_set_tx_queue_stats_mapping(port_id, tx_queue_id,
>> +stat_idx, ret);
>> +
>
>In below function 'rte_ethdev_trace_set_rx_queue_stats_mapping()' call
>wrapped to fit 80 lines, but this one not, please be consistent and if possible
>break both to fit as done below.
Ok.
>
><...>
>
>> +RTE_TRACE_POINT(
>> +	rte_eth_trace_iterator_next,
>> +	RTE_TRACE_POINT_ARGS(struct rte_dev_iterator *iter, uint16_t id),
>> +	rte_trace_point_emit_ptr(iter);
>> +	rte_trace_point_emit_string(iter->bus_str);
>> +	rte_trace_point_emit_string(iter->cls_str);
>> +	rte_trace_point_emit_u16(id);
>> +)
>> +
>
>Trace functions don't chage parameters, what do you think to update all
>parameters with 'const' keywords for this:
Yes will make it as const.
>``
>RTE_TRACE_POINT(
>	rte_eth_trace_iterator_next,
>	RTE_TRACE_POINT_ARGS(const struct rte_dev_iterator *iter, uint16_t
>id),
>	rte_trace_point_emit_ptr(iter);
>	rte_trace_point_emit_string(iter->bus_str);
>	rte_trace_point_emit_string(iter->cls_str);
>	rte_trace_point_emit_u16(id);
>)
>``
>
>This comment is not just for this trace point, but for *all* trace points.
>
><...>
>
>> +RTE_TRACE_POINT(
>> +	rte_eth_trace_rx_hairpin_queue_setup,
>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t rx_queue_id,
>> +		uint16_t nb_rx_desc, const struct rte_eth_hairpin_conf *conf,
>> +		int ret),
>> +	uint32_t peer_count = conf->peer_count;
>> +	uint32_t tx_explicit = conf->tx_explicit;
>> +	uint32_t manual_bind = conf->manual_bind;
>> +	uint32_t use_locked_device_memory = conf-
>>use_locked_device_memory;
>> +	uint32_t use_rte_memory = conf->use_rte_memory;
>> +	uint32_t force_memory = conf->force_memory;
>> +
>> +	rte_trace_point_emit_u16(port_id);
>> +	rte_trace_point_emit_u16(rx_queue_id);
>> +	rte_trace_point_emit_u16(nb_rx_desc);
>> +	rte_trace_point_emit_ptr(conf);
>> +	rte_trace_point_emit_u32(peer_count);
>> +	rte_trace_point_emit_u32(tx_explicit);
>> +	rte_trace_point_emit_u32(manual_bind);
>> +	rte_trace_point_emit_u32(use_locked_device_memory);
>> +	rte_trace_point_emit_u32(use_rte_memory);
>> +	rte_trace_point_emit_u32(force_memory);
>> +	rte_trace_point_emit_int(ret);
>
>Do we need temporary variables like 'peer_count', why not directly use them:
Temporary variables are needed where the actual variable is a bitfield.
For example in struct rte_eth_hairpin_conf:
struct rte_eth_hairpin_conf {
uint32_t peer_count:16;
....
uint32_t tx_explicit:1
....
};

Otherwise there is a compilation error.
../lib/ethdev/rte_ethdev_trace.h:253:9: note: in expansion of macro ‘rte_trace_point_emit_u16’
  253 |         rte_trace_point_emit_u16(conf->peer_count);
      |         ^~~~~~~~~~~~~~~~~~~~~~~~
../lib/eal/include/rte_trace_point.h:369:38: error: ‘sizeof’ applied to a bit-field
  369 |         mem = RTE_PTR_ADD(mem, sizeof(in)); \

>``
>rte_trace_point_emit_u32(conf->peer_count);
>``
>
>> +)
>> +
>> +RTE_TRACE_POINT(
>> +	rte_eth_trace_tx_hairpin_queue_setup,
>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t tx_queue_id,
>> +		uint16_t nb_tx_desc, const struct rte_eth_hairpin_conf *conf,
>> +		int ret),
>> +	uint32_t peer_count = conf->peer_count;
>> +	uint32_t tx_explicit = conf->tx_explicit;
>> +	uint32_t manual_bind = conf->manual_bind;
>> +	uint32_t use_locked_device_memory = conf-
>>use_locked_device_memory;
>> +	uint32_t use_rte_memory = conf->use_rte_memory;
>> +	uint32_t force_memory = conf->force_memory;
>> +
>> +	rte_trace_point_emit_u16(port_id);
>> +	rte_trace_point_emit_u16(tx_queue_id);
>> +	rte_trace_point_emit_u16(nb_tx_desc);
>> +	rte_trace_point_emit_ptr(conf);
>> +	rte_trace_point_emit_u32(peer_count);
>> +	rte_trace_point_emit_u32(tx_explicit);
>> +	rte_trace_point_emit_u32(manual_bind);
>> +	rte_trace_point_emit_u32(use_locked_device_memory);
>> +	rte_trace_point_emit_u32(use_rte_memory);
>> +	rte_trace_point_emit_u32(force_memory);
>> +	rte_trace_point_emit_int(ret);
>> +)
>
>Same as above.
>
><...>
>
>> +RTE_TRACE_POINT(
>> +	rte_eth_trace_allmulticast_disable,
>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, int all_multicast, int diag),
>> +	rte_trace_point_emit_u16(port_id);
>> +	rte_trace_point_emit_int(all_multicast);
>> +	rte_trace_point_emit_int(diag);
>> +)
>> +
>
>Can you replace 'diag' with 'ret' for consistency, same for
>'*promiscuous/allmulticast_enable/disable'.
Ok.
>
><...>
>
>> +RTE_TRACE_POINT_FP(
>> +	rte_eth_trace_find_next,
>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id),
>> +	rte_trace_point_emit_u16(port_id);
>> +)
>> +
>
>Why 'rte_eth_trace_find_next' added as fast path?
>Can you please add comment for all why it is added as fast path, this help to
>evaluate/review this later.

There were many functions for which I was not sure about whether they should be slow path or fast path. I made the following assumption:

For slow path I have chosen the function which do some setup, configure or write some configuration. For an example rte_eth_trace_tx_hairpin_queue_setup, rte_eth_trace_tx_buffer_set_err_callback, rte_eth_trace_promiscuous_enable are slow path functions. 

The functions which read data are made as fastpath functions. Also for functions for which I was not sure I made it as fastpath.

For an example rte_ethdev_trace_owner_get, rte_eth_trace_hairpin_get_peer_port, rte_eth_trace_macaddr_get are made as fastpath.

But there are few exceptions. Function like *_get_capability are made as slowpath. Also rte_ethdev_trace_info_get is slowpath. 

The slowpath and fastpath functions are in separate files. rte_ethdev_trace.h (slowpath) and rte_ethdev_trace_fp.h (fastpath). 

Please let me  know if any function needs to be swapped. I will make that change.

>
>> +RTE_TRACE_POINT_FP(
>> +	rte_eth_trace_find_next_of,
>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id),
>> +	rte_trace_point_emit_u16(port_id);
>> +)
>> +
>
>Why not record second parameter of the API, "struct rte_device *parent" too?
Ok.
>
><...>
>
>> +RTE_TRACE_POINT_FP(
>> +	rte_eth_trace_hairpin_get_peer_ports,
>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t *peer_ports,
>> +		size_t len, uint32_t direction, int ret),
>> +	rte_trace_point_emit_u16(port_id);
>> +	rte_trace_point_emit_ptr(peer_ports);
>> +	rte_trace_point_emit_size_t(len);
>> +	rte_trace_point_emit_u32(direction);
>> +	rte_trace_point_emit_int(ret);
>> +)
>> +
>
>Some of these functions I am not clear why fast path trace point is used,
>'rte_eth_trace_hairpin_get_peer_ports' is one of them, can you please
>comment the justification to the code as suggested above.
>
><...>
>
>> +RTE_TRACE_POINT_FP(
>> +	rte_eth_trace_link_get,
>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, struct rte_eth_link *link),
>> +	uint16_t link_duplex = link->link_duplex;
>> +	uint16_t link_autoneg = link->link_autoneg;
>> +	uint16_t link_status = link->link_status;
>> +
>> +	rte_trace_point_emit_u16(port_id);
>> +	rte_trace_point_emit_u32(link->link_speed);
>> +	rte_trace_point_emit_u16(link_duplex);
>> +	rte_trace_point_emit_u16(link_autoneg);
>> +	rte_trace_point_emit_u16(link_status);
>> +)
>
>Why creating varible for 'link_duplex' for 'link->link_duplex' but using 'link-
>>link_speed', why not use all as 'link->xxx'?

nink_duplex, link_autoneg and link_status are bit fields. Same reason as mentioned in earlier comment.
>
>
><...>
>
>> +RTE_TRACE_POINT_FP(
>> +	rte_eth_trace_xstats_get_names,
>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id,
>> +		struct rte_eth_xstat_name *xstats_names,
>> +		unsigned int size, int cnt_used_entries),
>> +	rte_trace_point_emit_u16(port_id);
>> +	rte_trace_point_emit_string(xstats_names->name);
>> +	rte_trace_point_emit_u32(size);
>> +	rte_trace_point_emit_int(cnt_used_entries);
>> +)
>> +
>
>'xstats_names' is an array, whose size is 'cnt_used_entries', just printing
>'xstats_names->name' for first element can be misleading, can print all values
>as done in 'rte_eth_xstats_get()'
Ok.
>
><...>
>
>> +RTE_TRACE_POINT_FP(
>> +	rte_eth_trace_xstats_get,
>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, struct rte_eth_xstat xstats,
>> +		int i),
>> +	rte_trace_point_emit_u16(port_id);
>> +	rte_trace_point_emit_u64(xstats.id);
>> +	rte_trace_point_emit_u64(xstats.value);
>> +	rte_trace_point_emit_u32(i);
>> +)
>> +
>
>as far as I can see "id == i", so maybe 'i' can be dropped.
Ok.
>
><...>
>
>> @@ -298,6 +298,72 @@ EXPERIMENTAL {
>>  	rte_flow_get_q_aged_flows;
>>  	rte_mtr_meter_policy_get;
>>  	rte_mtr_meter_profile_get;
>> +
>> +	# added in 23.03
>> +	__rte_eth_trace_allmulticast_disable;
>> +	__rte_eth_trace_allmulticast_enable;
>> +	__rte_eth_trace_allmulticast_get;
>> +	__rte_eth_trace_call_rx_callbacks;
>> +	__rte_eth_trace_call_tx_callbacks;
>> +	__rte_eth_trace_find_next;
>> +	__rte_eth_trace_find_next_of;
>> +	__rte_eth_trace_find_next_owned_by;
>> +	__rte_eth_trace_find_next_sibling;
>> +	__rte_eth_trace_hairpin_bind;
>> +	__rte_eth_trace_hairpin_get_peer_ports;
>> +	__rte_eth_trace_hairpin_unbind;
>> +	__rte_eth_trace_iterator_cleanup;
>> +	__rte_eth_trace_iterator_init;
>> +	__rte_eth_trace_iterator_next;
>> +	__rte_eth_trace_link_get;
>> +	__rte_eth_trace_link_get_nowait;
>> +	__rte_eth_trace_link_speed_to_str;
>> +	__rte_eth_trace_link_to_str;
>> +	__rte_eth_trace_promiscuous_disable;
>> +	__rte_eth_trace_promiscuous_enable;
>> +	__rte_eth_trace_promiscuous_get;
>> +	__rte_eth_trace_rx_hairpin_queue_setup;
>> +	__rte_eth_trace_speed_bitflag;
>> +	__rte_eth_trace_stats_get;
>> +	__rte_eth_trace_stats_reset;
>> +	__rte_eth_trace_tx_buffer_count_callback;
>> +	__rte_eth_trace_tx_buffer_drop_callback;
>> +	__rte_eth_trace_tx_buffer_init;
>> +	__rte_eth_trace_tx_buffer_set_err_callback;
>> +	__rte_eth_trace_tx_done_cleanup;
>> +	__rte_eth_trace_tx_hairpin_queue_setup;
>> +	__rte_eth_trace_xstats_get;
>> +	__rte_eth_trace_xstats_get_by_id;
>> +	__rte_eth_trace_xstats_get_id_by_name;
>> +	__rte_eth_trace_xstats_get_names;
>> +	__rte_eth_trace_xstats_get_names_by_id;
>> +	__rte_eth_trace_xstats_reset;
>> +	__rte_ethdev_trace_capability_name;
>> +	__rte_ethdev_trace_count_avail;
>> +	__rte_ethdev_trace_count_total;
>> +	__rte_ethdev_trace_fw_version_get;
>> +	__rte_ethdev_trace_get_name_by_port;
>> +	__rte_ethdev_trace_get_port_by_name;
>> +	__rte_ethdev_trace_get_sec_ctx;
>> +	__rte_ethdev_trace_is_removed;
>> +	__rte_ethdev_trace_is_valid_port;
>> +	__rte_ethdev_trace_owner_delete;
>> +	__rte_ethdev_trace_owner_get;
>> +	__rte_ethdev_trace_owner_new;
>> +	__rte_ethdev_trace_owner_set;
>> +	__rte_ethdev_trace_owner_unset;
>> +	__rte_ethdev_trace_reset;
>> +	__rte_ethdev_trace_rx_offload_name;
>> +	__rte_ethdev_trace_rx_queue_start;
>> +	__rte_ethdev_trace_rx_queue_stop;
>> +	__rte_ethdev_trace_set_link_down;
>> +	__rte_ethdev_trace_set_link_up;
>> +	__rte_ethdev_trace_set_rx_queue_stats_mapping;
>> +	__rte_ethdev_trace_set_tx_queue_stats_mapping;
>> +	__rte_ethdev_trace_socket_id;
>> +	__rte_ethdev_trace_tx_offload_name;
>> +	__rte_ethdev_trace_tx_queue_start;
>> +	__rte_ethdev_trace_tx_queue_stop;
>
>Can you please drop these trace objects?
Yes these can be dropped.
>
  
Ferruh Yigit Jan. 31, 2023, 6:38 p.m. UTC | #3
On 1/30/2023 4:01 PM, Ankur Dwivedi wrote:

<...>

>>> diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build index
>>> 39250b5da1..f5c0865023 100644
>>> --- a/lib/ethdev/meson.build
>>> +++ b/lib/ethdev/meson.build
>>> @@ -24,6 +24,7 @@ headers = files(
>>>          'rte_ethdev.h',
>>>          'rte_ethdev_trace.h',
>>>          'rte_ethdev_trace_fp.h',
>>> +        'rte_ethdev_trace_fp_burst.h',
>>
>> Why these trace headers are public?
>> Aren't trace points only used by the APIs, so I expect them to be internal, so
>> applications shouldn't need them. Why they are expsed to user.
> 'rte_ethdev_trace.h' can be made as internal. Not sure about 'rte_ethdev_trace_fp.h' and 'rte_ethdev_trace_fp_burst.h' as the tracepoints in fast path may be called from public inline functions.

Trace calls used by inline functions needs to be public, in this case at
least 'rte_ethdev_trace_fp_burst.h' needs to be public.

Can you please at least move all trace points that are called by inline
functions to the same file, 'rte_ethdev_trace_fp_burst.h', to reduce
number of the header files to make public? Feel free to rename header if
required.

Meanwhile not sure about adding new header as dependency to end user.
@Jerin, @Andrew, what do you think
1) to move these trace points to 'rte_ethdev_core.h'
OR
2) disable trace calls in inline functions on compile time, possibly
with existing 'RTE_ETHDEV_DEBUG_*' macro

<...>

>>> -	if (port_id >= RTE_MAX_ETHPORTS)
>>> +	if (port_id >= RTE_MAX_ETHPORTS) {
>>> +		rte_eth_trace_find_next(RTE_MAX_ETHPORTS);
>>
>> Is there a specific reason to trace all paths, why not just keep the last one?
> This can be kept as the function returns with RTE_MAX_ETHPORTS here.

'RTE_MAX_ETHPORTS' more like error path, that is returned when no more
valid port left, since most of the trace doesn't record error return
values, I thought this can be dropped as well unless there is an
explicit need for it.

<...>

>>> +RTE_TRACE_POINT(
>>> +	rte_eth_trace_rx_hairpin_queue_setup,
>>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t rx_queue_id,
>>> +		uint16_t nb_rx_desc, const struct rte_eth_hairpin_conf *conf,
>>> +		int ret),
>>> +	uint32_t peer_count = conf->peer_count;
>>> +	uint32_t tx_explicit = conf->tx_explicit;
>>> +	uint32_t manual_bind = conf->manual_bind;
>>> +	uint32_t use_locked_device_memory = conf-
>>> use_locked_device_memory;
>>> +	uint32_t use_rte_memory = conf->use_rte_memory;
>>> +	uint32_t force_memory = conf->force_memory;
>>> +
>>> +	rte_trace_point_emit_u16(port_id);
>>> +	rte_trace_point_emit_u16(rx_queue_id);
>>> +	rte_trace_point_emit_u16(nb_rx_desc);
>>> +	rte_trace_point_emit_ptr(conf);
>>> +	rte_trace_point_emit_u32(peer_count);
>>> +	rte_trace_point_emit_u32(tx_explicit);
>>> +	rte_trace_point_emit_u32(manual_bind);
>>> +	rte_trace_point_emit_u32(use_locked_device_memory);
>>> +	rte_trace_point_emit_u32(use_rte_memory);
>>> +	rte_trace_point_emit_u32(force_memory);
>>> +	rte_trace_point_emit_int(ret);
>>
>> Do we need temporary variables like 'peer_count', why not directly use them:
> Temporary variables are needed where the actual variable is a bitfield.
> For example in struct rte_eth_hairpin_conf:
> struct rte_eth_hairpin_conf {
> uint32_t peer_count:16;
> ....
> uint32_t tx_explicit:1
> ....
> };
> 
> Otherwise there is a compilation error.
> ../lib/ethdev/rte_ethdev_trace.h:253:9: note: in expansion of macro ‘rte_trace_point_emit_u16’
>   253 |         rte_trace_point_emit_u16(conf->peer_count);
>       |         ^~~~~~~~~~~~~~~~~~~~~~~~
> ../lib/eal/include/rte_trace_point.h:369:38: error: ‘sizeof’ applied to a bit-field
>   369 |         mem = RTE_PTR_ADD(mem, sizeof(in)); \
> 

Got it, that is because of bitfields. But as I look the
'rte_trace_point_emit_u16' macro, it is like:

```
#define rte_trace_point_emit_u16(in) __rte_trace_point_emit(in, uint16_t)

#define __rte_trace_point_emit(in, type) \
do { \
        memcpy(mem, &(in), sizeof(in)); \
        mem = RTE_PTR_ADD(mem, sizeof(in)); \
} while (0)
```

Here, in '__rte_trace_point_emit' macro 'type' is not used at all, if so
what is the point of passing type?

If 'sizeof(type)' used, instead of 'sizeof(in)', it would be possible to
use bitfields directly, what do you think?



Also, as for the "struct rte_eth_hairpin_conf" sample, some of the
bitfields are 1 bit length, does 'uint32_t' really needed for them?

<...>

>>> +RTE_TRACE_POINT_FP(
>>> +	rte_eth_trace_find_next,
>>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id),
>>> +	rte_trace_point_emit_u16(port_id);
>>> +)
>>> +
>>
>> Why 'rte_eth_trace_find_next' added as fast path?
>> Can you please add comment for all why it is added as fast path, this help to
>> evaluate/review this later.
> 
> There were many functions for which I was not sure about whether they should be slow path or fast path. I made the following assumption:
> 
> For slow path I have chosen the function which do some setup, configure or write some configuration. For an example rte_eth_trace_tx_hairpin_queue_setup, rte_eth_trace_tx_buffer_set_err_callback, rte_eth_trace_promiscuous_enable are slow path functions. 
> 
> The functions which read data are made as fastpath functions. Also for functions for which I was not sure I made it as fastpath.
> 
> For an example rte_ethdev_trace_owner_get, rte_eth_trace_hairpin_get_peer_port, rte_eth_trace_macaddr_get are made as fastpath.
> 
> But there are few exceptions. Function like *_get_capability are made as slowpath. Also rte_ethdev_trace_info_get is slowpath. 
> 
> The slowpath and fastpath functions are in separate files. rte_ethdev_trace.h (slowpath) and rte_ethdev_trace_fp.h (fastpath). 
> 
> Please let me  know if any function needs to be swapped. I will make that change.
> 

Got it, I think most of the trace points in the 'rte_ethdev_trace_fp.h'
are for control/helper functions like: 'rte_ethdev_trace_count_avail',
'rte_ethdev_trace_get_port_by_name', 'rte_eth_trace_promiscuous_get' ...

I thought you did based on some analysis, that is why I asked to add
that reasoning as code comment.


I think we can generalize as:

1) Anything called by ethdev static inline functions are datapath, and
must be 'RTE_TRACE_POINT_FP', like 'rte_eth_trace_call_[rt]x_callbacks',
'rte_ethdev_trace_[rt]x_burst',

2) Anything that is called in endless loop in application/sample that
has potential impact although it may not really be datapath

3) Rest are regular trace points, RTE_TRACE_POINT.


Item (2) requires some analysis and whenever you found one of them
please comment the reasoning in the code where trace point is defined.
  
Jerin Jacob Jan. 31, 2023, 6:46 p.m. UTC | #4
On Wed, Feb 1, 2023 at 12:09 AM Ferruh Yigit <ferruh.yigit@amd.com> wrote:
>
> On 1/30/2023 4:01 PM, Ankur Dwivedi wrote:
>
> <...>
>
> >>> diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build index
> >>> 39250b5da1..f5c0865023 100644
> >>> --- a/lib/ethdev/meson.build
> >>> +++ b/lib/ethdev/meson.build
> >>> @@ -24,6 +24,7 @@ headers = files(
> >>>          'rte_ethdev.h',
> >>>          'rte_ethdev_trace.h',
> >>>          'rte_ethdev_trace_fp.h',
> >>> +        'rte_ethdev_trace_fp_burst.h',
> >>
> >> Why these trace headers are public?
> >> Aren't trace points only used by the APIs, so I expect them to be internal, so
> >> applications shouldn't need them. Why they are expsed to user.
> > 'rte_ethdev_trace.h' can be made as internal. Not sure about 'rte_ethdev_trace_fp.h' and 'rte_ethdev_trace_fp_burst.h' as the tracepoints in fast path may be called from public inline functions.
>
> Trace calls used by inline functions needs to be public, in this case at
> least 'rte_ethdev_trace_fp_burst.h' needs to be public.
>
> Can you please at least move all trace points that are called by inline
> functions to the same file, 'rte_ethdev_trace_fp_burst.h', to reduce
> number of the header files to make public? Feel free to rename header if
> required.
>
> Meanwhile not sure about adding new header as dependency to end user.
> @Jerin, @Andrew, what do you think

rte_ethdev_trace_fp_burst.h will be installed through ninja install
and application does not need to directly include this. So this scheme
is OK. Right? I dont see any downside.


> 1) to move these trace points to 'rte_ethdev_core.h'
> OR
> 2) disable trace calls in inline functions on compile time, possibly
> with existing 'RTE_ETHDEV_DEBUG_*' macro
  
Ferruh Yigit Jan. 31, 2023, 10:20 p.m. UTC | #5
On 1/31/2023 6:46 PM, Jerin Jacob wrote:
> On Wed, Feb 1, 2023 at 12:09 AM Ferruh Yigit <ferruh.yigit@amd.com> wrote:
>>
>> On 1/30/2023 4:01 PM, Ankur Dwivedi wrote:
>>
>> <...>
>>
>>>>> diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build index
>>>>> 39250b5da1..f5c0865023 100644
>>>>> --- a/lib/ethdev/meson.build
>>>>> +++ b/lib/ethdev/meson.build
>>>>> @@ -24,6 +24,7 @@ headers = files(
>>>>>          'rte_ethdev.h',
>>>>>          'rte_ethdev_trace.h',
>>>>>          'rte_ethdev_trace_fp.h',
>>>>> +        'rte_ethdev_trace_fp_burst.h',
>>>>
>>>> Why these trace headers are public?
>>>> Aren't trace points only used by the APIs, so I expect them to be internal, so
>>>> applications shouldn't need them. Why they are expsed to user.
>>> 'rte_ethdev_trace.h' can be made as internal. Not sure about 'rte_ethdev_trace_fp.h' and 'rte_ethdev_trace_fp_burst.h' as the tracepoints in fast path may be called from public inline functions.
>>
>> Trace calls used by inline functions needs to be public, in this case at
>> least 'rte_ethdev_trace_fp_burst.h' needs to be public.
>>
>> Can you please at least move all trace points that are called by inline
>> functions to the same file, 'rte_ethdev_trace_fp_burst.h', to reduce
>> number of the header files to make public? Feel free to rename header if
>> required.
>>
>> Meanwhile not sure about adding new header as dependency to end user.
>> @Jerin, @Andrew, what do you think
> 
> rte_ethdev_trace_fp_burst.h will be installed through ninja install
> and application does not need to directly include this. So this scheme
> is OK. Right? I dont see any downside.
> 

Right. It is installed automatically with above meson change, and it is
included by 'rte_ethdev.h', so user doesn't need to include it
explicitly. Overall there is no functional problem here.

Only this header file needs to be included (directly or indirectly) by
every application that use ethdev. I would much prefer to have an
internal header but not able to because of technical reasons (inline
functions).
After lots of effort we did to hide as much ethdev internals as we can,
now we are exposing some new trace stuff to user.

As we can't prevent header to be public, I am just questioning below
options to reduce exposure of this header, hoping that it may lead a
better solution.

> 
>> 1) to move these trace points to 'rte_ethdev_core.h'
>> OR
>> 2) disable trace calls in inline functions on compile time, possibly
>> with existing 'RTE_ETHDEV_DEBUG_*' macro
  
Jerin Jacob Feb. 1, 2023, 8:31 a.m. UTC | #6
On Wed, Feb 1, 2023 at 3:50 AM Ferruh Yigit <ferruh.yigit@amd.com> wrote:
>
> On 1/31/2023 6:46 PM, Jerin Jacob wrote:
> > On Wed, Feb 1, 2023 at 12:09 AM Ferruh Yigit <ferruh.yigit@amd.com> wrote:
> >>
> >> On 1/30/2023 4:01 PM, Ankur Dwivedi wrote:
> >>
> >> <...>
> >>
> >>>>> diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build index
> >>>>> 39250b5da1..f5c0865023 100644
> >>>>> --- a/lib/ethdev/meson.build
> >>>>> +++ b/lib/ethdev/meson.build
> >>>>> @@ -24,6 +24,7 @@ headers = files(
> >>>>>          'rte_ethdev.h',
> >>>>>          'rte_ethdev_trace.h',
> >>>>>          'rte_ethdev_trace_fp.h',
> >>>>> +        'rte_ethdev_trace_fp_burst.h',
> >>>>
> >>>> Why these trace headers are public?
> >>>> Aren't trace points only used by the APIs, so I expect them to be internal, so
> >>>> applications shouldn't need them. Why they are expsed to user.
> >>> 'rte_ethdev_trace.h' can be made as internal. Not sure about 'rte_ethdev_trace_fp.h' and 'rte_ethdev_trace_fp_burst.h' as the tracepoints in fast path may be called from public inline functions.
> >>
> >> Trace calls used by inline functions needs to be public, in this case at
> >> least 'rte_ethdev_trace_fp_burst.h' needs to be public.
> >>
> >> Can you please at least move all trace points that are called by inline
> >> functions to the same file, 'rte_ethdev_trace_fp_burst.h', to reduce
> >> number of the header files to make public? Feel free to rename header if
> >> required.
> >>
> >> Meanwhile not sure about adding new header as dependency to end user.
> >> @Jerin, @Andrew, what do you think
> >
> > rte_ethdev_trace_fp_burst.h will be installed through ninja install
> > and application does not need to directly include this. So this scheme
> > is OK. Right? I dont see any downside.
> >
>
> Right. It is installed automatically with above meson change, and it is
> included by 'rte_ethdev.h', so user doesn't need to include it
> explicitly. Overall there is no functional problem here.
>
> Only this header file needs to be included (directly or indirectly) by
> every application that use ethdev. I would much prefer to have an
> internal header but not able to because of technical reasons (inline
> functions).
> After lots of effort we did to hide as much ethdev internals as we can,
> now we are exposing some new trace stuff to user.
>
> As we can't prevent header to be public, I am just questioning below
> options to reduce exposure of this header, hoping that it may lead a
> better solution.

Yes. All non-inline function can goto internal header file. I think,
it make sense
to have separated header file _fp functions using inline functions to avoid
cluttering main 'rte_ethdev.h' file.

> disable trace calls in inline functions on compile time, possibly
> with existing 'RTE_ETHDEV_DEBUG_*' macro

Disabling trace calls to inline functions, already possible with
"enable_trace_fp"
build option. So it will be possible to wrap around that to virtually to
disable

>
> >
> >> 1) to move these trace points to 'rte_ethdev_core.h'
> >> OR
> >> 2) disable trace calls in inline functions on compile time, possibly
> >> with existing 'RTE_ETHDEV_DEBUG_*' macro
>
  
Ferruh Yigit Feb. 1, 2023, 10:50 a.m. UTC | #7
On 2/1/2023 8:31 AM, Jerin Jacob wrote:
> On Wed, Feb 1, 2023 at 3:50 AM Ferruh Yigit <ferruh.yigit@amd.com> wrote:
>>
>> On 1/31/2023 6:46 PM, Jerin Jacob wrote:
>>> On Wed, Feb 1, 2023 at 12:09 AM Ferruh Yigit <ferruh.yigit@amd.com> wrote:
>>>>
>>>> On 1/30/2023 4:01 PM, Ankur Dwivedi wrote:
>>>>
>>>> <...>
>>>>
>>>>>>> diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build index
>>>>>>> 39250b5da1..f5c0865023 100644
>>>>>>> --- a/lib/ethdev/meson.build
>>>>>>> +++ b/lib/ethdev/meson.build
>>>>>>> @@ -24,6 +24,7 @@ headers = files(
>>>>>>>          'rte_ethdev.h',
>>>>>>>          'rte_ethdev_trace.h',
>>>>>>>          'rte_ethdev_trace_fp.h',
>>>>>>> +        'rte_ethdev_trace_fp_burst.h',
>>>>>>
>>>>>> Why these trace headers are public?
>>>>>> Aren't trace points only used by the APIs, so I expect them to be internal, so
>>>>>> applications shouldn't need them. Why they are expsed to user.
>>>>> 'rte_ethdev_trace.h' can be made as internal. Not sure about 'rte_ethdev_trace_fp.h' and 'rte_ethdev_trace_fp_burst.h' as the tracepoints in fast path may be called from public inline functions.
>>>>
>>>> Trace calls used by inline functions needs to be public, in this case at
>>>> least 'rte_ethdev_trace_fp_burst.h' needs to be public.
>>>>
>>>> Can you please at least move all trace points that are called by inline
>>>> functions to the same file, 'rte_ethdev_trace_fp_burst.h', to reduce
>>>> number of the header files to make public? Feel free to rename header if
>>>> required.
>>>>
>>>> Meanwhile not sure about adding new header as dependency to end user.
>>>> @Jerin, @Andrew, what do you think
>>>
>>> rte_ethdev_trace_fp_burst.h will be installed through ninja install
>>> and application does not need to directly include this. So this scheme
>>> is OK. Right? I dont see any downside.
>>>
>>
>> Right. It is installed automatically with above meson change, and it is
>> included by 'rte_ethdev.h', so user doesn't need to include it
>> explicitly. Overall there is no functional problem here.
>>
>> Only this header file needs to be included (directly or indirectly) by
>> every application that use ethdev. I would much prefer to have an
>> internal header but not able to because of technical reasons (inline
>> functions).
>> After lots of effort we did to hide as much ethdev internals as we can,
>> now we are exposing some new trace stuff to user.
>>
>> As we can't prevent header to be public, I am just questioning below
>> options to reduce exposure of this header, hoping that it may lead a
>> better solution.
> 
> Yes. All non-inline function can goto internal header file. I think,
> it make sense
> to have separated header file _fp functions using inline functions to avoid
> cluttering main 'rte_ethdev.h' file.
> 
>> disable trace calls in inline functions on compile time, possibly
>> with existing 'RTE_ETHDEV_DEBUG_*' macro
> 
> Disabling trace calls to inline functions, already possible with
> "enable_trace_fp"
> build option. So it will be possible to wrap around that to virtually to
> disable
> 

With "enable_trace_fp" build option "rte_ethdev_trace_fp.h" dependency
is still there, but wrapping with DEBUG macro can prevent it for
non-debug use cases.

Anyway, "rte_ethdev_trace_fp.h" dependency is already there before this
patch, so OK to not change it, and I am OK to move forward by making all
trace points and trace related header internal as much as possible.

>>
>>>
>>>> 1) to move these trace points to 'rte_ethdev_core.h'
>>>> OR
>>>> 2) disable trace calls in inline functions on compile time, possibly
>>>> with existing 'RTE_ETHDEV_DEBUG_*' macro
>>
  
Ankur Dwivedi Feb. 1, 2023, 3:42 p.m. UTC | #8
>-----Original Message-----
>From: Ferruh Yigit <ferruh.yigit@amd.com>
>Sent: Wednesday, February 1, 2023 12:08 AM
>To: Ankur Dwivedi <adwivedi@marvell.com>; dev@dpdk.org; David
>Marchand <david.marchand@redhat.com>; Jerin Jacob Kollanukkaran
><jerinj@marvell.com>; Andrew Rybchenko
><andrew.rybchenko@oktetlabs.ru>
>Cc: thomas@monjalon.net; mdr@ashroe.eu; orika@nvidia.com;
>chas3@att.com; humin29@huawei.com; linville@tuxdriver.com;
>ciara.loftus@intel.com; qi.z.zhang@intel.com; mw@semihalf.com;
>mk@semihalf.com; shaibran@amazon.com; evgenys@amazon.com;
>igorch@amazon.com; chandu@amd.com; Igor Russkikh
><irusskikh@marvell.com>; shepard.siegel@atomicrules.com;
>ed.czeck@atomicrules.com; john.miller@atomicrules.com;
>ajit.khaparde@broadcom.com; somnath.kotur@broadcom.com; Jerin Jacob
>Kollanukkaran <jerinj@marvell.com>; Maciej Czekaj [C]
><mczekaj@marvell.com>; Shijith Thotton <sthotton@marvell.com>;
>Srisivasubramanian Srinivasan <srinivasan@marvell.com>; Harman Kalra
><hkalra@marvell.com>; rahul.lakkireddy@chelsio.com; johndale@cisco.com;
>hyonkim@cisco.com; liudongdong3@huawei.com;
>yisen.zhuang@huawei.com; xuanziyang2@huawei.com;
>cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
>simei.su@intel.com; wenjun1.wu@intel.com; qiming.yang@intel.com;
>Yuying.Zhang@intel.com; beilei.xing@intel.com; xiao.w.wang@intel.com;
>jingjing.wu@intel.com; junfeng.guo@intel.com; rosen.xu@intel.com; Nithin
>Kumar Dabilpuram <ndabilpuram@marvell.com>; Kiran Kumar Kokkilagadda
><kirankumark@marvell.com>; Sunil Kumar Kori <skori@marvell.com>; Satha
>Koteswara Rao Kottidi <skoteshwar@marvell.com>; Liron Himi
><lironh@marvell.com>; zr@semihalf.com; Radha Chintakuntla
><radhac@marvell.com>; Veerasenareddy Burru <vburru@marvell.com>;
>Sathesh B Edara <sedara@marvell.com>; matan@nvidia.com;
>viacheslavo@nvidia.com; longli@microsoft.com; spinler@cesnet.cz;
>chaoyong.he@corigine.com; niklas.soderlund@corigine.com;
>hemant.agrawal@nxp.com; sachin.saxena@oss.nxp.com; g.singh@nxp.com;
>apeksha.gupta@nxp.com; sachin.saxena@nxp.com; aboyer@pensando.io;
>Rasesh Mody <rmody@marvell.com>; Shahed Shaikh
><shshaikh@marvell.com>; Devendra Singh Rawat
><dsinghrawat@marvell.com>; andrew.rybchenko@oktetlabs.ru;
>jiawenwu@trustnetic.com; jianwang@trustnetic.com;
>jbehrens@vmware.com; maxime.coquelin@redhat.com;
>chenbo.xia@intel.com; steven.webster@windriver.com;
>matt.peters@windriver.com; bruce.richardson@intel.com;
>mtetsuyah@gmail.com; grive@u256.net; jasvinder.singh@intel.com;
>cristian.dumitrescu@intel.com; jgrajcia@cisco.com;
>mb@smartsharesystems.com
>Subject: Re: [EXT] Re: [PATCH v6 2/6] ethdev: add trace points for ethdev (part
>one)
>
>On 1/30/2023 4:01 PM, Ankur Dwivedi wrote:
>
><...>
>
>>>> diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build index
>>>> 39250b5da1..f5c0865023 100644
>>>> --- a/lib/ethdev/meson.build
>>>> +++ b/lib/ethdev/meson.build
>>>> @@ -24,6 +24,7 @@ headers = files(
>>>>          'rte_ethdev.h',
>>>>          'rte_ethdev_trace.h',
>>>>          'rte_ethdev_trace_fp.h',
>>>> +        'rte_ethdev_trace_fp_burst.h',
>>>
>>> Why these trace headers are public?
>>> Aren't trace points only used by the APIs, so I expect them to be
>>> internal, so applications shouldn't need them. Why they are expsed to
>user.
>> 'rte_ethdev_trace.h' can be made as internal. Not sure about
>'rte_ethdev_trace_fp.h' and 'rte_ethdev_trace_fp_burst.h' as the tracepoints
>in fast path may be called from public inline functions.
>
>Trace calls used by inline functions needs to be public, in this case at least
>'rte_ethdev_trace_fp_burst.h' needs to be public.
>
>Can you please at least move all trace points that are called by inline functions
>to the same file, 'rte_ethdev_trace_fp_burst.h', to reduce number of the
>header files to make public? Feel free to rename header if required.
>
>Meanwhile not sure about adding new header as dependency to end user.
>@Jerin, @Andrew, what do you think
>1) to move these trace points to 'rte_ethdev_core.h'
>OR
>2) disable trace calls in inline functions on compile time, possibly with existing
>'RTE_ETHDEV_DEBUG_*' macro
>
><...>
>
>>>> -	if (port_id >= RTE_MAX_ETHPORTS)
>>>> +	if (port_id >= RTE_MAX_ETHPORTS) {
>>>> +		rte_eth_trace_find_next(RTE_MAX_ETHPORTS);
>>>
>>> Is there a specific reason to trace all paths, why not just keep the last one?
>> This can be kept as the function returns with RTE_MAX_ETHPORTS here.
>
>'RTE_MAX_ETHPORTS' more like error path, that is returned when no more
>valid port left, since most of the trace doesn't record error return values, I
>thought this can be dropped as well unless there is an explicit need for it.
Ok.
>
><...>
>
>>>> +RTE_TRACE_POINT(
>>>> +	rte_eth_trace_rx_hairpin_queue_setup,
>>>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t rx_queue_id,
>>>> +		uint16_t nb_rx_desc, const struct rte_eth_hairpin_conf *conf,
>>>> +		int ret),
>>>> +	uint32_t peer_count = conf->peer_count;
>>>> +	uint32_t tx_explicit = conf->tx_explicit;
>>>> +	uint32_t manual_bind = conf->manual_bind;
>>>> +	uint32_t use_locked_device_memory = conf-
>>>> use_locked_device_memory;
>>>> +	uint32_t use_rte_memory = conf->use_rte_memory;
>>>> +	uint32_t force_memory = conf->force_memory;
>>>> +
>>>> +	rte_trace_point_emit_u16(port_id);
>>>> +	rte_trace_point_emit_u16(rx_queue_id);
>>>> +	rte_trace_point_emit_u16(nb_rx_desc);
>>>> +	rte_trace_point_emit_ptr(conf);
>>>> +	rte_trace_point_emit_u32(peer_count);
>>>> +	rte_trace_point_emit_u32(tx_explicit);
>>>> +	rte_trace_point_emit_u32(manual_bind);
>>>> +	rte_trace_point_emit_u32(use_locked_device_memory);
>>>> +	rte_trace_point_emit_u32(use_rte_memory);
>>>> +	rte_trace_point_emit_u32(force_memory);
>>>> +	rte_trace_point_emit_int(ret);
>>>
>>> Do we need temporary variables like 'peer_count', why not directly use
>them:
>> Temporary variables are needed where the actual variable is a bitfield.
>> For example in struct rte_eth_hairpin_conf:
>> struct rte_eth_hairpin_conf {
>> uint32_t peer_count:16;
>> ....
>> uint32_t tx_explicit:1
>> ....
>> };
>>
>> Otherwise there is a compilation error.
>> ../lib/ethdev/rte_ethdev_trace.h:253:9: note: in expansion of macro
>‘rte_trace_point_emit_u16’
>>   253 |         rte_trace_point_emit_u16(conf->peer_count);
>>       |         ^~~~~~~~~~~~~~~~~~~~~~~~
>> ../lib/eal/include/rte_trace_point.h:369:38: error: ‘sizeof’ applied to a bit-
>field
>>   369 |         mem = RTE_PTR_ADD(mem, sizeof(in)); \
>>
>
>Got it, that is because of bitfields. But as I look the 'rte_trace_point_emit_u16'
>macro, it is like:
>
>```
>#define rte_trace_point_emit_u16(in) __rte_trace_point_emit(in, uint16_t)
>
>#define __rte_trace_point_emit(in, type) \ do { \
>        memcpy(mem, &(in), sizeof(in)); \
>        mem = RTE_PTR_ADD(mem, sizeof(in)); \ } while (0) ```
>
>Here, in '__rte_trace_point_emit' macro 'type' is not used at all, if so what is
>the point of passing type?
>
>If 'sizeof(type)' used, instead of 'sizeof(in)', it would be possible to use
>bitfields directly, what do you think?

After using 'sizeof(type)', the following compile time error is observed. 
In file included from ../lib/ethdev/rte_ethdev_trace_fp_burst.h:18,
                 from ../lib/ethdev/rte_ethdev.h:175,
                 from ../lib/ethdev/rte_tm.c:8:
../lib/ethdev/rte_ethdev_trace.h: In function ‘rte_eth_trace_rx_hairpin_queue_setup’:
../lib/eal/include/rte_trace_point.h:378:21: error: cannot take address of bit-field ‘peer_count’
  378 |         memcpy(mem, &(in), sizeof(type)); \
          |                                     ^
>
>
>
>Also, as for the "struct rte_eth_hairpin_conf" sample, some of the bitfields are
>1 bit length, does 'uint32_t' really needed for them?

uint8_t should suffice.
>
><...>
>
>>>> +RTE_TRACE_POINT_FP(
>>>> +	rte_eth_trace_find_next,
>>>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id),
>>>> +	rte_trace_point_emit_u16(port_id);
>>>> +)
>>>> +
>>>
>>> Why 'rte_eth_trace_find_next' added as fast path?
>>> Can you please add comment for all why it is added as fast path, this
>>> help to evaluate/review this later.
>>
>> There were many functions for which I was not sure about whether they
>should be slow path or fast path. I made the following assumption:
>>
>> For slow path I have chosen the function which do some setup, configure or
>write some configuration. For an example
>rte_eth_trace_tx_hairpin_queue_setup,
>rte_eth_trace_tx_buffer_set_err_callback,
>rte_eth_trace_promiscuous_enable are slow path functions.
>>
>> The functions which read data are made as fastpath functions. Also for
>functions for which I was not sure I made it as fastpath.
>>
>> For an example rte_ethdev_trace_owner_get,
>rte_eth_trace_hairpin_get_peer_port, rte_eth_trace_macaddr_get are made
>as fastpath.
>>
>> But there are few exceptions. Function like *_get_capability are made as
>slowpath. Also rte_ethdev_trace_info_get is slowpath.
>>
>> The slowpath and fastpath functions are in separate files.
>rte_ethdev_trace.h (slowpath) and rte_ethdev_trace_fp.h (fastpath).
>>
>> Please let me  know if any function needs to be swapped. I will make that
>change.
>>
>
>Got it, I think most of the trace points in the 'rte_ethdev_trace_fp.h'
>are for control/helper functions like: 'rte_ethdev_trace_count_avail',
>'rte_ethdev_trace_get_port_by_name', 'rte_eth_trace_promiscuous_get' ...
>
>I thought you did based on some analysis, that is why I asked to add that
>reasoning as code comment.
>
>
>I think we can generalize as:
>
>1) Anything called by ethdev static inline functions are datapath, and must be
>'RTE_TRACE_POINT_FP', like 'rte_eth_trace_call_[rt]x_callbacks',
>'rte_ethdev_trace_[rt]x_burst',

In this category the following functions come:
rte_eth_rx_burst
rte_eth_tx_burst
rte_eth_call_rx_callbacks (called from rte_eth_rx_burst)
rte_eth_call_tx_callbacks (called from rte_eth_tx_burst)
rte_eth_tx_buffer_count_callback (registered as error callback, called from rte_eth_tx_buffer_flush)
rte_eth_tx_buffer_drop_callback (registered as error callback)
>
>2) Anything that is called in endless loop in application/sample that has
>potential impact although it may not really be datapath

Apart from functions in category [1], I have observed the following functions in ethdev library, called in some while loop in app/examples.
rte_eth_stats_get (called in while loop in examples/qos_sched and examples/distributor)
rte_eth_macaddr_get (called in while loop in examples/bond and examples/ethtool)
rte_eth_link_get (called in for loop in examples/ip_pipeline)
rte_eth_dev_get_mtu (called in for loop in examples/ip_pipeline)
rte_eth_link_speed_to_str (called in for loop in examples/ip_pipeline)
rte_eth_dev_rx_intr_enable ( called in loop in examples/l3fwd-power)
rte_eth_dev_rx_intr_disable ( called in loop in examples/l3fwd-power)
rte_eth_timesync_read_rx_timestamp (called in loop in examples/ptpclient)
rte_eth_timesync_read_tx_timestamp (called in loop in examples/ptpclient)
rte_eth_timesync_adjust_time (called in loop in examples/ptpclient)
rte_eth_timesync_read_time (called in loop in examples/ptpclient)
rte_flow_classifier_query (called in examples/flow_classify)
rte_mtr_create (in app/test-flow-perf loop)
rte_mtr_destroy (in app/test-flow-perf loop)
rte_mtr_meter_policy_delete ((in app/test-flow-perf loop)
rte_flow_create (in app/test-flow-perf)
rte_flow_destroy (in app/test-flow-perf)

Apart from the above can all other functions be moved to slowpath tracepoints?
I think it was discussed in tech board meeting on 2022-11-30 that the functions for which the slowpath or fastpath
distinction is not clear, they should be fastpath tracepoints.

>
>3) Rest are regular trace points, RTE_TRACE_POINT.
>
>
>Item (2) requires some analysis and whenever you found one of them please
>comment the reasoning in the code where trace point is defined.
  
Ferruh Yigit Feb. 2, 2023, 8:56 a.m. UTC | #9
On 2/1/2023 3:42 PM, Ankur Dwivedi wrote:
> 
> 
>> -----Original Message-----
>> From: Ferruh Yigit <ferruh.yigit@amd.com>
>> Sent: Wednesday, February 1, 2023 12:08 AM
>> To: Ankur Dwivedi <adwivedi@marvell.com>; dev@dpdk.org; David
>> Marchand <david.marchand@redhat.com>; Jerin Jacob Kollanukkaran
>> <jerinj@marvell.com>; Andrew Rybchenko
>> <andrew.rybchenko@oktetlabs.ru>
>> Cc: thomas@monjalon.net; mdr@ashroe.eu; orika@nvidia.com;
>> chas3@att.com; humin29@huawei.com; linville@tuxdriver.com;
>> ciara.loftus@intel.com; qi.z.zhang@intel.com; mw@semihalf.com;
>> mk@semihalf.com; shaibran@amazon.com; evgenys@amazon.com;
>> igorch@amazon.com; chandu@amd.com; Igor Russkikh
>> <irusskikh@marvell.com>; shepard.siegel@atomicrules.com;
>> ed.czeck@atomicrules.com; john.miller@atomicrules.com;
>> ajit.khaparde@broadcom.com; somnath.kotur@broadcom.com; Jerin Jacob
>> Kollanukkaran <jerinj@marvell.com>; Maciej Czekaj [C]
>> <mczekaj@marvell.com>; Shijith Thotton <sthotton@marvell.com>;
>> Srisivasubramanian Srinivasan <srinivasan@marvell.com>; Harman Kalra
>> <hkalra@marvell.com>; rahul.lakkireddy@chelsio.com; johndale@cisco.com;
>> hyonkim@cisco.com; liudongdong3@huawei.com;
>> yisen.zhuang@huawei.com; xuanziyang2@huawei.com;
>> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
>> simei.su@intel.com; wenjun1.wu@intel.com; qiming.yang@intel.com;
>> Yuying.Zhang@intel.com; beilei.xing@intel.com; xiao.w.wang@intel.com;
>> jingjing.wu@intel.com; junfeng.guo@intel.com; rosen.xu@intel.com; Nithin
>> Kumar Dabilpuram <ndabilpuram@marvell.com>; Kiran Kumar Kokkilagadda
>> <kirankumark@marvell.com>; Sunil Kumar Kori <skori@marvell.com>; Satha
>> Koteswara Rao Kottidi <skoteshwar@marvell.com>; Liron Himi
>> <lironh@marvell.com>; zr@semihalf.com; Radha Chintakuntla
>> <radhac@marvell.com>; Veerasenareddy Burru <vburru@marvell.com>;
>> Sathesh B Edara <sedara@marvell.com>; matan@nvidia.com;
>> viacheslavo@nvidia.com; longli@microsoft.com; spinler@cesnet.cz;
>> chaoyong.he@corigine.com; niklas.soderlund@corigine.com;
>> hemant.agrawal@nxp.com; sachin.saxena@oss.nxp.com; g.singh@nxp.com;
>> apeksha.gupta@nxp.com; sachin.saxena@nxp.com; aboyer@pensando.io;
>> Rasesh Mody <rmody@marvell.com>; Shahed Shaikh
>> <shshaikh@marvell.com>; Devendra Singh Rawat
>> <dsinghrawat@marvell.com>; andrew.rybchenko@oktetlabs.ru;
>> jiawenwu@trustnetic.com; jianwang@trustnetic.com;
>> jbehrens@vmware.com; maxime.coquelin@redhat.com;
>> chenbo.xia@intel.com; steven.webster@windriver.com;
>> matt.peters@windriver.com; bruce.richardson@intel.com;
>> mtetsuyah@gmail.com; grive@u256.net; jasvinder.singh@intel.com;
>> cristian.dumitrescu@intel.com; jgrajcia@cisco.com;
>> mb@smartsharesystems.com
>> Subject: Re: [EXT] Re: [PATCH v6 2/6] ethdev: add trace points for ethdev (part
>> one)
>>
>> On 1/30/2023 4:01 PM, Ankur Dwivedi wrote:
>>
>> <...>
>>
>>>>> diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build index
>>>>> 39250b5da1..f5c0865023 100644
>>>>> --- a/lib/ethdev/meson.build
>>>>> +++ b/lib/ethdev/meson.build
>>>>> @@ -24,6 +24,7 @@ headers = files(
>>>>>          'rte_ethdev.h',
>>>>>          'rte_ethdev_trace.h',
>>>>>          'rte_ethdev_trace_fp.h',
>>>>> +        'rte_ethdev_trace_fp_burst.h',
>>>>
>>>> Why these trace headers are public?
>>>> Aren't trace points only used by the APIs, so I expect them to be
>>>> internal, so applications shouldn't need them. Why they are expsed to
>> user.
>>> 'rte_ethdev_trace.h' can be made as internal. Not sure about
>> 'rte_ethdev_trace_fp.h' and 'rte_ethdev_trace_fp_burst.h' as the tracepoints
>> in fast path may be called from public inline functions.
>>
>> Trace calls used by inline functions needs to be public, in this case at least
>> 'rte_ethdev_trace_fp_burst.h' needs to be public.
>>
>> Can you please at least move all trace points that are called by inline functions
>> to the same file, 'rte_ethdev_trace_fp_burst.h', to reduce number of the
>> header files to make public? Feel free to rename header if required.
>>
>> Meanwhile not sure about adding new header as dependency to end user.
>> @Jerin, @Andrew, what do you think
>> 1) to move these trace points to 'rte_ethdev_core.h'
>> OR
>> 2) disable trace calls in inline functions on compile time, possibly with existing
>> 'RTE_ETHDEV_DEBUG_*' macro
>>
>> <...>
>>
>>>>> -	if (port_id >= RTE_MAX_ETHPORTS)
>>>>> +	if (port_id >= RTE_MAX_ETHPORTS) {
>>>>> +		rte_eth_trace_find_next(RTE_MAX_ETHPORTS);
>>>>
>>>> Is there a specific reason to trace all paths, why not just keep the last one?
>>> This can be kept as the function returns with RTE_MAX_ETHPORTS here.
>>
>> 'RTE_MAX_ETHPORTS' more like error path, that is returned when no more
>> valid port left, since most of the trace doesn't record error return values, I
>> thought this can be dropped as well unless there is an explicit need for it.
> Ok.
>>
>> <...>
>>
>>>>> +RTE_TRACE_POINT(
>>>>> +	rte_eth_trace_rx_hairpin_queue_setup,
>>>>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t rx_queue_id,
>>>>> +		uint16_t nb_rx_desc, const struct rte_eth_hairpin_conf *conf,
>>>>> +		int ret),
>>>>> +	uint32_t peer_count = conf->peer_count;
>>>>> +	uint32_t tx_explicit = conf->tx_explicit;
>>>>> +	uint32_t manual_bind = conf->manual_bind;
>>>>> +	uint32_t use_locked_device_memory = conf-
>>>>> use_locked_device_memory;
>>>>> +	uint32_t use_rte_memory = conf->use_rte_memory;
>>>>> +	uint32_t force_memory = conf->force_memory;
>>>>> +
>>>>> +	rte_trace_point_emit_u16(port_id);
>>>>> +	rte_trace_point_emit_u16(rx_queue_id);
>>>>> +	rte_trace_point_emit_u16(nb_rx_desc);
>>>>> +	rte_trace_point_emit_ptr(conf);
>>>>> +	rte_trace_point_emit_u32(peer_count);
>>>>> +	rte_trace_point_emit_u32(tx_explicit);
>>>>> +	rte_trace_point_emit_u32(manual_bind);
>>>>> +	rte_trace_point_emit_u32(use_locked_device_memory);
>>>>> +	rte_trace_point_emit_u32(use_rte_memory);
>>>>> +	rte_trace_point_emit_u32(force_memory);
>>>>> +	rte_trace_point_emit_int(ret);
>>>>
>>>> Do we need temporary variables like 'peer_count', why not directly use
>> them:
>>> Temporary variables are needed where the actual variable is a bitfield.
>>> For example in struct rte_eth_hairpin_conf:
>>> struct rte_eth_hairpin_conf {
>>> uint32_t peer_count:16;
>>> ....
>>> uint32_t tx_explicit:1
>>> ....
>>> };
>>>
>>> Otherwise there is a compilation error.
>>> ../lib/ethdev/rte_ethdev_trace.h:253:9: note: in expansion of macro
>> ‘rte_trace_point_emit_u16’
>>>   253 |         rte_trace_point_emit_u16(conf->peer_count);
>>>       |         ^~~~~~~~~~~~~~~~~~~~~~~~
>>> ../lib/eal/include/rte_trace_point.h:369:38: error: ‘sizeof’ applied to a bit-
>> field
>>>   369 |         mem = RTE_PTR_ADD(mem, sizeof(in)); \
>>>
>>
>> Got it, that is because of bitfields. But as I look the 'rte_trace_point_emit_u16'
>> macro, it is like:
>>
>> ```
>> #define rte_trace_point_emit_u16(in) __rte_trace_point_emit(in, uint16_t)
>>
>> #define __rte_trace_point_emit(in, type) \ do { \
>>        memcpy(mem, &(in), sizeof(in)); \
>>        mem = RTE_PTR_ADD(mem, sizeof(in)); \ } while (0) ```
>>
>> Here, in '__rte_trace_point_emit' macro 'type' is not used at all, if so what is
>> the point of passing type?
>>
>> If 'sizeof(type)' used, instead of 'sizeof(in)', it would be possible to use
>> bitfields directly, what do you think?
> 
> After using 'sizeof(type)', the following compile time error is observed. 
> In file included from ../lib/ethdev/rte_ethdev_trace_fp_burst.h:18,
>                  from ../lib/ethdev/rte_ethdev.h:175,
>                  from ../lib/ethdev/rte_tm.c:8:
> ../lib/ethdev/rte_ethdev_trace.h: In function ‘rte_eth_trace_rx_hairpin_queue_setup’:
> ../lib/eal/include/rte_trace_point.h:378:21: error: cannot take address of bit-field ‘peer_count’
>   378 |         memcpy(mem, &(in), sizeof(type)); \
>           |                                     ^

Right, you can't memcpy to bitfiled, so temporary variables are
required, OK.

Still, is it OK to ignore 'type' in the '__rte_trace_point_emit()'?
If variable (in) is uint8_t, it won't matter if
'rte_trace_point_emit_u8', 'rte_trace_point_emit_u16' or
'rte_trace_point_emit_u32' used, they will all give same result, right?


>>
>>
>>
>> Also, as for the "struct rte_eth_hairpin_conf" sample, some of the bitfields are
>> 1 bit length, does 'uint32_t' really needed for them?
> 
> uint8_t should suffice.

ack

>>
>> <...>
>>
>>>>> +RTE_TRACE_POINT_FP(
>>>>> +	rte_eth_trace_find_next,
>>>>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id),
>>>>> +	rte_trace_point_emit_u16(port_id);
>>>>> +)
>>>>> +
>>>>
>>>> Why 'rte_eth_trace_find_next' added as fast path?
>>>> Can you please add comment for all why it is added as fast path, this
>>>> help to evaluate/review this later.
>>>
>>> There were many functions for which I was not sure about whether they
>> should be slow path or fast path. I made the following assumption:
>>>
>>> For slow path I have chosen the function which do some setup, configure or
>> write some configuration. For an example
>> rte_eth_trace_tx_hairpin_queue_setup,
>> rte_eth_trace_tx_buffer_set_err_callback,
>> rte_eth_trace_promiscuous_enable are slow path functions.
>>>
>>> The functions which read data are made as fastpath functions. Also for
>> functions for which I was not sure I made it as fastpath.
>>>
>>> For an example rte_ethdev_trace_owner_get,
>> rte_eth_trace_hairpin_get_peer_port, rte_eth_trace_macaddr_get are made
>> as fastpath.
>>>
>>> But there are few exceptions. Function like *_get_capability are made as
>> slowpath. Also rte_ethdev_trace_info_get is slowpath.
>>>
>>> The slowpath and fastpath functions are in separate files.
>> rte_ethdev_trace.h (slowpath) and rte_ethdev_trace_fp.h (fastpath).
>>>
>>> Please let me  know if any function needs to be swapped. I will make that
>> change.
>>>
>>
>> Got it, I think most of the trace points in the 'rte_ethdev_trace_fp.h'
>> are for control/helper functions like: 'rte_ethdev_trace_count_avail',
>> 'rte_ethdev_trace_get_port_by_name', 'rte_eth_trace_promiscuous_get' ...
>>
>> I thought you did based on some analysis, that is why I asked to add that
>> reasoning as code comment.
>>
>>
>> I think we can generalize as:
>>
>> 1) Anything called by ethdev static inline functions are datapath, and must be
>> 'RTE_TRACE_POINT_FP', like 'rte_eth_trace_call_[rt]x_callbacks',
>> 'rte_ethdev_trace_[rt]x_burst',
> 
> In this category the following functions come:
> rte_eth_rx_burst
> rte_eth_tx_burst
> rte_eth_call_rx_callbacks (called from rte_eth_rx_burst)
> rte_eth_call_tx_callbacks (called from rte_eth_tx_burst)
> rte_eth_tx_buffer_count_callback (registered as error callback, called from rte_eth_tx_buffer_flush)
> rte_eth_tx_buffer_drop_callback (registered as error callback)

ack

>>
>> 2) Anything that is called in endless loop in application/sample that has
>> potential impact although it may not really be datapath
> 
> Apart from functions in category [1], I have observed the following functions in ethdev library, called in some while loop in app/examples.
> rte_eth_stats_get (called in while loop in examples/qos_sched and examples/distributor)
> rte_eth_macaddr_get (called in while loop in examples/bond and examples/ethtool)
> rte_eth_link_get (called in for loop in examples/ip_pipeline)
> rte_eth_dev_get_mtu (called in for loop in examples/ip_pipeline)
> rte_eth_link_speed_to_str (called in for loop in examples/ip_pipeline)
> rte_eth_dev_rx_intr_enable ( called in loop in examples/l3fwd-power)
> rte_eth_dev_rx_intr_disable ( called in loop in examples/l3fwd-power)
> rte_eth_timesync_read_rx_timestamp (called in loop in examples/ptpclient)
> rte_eth_timesync_read_tx_timestamp (called in loop in examples/ptpclient)
> rte_eth_timesync_adjust_time (called in loop in examples/ptpclient)
> rte_eth_timesync_read_time (called in loop in examples/ptpclient)
> rte_flow_classifier_query (called in examples/flow_classify)
> rte_mtr_create (in app/test-flow-perf loop)
> rte_mtr_destroy (in app/test-flow-perf loop)
> rte_mtr_meter_policy_delete ((in app/test-flow-perf loop)
> rte_flow_create (in app/test-flow-perf)
> rte_flow_destroy (in app/test-flow-perf)
> 

Ack, and can you please add the note within the parenthesis as a
comment, whoever visits these later knows why there trace points added
as fast path trace point?

> Apart from the above can all other functions be moved to slowpath tracepoints?

I think yes, we can start with this.
At least this gives us a logic to follow.

And does trace point and fast path trace points needs to be in separate
header files?
Can you please put first group into a header an expose it to the user,
rest (as a mixture) to another header and keep it internal to ethdev?

Thanks.

> I think it was discussed in tech board meeting on 2022-11-30 that the functions for which the slowpath or fastpath
> distinction is not clear, they should be fastpath tracepoints.
> 
>>
>> 3) Rest are regular trace points, RTE_TRACE_POINT.
>>
>>
>> Item (2) requires some analysis and whenever you found one of them please
>> comment the reasoning in the code where trace point is defined.
>
  
Ankur Dwivedi Feb. 2, 2023, 10:20 a.m. UTC | #10
>-----Original Message-----
>From: Ferruh Yigit <ferruh.yigit@amd.com>
>Sent: Thursday, February 2, 2023 2:27 PM
>To: Ankur Dwivedi <adwivedi@marvell.com>; dev@dpdk.org; David
>Marchand <david.marchand@redhat.com>; Jerin Jacob Kollanukkaran
><jerinj@marvell.com>; Andrew Rybchenko
><andrew.rybchenko@oktetlabs.ru>
>Cc: thomas@monjalon.net; mdr@ashroe.eu; orika@nvidia.com;
>chas3@att.com; humin29@huawei.com; linville@tuxdriver.com;
>ciara.loftus@intel.com; qi.z.zhang@intel.com; mw@semihalf.com;
>mk@semihalf.com; shaibran@amazon.com; evgenys@amazon.com;
>igorch@amazon.com; chandu@amd.com; Igor Russkikh
><irusskikh@marvell.com>; shepard.siegel@atomicrules.com;
>ed.czeck@atomicrules.com; john.miller@atomicrules.com;
>ajit.khaparde@broadcom.com; somnath.kotur@broadcom.com; Maciej
>Czekaj [C] <mczekaj@marvell.com>; Shijith Thotton <sthotton@marvell.com>;
>Srisivasubramanian Srinivasan <srinivasan@marvell.com>; Harman Kalra
><hkalra@marvell.com>; rahul.lakkireddy@chelsio.com; johndale@cisco.com;
>hyonkim@cisco.com; liudongdong3@huawei.com;
>yisen.zhuang@huawei.com; xuanziyang2@huawei.com;
>cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
>simei.su@intel.com; wenjun1.wu@intel.com; qiming.yang@intel.com;
>Yuying.Zhang@intel.com; beilei.xing@intel.com; xiao.w.wang@intel.com;
>jingjing.wu@intel.com; junfeng.guo@intel.com; rosen.xu@intel.com; Nithin
>Kumar Dabilpuram <ndabilpuram@marvell.com>; Kiran Kumar Kokkilagadda
><kirankumark@marvell.com>; Sunil Kumar Kori <skori@marvell.com>; Satha
>Koteswara Rao Kottidi <skoteshwar@marvell.com>; Liron Himi
><lironh@marvell.com>; zr@semihalf.com; Radha Chintakuntla
><radhac@marvell.com>; Veerasenareddy Burru <vburru@marvell.com>;
>Sathesh B Edara <sedara@marvell.com>; matan@nvidia.com;
>viacheslavo@nvidia.com; longli@microsoft.com; spinler@cesnet.cz;
>chaoyong.he@corigine.com; niklas.soderlund@corigine.com;
>hemant.agrawal@nxp.com; sachin.saxena@oss.nxp.com; g.singh@nxp.com;
>apeksha.gupta@nxp.com; sachin.saxena@nxp.com; aboyer@pensando.io;
>Rasesh Mody <rmody@marvell.com>; Shahed Shaikh
><shshaikh@marvell.com>; Devendra Singh Rawat
><dsinghrawat@marvell.com>; jiawenwu@trustnetic.com;
>jianwang@trustnetic.com; jbehrens@vmware.com;
>maxime.coquelin@redhat.com; chenbo.xia@intel.com;
>steven.webster@windriver.com; matt.peters@windriver.com;
>bruce.richardson@intel.com; mtetsuyah@gmail.com; grive@u256.net;
>jasvinder.singh@intel.com; cristian.dumitrescu@intel.com;
>jgrajcia@cisco.com; mb@smartsharesystems.com
>Subject: Re: [EXT] Re: [PATCH v6 2/6] ethdev: add trace points for ethdev (part
>one)
>
>On 2/1/2023 3:42 PM, Ankur Dwivedi wrote:
>>
>>
>>> -----Original Message-----
>>> From: Ferruh Yigit <ferruh.yigit@amd.com>
>>> Sent: Wednesday, February 1, 2023 12:08 AM
>>> To: Ankur Dwivedi <adwivedi@marvell.com>; dev@dpdk.org; David
>>> Marchand <david.marchand@redhat.com>; Jerin Jacob Kollanukkaran
>>> <jerinj@marvell.com>; Andrew Rybchenko
>>> <andrew.rybchenko@oktetlabs.ru>
>>> Cc: thomas@monjalon.net; mdr@ashroe.eu; orika@nvidia.com;
>>> chas3@att.com; humin29@huawei.com; linville@tuxdriver.com;
>>> ciara.loftus@intel.com; qi.z.zhang@intel.com; mw@semihalf.com;
>>> mk@semihalf.com; shaibran@amazon.com; evgenys@amazon.com;
>>> igorch@amazon.com; chandu@amd.com; Igor Russkikh
>>> <irusskikh@marvell.com>; shepard.siegel@atomicrules.com;
>>> ed.czeck@atomicrules.com; john.miller@atomicrules.com;
>>> ajit.khaparde@broadcom.com; somnath.kotur@broadcom.com; Jerin
>Jacob
>>> Kollanukkaran <jerinj@marvell.com>; Maciej Czekaj [C]
>>> <mczekaj@marvell.com>; Shijith Thotton <sthotton@marvell.com>;
>>> Srisivasubramanian Srinivasan <srinivasan@marvell.com>; Harman Kalra
>>> <hkalra@marvell.com>; rahul.lakkireddy@chelsio.com;
>>> johndale@cisco.com; hyonkim@cisco.com; liudongdong3@huawei.com;
>>> yisen.zhuang@huawei.com; xuanziyang2@huawei.com;
>>> cloud.wangxiaoyun@huawei.com; zhouguoyang@huawei.com;
>>> simei.su@intel.com; wenjun1.wu@intel.com; qiming.yang@intel.com;
>>> Yuying.Zhang@intel.com; beilei.xing@intel.com; xiao.w.wang@intel.com;
>>> jingjing.wu@intel.com; junfeng.guo@intel.com; rosen.xu@intel.com;
>>> Nithin Kumar Dabilpuram <ndabilpuram@marvell.com>; Kiran Kumar
>>> Kokkilagadda <kirankumark@marvell.com>; Sunil Kumar Kori
>>> <skori@marvell.com>; Satha Koteswara Rao Kottidi
>>> <skoteshwar@marvell.com>; Liron Himi <lironh@marvell.com>;
>>> zr@semihalf.com; Radha Chintakuntla <radhac@marvell.com>;
>>> Veerasenareddy Burru <vburru@marvell.com>; Sathesh B Edara
>>> <sedara@marvell.com>; matan@nvidia.com; viacheslavo@nvidia.com;
>>> longli@microsoft.com; spinler@cesnet.cz; chaoyong.he@corigine.com;
>>> niklas.soderlund@corigine.com; hemant.agrawal@nxp.com;
>>> sachin.saxena@oss.nxp.com; g.singh@nxp.com; apeksha.gupta@nxp.com;
>>> sachin.saxena@nxp.com; aboyer@pensando.io; Rasesh Mody
>>> <rmody@marvell.com>; Shahed Shaikh <shshaikh@marvell.com>;
>Devendra
>>> Singh Rawat <dsinghrawat@marvell.com>;
>andrew.rybchenko@oktetlabs.ru;
>>> jiawenwu@trustnetic.com; jianwang@trustnetic.com;
>>> jbehrens@vmware.com; maxime.coquelin@redhat.com;
>>> chenbo.xia@intel.com; steven.webster@windriver.com;
>>> matt.peters@windriver.com; bruce.richardson@intel.com;
>>> mtetsuyah@gmail.com; grive@u256.net; jasvinder.singh@intel.com;
>>> cristian.dumitrescu@intel.com; jgrajcia@cisco.com;
>>> mb@smartsharesystems.com
>>> Subject: Re: [EXT] Re: [PATCH v6 2/6] ethdev: add trace points for
>>> ethdev (part
>>> one)
>>>
>>> On 1/30/2023 4:01 PM, Ankur Dwivedi wrote:
>>>
>>> <...>
>>>
>>>>>> diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build index
>>>>>> 39250b5da1..f5c0865023 100644
>>>>>> --- a/lib/ethdev/meson.build
>>>>>> +++ b/lib/ethdev/meson.build
>>>>>> @@ -24,6 +24,7 @@ headers = files(
>>>>>>          'rte_ethdev.h',
>>>>>>          'rte_ethdev_trace.h',
>>>>>>          'rte_ethdev_trace_fp.h',
>>>>>> +        'rte_ethdev_trace_fp_burst.h',
>>>>>
>>>>> Why these trace headers are public?
>>>>> Aren't trace points only used by the APIs, so I expect them to be
>>>>> internal, so applications shouldn't need them. Why they are expsed
>>>>> to
>>> user.
>>>> 'rte_ethdev_trace.h' can be made as internal. Not sure about
>>> 'rte_ethdev_trace_fp.h' and 'rte_ethdev_trace_fp_burst.h' as the
>>> tracepoints in fast path may be called from public inline functions.
>>>
>>> Trace calls used by inline functions needs to be public, in this case
>>> at least 'rte_ethdev_trace_fp_burst.h' needs to be public.
>>>
>>> Can you please at least move all trace points that are called by
>>> inline functions to the same file, 'rte_ethdev_trace_fp_burst.h', to
>>> reduce number of the header files to make public? Feel free to rename
>header if required.
>>>
>>> Meanwhile not sure about adding new header as dependency to end user.
>>> @Jerin, @Andrew, what do you think
>>> 1) to move these trace points to 'rte_ethdev_core.h'
>>> OR
>>> 2) disable trace calls in inline functions on compile time, possibly
>>> with existing 'RTE_ETHDEV_DEBUG_*' macro
>>>
>>> <...>
>>>
>>>>>> -	if (port_id >= RTE_MAX_ETHPORTS)
>>>>>> +	if (port_id >= RTE_MAX_ETHPORTS) {
>>>>>> +		rte_eth_trace_find_next(RTE_MAX_ETHPORTS);
>>>>>
>>>>> Is there a specific reason to trace all paths, why not just keep the last
>one?
>>>> This can be kept as the function returns with RTE_MAX_ETHPORTS here.
>>>
>>> 'RTE_MAX_ETHPORTS' more like error path, that is returned when no
>>> more valid port left, since most of the trace doesn't record error
>>> return values, I thought this can be dropped as well unless there is an
>explicit need for it.
>> Ok.
>>>
>>> <...>
>>>
>>>>>> +RTE_TRACE_POINT(
>>>>>> +	rte_eth_trace_rx_hairpin_queue_setup,
>>>>>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t
>rx_queue_id,
>>>>>> +		uint16_t nb_rx_desc, const struct
>rte_eth_hairpin_conf *conf,
>>>>>> +		int ret),
>>>>>> +	uint32_t peer_count = conf->peer_count;
>>>>>> +	uint32_t tx_explicit = conf->tx_explicit;
>>>>>> +	uint32_t manual_bind = conf->manual_bind;
>>>>>> +	uint32_t use_locked_device_memory = conf-
>>>>>> use_locked_device_memory;
>>>>>> +	uint32_t use_rte_memory = conf->use_rte_memory;
>>>>>> +	uint32_t force_memory = conf->force_memory;
>>>>>> +
>>>>>> +	rte_trace_point_emit_u16(port_id);
>>>>>> +	rte_trace_point_emit_u16(rx_queue_id);
>>>>>> +	rte_trace_point_emit_u16(nb_rx_desc);
>>>>>> +	rte_trace_point_emit_ptr(conf);
>>>>>> +	rte_trace_point_emit_u32(peer_count);
>>>>>> +	rte_trace_point_emit_u32(tx_explicit);
>>>>>> +	rte_trace_point_emit_u32(manual_bind);
>>>>>> +	rte_trace_point_emit_u32(use_locked_device_memory);
>>>>>> +	rte_trace_point_emit_u32(use_rte_memory);
>>>>>> +	rte_trace_point_emit_u32(force_memory);
>>>>>> +	rte_trace_point_emit_int(ret);
>>>>>
>>>>> Do we need temporary variables like 'peer_count', why not directly
>>>>> use
>>> them:
>>>> Temporary variables are needed where the actual variable is a bitfield.
>>>> For example in struct rte_eth_hairpin_conf:
>>>> struct rte_eth_hairpin_conf {
>>>> uint32_t peer_count:16;
>>>> ....
>>>> uint32_t tx_explicit:1
>>>> ....
>>>> };
>>>>
>>>> Otherwise there is a compilation error.
>>>> ../lib/ethdev/rte_ethdev_trace.h:253:9: note: in expansion of macro
>>> ‘rte_trace_point_emit_u16’
>>>>   253 |         rte_trace_point_emit_u16(conf->peer_count);
>>>>       |         ^~~~~~~~~~~~~~~~~~~~~~~~
>>>> ../lib/eal/include/rte_trace_point.h:369:38: error: ‘sizeof’ applied
>>>> to a bit-
>>> field
>>>>   369 |         mem = RTE_PTR_ADD(mem, sizeof(in)); \
>>>>
>>>
>>> Got it, that is because of bitfields. But as I look the
>'rte_trace_point_emit_u16'
>>> macro, it is like:
>>>
>>> ```
>>> #define rte_trace_point_emit_u16(in) __rte_trace_point_emit(in,
>>> uint16_t)
>>>
>>> #define __rte_trace_point_emit(in, type) \ do { \
>>>        memcpy(mem, &(in), sizeof(in)); \
>>>        mem = RTE_PTR_ADD(mem, sizeof(in)); \ } while (0) ```
>>>
>>> Here, in '__rte_trace_point_emit' macro 'type' is not used at all, if
>>> so what is the point of passing type?
>>>
>>> If 'sizeof(type)' used, instead of 'sizeof(in)', it would be possible
>>> to use bitfields directly, what do you think?
>>
>> After using 'sizeof(type)', the following compile time error is observed.
>> In file included from ../lib/ethdev/rte_ethdev_trace_fp_burst.h:18,
>>                  from ../lib/ethdev/rte_ethdev.h:175,
>>                  from ../lib/ethdev/rte_tm.c:8:
>> ../lib/ethdev/rte_ethdev_trace.h: In function
>‘rte_eth_trace_rx_hairpin_queue_setup’:
>> ../lib/eal/include/rte_trace_point.h:378:21: error: cannot take address of
>bit-field ‘peer_count’
>>   378 |         memcpy(mem, &(in), sizeof(type)); \
>>           |                                     ^
>
>Right, you can't memcpy to bitfiled, so temporary variables are required, OK.

Yes.
>
>Still, is it OK to ignore 'type' in the '__rte_trace_point_emit()'?
>If variable (in) is uint8_t, it won't matter if 'rte_trace_point_emit_u8',
>'rte_trace_point_emit_u16' or 'rte_trace_point_emit_u32' used, they will all
>give same result, right?
Calling (in) which is uint8_t is not allowed in 'rte_trace_point_emit_u16' because there is other __rte_trace_point_emit
in lib/eal/include/rte_trace_point_register.h called during trace point register which prevents this. 

There would be compilation error because of this check in '__rte_trace_point_emit()'
RTE_BUILD_BUG_ON(sizeof(type) != sizeof(typeof(in))); 

So type can be ignored in '__rte_trace_point_emit()' in lib/eal/include/rte_trace_point.h.
>
>
>>>
>>>
>>>
>>> Also, as for the "struct rte_eth_hairpin_conf" sample, some of the
>>> bitfields are
>>> 1 bit length, does 'uint32_t' really needed for them?
>>
>> uint8_t should suffice.
>
>ack
>
>>>
>>> <...>
>>>
>>>>>> +RTE_TRACE_POINT_FP(
>>>>>> +	rte_eth_trace_find_next,
>>>>>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id),
>>>>>> +	rte_trace_point_emit_u16(port_id);
>>>>>> +)
>>>>>> +
>>>>>
>>>>> Why 'rte_eth_trace_find_next' added as fast path?
>>>>> Can you please add comment for all why it is added as fast path,
>>>>> this help to evaluate/review this later.
>>>>
>>>> There were many functions for which I was not sure about whether
>>>> they
>>> should be slow path or fast path. I made the following assumption:
>>>>
>>>> For slow path I have chosen the function which do some setup,
>>>> configure or
>>> write some configuration. For an example
>>> rte_eth_trace_tx_hairpin_queue_setup,
>>> rte_eth_trace_tx_buffer_set_err_callback,
>>> rte_eth_trace_promiscuous_enable are slow path functions.
>>>>
>>>> The functions which read data are made as fastpath functions. Also
>>>> for
>>> functions for which I was not sure I made it as fastpath.
>>>>
>>>> For an example rte_ethdev_trace_owner_get,
>>> rte_eth_trace_hairpin_get_peer_port, rte_eth_trace_macaddr_get are
>>> made as fastpath.
>>>>
>>>> But there are few exceptions. Function like *_get_capability are
>>>> made as
>>> slowpath. Also rte_ethdev_trace_info_get is slowpath.
>>>>
>>>> The slowpath and fastpath functions are in separate files.
>>> rte_ethdev_trace.h (slowpath) and rte_ethdev_trace_fp.h (fastpath).
>>>>
>>>> Please let me  know if any function needs to be swapped. I will make
>>>> that
>>> change.
>>>>
>>>
>>> Got it, I think most of the trace points in the 'rte_ethdev_trace_fp.h'
>>> are for control/helper functions like:
>>> 'rte_ethdev_trace_count_avail', 'rte_ethdev_trace_get_port_by_name',
>'rte_eth_trace_promiscuous_get' ...
>>>
>>> I thought you did based on some analysis, that is why I asked to add
>>> that reasoning as code comment.
>>>
>>>
>>> I think we can generalize as:
>>>
>>> 1) Anything called by ethdev static inline functions are datapath,
>>> and must be 'RTE_TRACE_POINT_FP', like
>>> 'rte_eth_trace_call_[rt]x_callbacks',
>>> 'rte_ethdev_trace_[rt]x_burst',
>>
>> In this category the following functions come:
>> rte_eth_rx_burst
>> rte_eth_tx_burst
>> rte_eth_call_rx_callbacks (called from rte_eth_rx_burst)
>> rte_eth_call_tx_callbacks (called from rte_eth_tx_burst)
>> rte_eth_tx_buffer_count_callback (registered as error callback, called
>> from rte_eth_tx_buffer_flush) rte_eth_tx_buffer_drop_callback
>> (registered as error callback)
>
>ack
>
>>>
>>> 2) Anything that is called in endless loop in application/sample that
>>> has potential impact although it may not really be datapath
>>
>> Apart from functions in category [1], I have observed the following functions
>in ethdev library, called in some while loop in app/examples.
>> rte_eth_stats_get (called in while loop in examples/qos_sched and
>> examples/distributor) rte_eth_macaddr_get (called in while loop in
>> examples/bond and examples/ethtool) rte_eth_link_get (called in for
>> loop in examples/ip_pipeline) rte_eth_dev_get_mtu (called in for loop
>> in examples/ip_pipeline) rte_eth_link_speed_to_str (called in for loop
>> in examples/ip_pipeline) rte_eth_dev_rx_intr_enable ( called in loop
>> in examples/l3fwd-power) rte_eth_dev_rx_intr_disable ( called in loop
>> in examples/l3fwd-power) rte_eth_timesync_read_rx_timestamp (called in
>> loop in examples/ptpclient) rte_eth_timesync_read_tx_timestamp (called
>> in loop in examples/ptpclient) rte_eth_timesync_adjust_time (called in
>> loop in examples/ptpclient) rte_eth_timesync_read_time (called in loop
>> in examples/ptpclient) rte_flow_classifier_query (called in
>> examples/flow_classify) rte_mtr_create (in app/test-flow-perf loop)
>> rte_mtr_destroy (in app/test-flow-perf loop)
>> rte_mtr_meter_policy_delete ((in app/test-flow-perf loop)
>> rte_flow_create (in app/test-flow-perf) rte_flow_destroy (in
>> app/test-flow-perf)
>>
>
>Ack, and can you please add the note within the parenthesis as a comment,
>whoever visits these later knows why there trace points added as fast path
>trace point?
>
>> Apart from the above can all other functions be moved to slowpath
>tracepoints?
>
>I think yes, we can start with this.
>At least this gives us a logic to follow.
>
>And does trace point and fast path trace points needs to be in separate
>header files?

I do not think separate header files is a requirement, but it is easy  to segregate 
slowpath/fastpath if they are in separate files. What do you think ?
>Can you please put first group into a header an expose it to the user, rest (as a
>mixture) to another header and keep it internal to ethdev?
Ok.
>
>Thanks.
>
>> I think it was discussed in tech board meeting on 2022-11-30 that the
>> functions for which the slowpath or fastpath distinction is not clear, they
>should be fastpath tracepoints.
>>
>>>
>>> 3) Rest are regular trace points, RTE_TRACE_POINT.
>>>
>>>
>>> Item (2) requires some analysis and whenever you found one of them
>>> please comment the reasoning in the code where trace point is defined.
>>
  
Ferruh Yigit Feb. 2, 2023, 12:52 p.m. UTC | #11
On 2/2/2023 10:20 AM, Ankur Dwivedi wrote:

>>>>>>> +RTE_TRACE_POINT_FP(
>>>>>>> +	rte_eth_trace_find_next,
>>>>>>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id),
>>>>>>> +	rte_trace_point_emit_u16(port_id);
>>>>>>> +)
>>>>>>> +
>>>>>>
>>>>>> Why 'rte_eth_trace_find_next' added as fast path?
>>>>>> Can you please add comment for all why it is added as fast path,
>>>>>> this help to evaluate/review this later.
>>>>>
>>>>> There were many functions for which I was not sure about whether
>>>>> they
>>>> should be slow path or fast path. I made the following assumption:
>>>>>
>>>>> For slow path I have chosen the function which do some setup,
>>>>> configure or
>>>> write some configuration. For an example
>>>> rte_eth_trace_tx_hairpin_queue_setup,
>>>> rte_eth_trace_tx_buffer_set_err_callback,
>>>> rte_eth_trace_promiscuous_enable are slow path functions.
>>>>>
>>>>> The functions which read data are made as fastpath functions. Also
>>>>> for
>>>> functions for which I was not sure I made it as fastpath.
>>>>>
>>>>> For an example rte_ethdev_trace_owner_get,
>>>> rte_eth_trace_hairpin_get_peer_port, rte_eth_trace_macaddr_get are
>>>> made as fastpath.
>>>>>
>>>>> But there are few exceptions. Function like *_get_capability are
>>>>> made as
>>>> slowpath. Also rte_ethdev_trace_info_get is slowpath.
>>>>>
>>>>> The slowpath and fastpath functions are in separate files.
>>>> rte_ethdev_trace.h (slowpath) and rte_ethdev_trace_fp.h (fastpath).
>>>>>
>>>>> Please let me  know if any function needs to be swapped. I will make
>>>>> that
>>>> change.
>>>>>
>>>>
>>>> Got it, I think most of the trace points in the 'rte_ethdev_trace_fp.h'
>>>> are for control/helper functions like:
>>>> 'rte_ethdev_trace_count_avail', 'rte_ethdev_trace_get_port_by_name',
>> 'rte_eth_trace_promiscuous_get' ...
>>>>
>>>> I thought you did based on some analysis, that is why I asked to add
>>>> that reasoning as code comment.
>>>>
>>>>
>>>> I think we can generalize as:
>>>>
>>>> 1) Anything called by ethdev static inline functions are datapath,
>>>> and must be 'RTE_TRACE_POINT_FP', like
>>>> 'rte_eth_trace_call_[rt]x_callbacks',
>>>> 'rte_ethdev_trace_[rt]x_burst',
>>>
>>> In this category the following functions come:
>>> rte_eth_rx_burst
>>> rte_eth_tx_burst
>>> rte_eth_call_rx_callbacks (called from rte_eth_rx_burst)
>>> rte_eth_call_tx_callbacks (called from rte_eth_tx_burst)
>>> rte_eth_tx_buffer_count_callback (registered as error callback, called
>>> from rte_eth_tx_buffer_flush) rte_eth_tx_buffer_drop_callback
>>> (registered as error callback)
>>
>> ack
>>
>>>>
>>>> 2) Anything that is called in endless loop in application/sample that
>>>> has potential impact although it may not really be datapath
>>>
>>> Apart from functions in category [1], I have observed the following functions
>> in ethdev library, called in some while loop in app/examples.
>>> rte_eth_stats_get (called in while loop in examples/qos_sched and
>>> examples/distributor) rte_eth_macaddr_get (called in while loop in
>>> examples/bond and examples/ethtool) rte_eth_link_get (called in for
>>> loop in examples/ip_pipeline) rte_eth_dev_get_mtu (called in for loop
>>> in examples/ip_pipeline) rte_eth_link_speed_to_str (called in for loop
>>> in examples/ip_pipeline) rte_eth_dev_rx_intr_enable ( called in loop
>>> in examples/l3fwd-power) rte_eth_dev_rx_intr_disable ( called in loop
>>> in examples/l3fwd-power) rte_eth_timesync_read_rx_timestamp (called in
>>> loop in examples/ptpclient) rte_eth_timesync_read_tx_timestamp (called
>>> in loop in examples/ptpclient) rte_eth_timesync_adjust_time (called in
>>> loop in examples/ptpclient) rte_eth_timesync_read_time (called in loop
>>> in examples/ptpclient) rte_flow_classifier_query (called in
>>> examples/flow_classify) rte_mtr_create (in app/test-flow-perf loop)
>>> rte_mtr_destroy (in app/test-flow-perf loop)
>>> rte_mtr_meter_policy_delete ((in app/test-flow-perf loop)
>>> rte_flow_create (in app/test-flow-perf) rte_flow_destroy (in
>>> app/test-flow-perf)
>>>
>>
>> Ack, and can you please add the note within the parenthesis as a comment,
>> whoever visits these later knows why there trace points added as fast path
>> trace point?
>>
>>> Apart from the above can all other functions be moved to slowpath
>> tracepoints?
>>
>> I think yes, we can start with this.
>> At least this gives us a logic to follow.
>>
>> And does trace point and fast path trace points needs to be in separate
>> header files?
> 
> I do not think separate header files is a requirement, but it is easy  to segregate 
> slowpath/fastpath if they are in separate files. What do you think ?

I think it is not good to expose trace points to user more than we have
to, that is why I think better to segregate as public/internal.

It is possible to group and comment them as slowpath/fastpath within the
internal header.
  
Ankur Dwivedi Feb. 2, 2023, 1:40 p.m. UTC | #12
>
>On 2/2/2023 10:20 AM, Ankur Dwivedi wrote:
>
>>>>>>>> +RTE_TRACE_POINT_FP(
>>>>>>>> +	rte_eth_trace_find_next,
>>>>>>>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id),
>>>>>>>> +	rte_trace_point_emit_u16(port_id);
>>>>>>>> +)
>>>>>>>> +
>>>>>>>
>>>>>>> Why 'rte_eth_trace_find_next' added as fast path?
>>>>>>> Can you please add comment for all why it is added as fast path,
>>>>>>> this help to evaluate/review this later.
>>>>>>
>>>>>> There were many functions for which I was not sure about whether
>>>>>> they
>>>>> should be slow path or fast path. I made the following assumption:
>>>>>>
>>>>>> For slow path I have chosen the function which do some setup,
>>>>>> configure or
>>>>> write some configuration. For an example
>>>>> rte_eth_trace_tx_hairpin_queue_setup,
>>>>> rte_eth_trace_tx_buffer_set_err_callback,
>>>>> rte_eth_trace_promiscuous_enable are slow path functions.
>>>>>>
>>>>>> The functions which read data are made as fastpath functions. Also
>>>>>> for
>>>>> functions for which I was not sure I made it as fastpath.
>>>>>>
>>>>>> For an example rte_ethdev_trace_owner_get,
>>>>> rte_eth_trace_hairpin_get_peer_port, rte_eth_trace_macaddr_get are
>>>>> made as fastpath.
>>>>>>
>>>>>> But there are few exceptions. Function like *_get_capability are
>>>>>> made as
>>>>> slowpath. Also rte_ethdev_trace_info_get is slowpath.
>>>>>>
>>>>>> The slowpath and fastpath functions are in separate files.
>>>>> rte_ethdev_trace.h (slowpath) and rte_ethdev_trace_fp.h (fastpath).
>>>>>>
>>>>>> Please let me  know if any function needs to be swapped. I will
>>>>>> make that
>>>>> change.
>>>>>>
>>>>>
>>>>> Got it, I think most of the trace points in the 'rte_ethdev_trace_fp.h'
>>>>> are for control/helper functions like:
>>>>> 'rte_ethdev_trace_count_avail',
>>>>> 'rte_ethdev_trace_get_port_by_name',
>>> 'rte_eth_trace_promiscuous_get' ...
>>>>>
>>>>> I thought you did based on some analysis, that is why I asked to
>>>>> add that reasoning as code comment.
>>>>>
>>>>>
>>>>> I think we can generalize as:
>>>>>
>>>>> 1) Anything called by ethdev static inline functions are datapath,
>>>>> and must be 'RTE_TRACE_POINT_FP', like
>>>>> 'rte_eth_trace_call_[rt]x_callbacks',
>>>>> 'rte_ethdev_trace_[rt]x_burst',
>>>>
>>>> In this category the following functions come:
>>>> rte_eth_rx_burst
>>>> rte_eth_tx_burst
>>>> rte_eth_call_rx_callbacks (called from rte_eth_rx_burst)
>>>> rte_eth_call_tx_callbacks (called from rte_eth_tx_burst)
>>>> rte_eth_tx_buffer_count_callback (registered as error callback,
>>>> called from rte_eth_tx_buffer_flush) rte_eth_tx_buffer_drop_callback
>>>> (registered as error callback)
>>>
>>> ack
>>>
>>>>>
>>>>> 2) Anything that is called in endless loop in application/sample
>>>>> that has potential impact although it may not really be datapath
>>>>
>>>> Apart from functions in category [1], I have observed the following
>>>> functions
>>> in ethdev library, called in some while loop in app/examples.
>>>> rte_eth_stats_get (called in while loop in examples/qos_sched and
>>>> examples/distributor) rte_eth_macaddr_get (called in while loop in
>>>> examples/bond and examples/ethtool) rte_eth_link_get (called in for
>>>> loop in examples/ip_pipeline) rte_eth_dev_get_mtu (called in for
>>>> loop in examples/ip_pipeline) rte_eth_link_speed_to_str (called in
>>>> for loop in examples/ip_pipeline) rte_eth_dev_rx_intr_enable (
>>>> called in loop in examples/l3fwd-power) rte_eth_dev_rx_intr_disable
>>>> ( called in loop in examples/l3fwd-power)
>>>> rte_eth_timesync_read_rx_timestamp (called in loop in
>>>> examples/ptpclient) rte_eth_timesync_read_tx_timestamp (called in
>>>> loop in examples/ptpclient) rte_eth_timesync_adjust_time (called in
>>>> loop in examples/ptpclient) rte_eth_timesync_read_time (called in
>>>> loop in examples/ptpclient) rte_flow_classifier_query (called in
>>>> examples/flow_classify) rte_mtr_create (in app/test-flow-perf loop)
>>>> rte_mtr_destroy (in app/test-flow-perf loop)
>>>> rte_mtr_meter_policy_delete ((in app/test-flow-perf loop)
>>>> rte_flow_create (in app/test-flow-perf) rte_flow_destroy (in
>>>> app/test-flow-perf)
>>>>
>>>
>>> Ack, and can you please add the note within the parenthesis as a
>>> comment, whoever visits these later knows why there trace points
>>> added as fast path trace point?
>>>
>>>> Apart from the above can all other functions be moved to slowpath
>>> tracepoints?
>>>
>>> I think yes, we can start with this.
>>> At least this gives us a logic to follow.
>>>
>>> And does trace point and fast path trace points needs to be in
>>> separate header files?
>>
>> I do not think separate header files is a requirement, but it is easy
>> to segregate slowpath/fastpath if they are in separate files. What do you
>think ?
>
>I think it is not good to expose trace points to user more than we have to, that
>is why I think better to segregate as public/internal.

In my earlier comment I was thinking of something like this:
- Functions in category [1] (fastpath tracepoints) can be in public header named rte_ethdev_trace_fp.h(exposed to the user).

- Functions in category [2] (fastpath tracepoints)can be in internal header named ethdev_trace_fp.h
- Functions in category [3] (slowpath tracepoints) can be in internal header ethdev_trace.h
>
>It is possible to group and comment them as slowpath/fastpath within the
>internal header.
  
Ferruh Yigit Feb. 2, 2023, 1:44 p.m. UTC | #13
On 2/2/2023 1:40 PM, Ankur Dwivedi wrote:
>>
>> On 2/2/2023 10:20 AM, Ankur Dwivedi wrote:
>>
>>>>>>>>> +RTE_TRACE_POINT_FP(
>>>>>>>>> +	rte_eth_trace_find_next,
>>>>>>>>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id),
>>>>>>>>> +	rte_trace_point_emit_u16(port_id);
>>>>>>>>> +)
>>>>>>>>> +
>>>>>>>>
>>>>>>>> Why 'rte_eth_trace_find_next' added as fast path?
>>>>>>>> Can you please add comment for all why it is added as fast path,
>>>>>>>> this help to evaluate/review this later.
>>>>>>>
>>>>>>> There were many functions for which I was not sure about whether
>>>>>>> they
>>>>>> should be slow path or fast path. I made the following assumption:
>>>>>>>
>>>>>>> For slow path I have chosen the function which do some setup,
>>>>>>> configure or
>>>>>> write some configuration. For an example
>>>>>> rte_eth_trace_tx_hairpin_queue_setup,
>>>>>> rte_eth_trace_tx_buffer_set_err_callback,
>>>>>> rte_eth_trace_promiscuous_enable are slow path functions.
>>>>>>>
>>>>>>> The functions which read data are made as fastpath functions. Also
>>>>>>> for
>>>>>> functions for which I was not sure I made it as fastpath.
>>>>>>>
>>>>>>> For an example rte_ethdev_trace_owner_get,
>>>>>> rte_eth_trace_hairpin_get_peer_port, rte_eth_trace_macaddr_get are
>>>>>> made as fastpath.
>>>>>>>
>>>>>>> But there are few exceptions. Function like *_get_capability are
>>>>>>> made as
>>>>>> slowpath. Also rte_ethdev_trace_info_get is slowpath.
>>>>>>>
>>>>>>> The slowpath and fastpath functions are in separate files.
>>>>>> rte_ethdev_trace.h (slowpath) and rte_ethdev_trace_fp.h (fastpath).
>>>>>>>
>>>>>>> Please let me  know if any function needs to be swapped. I will
>>>>>>> make that
>>>>>> change.
>>>>>>>
>>>>>>
>>>>>> Got it, I think most of the trace points in the 'rte_ethdev_trace_fp.h'
>>>>>> are for control/helper functions like:
>>>>>> 'rte_ethdev_trace_count_avail',
>>>>>> 'rte_ethdev_trace_get_port_by_name',
>>>> 'rte_eth_trace_promiscuous_get' ...
>>>>>>
>>>>>> I thought you did based on some analysis, that is why I asked to
>>>>>> add that reasoning as code comment.
>>>>>>
>>>>>>
>>>>>> I think we can generalize as:
>>>>>>
>>>>>> 1) Anything called by ethdev static inline functions are datapath,
>>>>>> and must be 'RTE_TRACE_POINT_FP', like
>>>>>> 'rte_eth_trace_call_[rt]x_callbacks',
>>>>>> 'rte_ethdev_trace_[rt]x_burst',
>>>>>
>>>>> In this category the following functions come:
>>>>> rte_eth_rx_burst
>>>>> rte_eth_tx_burst
>>>>> rte_eth_call_rx_callbacks (called from rte_eth_rx_burst)
>>>>> rte_eth_call_tx_callbacks (called from rte_eth_tx_burst)
>>>>> rte_eth_tx_buffer_count_callback (registered as error callback,
>>>>> called from rte_eth_tx_buffer_flush) rte_eth_tx_buffer_drop_callback
>>>>> (registered as error callback)
>>>>
>>>> ack
>>>>
>>>>>>
>>>>>> 2) Anything that is called in endless loop in application/sample
>>>>>> that has potential impact although it may not really be datapath
>>>>>
>>>>> Apart from functions in category [1], I have observed the following
>>>>> functions
>>>> in ethdev library, called in some while loop in app/examples.
>>>>> rte_eth_stats_get (called in while loop in examples/qos_sched and
>>>>> examples/distributor) rte_eth_macaddr_get (called in while loop in
>>>>> examples/bond and examples/ethtool) rte_eth_link_get (called in for
>>>>> loop in examples/ip_pipeline) rte_eth_dev_get_mtu (called in for
>>>>> loop in examples/ip_pipeline) rte_eth_link_speed_to_str (called in
>>>>> for loop in examples/ip_pipeline) rte_eth_dev_rx_intr_enable (
>>>>> called in loop in examples/l3fwd-power) rte_eth_dev_rx_intr_disable
>>>>> ( called in loop in examples/l3fwd-power)
>>>>> rte_eth_timesync_read_rx_timestamp (called in loop in
>>>>> examples/ptpclient) rte_eth_timesync_read_tx_timestamp (called in
>>>>> loop in examples/ptpclient) rte_eth_timesync_adjust_time (called in
>>>>> loop in examples/ptpclient) rte_eth_timesync_read_time (called in
>>>>> loop in examples/ptpclient) rte_flow_classifier_query (called in
>>>>> examples/flow_classify) rte_mtr_create (in app/test-flow-perf loop)
>>>>> rte_mtr_destroy (in app/test-flow-perf loop)
>>>>> rte_mtr_meter_policy_delete ((in app/test-flow-perf loop)
>>>>> rte_flow_create (in app/test-flow-perf) rte_flow_destroy (in
>>>>> app/test-flow-perf)
>>>>>
>>>>
>>>> Ack, and can you please add the note within the parenthesis as a
>>>> comment, whoever visits these later knows why there trace points
>>>> added as fast path trace point?
>>>>
>>>>> Apart from the above can all other functions be moved to slowpath
>>>> tracepoints?
>>>>
>>>> I think yes, we can start with this.
>>>> At least this gives us a logic to follow.
>>>>
>>>> And does trace point and fast path trace points needs to be in
>>>> separate header files?
>>>
>>> I do not think separate header files is a requirement, but it is easy
>>> to segregate slowpath/fastpath if they are in separate files. What do you
>> think ?
>>
>> I think it is not good to expose trace points to user more than we have to, that
>> is why I think better to segregate as public/internal.
> 
> In my earlier comment I was thinking of something like this:
> - Functions in category [1] (fastpath tracepoints) can be in public header named rte_ethdev_trace_fp.h(exposed to the user).
> 
> - Functions in category [2] (fastpath tracepoints)can be in internal header named ethdev_trace_fp.h
> - Functions in category [3] (slowpath tracepoints) can be in internal header ethdev_trace.h

Do we need three headers for trace? What is the downside to merge [2]
and [3] but group them withing the same header?
  
Ankur Dwivedi Feb. 2, 2023, 1:53 p.m. UTC | #14
>
>On 2/2/2023 1:40 PM, Ankur Dwivedi wrote:
>>>
>>> On 2/2/2023 10:20 AM, Ankur Dwivedi wrote:
>>>
>>>>>>>>>> +RTE_TRACE_POINT_FP(
>>>>>>>>>> +	rte_eth_trace_find_next,
>>>>>>>>>> +	RTE_TRACE_POINT_ARGS(uint16_t port_id),
>>>>>>>>>> +	rte_trace_point_emit_u16(port_id);
>>>>>>>>>> +)
>>>>>>>>>> +
>>>>>>>>>
>>>>>>>>> Why 'rte_eth_trace_find_next' added as fast path?
>>>>>>>>> Can you please add comment for all why it is added as fast
>>>>>>>>> path, this help to evaluate/review this later.
>>>>>>>>
>>>>>>>> There were many functions for which I was not sure about whether
>>>>>>>> they
>>>>>>> should be slow path or fast path. I made the following assumption:
>>>>>>>>
>>>>>>>> For slow path I have chosen the function which do some setup,
>>>>>>>> configure or
>>>>>>> write some configuration. For an example
>>>>>>> rte_eth_trace_tx_hairpin_queue_setup,
>>>>>>> rte_eth_trace_tx_buffer_set_err_callback,
>>>>>>> rte_eth_trace_promiscuous_enable are slow path functions.
>>>>>>>>
>>>>>>>> The functions which read data are made as fastpath functions.
>>>>>>>> Also for
>>>>>>> functions for which I was not sure I made it as fastpath.
>>>>>>>>
>>>>>>>> For an example rte_ethdev_trace_owner_get,
>>>>>>> rte_eth_trace_hairpin_get_peer_port, rte_eth_trace_macaddr_get
>>>>>>> are made as fastpath.
>>>>>>>>
>>>>>>>> But there are few exceptions. Function like *_get_capability are
>>>>>>>> made as
>>>>>>> slowpath. Also rte_ethdev_trace_info_get is slowpath.
>>>>>>>>
>>>>>>>> The slowpath and fastpath functions are in separate files.
>>>>>>> rte_ethdev_trace.h (slowpath) and rte_ethdev_trace_fp.h (fastpath).
>>>>>>>>
>>>>>>>> Please let me  know if any function needs to be swapped. I will
>>>>>>>> make that
>>>>>>> change.
>>>>>>>>
>>>>>>>
>>>>>>> Got it, I think most of the trace points in the 'rte_ethdev_trace_fp.h'
>>>>>>> are for control/helper functions like:
>>>>>>> 'rte_ethdev_trace_count_avail',
>>>>>>> 'rte_ethdev_trace_get_port_by_name',
>>>>> 'rte_eth_trace_promiscuous_get' ...
>>>>>>>
>>>>>>> I thought you did based on some analysis, that is why I asked to
>>>>>>> add that reasoning as code comment.
>>>>>>>
>>>>>>>
>>>>>>> I think we can generalize as:
>>>>>>>
>>>>>>> 1) Anything called by ethdev static inline functions are
>>>>>>> datapath, and must be 'RTE_TRACE_POINT_FP', like
>>>>>>> 'rte_eth_trace_call_[rt]x_callbacks',
>>>>>>> 'rte_ethdev_trace_[rt]x_burst',
>>>>>>
>>>>>> In this category the following functions come:
>>>>>> rte_eth_rx_burst
>>>>>> rte_eth_tx_burst
>>>>>> rte_eth_call_rx_callbacks (called from rte_eth_rx_burst)
>>>>>> rte_eth_call_tx_callbacks (called from rte_eth_tx_burst)
>>>>>> rte_eth_tx_buffer_count_callback (registered as error callback,
>>>>>> called from rte_eth_tx_buffer_flush)
>>>>>> rte_eth_tx_buffer_drop_callback (registered as error callback)
>>>>>
>>>>> ack
>>>>>
>>>>>>>
>>>>>>> 2) Anything that is called in endless loop in application/sample
>>>>>>> that has potential impact although it may not really be datapath
>>>>>>
>>>>>> Apart from functions in category [1], I have observed the
>>>>>> following functions
>>>>> in ethdev library, called in some while loop in app/examples.
>>>>>> rte_eth_stats_get (called in while loop in examples/qos_sched and
>>>>>> examples/distributor) rte_eth_macaddr_get (called in while loop in
>>>>>> examples/bond and examples/ethtool) rte_eth_link_get (called in
>>>>>> for loop in examples/ip_pipeline) rte_eth_dev_get_mtu (called in
>>>>>> for loop in examples/ip_pipeline) rte_eth_link_speed_to_str
>>>>>> (called in for loop in examples/ip_pipeline)
>>>>>> rte_eth_dev_rx_intr_enable ( called in loop in
>>>>>> examples/l3fwd-power) rte_eth_dev_rx_intr_disable ( called in loop
>>>>>> in examples/l3fwd-power) rte_eth_timesync_read_rx_timestamp
>>>>>> (called in loop in
>>>>>> examples/ptpclient) rte_eth_timesync_read_tx_timestamp (called in
>>>>>> loop in examples/ptpclient) rte_eth_timesync_adjust_time (called
>>>>>> in loop in examples/ptpclient) rte_eth_timesync_read_time (called
>>>>>> in loop in examples/ptpclient) rte_flow_classifier_query (called
>>>>>> in
>>>>>> examples/flow_classify) rte_mtr_create (in app/test-flow-perf
>>>>>> loop) rte_mtr_destroy (in app/test-flow-perf loop)
>>>>>> rte_mtr_meter_policy_delete ((in app/test-flow-perf loop)
>>>>>> rte_flow_create (in app/test-flow-perf) rte_flow_destroy (in
>>>>>> app/test-flow-perf)
>>>>>>
>>>>>
>>>>> Ack, and can you please add the note within the parenthesis as a
>>>>> comment, whoever visits these later knows why there trace points
>>>>> added as fast path trace point?
>>>>>
>>>>>> Apart from the above can all other functions be moved to slowpath
>>>>> tracepoints?
>>>>>
>>>>> I think yes, we can start with this.
>>>>> At least this gives us a logic to follow.
>>>>>
>>>>> And does trace point and fast path trace points needs to be in
>>>>> separate header files?
>>>>
>>>> I do not think separate header files is a requirement, but it is
>>>> easy to segregate slowpath/fastpath if they are in separate files.
>>>> What do you
>>> think ?
>>>
>>> I think it is not good to expose trace points to user more than we
>>> have to, that is why I think better to segregate as public/internal.
>>
>> In my earlier comment I was thinking of something like this:
>> - Functions in category [1] (fastpath tracepoints) can be in public header
>named rte_ethdev_trace_fp.h(exposed to the user).
>>
>> - Functions in category [2] (fastpath tracepoints)can be in internal
>> header named ethdev_trace_fp.h
>> - Functions in category [3] (slowpath tracepoints) can be in internal
>> header ethdev_trace.h
>
>Do we need three headers for trace? What is the downside to merge [2] and
>[3] but group them withing the same header?

There is no downside.
Will merge [2] and [3], with file named as ethdev_trace.h.
  

Patch

diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c
index 48090c879a..fd16b25e55 100644
--- a/lib/ethdev/ethdev_private.c
+++ b/lib/ethdev/ethdev_private.c
@@ -5,6 +5,7 @@ 
 #include <rte_debug.h>
 
 #include "rte_ethdev.h"
+#include "rte_ethdev_trace_fp.h"
 #include "ethdev_driver.h"
 #include "ethdev_private.h"
 
@@ -297,6 +298,8 @@  rte_eth_call_rx_callbacks(uint16_t port_id, uint16_t queue_id,
 		cb = cb->next;
 	}
 
+	rte_eth_trace_call_rx_callbacks(port_id, queue_id, rx_pkts, nb_rx, nb_pkts);
+
 	return nb_rx;
 }
 
@@ -312,6 +315,8 @@  rte_eth_call_tx_callbacks(uint16_t port_id, uint16_t queue_id,
 		cb = cb->next;
 	}
 
+	rte_eth_trace_call_tx_callbacks(port_id, queue_id, tx_pkts, nb_pkts);
+
 	return nb_pkts;
 }
 
diff --git a/lib/ethdev/ethdev_trace_points.c b/lib/ethdev/ethdev_trace_points.c
index 2919409a15..4fea76e0ff 100644
--- a/lib/ethdev/ethdev_trace_points.c
+++ b/lib/ethdev/ethdev_trace_points.c
@@ -5,6 +5,7 @@ 
 #include <rte_trace_point_register.h>
 
 #include <rte_ethdev_trace.h>
+#include <rte_ethdev_trace_fp.h>
 
 RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_configure,
 	lib.ethdev.configure)
@@ -29,3 +30,195 @@  RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_rx_burst,
 
 RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_tx_burst,
 	lib.ethdev.tx.burst)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_call_rx_callbacks,
+	lib.ethdev.call_rx_callbacks)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_call_tx_callbacks,
+	lib.ethdev.call_tx_callbacks)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_iterator_init,
+	lib.ethdev.iterator_init)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_iterator_next,
+	lib.ethdev.iterator_next)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_iterator_cleanup,
+	lib.ethdev.iterator_cleanup)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_find_next,
+	lib.ethdev.find_next)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_find_next_of,
+	lib.ethdev.find_next_of)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_find_next_sibling,
+	lib.ethdev.find_next_sibling)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_is_valid_port,
+	lib.ethdev.is_valid_port)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_find_next_owned_by,
+	lib.ethdev.find_next_owned_by)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_owner_new,
+	lib.ethdev.owner_new)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_owner_set,
+	lib.ethdev.owner_set)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_owner_unset,
+	lib.ethdev.owner_unset)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_owner_delete,
+	lib.ethdev.owner_delete)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_owner_get,
+	lib.ethdev.owner_get)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_socket_id,
+	lib.ethdev.socket_id)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_get_sec_ctx,
+	lib.ethdev.get_sec_ctx)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_count_avail,
+	lib.ethdev.count_avail)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_count_total,
+	lib.ethdev.count_total)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_get_name_by_port,
+	lib.ethdev.get_name_by_port)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_get_port_by_name,
+	lib.ethdev.get_port_by_name)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_rx_queue_start,
+	lib.ethdev.rx_queue_start)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_rx_queue_stop,
+	lib.ethdev.rx_queue_stop)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_tx_queue_start,
+	lib.ethdev.tx_queue_start)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_tx_queue_stop,
+	lib.ethdev.tx_queue_stop)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_speed_bitflag,
+	lib.ethdev.speed_bitflag)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_rx_offload_name,
+	lib.ethdev.rx_offload_name)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_tx_offload_name,
+	lib.ethdev.tx_offload_name)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_capability_name,
+	lib.ethdev.capability_name)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_set_link_up,
+	lib.ethdev.set_link_up)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_set_link_down,
+	lib.ethdev.set_link_down)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_reset,
+	lib.ethdev.reset)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_is_removed,
+	lib.ethdev.is_removed)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_rx_hairpin_queue_setup,
+	lib.ethdev.rx.hairpin_queue_setup)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_tx_hairpin_queue_setup,
+	lib.ethdev.tx.hairpin_queue_setup)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_hairpin_bind,
+	lib.ethdev.hairpin_bind)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_hairpin_unbind,
+	lib.ethdev.hairpin_unbind)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_hairpin_get_peer_ports,
+	lib.ethdev.hairpin_get_peer_ports)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_tx_buffer_drop_callback,
+	lib.ethdev.tx_buffer_drop_callback)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_tx_buffer_count_callback,
+	lib.ethdev.tx_buffer_count_callback)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_tx_buffer_set_err_callback,
+	lib.ethdev.tx_buffer_set_err_callback)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_tx_buffer_init,
+	lib.ethdev.tx_buffer_init)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_tx_done_cleanup,
+	lib.ethdev.tx_done_cleanup)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_promiscuous_enable,
+	lib.ethdev.promiscuous_enable)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_promiscuous_disable,
+	lib.ethdev.promiscuous_disable)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_promiscuous_get,
+	lib.ethdev.promiscuous_get)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_allmulticast_enable,
+	lib.ethdev.allmulticast_enable)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_allmulticast_disable,
+	lib.ethdev.allmulticast_disable)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_allmulticast_get,
+	lib.ethdev.allmulticast_get)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_link_get,
+	lib.ethdev.link_get)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_link_get_nowait,
+	lib.ethdev.link_get_nowait)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_link_speed_to_str,
+	lib.ethdev.link_speed_to_str)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_link_to_str,
+	lib.ethdev.link_to_str)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_stats_get,
+	lib.ethdev.stats_get)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_stats_reset,
+	lib.ethdev.stats_reset)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_xstats_get_id_by_name,
+	lib.ethdev.xstats_get_id_by_name)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_xstats_get_names_by_id,
+	lib.ethdev.xstats_get_names_by_id)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_xstats_get_names,
+	lib.ethdev.xstats_get_names)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_xstats_get_by_id,
+	lib.ethdev.xstats_get_by_id)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_xstats_get,
+	lib.ethdev.xstats_get)
+
+RTE_TRACE_POINT_REGISTER(rte_eth_trace_xstats_reset,
+	lib.ethdev.xstats_reset)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_set_tx_queue_stats_mapping,
+	lib.ethdev.set_tx_queue_stats_mapping)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_set_rx_queue_stats_mapping,
+	lib.ethdev.set_rx_queue_stats_mapping)
+
+RTE_TRACE_POINT_REGISTER(rte_ethdev_trace_fw_version_get,
+	lib.ethdev.fw_version_get)
diff --git a/lib/ethdev/meson.build b/lib/ethdev/meson.build
index 39250b5da1..f5c0865023 100644
--- a/lib/ethdev/meson.build
+++ b/lib/ethdev/meson.build
@@ -24,6 +24,7 @@  headers = files(
         'rte_ethdev.h',
         'rte_ethdev_trace.h',
         'rte_ethdev_trace_fp.h',
+        'rte_ethdev_trace_fp_burst.h',
         'rte_dev_info.h',
         'rte_flow.h',
         'rte_flow_driver.h',
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 5d5e18db1e..40897ad94d 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -27,8 +27,9 @@ 
 #include <rte_ether.h>
 #include <rte_telemetry.h>
 
-#include "rte_ethdev_trace.h"
 #include "rte_ethdev.h"
+#include "rte_ethdev_trace.h"
+#include "rte_ethdev_trace_fp.h"
 #include "ethdev_driver.h"
 #include "ethdev_profile.h"
 #include "ethdev_private.h"
@@ -258,6 +259,7 @@  rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs_str)
 
 end:
 	iter->cls = rte_class_find_by_name("eth");
+	rte_eth_trace_iterator_init(devargs_str);
 	rte_devargs_reset(&devargs);
 	return 0;
 
@@ -274,6 +276,8 @@  rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs_str)
 uint16_t
 rte_eth_iterator_next(struct rte_dev_iterator *iter)
 {
+	uint16_t id;
+
 	if (iter == NULL) {
 		RTE_ETHDEV_LOG(ERR,
 			"Cannot get next device from NULL iterator\n");
@@ -297,8 +301,11 @@  rte_eth_iterator_next(struct rte_dev_iterator *iter)
 		/* A device is matching bus part, need to check ethdev part. */
 		iter->class_device = iter->cls->dev_iterate(
 				iter->class_device, iter->cls_str, iter);
-		if (iter->class_device != NULL)
-			return eth_dev_to_id(iter->class_device); /* match */
+		if (iter->class_device != NULL) {
+			id = eth_dev_to_id(iter->class_device);
+			rte_eth_trace_iterator_next(iter, id);
+			return id; /* match */
+		}
 	} while (iter->bus != NULL); /* need to try next rte_device */
 
 	/* No more ethdev port to iterate. */
@@ -316,6 +323,7 @@  rte_eth_iterator_cleanup(struct rte_dev_iterator *iter)
 
 	if (iter->bus_str == NULL)
 		return; /* nothing to free in pure class filter */
+	rte_eth_trace_iterator_cleanup(iter);
 	free(RTE_CAST_FIELD(iter, bus_str, char *)); /* workaround const */
 	free(RTE_CAST_FIELD(iter, cls_str, char *)); /* workaround const */
 	memset(iter, 0, sizeof(*iter));
@@ -324,12 +332,18 @@  rte_eth_iterator_cleanup(struct rte_dev_iterator *iter)
 uint16_t
 rte_eth_find_next(uint16_t port_id)
 {
+	rte_eth_trace_find_next(port_id);
+
 	while (port_id < RTE_MAX_ETHPORTS &&
 			rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED)
 		port_id++;
 
-	if (port_id >= RTE_MAX_ETHPORTS)
+	if (port_id >= RTE_MAX_ETHPORTS) {
+		rte_eth_trace_find_next(RTE_MAX_ETHPORTS);
 		return RTE_MAX_ETHPORTS;
+	}
+
+	rte_eth_trace_find_next(port_id);
 
 	return port_id;
 }
@@ -347,10 +361,15 @@  uint16_t
 rte_eth_find_next_of(uint16_t port_id, const struct rte_device *parent)
 {
 	port_id = rte_eth_find_next(port_id);
+
+	rte_eth_trace_find_next_of(port_id);
+
 	while (port_id < RTE_MAX_ETHPORTS &&
 			rte_eth_devices[port_id].device != parent)
 		port_id = rte_eth_find_next(port_id + 1);
 
+	rte_eth_trace_find_next_of(port_id);
+
 	return port_id;
 }
 
@@ -358,6 +377,9 @@  uint16_t
 rte_eth_find_next_sibling(uint16_t port_id, uint16_t ref_port_id)
 {
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(ref_port_id, RTE_MAX_ETHPORTS);
+
+	rte_eth_trace_find_next_sibling(port_id, ref_port_id);
+
 	return rte_eth_find_next_of(port_id,
 			rte_eth_devices[ref_port_id].device);
 }
@@ -372,10 +394,13 @@  int
 rte_eth_dev_is_valid_port(uint16_t port_id)
 {
 	if (port_id >= RTE_MAX_ETHPORTS ||
-	    (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED))
+	    (rte_eth_devices[port_id].state == RTE_ETH_DEV_UNUSED)) {
+		rte_ethdev_trace_is_valid_port(port_id, 0);
 		return 0;
-	else
+	} else {
+		rte_ethdev_trace_is_valid_port(port_id, 1);
 		return 1;
+	}
 }
 
 static int
@@ -395,6 +420,7 @@  rte_eth_find_next_owned_by(uint16_t port_id, const uint64_t owner_id)
 			rte_eth_devices[port_id].data->owner.id != owner_id)
 		port_id = rte_eth_find_next(port_id + 1);
 
+	rte_eth_trace_find_next_owned_by(port_id, owner_id);
 	return port_id;
 }
 
@@ -413,6 +439,7 @@  rte_eth_dev_owner_new(uint64_t *owner_id)
 	*owner_id = eth_dev_shared_data->next_owner_id++;
 
 	rte_spinlock_unlock(&eth_dev_shared_data->ownership_lock);
+	rte_ethdev_trace_owner_new(*owner_id);
 	return 0;
 }
 
@@ -476,6 +503,7 @@  rte_eth_dev_owner_set(const uint16_t port_id,
 	ret = eth_dev_owner_set(port_id, RTE_ETH_DEV_NO_OWNER, owner);
 
 	rte_spinlock_unlock(&eth_dev_shared_data->ownership_lock);
+	rte_ethdev_trace_owner_set(port_id, owner, ret);
 	return ret;
 }
 
@@ -493,6 +521,7 @@  rte_eth_dev_owner_unset(const uint16_t port_id, const uint64_t owner_id)
 	ret = eth_dev_owner_set(port_id, owner_id, &new_owner);
 
 	rte_spinlock_unlock(&eth_dev_shared_data->ownership_lock);
+	rte_ethdev_trace_owner_unset(port_id, owner_id, ret);
 	return ret;
 }
 
@@ -526,6 +555,7 @@  rte_eth_dev_owner_delete(const uint64_t owner_id)
 
 	rte_spinlock_unlock(&eth_dev_shared_data->ownership_lock);
 
+	rte_ethdev_trace_owner_delete(owner_id, ret);
 	return ret;
 }
 
@@ -555,6 +585,7 @@  rte_eth_dev_owner_get(const uint16_t port_id, struct rte_eth_dev_owner *owner)
 	rte_memcpy(owner, &ethdev->data->owner, sizeof(*owner));
 	rte_spinlock_unlock(&eth_dev_shared_data->ownership_lock);
 
+	rte_ethdev_trace_owner_get(port_id, owner);
 	return 0;
 }
 
@@ -570,14 +601,21 @@  rte_eth_dev_socket_id(uint16_t port_id)
 		if (socket_id == SOCKET_ID_ANY)
 			rte_errno = 0;
 	}
+	rte_ethdev_trace_socket_id(port_id, socket_id);
 	return socket_id;
 }
 
 void *
 rte_eth_dev_get_sec_ctx(uint16_t port_id)
 {
+	void *ctx;
+
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
-	return rte_eth_devices[port_id].security_ctx;
+	ctx = rte_eth_devices[port_id].security_ctx;
+
+	rte_ethdev_trace_get_sec_ctx(port_id, ctx);
+
+	return ctx;
 }
 
 uint16_t
@@ -591,6 +629,7 @@  rte_eth_dev_count_avail(void)
 	RTE_ETH_FOREACH_DEV(p)
 		count++;
 
+	rte_ethdev_trace_count_avail(count);
 	return count;
 }
 
@@ -602,6 +641,7 @@  rte_eth_dev_count_total(void)
 	RTE_ETH_FOREACH_VALID_DEV(port)
 		count++;
 
+	rte_ethdev_trace_count_total(count);
 	return count;
 }
 
@@ -622,6 +662,7 @@  rte_eth_dev_get_name_by_port(uint16_t port_id, char *name)
 	 * because it might be overwritten by VDEV PMD */
 	tmp = eth_dev_shared_data->data[port_id].name;
 	strcpy(name, tmp);
+	rte_ethdev_trace_get_name_by_port(port_id, name);
 	return 0;
 }
 
@@ -644,6 +685,7 @@  rte_eth_dev_get_port_by_name(const char *name, uint16_t *port_id)
 	RTE_ETH_FOREACH_VALID_DEV(pid)
 		if (!strcmp(name, eth_dev_shared_data->data[pid].name)) {
 			*port_id = pid;
+			rte_ethdev_trace_get_port_by_name(name, *port_id);
 			return 0;
 		}
 
@@ -745,7 +787,11 @@  rte_eth_dev_rx_queue_start(uint16_t port_id, uint16_t rx_queue_id)
 		return 0;
 	}
 
-	return eth_err(port_id, dev->dev_ops->rx_queue_start(dev, rx_queue_id));
+	ret = eth_err(port_id, dev->dev_ops->rx_queue_start(dev, rx_queue_id));
+
+	rte_ethdev_trace_rx_queue_start(port_id, rx_queue_id, ret);
+
+	return ret;
 }
 
 int
@@ -778,7 +824,11 @@  rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id)
 		return 0;
 	}
 
-	return eth_err(port_id, dev->dev_ops->rx_queue_stop(dev, rx_queue_id));
+	ret = eth_err(port_id, dev->dev_ops->rx_queue_stop(dev, rx_queue_id));
+
+	rte_ethdev_trace_rx_queue_stop(port_id, rx_queue_id, ret);
+
+	return ret;
 }
 
 int
@@ -818,7 +868,11 @@  rte_eth_dev_tx_queue_start(uint16_t port_id, uint16_t tx_queue_id)
 		return 0;
 	}
 
-	return eth_err(port_id, dev->dev_ops->tx_queue_start(dev, tx_queue_id));
+	ret = eth_err(port_id, dev->dev_ops->tx_queue_start(dev, tx_queue_id));
+
+	rte_ethdev_trace_tx_queue_start(port_id, tx_queue_id, ret);
+
+	return ret;
 }
 
 int
@@ -851,12 +905,17 @@  rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id)
 		return 0;
 	}
 
-	return eth_err(port_id, dev->dev_ops->tx_queue_stop(dev, tx_queue_id));
+	ret = eth_err(port_id, dev->dev_ops->tx_queue_stop(dev, tx_queue_id));
+
+	rte_ethdev_trace_tx_queue_stop(port_id, tx_queue_id, ret);
+
+	return ret;
 }
 
 uint32_t
 rte_eth_speed_bitflag(uint32_t speed, int duplex)
 {
+	rte_eth_trace_speed_bitflag(speed, duplex);
 	switch (speed) {
 	case RTE_ETH_SPEED_NUM_10M:
 		return duplex ? RTE_ETH_LINK_SPEED_10M : RTE_ETH_LINK_SPEED_10M_HD;
@@ -902,6 +961,8 @@  rte_eth_dev_rx_offload_name(uint64_t offload)
 		}
 	}
 
+	rte_ethdev_trace_rx_offload_name(offload, name);
+
 	return name;
 }
 
@@ -918,6 +979,8 @@  rte_eth_dev_tx_offload_name(uint64_t offload)
 		}
 	}
 
+	rte_ethdev_trace_tx_offload_name(offload, name);
+
 	return name;
 }
 
@@ -934,6 +997,8 @@  rte_eth_dev_capability_name(uint64_t capability)
 		}
 	}
 
+	rte_ethdev_trace_capability_name(capability, name);
+
 	return name;
 }
 
@@ -1554,26 +1619,36 @@  int
 rte_eth_dev_set_link_up(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
+	int ret;
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	dev = &rte_eth_devices[port_id];
 
 	if (*dev->dev_ops->dev_set_link_up == NULL)
 		return -ENOTSUP;
-	return eth_err(port_id, (*dev->dev_ops->dev_set_link_up)(dev));
+	ret = eth_err(port_id, (*dev->dev_ops->dev_set_link_up)(dev));
+
+	rte_ethdev_trace_set_link_up(port_id, ret);
+
+	return ret;
 }
 
 int
 rte_eth_dev_set_link_down(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
+	int ret;
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	dev = &rte_eth_devices[port_id];
 
 	if (*dev->dev_ops->dev_set_link_down == NULL)
 		return -ENOTSUP;
-	return eth_err(port_id, (*dev->dev_ops->dev_set_link_down)(dev));
+	ret = eth_err(port_id, (*dev->dev_ops->dev_set_link_down)(dev));
+
+	rte_ethdev_trace_set_link_down(port_id, ret);
+
+	return ret;
 }
 
 int
@@ -1628,9 +1703,12 @@  rte_eth_dev_reset(uint16_t port_id)
 			"Failed to stop device (port %u) before reset: %s - ignore\n",
 			port_id, rte_strerror(-ret));
 	}
-	ret = dev->dev_ops->dev_reset(dev);
 
-	return eth_err(port_id, ret);
+	ret = eth_err(port_id, dev->dev_ops->dev_reset(dev));
+
+	rte_ethdev_trace_reset(port_id, ret);
+
+	return ret;
 }
 
 int
@@ -1653,6 +1731,8 @@  rte_eth_dev_is_removed(uint16_t port_id)
 		/* Device is physically removed. */
 		dev->state = RTE_ETH_DEV_REMOVED;
 
+	rte_ethdev_trace_is_removed(port_id, ret);
+
 	return ret;
 }
 
@@ -2151,7 +2231,13 @@  rte_eth_rx_hairpin_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 	if (ret == 0)
 		dev->data->rx_queue_state[rx_queue_id] =
 			RTE_ETH_QUEUE_STATE_HAIRPIN;
-	return eth_err(port_id, ret);
+
+	ret = eth_err(port_id, ret);
+
+	rte_eth_trace_rx_hairpin_queue_setup(port_id, rx_queue_id, nb_rx_desc,
+					     conf, ret);
+
+	return ret;
 }
 
 int
@@ -2340,7 +2426,12 @@  rte_eth_tx_hairpin_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
 	if (ret == 0)
 		dev->data->tx_queue_state[tx_queue_id] =
 			RTE_ETH_QUEUE_STATE_HAIRPIN;
-	return eth_err(port_id, ret);
+
+	ret = eth_err(port_id, ret);
+
+	rte_eth_trace_tx_hairpin_queue_setup(port_id, tx_queue_id, nb_tx_desc, conf, ret);
+
+	return ret;
 }
 
 int
@@ -2365,6 +2456,8 @@  rte_eth_hairpin_bind(uint16_t tx_port, uint16_t rx_port)
 			       " to Rx %d (%d - all ports)\n",
 			       tx_port, rx_port, RTE_MAX_ETHPORTS);
 
+	rte_eth_trace_hairpin_bind(tx_port, rx_port, ret);
+
 	return ret;
 }
 
@@ -2390,6 +2483,7 @@  rte_eth_hairpin_unbind(uint16_t tx_port, uint16_t rx_port)
 			       " from Rx %d (%d - all ports)\n",
 			       tx_port, rx_port, RTE_MAX_ETHPORTS);
 
+	rte_eth_trace_hairpin_unbind(tx_port, rx_port, ret);
 	return ret;
 }
 
@@ -2426,6 +2520,8 @@  rte_eth_hairpin_get_peer_ports(uint16_t port_id, uint16_t *peer_ports,
 		RTE_ETHDEV_LOG(ERR, "Failed to get %d hairpin peer %s ports\n",
 			       port_id, direction ? "Rx" : "Tx");
 
+	rte_eth_trace_hairpin_get_peer_ports(port_id, peer_ports, len, direction, ret);
+
 	return ret;
 }
 
@@ -2433,6 +2529,7 @@  void
 rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent,
 		void *userdata __rte_unused)
 {
+	rte_eth_trace_tx_buffer_drop_callback(pkts, unsent);
 	rte_pktmbuf_free_bulk(pkts, unsent);
 }
 
@@ -2444,6 +2541,7 @@  rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent,
 
 	rte_pktmbuf_free_bulk(pkts, unsent);
 	*count += unsent;
+	rte_eth_trace_tx_buffer_count_callback(pkts, unsent, *count);
 }
 
 int
@@ -2458,6 +2556,9 @@  rte_eth_tx_buffer_set_err_callback(struct rte_eth_dev_tx_buffer *buffer,
 
 	buffer->error_callback = cbfn;
 	buffer->error_userdata = userdata;
+
+	rte_eth_trace_tx_buffer_set_err_callback(buffer);
+
 	return 0;
 }
 
@@ -2477,6 +2578,8 @@  rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size)
 			buffer, rte_eth_tx_buffer_drop_callback, NULL);
 	}
 
+	rte_eth_trace_tx_buffer_init(buffer, size, ret);
+
 	return ret;
 }
 
@@ -2495,7 +2598,12 @@  rte_eth_tx_done_cleanup(uint16_t port_id, uint16_t queue_id, uint32_t free_cnt)
 	/* Call driver to free pending mbufs. */
 	ret = (*dev->dev_ops->tx_done_cleanup)(dev->data->tx_queues[queue_id],
 					       free_cnt);
-	return eth_err(port_id, ret);
+
+	ret = eth_err(port_id, ret);
+
+	rte_eth_trace_tx_done_cleanup(port_id, queue_id, free_cnt, ret);
+
+	return ret;
 }
 
 int
@@ -2516,7 +2624,11 @@  rte_eth_promiscuous_enable(uint16_t port_id)
 	diag = (*dev->dev_ops->promiscuous_enable)(dev);
 	dev->data->promiscuous = (diag == 0) ? 1 : 0;
 
-	return eth_err(port_id, diag);
+	diag = eth_err(port_id, diag);
+
+	rte_eth_trace_promiscuous_enable(port_id, dev->data->promiscuous, diag);
+
+	return diag;
 }
 
 int
@@ -2539,7 +2651,11 @@  rte_eth_promiscuous_disable(uint16_t port_id)
 	if (diag != 0)
 		dev->data->promiscuous = 1;
 
-	return eth_err(port_id, diag);
+	diag = eth_err(port_id, diag);
+
+	rte_eth_trace_promiscuous_disable(port_id, dev->data->promiscuous, diag);
+
+	return diag;
 }
 
 int
@@ -2550,6 +2666,8 @@  rte_eth_promiscuous_get(uint16_t port_id)
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	dev = &rte_eth_devices[port_id];
 
+	rte_eth_trace_promiscuous_get(port_id, dev->data->promiscuous);
+
 	return dev->data->promiscuous;
 }
 
@@ -2570,7 +2688,11 @@  rte_eth_allmulticast_enable(uint16_t port_id)
 	diag = (*dev->dev_ops->allmulticast_enable)(dev);
 	dev->data->all_multicast = (diag == 0) ? 1 : 0;
 
-	return eth_err(port_id, diag);
+	diag = eth_err(port_id, diag);
+
+	rte_eth_trace_allmulticast_enable(port_id, dev->data->all_multicast, diag);
+
+	return diag;
 }
 
 int
@@ -2592,7 +2714,11 @@  rte_eth_allmulticast_disable(uint16_t port_id)
 	if (diag != 0)
 		dev->data->all_multicast = 1;
 
-	return eth_err(port_id, diag);
+	diag = eth_err(port_id, diag);
+
+	rte_eth_trace_allmulticast_disable(port_id, dev->data->all_multicast, diag);
+
+	return diag;
 }
 
 int
@@ -2603,6 +2729,8 @@  rte_eth_allmulticast_get(uint16_t port_id)
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	dev = &rte_eth_devices[port_id];
 
+	rte_eth_trace_allmulticast_get(port_id, dev->data->all_multicast);
+
 	return dev->data->all_multicast;
 }
 
@@ -2629,6 +2757,8 @@  rte_eth_link_get(uint16_t port_id, struct rte_eth_link *eth_link)
 		*eth_link = dev->data->dev_link;
 	}
 
+	rte_eth_trace_link_get(port_id, eth_link);
+
 	return 0;
 }
 
@@ -2655,12 +2785,16 @@  rte_eth_link_get_nowait(uint16_t port_id, struct rte_eth_link *eth_link)
 		*eth_link = dev->data->dev_link;
 	}
 
+	rte_eth_trace_link_get_nowait(port_id, eth_link);
+
 	return 0;
 }
 
 const char *
 rte_eth_link_speed_to_str(uint32_t link_speed)
 {
+	rte_eth_trace_link_speed_to_str(link_speed);
+
 	switch (link_speed) {
 	case RTE_ETH_SPEED_NUM_NONE: return "None";
 	case RTE_ETH_SPEED_NUM_10M:  return "10 Mbps";
@@ -2700,6 +2834,8 @@  rte_eth_link_to_str(char *str, size_t len, const struct rte_eth_link *eth_link)
 		return -EINVAL;
 	}
 
+	rte_eth_trace_link_to_str(len, eth_link);
+
 	if (eth_link->link_status == RTE_ETH_LINK_DOWN)
 		return snprintf(str, len, "Link down");
 	else
@@ -2715,6 +2851,7 @@  int
 rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats)
 {
 	struct rte_eth_dev *dev;
+	int ret;
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	dev = &rte_eth_devices[port_id];
@@ -2730,7 +2867,12 @@  rte_eth_stats_get(uint16_t port_id, struct rte_eth_stats *stats)
 	if (*dev->dev_ops->stats_get == NULL)
 		return -ENOTSUP;
 	stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed;
-	return eth_err(port_id, (*dev->dev_ops->stats_get)(dev, stats));
+
+	ret = eth_err(port_id, (*dev->dev_ops->stats_get)(dev, stats));
+
+	rte_eth_trace_stats_get(port_id, stats, ret);
+
+	return ret;
 }
 
 int
@@ -2750,6 +2892,8 @@  rte_eth_stats_reset(uint16_t port_id)
 
 	dev->data->rx_mbuf_alloc_failed = 0;
 
+	rte_eth_trace_stats_reset(port_id);
+
 	return 0;
 }
 
@@ -2833,6 +2977,7 @@  rte_eth_xstats_get_id_by_name(uint16_t port_id, const char *xstat_name,
 	for (idx_xstat = 0; idx_xstat < cnt_xstats; idx_xstat++) {
 		if (!strcmp(xstats_names[idx_xstat].name, xstat_name)) {
 			*id = idx_xstat;
+			rte_eth_trace_xstats_get_id_by_name(port_id, xstat_name, *id);
 			return 0;
 		};
 	}
@@ -2986,6 +3131,8 @@  rte_eth_xstats_get_names_by_id(uint16_t port_id,
 			return -1;
 		}
 		xstats_names[i] = xstats_names_copy[ids[i]];
+		rte_eth_trace_xstats_get_names_by_id(port_id, &xstats_names[i],
+						     ids[i]);
 	}
 
 	free(xstats_names_copy);
@@ -3025,6 +3172,8 @@  rte_eth_xstats_get_names(uint16_t port_id,
 		cnt_used_entries += cnt_driver_entries;
 	}
 
+	rte_eth_trace_xstats_get_names(port_id, xstats_names, size, cnt_used_entries);
+
 	return cnt_used_entries;
 }
 
@@ -3174,6 +3323,9 @@  rte_eth_xstats_get_by_id(uint16_t port_id, const uint64_t *ids,
 		}
 		values[i] = xstats[ids[i]].value;
 	}
+
+	rte_eth_trace_xstats_get_by_id(port_id, ids, values, size);
+
 	return size;
 }
 
@@ -3221,6 +3373,9 @@  rte_eth_xstats_get(uint16_t port_id, struct rte_eth_xstat *xstats,
 	for ( ; i < count + xcount; i++)
 		xstats[i].id += count;
 
+	for (i = 0; i < n; i++)
+		rte_eth_trace_xstats_get(port_id, xstats[i], i);
+
 	return count + xcount;
 }
 
@@ -3229,13 +3384,19 @@  int
 rte_eth_xstats_reset(uint16_t port_id)
 {
 	struct rte_eth_dev *dev;
+	int ret;
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	dev = &rte_eth_devices[port_id];
 
 	/* implemented by the driver */
-	if (dev->dev_ops->xstats_reset != NULL)
-		return eth_err(port_id, (*dev->dev_ops->xstats_reset)(dev));
+	if (dev->dev_ops->xstats_reset != NULL) {
+		ret = eth_err(port_id, (*dev->dev_ops->xstats_reset)(dev));
+
+		rte_eth_trace_xstats_reset(port_id, ret);
+
+		return ret;
+	}
 
 	/* fallback to default */
 	return rte_eth_stats_reset(port_id);
@@ -3268,24 +3429,37 @@  int
 rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id, uint16_t tx_queue_id,
 		uint8_t stat_idx)
 {
-	return eth_err(port_id, eth_dev_set_queue_stats_mapping(port_id,
+	int ret;
+
+	ret = eth_err(port_id, eth_dev_set_queue_stats_mapping(port_id,
 						tx_queue_id,
 						stat_idx, STAT_QMAP_TX));
+
+	rte_ethdev_trace_set_tx_queue_stats_mapping(port_id, tx_queue_id, stat_idx, ret);
+
+	return ret;
 }
 
 int
 rte_eth_dev_set_rx_queue_stats_mapping(uint16_t port_id, uint16_t rx_queue_id,
 		uint8_t stat_idx)
 {
-	return eth_err(port_id, eth_dev_set_queue_stats_mapping(port_id,
+	int ret;
+	ret = eth_err(port_id, eth_dev_set_queue_stats_mapping(port_id,
 						rx_queue_id,
 						stat_idx, STAT_QMAP_RX));
+
+	rte_ethdev_trace_set_rx_queue_stats_mapping(port_id, rx_queue_id,
+						    stat_idx, ret);
+
+	return ret;
 }
 
 int
 rte_eth_dev_fw_version_get(uint16_t port_id, char *fw_version, size_t fw_size)
 {
 	struct rte_eth_dev *dev;
+	int ret;
 
 	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
 	dev = &rte_eth_devices[port_id];
@@ -3299,8 +3473,13 @@  rte_eth_dev_fw_version_get(uint16_t port_id, char *fw_version, size_t fw_size)
 
 	if (*dev->dev_ops->fw_version_get == NULL)
 		return -ENOTSUP;
-	return eth_err(port_id, (*dev->dev_ops->fw_version_get)(dev,
+
+	ret = eth_err(port_id, (*dev->dev_ops->fw_version_get)(dev,
 							fw_version, fw_size));
+
+	rte_ethdev_trace_fw_version_get(port_id, fw_version, fw_size, ret);
+
+	return ret;
 }
 
 int
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 96d0650d0c..6340a84c10 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -172,7 +172,7 @@  extern "C" {
 #include <rte_config.h>
 #include <rte_power_intrinsics.h>
 
-#include "rte_ethdev_trace_fp.h"
+#include "rte_ethdev_trace_fp_burst.h"
 #include "rte_dev_info.h"
 
 extern int rte_eth_dev_logtype;
diff --git a/lib/ethdev/rte_ethdev_trace.h b/lib/ethdev/rte_ethdev_trace.h
index 1491c815c3..bc3b3d2a1b 100644
--- a/lib/ethdev/rte_ethdev_trace.h
+++ b/lib/ethdev/rte_ethdev_trace.h
@@ -88,6 +88,291 @@  RTE_TRACE_POINT(
 	rte_trace_point_emit_u16(port_id);
 )
 
+RTE_TRACE_POINT(
+	rte_eth_trace_iterator_init,
+	RTE_TRACE_POINT_ARGS(const char *devargs),
+	rte_trace_point_emit_string(devargs);
+)
+
+RTE_TRACE_POINT(
+	rte_eth_trace_iterator_next,
+	RTE_TRACE_POINT_ARGS(struct rte_dev_iterator *iter, uint16_t id),
+	rte_trace_point_emit_ptr(iter);
+	rte_trace_point_emit_string(iter->bus_str);
+	rte_trace_point_emit_string(iter->cls_str);
+	rte_trace_point_emit_u16(id);
+)
+
+RTE_TRACE_POINT(
+	rte_eth_trace_iterator_cleanup,
+	RTE_TRACE_POINT_ARGS(struct rte_dev_iterator *iter),
+	rte_trace_point_emit_ptr(iter);
+	rte_trace_point_emit_string(iter->bus_str);
+	rte_trace_point_emit_string(iter->cls_str);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_owner_new,
+	RTE_TRACE_POINT_ARGS(uint64_t owner_id),
+	rte_trace_point_emit_u64(owner_id);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_owner_set,
+	RTE_TRACE_POINT_ARGS(const uint16_t port_id,
+		const struct rte_eth_dev_owner *owner, int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u64(owner->id);
+	rte_trace_point_emit_string(owner->name);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_owner_unset,
+	RTE_TRACE_POINT_ARGS(const uint16_t port_id,
+		const uint64_t owner_id, int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u64(owner_id);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_owner_delete,
+	RTE_TRACE_POINT_ARGS(const uint64_t owner_id, int ret),
+	rte_trace_point_emit_u64(owner_id);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_socket_id,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, int socket_id),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_int(socket_id);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_rx_queue_start,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t rx_queue_id, int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u16(rx_queue_id);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_rx_queue_stop,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t rx_queue_id, int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u16(rx_queue_id);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_tx_queue_start,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t tx_queue_id, int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u16(tx_queue_id);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_tx_queue_stop,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t tx_queue_id, int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u16(tx_queue_id);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_eth_trace_speed_bitflag,
+	RTE_TRACE_POINT_ARGS(uint32_t speed, int duplex),
+	rte_trace_point_emit_u32(speed);
+	rte_trace_point_emit_int(duplex);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_rx_offload_name,
+	RTE_TRACE_POINT_ARGS(uint64_t offload, const char *name),
+	rte_trace_point_emit_u64(offload);
+	rte_trace_point_emit_string(name);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_tx_offload_name,
+	RTE_TRACE_POINT_ARGS(uint64_t offload, const char *name),
+	rte_trace_point_emit_u64(offload);
+	rte_trace_point_emit_string(name);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_capability_name,
+	RTE_TRACE_POINT_ARGS(uint64_t capability, const char *name),
+	rte_trace_point_emit_u64(capability);
+	rte_trace_point_emit_string(name);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_set_link_up,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_set_link_down,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_reset,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_eth_trace_rx_hairpin_queue_setup,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t rx_queue_id,
+		uint16_t nb_rx_desc, const struct rte_eth_hairpin_conf *conf,
+		int ret),
+	uint32_t peer_count = conf->peer_count;
+	uint32_t tx_explicit = conf->tx_explicit;
+	uint32_t manual_bind = conf->manual_bind;
+	uint32_t use_locked_device_memory = conf->use_locked_device_memory;
+	uint32_t use_rte_memory = conf->use_rte_memory;
+	uint32_t force_memory = conf->force_memory;
+
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u16(rx_queue_id);
+	rte_trace_point_emit_u16(nb_rx_desc);
+	rte_trace_point_emit_ptr(conf);
+	rte_trace_point_emit_u32(peer_count);
+	rte_trace_point_emit_u32(tx_explicit);
+	rte_trace_point_emit_u32(manual_bind);
+	rte_trace_point_emit_u32(use_locked_device_memory);
+	rte_trace_point_emit_u32(use_rte_memory);
+	rte_trace_point_emit_u32(force_memory);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_eth_trace_tx_hairpin_queue_setup,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t tx_queue_id,
+		uint16_t nb_tx_desc, const struct rte_eth_hairpin_conf *conf,
+		int ret),
+	uint32_t peer_count = conf->peer_count;
+	uint32_t tx_explicit = conf->tx_explicit;
+	uint32_t manual_bind = conf->manual_bind;
+	uint32_t use_locked_device_memory = conf->use_locked_device_memory;
+	uint32_t use_rte_memory = conf->use_rte_memory;
+	uint32_t force_memory = conf->force_memory;
+
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u16(tx_queue_id);
+	rte_trace_point_emit_u16(nb_tx_desc);
+	rte_trace_point_emit_ptr(conf);
+	rte_trace_point_emit_u32(peer_count);
+	rte_trace_point_emit_u32(tx_explicit);
+	rte_trace_point_emit_u32(manual_bind);
+	rte_trace_point_emit_u32(use_locked_device_memory);
+	rte_trace_point_emit_u32(use_rte_memory);
+	rte_trace_point_emit_u32(force_memory);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_eth_trace_hairpin_bind,
+	RTE_TRACE_POINT_ARGS(uint16_t tx_port, uint16_t rx_port, int ret),
+	rte_trace_point_emit_u16(tx_port);
+	rte_trace_point_emit_u16(rx_port);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_eth_trace_hairpin_unbind,
+	RTE_TRACE_POINT_ARGS(uint16_t tx_port, uint16_t rx_port, int ret),
+	rte_trace_point_emit_u16(tx_port);
+	rte_trace_point_emit_u16(rx_port);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_eth_trace_tx_buffer_set_err_callback,
+	RTE_TRACE_POINT_ARGS(struct rte_eth_dev_tx_buffer *buffer),
+	rte_trace_point_emit_ptr(buffer);
+	rte_trace_point_emit_ptr(buffer->error_callback);
+	rte_trace_point_emit_ptr(buffer->error_userdata);
+)
+
+RTE_TRACE_POINT(
+	rte_eth_trace_promiscuous_enable,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, int promiscuous, int diag),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_int(promiscuous);
+	rte_trace_point_emit_int(diag);
+)
+
+RTE_TRACE_POINT(
+	rte_eth_trace_promiscuous_disable,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, int promiscuous, int diag),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_int(promiscuous);
+	rte_trace_point_emit_int(diag);
+)
+
+RTE_TRACE_POINT(
+	rte_eth_trace_allmulticast_enable,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, int all_multicast, int diag),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_int(all_multicast);
+	rte_trace_point_emit_int(diag);
+)
+
+RTE_TRACE_POINT(
+	rte_eth_trace_allmulticast_disable,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, int all_multicast, int diag),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_int(all_multicast);
+	rte_trace_point_emit_int(diag);
+)
+
+RTE_TRACE_POINT(
+	rte_eth_trace_link_speed_to_str,
+	RTE_TRACE_POINT_ARGS(uint32_t link_speed),
+	rte_trace_point_emit_u32(link_speed);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_set_rx_queue_stats_mapping,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t rx_queue_id,
+		uint8_t stat_idx, int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u16(rx_queue_id);
+	rte_trace_point_emit_u8(stat_idx);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_set_tx_queue_stats_mapping,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t tx_queue_id,
+		uint8_t stat_idx, int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u16(tx_queue_id);
+	rte_trace_point_emit_u8(stat_idx);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT(
+	rte_ethdev_trace_fw_version_get,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, char *fw_version, size_t fw_size,
+		int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_string(fw_version);
+	rte_trace_point_emit_size_t(fw_size);
+	rte_trace_point_emit_int(ret);
+)
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/ethdev/rte_ethdev_trace_fp.h b/lib/ethdev/rte_ethdev_trace_fp.h
index 40084d1929..9f1d3d5a1b 100644
--- a/lib/ethdev/rte_ethdev_trace_fp.h
+++ b/lib/ethdev/rte_ethdev_trace_fp.h
@@ -17,26 +17,293 @@  extern "C" {
 
 #include <rte_trace_point.h>
 
+#include "rte_ethdev.h"
+
 RTE_TRACE_POINT_FP(
-	rte_ethdev_trace_rx_burst,
+	rte_eth_trace_call_rx_callbacks,
 	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t queue_id,
-		void **pkt_tbl, uint16_t nb_rx),
+		struct rte_mbuf **rx_pkts, uint16_t nb_rx,
+		uint16_t nb_pkts),
 	rte_trace_point_emit_u16(port_id);
 	rte_trace_point_emit_u16(queue_id);
-	rte_trace_point_emit_ptr(pkt_tbl);
+	rte_trace_point_emit_ptr(rx_pkts);
 	rte_trace_point_emit_u16(nb_rx);
+	rte_trace_point_emit_u16(nb_pkts);
 )
 
 RTE_TRACE_POINT_FP(
-	rte_ethdev_trace_tx_burst,
+	rte_eth_trace_call_tx_callbacks,
 	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t queue_id,
-		void **pkts_tbl, uint16_t nb_pkts),
+		struct rte_mbuf **tx_pkts, uint16_t nb_pkts),
 	rte_trace_point_emit_u16(port_id);
 	rte_trace_point_emit_u16(queue_id);
-	rte_trace_point_emit_ptr(pkts_tbl);
+	rte_trace_point_emit_ptr(tx_pkts);
 	rte_trace_point_emit_u16(nb_pkts);
 )
 
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_find_next,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id),
+	rte_trace_point_emit_u16(port_id);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_find_next_of,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id),
+	rte_trace_point_emit_u16(port_id);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_find_next_sibling,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id_start, uint16_t ref_port_id),
+	rte_trace_point_emit_u16(port_id_start);
+	rte_trace_point_emit_u16(ref_port_id);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_ethdev_trace_is_valid_port,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, int is_valid),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_int(is_valid);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_find_next_owned_by,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id,
+		const uint64_t owner_id),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u64(owner_id);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_ethdev_trace_owner_get,
+	RTE_TRACE_POINT_ARGS(const uint16_t port_id,
+		struct rte_eth_dev_owner *owner),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u64(owner->id);
+	rte_trace_point_emit_string(owner->name);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_ethdev_trace_get_sec_ctx,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, void *ctx),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_ptr(ctx);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_ethdev_trace_count_avail,
+	RTE_TRACE_POINT_ARGS(uint16_t count),
+	rte_trace_point_emit_u16(count);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_ethdev_trace_count_total,
+	RTE_TRACE_POINT_ARGS(uint16_t count),
+	rte_trace_point_emit_u16(count);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_ethdev_trace_get_name_by_port,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, char *name),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_string(name);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_ethdev_trace_get_port_by_name,
+	RTE_TRACE_POINT_ARGS(const char *name, uint16_t port_id),
+	rte_trace_point_emit_string(name);
+	rte_trace_point_emit_u16(port_id);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_ethdev_trace_is_removed,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_hairpin_get_peer_ports,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t *peer_ports,
+		size_t len, uint32_t direction, int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_ptr(peer_ports);
+	rte_trace_point_emit_size_t(len);
+	rte_trace_point_emit_u32(direction);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_tx_buffer_drop_callback,
+	RTE_TRACE_POINT_ARGS(struct rte_mbuf **pkts, uint16_t unsent),
+	rte_trace_point_emit_ptr(pkts);
+	rte_trace_point_emit_u16(unsent);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_tx_buffer_count_callback,
+	RTE_TRACE_POINT_ARGS(struct rte_mbuf **pkts, uint16_t unsent,
+		uint64_t count),
+	rte_trace_point_emit_ptr(pkts);
+	rte_trace_point_emit_u16(unsent);
+	rte_trace_point_emit_u64(count);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_tx_buffer_init,
+	RTE_TRACE_POINT_ARGS(struct rte_eth_dev_tx_buffer *buffer, uint16_t size,
+		int ret),
+	rte_trace_point_emit_ptr(buffer);
+	rte_trace_point_emit_u16(size);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_tx_done_cleanup,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t queue_id, uint32_t free_cnt,
+		int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u16(queue_id);
+	rte_trace_point_emit_u32(free_cnt);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_promiscuous_get,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, int promiscuous),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_int(promiscuous);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_allmulticast_get,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, int all_multicast),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_int(all_multicast);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_link_get,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, struct rte_eth_link *link),
+	uint16_t link_duplex = link->link_duplex;
+	uint16_t link_autoneg = link->link_autoneg;
+	uint16_t link_status = link->link_status;
+
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u32(link->link_speed);
+	rte_trace_point_emit_u16(link_duplex);
+	rte_trace_point_emit_u16(link_autoneg);
+	rte_trace_point_emit_u16(link_status);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_link_get_nowait,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, struct rte_eth_link *link),
+	uint16_t link_duplex = link->link_duplex;
+	uint16_t link_autoneg = link->link_autoneg;
+	uint16_t link_status = link->link_status;
+
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u32(link->link_speed);
+	rte_trace_point_emit_u16(link_duplex);
+	rte_trace_point_emit_u16(link_autoneg);
+	rte_trace_point_emit_u16(link_status);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_link_to_str,
+	RTE_TRACE_POINT_ARGS(size_t len, const struct rte_eth_link *link),
+	uint16_t link_duplex = link->link_duplex;
+	uint16_t link_autoneg = link->link_autoneg;
+	uint16_t link_status = link->link_status;
+
+	rte_trace_point_emit_size_t(len);
+	rte_trace_point_emit_u32(link->link_speed);
+	rte_trace_point_emit_u16(link_duplex);
+	rte_trace_point_emit_u16(link_autoneg);
+	rte_trace_point_emit_u16(link_status);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_stats_get,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, struct rte_eth_stats *stats, int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_ptr(stats);
+	rte_trace_point_emit_u64(stats->rx_nombuf);
+	rte_trace_point_emit_u64(stats->ipackets);
+	rte_trace_point_emit_u64(stats->opackets);
+	rte_trace_point_emit_u64(stats->ibytes);
+	rte_trace_point_emit_u64(stats->obytes);
+	rte_trace_point_emit_u64(stats->imissed);
+	rte_trace_point_emit_u64(stats->ierrors);
+	rte_trace_point_emit_u64(stats->oerrors);
+	rte_trace_point_emit_int(ret);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_stats_reset,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id),
+	rte_trace_point_emit_u16(port_id);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_xstats_get_id_by_name,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, const char *xstat_name,
+		uint64_t id),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_string(xstat_name);
+	rte_trace_point_emit_u64(id);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_xstats_get_names_by_id,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id,
+		struct rte_eth_xstat_name *xstats_names, uint64_t ids),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_string(xstats_names->name);
+	rte_trace_point_emit_u64(ids);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_xstats_get_names,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id,
+		struct rte_eth_xstat_name *xstats_names,
+		unsigned int size, int cnt_used_entries),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_string(xstats_names->name);
+	rte_trace_point_emit_u32(size);
+	rte_trace_point_emit_int(cnt_used_entries);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_xstats_get_by_id,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, const uint64_t *ids,
+		uint64_t *values, unsigned int size),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_ptr(ids);
+	rte_trace_point_emit_ptr(values);
+	rte_trace_point_emit_u32(size);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_xstats_get,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, struct rte_eth_xstat xstats,
+		int i),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u64(xstats.id);
+	rte_trace_point_emit_u64(xstats.value);
+	rte_trace_point_emit_u32(i);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_eth_trace_xstats_reset,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, int ret),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_int(ret);
+)
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/ethdev/rte_ethdev_trace_fp_burst.h b/lib/ethdev/rte_ethdev_trace_fp_burst.h
new file mode 100644
index 0000000000..899b4ed070
--- /dev/null
+++ b/lib/ethdev/rte_ethdev_trace_fp_burst.h
@@ -0,0 +1,44 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2022 Marvell International Ltd.
+ */
+
+#ifndef _RTE_ETHDEV_TRACE_FP_BURST_H_
+#define _RTE_ETHDEV_TRACE_FP_BURST_H_
+
+/**
+ * @file
+ *
+ * API for ethdev burst trace support
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_trace_point.h>
+
+RTE_TRACE_POINT_FP(
+	rte_ethdev_trace_rx_burst,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t queue_id,
+		void **pkt_tbl, uint16_t nb_rx),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u16(queue_id);
+	rte_trace_point_emit_ptr(pkt_tbl);
+	rte_trace_point_emit_u16(nb_rx);
+)
+
+RTE_TRACE_POINT_FP(
+	rte_ethdev_trace_tx_burst,
+	RTE_TRACE_POINT_ARGS(uint16_t port_id, uint16_t queue_id,
+		void **pkts_tbl, uint16_t nb_pkts),
+	rte_trace_point_emit_u16(port_id);
+	rte_trace_point_emit_u16(queue_id);
+	rte_trace_point_emit_ptr(pkts_tbl);
+	rte_trace_point_emit_u16(nb_pkts);
+)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHDEV_TRACE_FP_BURST_H_ */
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 17201fbe0f..c7ba2e3dc8 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -298,6 +298,72 @@  EXPERIMENTAL {
 	rte_flow_get_q_aged_flows;
 	rte_mtr_meter_policy_get;
 	rte_mtr_meter_profile_get;
+
+	# added in 23.03
+	__rte_eth_trace_allmulticast_disable;
+	__rte_eth_trace_allmulticast_enable;
+	__rte_eth_trace_allmulticast_get;
+	__rte_eth_trace_call_rx_callbacks;
+	__rte_eth_trace_call_tx_callbacks;
+	__rte_eth_trace_find_next;
+	__rte_eth_trace_find_next_of;
+	__rte_eth_trace_find_next_owned_by;
+	__rte_eth_trace_find_next_sibling;
+	__rte_eth_trace_hairpin_bind;
+	__rte_eth_trace_hairpin_get_peer_ports;
+	__rte_eth_trace_hairpin_unbind;
+	__rte_eth_trace_iterator_cleanup;
+	__rte_eth_trace_iterator_init;
+	__rte_eth_trace_iterator_next;
+	__rte_eth_trace_link_get;
+	__rte_eth_trace_link_get_nowait;
+	__rte_eth_trace_link_speed_to_str;
+	__rte_eth_trace_link_to_str;
+	__rte_eth_trace_promiscuous_disable;
+	__rte_eth_trace_promiscuous_enable;
+	__rte_eth_trace_promiscuous_get;
+	__rte_eth_trace_rx_hairpin_queue_setup;
+	__rte_eth_trace_speed_bitflag;
+	__rte_eth_trace_stats_get;
+	__rte_eth_trace_stats_reset;
+	__rte_eth_trace_tx_buffer_count_callback;
+	__rte_eth_trace_tx_buffer_drop_callback;
+	__rte_eth_trace_tx_buffer_init;
+	__rte_eth_trace_tx_buffer_set_err_callback;
+	__rte_eth_trace_tx_done_cleanup;
+	__rte_eth_trace_tx_hairpin_queue_setup;
+	__rte_eth_trace_xstats_get;
+	__rte_eth_trace_xstats_get_by_id;
+	__rte_eth_trace_xstats_get_id_by_name;
+	__rte_eth_trace_xstats_get_names;
+	__rte_eth_trace_xstats_get_names_by_id;
+	__rte_eth_trace_xstats_reset;
+	__rte_ethdev_trace_capability_name;
+	__rte_ethdev_trace_count_avail;
+	__rte_ethdev_trace_count_total;
+	__rte_ethdev_trace_fw_version_get;
+	__rte_ethdev_trace_get_name_by_port;
+	__rte_ethdev_trace_get_port_by_name;
+	__rte_ethdev_trace_get_sec_ctx;
+	__rte_ethdev_trace_is_removed;
+	__rte_ethdev_trace_is_valid_port;
+	__rte_ethdev_trace_owner_delete;
+	__rte_ethdev_trace_owner_get;
+	__rte_ethdev_trace_owner_new;
+	__rte_ethdev_trace_owner_set;
+	__rte_ethdev_trace_owner_unset;
+	__rte_ethdev_trace_reset;
+	__rte_ethdev_trace_rx_offload_name;
+	__rte_ethdev_trace_rx_queue_start;
+	__rte_ethdev_trace_rx_queue_stop;
+	__rte_ethdev_trace_set_link_down;
+	__rte_ethdev_trace_set_link_up;
+	__rte_ethdev_trace_set_rx_queue_stats_mapping;
+	__rte_ethdev_trace_set_tx_queue_stats_mapping;
+	__rte_ethdev_trace_socket_id;
+	__rte_ethdev_trace_tx_offload_name;
+	__rte_ethdev_trace_tx_queue_start;
+	__rte_ethdev_trace_tx_queue_stop;
 };
 
 INTERNAL {