[[PATCH,v3,4/4] net/pcap: support hardware Tx timestamps

Message ID 20200724202315.19533-5-patrick.keroulas@radio-canada.ca (mailing list archive)
State Changes Requested, archived
Delegated to: Ferruh Yigit
Headers
Series pdump HW Rx timestamps for mlx5 |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/travis-robot warning Travis build: failed

Commit Message

Patrick Keroulas July 24, 2020, 8:23 p.m. UTC
  From: Vivien Didelot <vivien.didelot@gmail.com>

When hardware timestamping is enabled on Rx path, system time should
no longer be used to calculate the timestamps when dumping packets.

Instead, use the value stored by the driver in mbuf->timestamp
and assume it is already converted to nanoseconds (otherwise the
application may edit the packet headers itself afterwards).

Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
Signed-off-by: Patrick Keroulas <patrick.keroulas@radio-canada.ca>
---
 doc/guides/rel_notes/release_20_08.rst |  1 +
 drivers/net/pcap/rte_eth_pcap.c        | 32 +++++++++++++-----------
 lib/librte_pdump/rte_pdump.c           | 34 ++++++++++++++------------
 3 files changed, 37 insertions(+), 30 deletions(-)
  

Comments

Tom Barbette July 25, 2020, 8:35 a.m. UTC | #1
Nice patch!

However I would use a multiply and shift approach instead of a division 
in the fast path. When we did that it was really hurting the performance.

Cheers,

Tom

Le 24/07/2020 à 22:23, Patrick Keroulas a écrit :
> From: Vivien Didelot <vivien.didelot@gmail.com>
> 
> When hardware timestamping is enabled on Rx path, system time should
> no longer be used to calculate the timestamps when dumping packets.
> 
> Instead, use the value stored by the driver in mbuf->timestamp
> and assume it is already converted to nanoseconds (otherwise the
> application may edit the packet headers itself afterwards).
> 
> Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
> Signed-off-by: Patrick Keroulas <patrick.keroulas@radio-canada.ca>
> ---
>   doc/guides/rel_notes/release_20_08.rst |  1 +
>   drivers/net/pcap/rte_eth_pcap.c        | 32 +++++++++++++-----------
>   lib/librte_pdump/rte_pdump.c           | 34 ++++++++++++++------------
>   3 files changed, 37 insertions(+), 30 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/release_20_08.rst b/doc/guides/rel_notes/release_20_08.rst
> index 89822bcb8d..bda2f7567a 100644
> --- a/doc/guides/rel_notes/release_20_08.rst
> +++ b/doc/guides/rel_notes/release_20_08.rst
> @@ -105,6 +105,7 @@ New Features
>     Updated PCAP driver with new features and improvements, including:
>   
>     * Support software Tx nanosecond timestamps precision.
> +  * Support hardware Tx timestamps.
>   
>   * **Updated Broadcom bnxt driver.**
>   
> diff --git a/drivers/net/pcap/rte_eth_pcap.c b/drivers/net/pcap/rte_eth_pcap.c
> index 668cbd1fc7..45775951f7 100644
> --- a/drivers/net/pcap/rte_eth_pcap.c
> +++ b/drivers/net/pcap/rte_eth_pcap.c
> @@ -290,19 +290,23 @@ eth_null_rx(void *queue __rte_unused,
>   #define NSEC_PER_SEC	1000000000L
>   
>   static inline void
> -calculate_timestamp(struct timeval *ts) {
> -	uint64_t cycles;
> -	struct timeval cur_time;
> -
> -	cycles = rte_get_timer_cycles() - start_cycles;
> -	cur_time.tv_sec = cycles / hz;
> -	cur_time.tv_usec = (cycles % hz) * NSEC_PER_SEC / hz;
> -
> -	ts->tv_sec = start_time.tv_sec + cur_time.tv_sec;
> -	ts->tv_usec = start_time.tv_usec + cur_time.tv_usec;
> -	if (ts->tv_usec >= NSEC_PER_SEC) {
> -		ts->tv_usec -= NSEC_PER_SEC;
> -		ts->tv_sec += 1;
> +calculate_timestamp(const struct rte_mbuf *mbuf, struct timeval *ts) {
> +	if (mbuf->ol_flags & PKT_RX_TIMESTAMP) {
> +		ts->tv_sec = mbuf->timestamp / NSEC_PER_SEC;
> +		ts->tv_usec = mbuf->timestamp % NSEC_PER_SEC;
> +	} else {
> +		uint64_t cycles = rte_get_timer_cycles() - start_cycles;
> +		struct timeval cur_time = {
> +			.tv_sec = cycles / hz,
> +			.tv_usec = (cycles % hz) * NSEC_PER_SEC / hz,
> +		};
> +
> +		ts->tv_sec = start_time.tv_sec + cur_time.tv_sec;
> +		ts->tv_usec = start_time.tv_usec + cur_time.tv_usec;
> +		if (ts->tv_usec >= NSEC_PER_SEC) {
> +			ts->tv_usec -= NSEC_PER_SEC;
> +			ts->tv_sec += 1;
> +		}
>   	}
>   }
>   
> @@ -339,7 +343,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   			caplen = sizeof(temp_data);
>   		}
>   
> -		calculate_timestamp(&header.ts);
> +		calculate_timestamp(mbuf, &header.ts);
>   		header.len = len;
>   		header.caplen = caplen;
>   		/* rte_pktmbuf_read() returns a pointer to the data directly
> diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
> index 8fd7e13af5..e8963e9dd8 100644
> --- a/lib/librte_pdump/rte_pdump.c
> +++ b/lib/librte_pdump/rte_pdump.c
> @@ -71,21 +71,6 @@ static struct pdump_rxtx_cbs {
>   } rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT],
>   tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
>   
> -static uint64_t hz;
> -static uint64_t start_time;
> -static uint64_t start_cycles;
> -
> -static inline void
> -pdump_ts_to_ns(struct rte_mbuf **pkts, uint16_t nb_pkts)
> -{
> -	unsigned int i;
> -
> -	for (i = 0; i < nb_pkts; i++) {
> -		if ((pkts[i]->ol_flags & PKT_RX_TIMESTAMP) && hz)
> -			pkts[i]->timestamp = start_time +
> -				 (pkts[i]->timestamp - start_cycles) * NS_PER_S / hz;
> -	}
> -}
>   
>   static inline void
>   pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params)
> @@ -118,6 +103,23 @@ pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params)
>   	}
>   }
>   
> +#define NSEC_PER_SEC 1000000000L
> +static uint64_t hz;
> +static uint64_t start_time;
> +static uint64_t start_cycles;
> +
> +static inline void
> +pdump_ts_to_ns(struct rte_mbuf **pkts, uint16_t nb_pkts)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < nb_pkts; i++) {
> +		if ((pkts[i]->ol_flags & PKT_RX_TIMESTAMP) && hz)
> +			pkts[i]->timestamp = start_time +
> +				 (pkts[i]->timestamp - start_cycles) * NSEC_PER_SEC / hz;
> +	}
> +}
> +
>   static uint16_t
>   pdump_rx(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
>   	struct rte_mbuf **pkts, uint16_t nb_pkts,
> @@ -154,7 +156,7 @@ pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
>   			rte_eth_read_clock(port, &start_cycles);
>   			rte_eth_get_clock_freq(port, &hz);
>   			gettimeofday(&now, NULL);
> -			start_time = now.tv_sec * NS_PER_S + now.tv_usec * 1000;
> +			start_time = now.tv_sec * NSEC_PER_SEC + now.tv_usec * 1000;
>   
>   			if (cbs->cb) {
>   				PDUMP_LOG(ERR,
>
  
Stephen Hemminger July 25, 2020, 11:51 a.m. UTC | #2
On Sat, 25 Jul 2020 10:35:48 +0200
Tom Barbette <barbette@kth.se> wrote:

> Nice patch!
> 
> However I would use a multiply and shift approach instead of a division 
> in the fast path. When we did that it was really hurting the performance.


See rte_reciprocal
  
Stephen Hemminger July 26, 2020, 10:17 p.m. UTC | #3
On Fri, 24 Jul 2020 16:23:15 -0400
Patrick Keroulas <patrick.keroulas@radio-canada.ca> wrote:

> From: Vivien Didelot <vivien.didelot@gmail.com>
> 
> When hardware timestamping is enabled on Rx path, system time should
> no longer be used to calculate the timestamps when dumping packets.
> 
> Instead, use the value stored by the driver in mbuf->timestamp
> and assume it is already converted to nanoseconds (otherwise the
> application may edit the packet headers itself afterwards).
> 
> Signed-off-by: Vivien Didelot <vivien.didelot@gmail.com>
> Signed-off-by: Patrick Keroulas <patrick.keroulas@radio-canada.ca>
> ---
>  doc/guides/rel_notes/release_20_08.rst |  1 +
>  drivers/net/pcap/rte_eth_pcap.c        | 32 +++++++++++++-----------
>  lib/librte_pdump/rte_pdump.c           | 34 ++++++++++++++------------
>  3 files changed, 37 insertions(+), 30 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/release_20_08.rst b/doc/guides/rel_notes/release_20_08.rst
> index 89822bcb8d..bda2f7567a 100644
> --- a/doc/guides/rel_notes/release_20_08.rst
> +++ b/doc/guides/rel_notes/release_20_08.rst
> @@ -105,6 +105,7 @@ New Features
>    Updated PCAP driver with new features and improvements, including:
>  
>    * Support software Tx nanosecond timestamps precision.
> +  * Support hardware Tx timestamps.
>  
>  * **Updated Broadcom bnxt driver.**
>  
> diff --git a/drivers/net/pcap/rte_eth_pcap.c b/drivers/net/pcap/rte_eth_pcap.c
> index 668cbd1fc7..45775951f7 100644
> --- a/drivers/net/pcap/rte_eth_pcap.c
> +++ b/drivers/net/pcap/rte_eth_pcap.c
> @@ -290,19 +290,23 @@ eth_null_rx(void *queue __rte_unused,
>  #define NSEC_PER_SEC	1000000000L
>  
>  static inline void
> -calculate_timestamp(struct timeval *ts) {
> -	uint64_t cycles;
> -	struct timeval cur_time;
> -
> -	cycles = rte_get_timer_cycles() - start_cycles;
> -	cur_time.tv_sec = cycles / hz;
> -	cur_time.tv_usec = (cycles % hz) * NSEC_PER_SEC / hz;
> -
> -	ts->tv_sec = start_time.tv_sec + cur_time.tv_sec;
> -	ts->tv_usec = start_time.tv_usec + cur_time.tv_usec;
> -	if (ts->tv_usec >= NSEC_PER_SEC) {
> -		ts->tv_usec -= NSEC_PER_SEC;
> -		ts->tv_sec += 1;
> +calculate_timestamp(const struct rte_mbuf *mbuf, struct timeval *ts) {

DPDK style is to have { on seperate line

> +	if (mbuf->ol_flags & PKT_RX_TIMESTAMP) {
> +		ts->tv_sec = mbuf->timestamp / NSEC_PER_SEC;
> +		ts->tv_usec = mbuf->timestamp % NSEC_PER_SEC;
> +	} else {
> +		uint64_t cycles = rte_get_timer_cycles() - start_cycles;
> +		struct timeval cur_time = {
> +			.tv_sec = cycles / hz,
> +			.tv_usec = (cycles % hz) * NSEC_PER_SEC / hz,
> +		};
> +
> +		ts->tv_sec = start_time.tv_sec + cur_time.tv_sec;
> +		ts->tv_usec = start_time.tv_usec + cur_time.tv_usec;
> +		if (ts->tv_usec >= NSEC_PER_SEC) {
> +			ts->tv_usec -= NSEC_PER_SEC;
> +			ts->tv_sec += 1;
> +		}
>  	}
>  }
>  
> @@ -339,7 +343,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>  			caplen = sizeof(temp_data);
>  		}
>  
> -		calculate_timestamp(&header.ts);
> +		calculate_timestamp(mbuf, &header.ts);

On Tx pcap you don't want the hardware RX timestamp.
You should update mbuf with software timestamp.

>  		header.len = len;
>  		header.caplen = caplen;
>  		/* rte_pktmbuf_read() returns a pointer to the data directly
> diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
> index 8fd7e13af5..e8963e9dd8 100644
> --- a/lib/librte_pdump/rte_pdump.c
> +++ b/lib/librte_pdump/rte_pdump.c
> @@ -71,21 +71,6 @@ static struct pdump_rxtx_cbs {
>  } rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT],
>  tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
>  
> -static uint64_t hz;
> -static uint64_t start_time;
> -static uint64_t start_cycles;
> -
> -static inline void
> -pdump_ts_to_ns(struct rte_mbuf **pkts, uint16_t nb_pkts)
> -{
> -	unsigned int i;
> -
> -	for (i = 0; i < nb_pkts; i++) {
> -		if ((pkts[i]->ol_flags & PKT_RX_TIMESTAMP) && hz)
> -			pkts[i]->timestamp = start_time +
> -				 (pkts[i]->timestamp - start_cycles) * NS_PER_S / hz;
> -	}
> -}
>  
>  static inline void
>  pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params)
> @@ -118,6 +103,23 @@ pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params)
>  	}
>  }
>  
> +#define NSEC_PER_SEC 1000000000L
> +static uint64_t hz;
> +static uint64_t start_time;
> +static uint64_t start_cycles;
> +
> +static inline void
> +pdump_ts_to_ns(struct rte_mbuf **pkts, uint16_t nb_pkts)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < nb_pkts; i++) {
> +		if ((pkts[i]->ol_flags & PKT_RX_TIMESTAMP) && hz)
> +			pkts[i]->timestamp = start_time +
> +				 (pkts[i]->timestamp - start_cycles) * NSEC_PER_SEC / hz;
> +	}
> +}
> +
>  static uint16_t
>  pdump_rx(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
>  	struct rte_mbuf **pkts, uint16_t nb_pkts,
> @@ -154,7 +156,7 @@ pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
>  			rte_eth_read_clock(port, &start_cycles);
>  			rte_eth_get_clock_freq(port, &hz);
>  			gettimeofday(&now, NULL);
> -			start_time = now.tv_sec * NS_PER_S + now.tv_usec * 1000;
> +			start_time = now.tv_sec * NSEC_PER_SEC + now.tv_usec * 1000;
>  
>  			if (cbs->cb) {
>  				PDUMP_LOG(ERR,


Actually using the current way through rte_eth_pcap adds a lot
of overhead. I dropped it when doing pcapng.

Perhaps we should work together on the pcapng version.
  

Patch

diff --git a/doc/guides/rel_notes/release_20_08.rst b/doc/guides/rel_notes/release_20_08.rst
index 89822bcb8d..bda2f7567a 100644
--- a/doc/guides/rel_notes/release_20_08.rst
+++ b/doc/guides/rel_notes/release_20_08.rst
@@ -105,6 +105,7 @@  New Features
   Updated PCAP driver with new features and improvements, including:
 
   * Support software Tx nanosecond timestamps precision.
+  * Support hardware Tx timestamps.
 
 * **Updated Broadcom bnxt driver.**
 
diff --git a/drivers/net/pcap/rte_eth_pcap.c b/drivers/net/pcap/rte_eth_pcap.c
index 668cbd1fc7..45775951f7 100644
--- a/drivers/net/pcap/rte_eth_pcap.c
+++ b/drivers/net/pcap/rte_eth_pcap.c
@@ -290,19 +290,23 @@  eth_null_rx(void *queue __rte_unused,
 #define NSEC_PER_SEC	1000000000L
 
 static inline void
-calculate_timestamp(struct timeval *ts) {
-	uint64_t cycles;
-	struct timeval cur_time;
-
-	cycles = rte_get_timer_cycles() - start_cycles;
-	cur_time.tv_sec = cycles / hz;
-	cur_time.tv_usec = (cycles % hz) * NSEC_PER_SEC / hz;
-
-	ts->tv_sec = start_time.tv_sec + cur_time.tv_sec;
-	ts->tv_usec = start_time.tv_usec + cur_time.tv_usec;
-	if (ts->tv_usec >= NSEC_PER_SEC) {
-		ts->tv_usec -= NSEC_PER_SEC;
-		ts->tv_sec += 1;
+calculate_timestamp(const struct rte_mbuf *mbuf, struct timeval *ts) {
+	if (mbuf->ol_flags & PKT_RX_TIMESTAMP) {
+		ts->tv_sec = mbuf->timestamp / NSEC_PER_SEC;
+		ts->tv_usec = mbuf->timestamp % NSEC_PER_SEC;
+	} else {
+		uint64_t cycles = rte_get_timer_cycles() - start_cycles;
+		struct timeval cur_time = {
+			.tv_sec = cycles / hz,
+			.tv_usec = (cycles % hz) * NSEC_PER_SEC / hz,
+		};
+
+		ts->tv_sec = start_time.tv_sec + cur_time.tv_sec;
+		ts->tv_usec = start_time.tv_usec + cur_time.tv_usec;
+		if (ts->tv_usec >= NSEC_PER_SEC) {
+			ts->tv_usec -= NSEC_PER_SEC;
+			ts->tv_sec += 1;
+		}
 	}
 }
 
@@ -339,7 +343,7 @@  eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			caplen = sizeof(temp_data);
 		}
 
-		calculate_timestamp(&header.ts);
+		calculate_timestamp(mbuf, &header.ts);
 		header.len = len;
 		header.caplen = caplen;
 		/* rte_pktmbuf_read() returns a pointer to the data directly
diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c
index 8fd7e13af5..e8963e9dd8 100644
--- a/lib/librte_pdump/rte_pdump.c
+++ b/lib/librte_pdump/rte_pdump.c
@@ -71,21 +71,6 @@  static struct pdump_rxtx_cbs {
 } rx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT],
 tx_cbs[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT];
 
-static uint64_t hz;
-static uint64_t start_time;
-static uint64_t start_cycles;
-
-static inline void
-pdump_ts_to_ns(struct rte_mbuf **pkts, uint16_t nb_pkts)
-{
-	unsigned int i;
-
-	for (i = 0; i < nb_pkts; i++) {
-		if ((pkts[i]->ol_flags & PKT_RX_TIMESTAMP) && hz)
-			pkts[i]->timestamp = start_time +
-				 (pkts[i]->timestamp - start_cycles) * NS_PER_S / hz;
-	}
-}
 
 static inline void
 pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params)
@@ -118,6 +103,23 @@  pdump_copy(struct rte_mbuf **pkts, uint16_t nb_pkts, void *user_params)
 	}
 }
 
+#define NSEC_PER_SEC 1000000000L
+static uint64_t hz;
+static uint64_t start_time;
+static uint64_t start_cycles;
+
+static inline void
+pdump_ts_to_ns(struct rte_mbuf **pkts, uint16_t nb_pkts)
+{
+	unsigned int i;
+
+	for (i = 0; i < nb_pkts; i++) {
+		if ((pkts[i]->ol_flags & PKT_RX_TIMESTAMP) && hz)
+			pkts[i]->timestamp = start_time +
+				 (pkts[i]->timestamp - start_cycles) * NSEC_PER_SEC / hz;
+	}
+}
+
 static uint16_t
 pdump_rx(uint16_t port __rte_unused, uint16_t qidx __rte_unused,
 	struct rte_mbuf **pkts, uint16_t nb_pkts,
@@ -154,7 +156,7 @@  pdump_register_rx_callbacks(uint16_t end_q, uint16_t port, uint16_t queue,
 			rte_eth_read_clock(port, &start_cycles);
 			rte_eth_get_clock_freq(port, &hz);
 			gettimeofday(&now, NULL);
-			start_time = now.tv_sec * NS_PER_S + now.tv_usec * 1000;
+			start_time = now.tv_sec * NSEC_PER_SEC + now.tv_usec * 1000;
 
 			if (cbs->cb) {
 				PDUMP_LOG(ERR,