[v5] app/testpmd: txonly multiflow port change support

Message ID 20230421232022.342081-1-joshwash@google.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series [v5] app/testpmd: txonly multiflow port change support |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-unit-testing success Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/intel-Functional success Functional PASS

Commit Message

Joshua Washington April 21, 2023, 11:20 p.m. UTC
  Google cloud routes traffic using IP addresses without the support of MAC
addresses, so changing source IP address for txonly-multi-flow can have
negative performance implications for net/gve when using testpmd. This
patch updates txonly multiflow mode to modify source ports instead of
source IP addresses.

The change can be tested with the following command:
dpdk-testpmd -- --forward-mode=txonly --txonly-multi-flow \
    --tx-ip=<SRC>,<DST>

Signed-off-by: Joshua Washington <joshwash@google.com>
Reviewed-by: Rushil Gupta <rushilg@google.com>
---
 app/test-pmd/txonly.c | 39 +++++++++++++++++++++++----------------
 1 file changed, 23 insertions(+), 16 deletions(-)
  

Comments

Joshua Washington April 24, 2023, 5:55 p.m. UTC | #1
After updating the patch, it seems that the `lcores_autotest` unit test now
times out on Windows Server 2019. I looked at the test logs, but they were
identical as far as I could tell, with the timed out test even printing "Test
OK" to stdout. Is this a flake? Or is there any other way to get extra
information about why the test timed out or run the test with extra
debugging information?

Thanks,
Josh

On Fri, Apr 21, 2023 at 4:20 PM Joshua Washington <joshwash@google.com>
wrote:

> Google cloud routes traffic using IP addresses without the support of MAC
> addresses, so changing source IP address for txonly-multi-flow can have
> negative performance implications for net/gve when using testpmd. This
> patch updates txonly multiflow mode to modify source ports instead of
> source IP addresses.
>
> The change can be tested with the following command:
> dpdk-testpmd -- --forward-mode=txonly --txonly-multi-flow \
>     --tx-ip=<SRC>,<DST>
>
> Signed-off-by: Joshua Washington <joshwash@google.com>
> Reviewed-by: Rushil Gupta <rushilg@google.com>
> ---
>  app/test-pmd/txonly.c | 39 +++++++++++++++++++++++----------------
>  1 file changed, 23 insertions(+), 16 deletions(-)
>
> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
> index b3d6873104..f79e0e5d0b 100644
> --- a/app/test-pmd/txonly.c
> +++ b/app/test-pmd/txonly.c
> @@ -56,7 +56,7 @@ uint32_t tx_ip_dst_addr = (198U << 24) | (18 << 16) | (0
> << 8) | 2;
>  #define IP_DEFTTL  64   /* from RFC 1340. */
>
>  static struct rte_ipv4_hdr pkt_ip_hdr; /**< IP header of transmitted
> packets. */
> -RTE_DEFINE_PER_LCORE(uint8_t, _ip_var); /**< IP address variation */
> +RTE_DEFINE_PER_LCORE(uint8_t, _src_var); /**< Source port variation */
>  static struct rte_udp_hdr pkt_udp_hdr; /**< UDP header of tx packets. */
>
>  static uint64_t timestamp_mask; /**< Timestamp dynamic flag mask */
> @@ -230,28 +230,35 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct
> rte_mempool *mbp,
>         copy_buf_to_pkt(eth_hdr, sizeof(*eth_hdr), pkt, 0);
>         copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
>                         sizeof(struct rte_ether_hdr));
> +       copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> +                       sizeof(struct rte_ether_hdr) +
> +                       sizeof(struct rte_ipv4_hdr));
>         if (txonly_multi_flow) {
> -               uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
> -               struct rte_ipv4_hdr *ip_hdr;
> -               uint32_t addr;
> +               uint16_t src_var = RTE_PER_LCORE(_src_var);
> +               struct rte_udp_hdr *udp_hdr;
> +               uint16_t port;
>
> -               ip_hdr = rte_pktmbuf_mtod_offset(pkt,
> -                               struct rte_ipv4_hdr *,
> -                               sizeof(struct rte_ether_hdr));
> +               udp_hdr = rte_pktmbuf_mtod_offset(pkt,
> +                               struct rte_udp_hdr *,
> +                               sizeof(struct rte_ether_hdr) +
> +                               sizeof(struct rte_ipv4_hdr));
>                 /*
> -                * Generate multiple flows by varying IP src addr. This
> -                * enables packets are well distributed by RSS in
> +                * Generate multiple flows by varying UDP source port.
> +                * This enables packets are well distributed by RSS in
>                  * receiver side if any and txonly mode can be a decent
>                  * packet generator for developer's quick performance
>                  * regression test.
> +                *
> +                * Only ports in the range 49152 (0xC000) and 65535
> (0xFFFF)
> +                * will be used, with the least significant byte
> representing
> +                * the lcore ID. As such, the most significant byte will
> cycle
> +                * through 0xC0 and 0xFF.
>                  */
> -               addr = (tx_ip_dst_addr | (ip_var++ << 8)) + rte_lcore_id();
> -               ip_hdr->src_addr = rte_cpu_to_be_32(addr);
> -               RTE_PER_LCORE(_ip_var) = ip_var;
> +               port = ((((src_var++) % (0xFF - 0xC0) + 0xC0) & 0xFF) << 8)
> +                               + rte_lcore_id();
> +               udp_hdr->src_port = rte_cpu_to_be_16(port);
> +               RTE_PER_LCORE(_src_var) = src_var;
>         }
> -       copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> -                       sizeof(struct rte_ether_hdr) +
> -                       sizeof(struct rte_ipv4_hdr));
>
>         if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND) ||
> txonly_multi_flow)
>                 update_pkt_header(pkt, pkt_len);
> @@ -393,7 +400,7 @@ pkt_burst_transmit(struct fwd_stream *fs)
>         nb_tx = common_fwd_stream_transmit(fs, pkts_burst, nb_pkt);
>
>         if (txonly_multi_flow)
> -               RTE_PER_LCORE(_ip_var) -= nb_pkt - nb_tx;
> +               RTE_PER_LCORE(_src_var) -= nb_pkt - nb_tx;
>
>         if (unlikely(nb_tx < nb_pkt)) {
>                 if (verbose_level > 0 && fs->fwd_dropped == 0)
> --
> 2.40.0.634.g4ca3ef3211-goog
>
>
  
David Marchand April 25, 2023, 6:56 a.m. UTC | #2
Hello Joshua,

On Mon, Apr 24, 2023 at 7:56 PM Joshua Washington <joshwash@google.com> wrote:
>
> After updating the patch, it seems that the `lcores_autotest` unit test now times out on Windows Server 2019. I looked at the test logs, but they were identical as far as I could tell, with the timed out test even printing "Test OK" to stdout. Is this a flake? Or is there any other way to get extra information about why the test timed out or run the test with extra debugging information?

In general, the UNH dashboard provides an archive with logs for each
report, like for example:
https://lab.dpdk.org/results/dashboard/patchsets/26090/
https://lab.dpdk.org/results/dashboard/results/results-uploads/test_runs/b5a6a2743665426b937603587850aa6d/log_upload_file/2023/4/dpdk_5f34cc454df4_26090_2023-04-22_02-36-52_NA.zip

This timeout is something I did not notice so far, Ccing UNH guys, for info.


Regarding your patch, the CI passes fine on the current main branch.
And there is no relation between testpmd and the EAL unit tests.
So this report is very likely a false positive.

I triggered a retest on your patch.
  
Singh, Aman Deep April 26, 2023, 7:24 a.m. UTC | #3
On 4/22/2023 4:50 AM, Joshua Washington wrote:
> Google cloud routes traffic using IP addresses without the support of MAC
> addresses, so changing source IP address for txonly-multi-flow can have
> negative performance implications for net/gve when using testpmd. This
> patch updates txonly multiflow mode to modify source ports instead of
> source IP addresses.
>
> The change can be tested with the following command:
> dpdk-testpmd -- --forward-mode=txonly --txonly-multi-flow \
>      --tx-ip=<SRC>,<DST>
>
> Signed-off-by: Joshua Washington <joshwash@google.com>
> Reviewed-by: Rushil Gupta <rushilg@google.com>

Acked-by: Aman Singh <aman.deep.singh@intel.com>

<snip>
  
Joshua Washington May 15, 2023, 6:11 p.m. UTC | #4
Does this patch need anything else to be done before it can be merged? I'm
hoping to get this patch merged as part of the 23.07 release.

Thanks,
 Josh
  
Ferruh Yigit May 15, 2023, 10:26 p.m. UTC | #5
On 4/22/2023 12:20 AM, Joshua Washington wrote:
> Google cloud routes traffic using IP addresses without the support of MAC
> addresses, so changing source IP address for txonly-multi-flow can have
> negative performance implications for net/gve when using testpmd. This
> patch updates txonly multiflow mode to modify source ports instead of
> source IP addresses.
> 
> The change can be tested with the following command:
> dpdk-testpmd -- --forward-mode=txonly --txonly-multi-flow \
>     --tx-ip=<SRC>,<DST>
> 
> Signed-off-by: Joshua Washington <joshwash@google.com>
> Reviewed-by: Rushil Gupta <rushilg@google.com>

Hi Joshua, Aman,

The reason to support multi-flow in Tx-only mode is, to test RSS in the
Rx side.
When used in guest, in a hypervisor infrastructure what is the usecase
for multi-flow, why not use default single IP?

And if there is a need to support multi-flow without updating source IP,
what about to support various modes as "--txonly-multi-flow=XXX", where
XXX can be src_ip, dst_ip, src_port, dst_port?
If possible by keeping no param '--txonly-multi-flow' same as current
usage (src_ip) for backward capability..

This extends testing capability and lets different platforms select
different config based on their needs.
  
Joshua Washington May 16, 2023, 5:32 p.m. UTC | #6
Hello,

I believe my original solution was something closer to what you are
suggesting for backward compatibility. I originally had a flag that enabled
changing source port instead of source IP addresses, but I received
feedback that adding an extra flag was complicating things too much from
Stephen.

On a VM, the purpose of using multi-flow is similar to that of bare metal:
to test RSS in the RX side. However, generating traffic by changing source
IP address can cause inconsistencies in performance due to protections in
cloud infrastructure from sending packets from a different source IP
address than is provisioned for the VM. Changing source UDP port to test
RSS should be functionally equivalent while allowing VMs to send traffic
from a single source IP address.

If everyone agrees that adding --txonly-multi-flow as an option as well as
keeping the flag is an acceptable way of moving forward, I can do that.

Thanks,
Josh
  
Ferruh Yigit May 17, 2023, 10:10 a.m. UTC | #7
On 5/16/2023 6:32 PM, Joshua Washington wrote:
> Hello,
> 
> I believe my original solution was something closer to what you are
> suggesting for backward compatibility. I originally had a flag that
> enabled changing source port instead of source IP addresses, but I
> received feedback that adding an extra flag was complicating things too
> much from Stephen.
> 
> On a VM, the purpose of using multi-flow is similar to that of bare
> metal: to test RSS in the RX side. However, generating traffic by
> changing source IP address can cause inconsistencies in performance due
> to protections in cloud infrastructure from sending packets from a
> different source IP address than is provisioned for the VM. Changing
> source UDP port to test RSS should be functionally equivalent while
> allowing VMs to send traffic from a single source IP address.
> 
> If everyone agrees that adding --txonly-multi-flow as an option as well
> as keeping the flag is an acceptable way of moving forward, I can do that.
> 

Hi Joshua,

I missed the first version of the patch and the comment, thinking twice
comment has a point, I was thinking some users doing RSS based on IP but
probably all supports RSS based on UDP too. Better to keep simple.

Lets proceed with latest version, I will put some comments on it.

Thanks,
Ferruh
  
Ferruh Yigit May 17, 2023, 10:34 a.m. UTC | #8
On 4/22/2023 12:20 AM, Joshua Washington wrote:
> Google cloud routes traffic using IP addresses without the support of MAC
> addresses, so changing source IP address for txonly-multi-flow can have
> negative performance implications for net/gve when using testpmd. This
> patch updates txonly multiflow mode to modify source ports instead of
> source IP addresses.
> 
> The change can be tested with the following command:
> dpdk-testpmd -- --forward-mode=txonly --txonly-multi-flow \
>     --tx-ip=<SRC>,<DST>
> 
> Signed-off-by: Joshua Washington <joshwash@google.com>
> Reviewed-by: Rushil Gupta <rushilg@google.com>
> ---
>  app/test-pmd/txonly.c | 39 +++++++++++++++++++++++----------------
>  1 file changed, 23 insertions(+), 16 deletions(-)
> 
> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
> index b3d6873104..f79e0e5d0b 100644
> --- a/app/test-pmd/txonly.c
> +++ b/app/test-pmd/txonly.c
> @@ -56,7 +56,7 @@ uint32_t tx_ip_dst_addr = (198U << 24) | (18 << 16) | (0 << 8) | 2;
>  #define IP_DEFTTL  64   /* from RFC 1340. */
>  
>  static struct rte_ipv4_hdr pkt_ip_hdr; /**< IP header of transmitted packets. */
> -RTE_DEFINE_PER_LCORE(uint8_t, _ip_var); /**< IP address variation */
> +RTE_DEFINE_PER_LCORE(uint8_t, _src_var); /**< Source port variation */

What about '_src_port_var' as variable name?

>  static struct rte_udp_hdr pkt_udp_hdr; /**< UDP header of tx packets. */
>  
>  static uint64_t timestamp_mask; /**< Timestamp dynamic flag mask */
> @@ -230,28 +230,35 @@ pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
>  	copy_buf_to_pkt(eth_hdr, sizeof(*eth_hdr), pkt, 0);
>  	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
>  			sizeof(struct rte_ether_hdr));
> +	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> +			sizeof(struct rte_ether_hdr) +
> +			sizeof(struct rte_ipv4_hdr));
>  	if (txonly_multi_flow) {
> -		uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
> -		struct rte_ipv4_hdr *ip_hdr;
> -		uint32_t addr;
> +		uint16_t src_var = RTE_PER_LCORE(_src_var);
> +		struct rte_udp_hdr *udp_hdr;
> +		uint16_t port;

'port' is used in testpmd for port_id in many places, what do you think
to rename variable as 'src_port'?

>  
> -		ip_hdr = rte_pktmbuf_mtod_offset(pkt,
> -				struct rte_ipv4_hdr *,
> -				sizeof(struct rte_ether_hdr));
> +		udp_hdr = rte_pktmbuf_mtod_offset(pkt,
> +				struct rte_udp_hdr *,
> +				sizeof(struct rte_ether_hdr) +
> +				sizeof(struct rte_ipv4_hdr));
>  		/*
> -		 * Generate multiple flows by varying IP src addr. This
> -		 * enables packets are well distributed by RSS in
> +		 * Generate multiple flows by varying UDP source port.
> +		 * This enables packets are well distributed by RSS in
>  		 * receiver side if any and txonly mode can be a decent
>  		 * packet generator for developer's quick performance
>  		 * regression test.
> +		 *
> +		 * Only ports in the range 49152 (0xC000) and 65535 (0xFFFF)
> +		 * will be used, with the least significant byte representing
> +		 * the lcore ID. As such, the most significant byte will cycle
> +		 * through 0xC0 and 0xFF.
>  		 */
> -		addr = (tx_ip_dst_addr | (ip_var++ << 8)) + rte_lcore_id();
> -		ip_hdr->src_addr = rte_cpu_to_be_32(addr);
> -		RTE_PER_LCORE(_ip_var) = ip_var;
> +		port = ((((src_var++) % (0xFF - 0xC0) + 0xC0) & 0xFF) << 8)
> +				+ rte_lcore_id();

To prevent '%' in the datapath, I think following does the same thing:
port = ((src_var++ | 0xC0) << 8) + rte_lcore_id();

Although it is not big deal, in original version you never got 0xFFXX
values:
3E % 3F + C0 = FE
3F % 3F + C0 = C0
  

Patch

diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index b3d6873104..f79e0e5d0b 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -56,7 +56,7 @@  uint32_t tx_ip_dst_addr = (198U << 24) | (18 << 16) | (0 << 8) | 2;
 #define IP_DEFTTL  64   /* from RFC 1340. */
 
 static struct rte_ipv4_hdr pkt_ip_hdr; /**< IP header of transmitted packets. */
-RTE_DEFINE_PER_LCORE(uint8_t, _ip_var); /**< IP address variation */
+RTE_DEFINE_PER_LCORE(uint8_t, _src_var); /**< Source port variation */
 static struct rte_udp_hdr pkt_udp_hdr; /**< UDP header of tx packets. */
 
 static uint64_t timestamp_mask; /**< Timestamp dynamic flag mask */
@@ -230,28 +230,35 @@  pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
 	copy_buf_to_pkt(eth_hdr, sizeof(*eth_hdr), pkt, 0);
 	copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
 			sizeof(struct rte_ether_hdr));
+	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
+			sizeof(struct rte_ether_hdr) +
+			sizeof(struct rte_ipv4_hdr));
 	if (txonly_multi_flow) {
-		uint8_t  ip_var = RTE_PER_LCORE(_ip_var);
-		struct rte_ipv4_hdr *ip_hdr;
-		uint32_t addr;
+		uint16_t src_var = RTE_PER_LCORE(_src_var);
+		struct rte_udp_hdr *udp_hdr;
+		uint16_t port;
 
-		ip_hdr = rte_pktmbuf_mtod_offset(pkt,
-				struct rte_ipv4_hdr *,
-				sizeof(struct rte_ether_hdr));
+		udp_hdr = rte_pktmbuf_mtod_offset(pkt,
+				struct rte_udp_hdr *,
+				sizeof(struct rte_ether_hdr) +
+				sizeof(struct rte_ipv4_hdr));
 		/*
-		 * Generate multiple flows by varying IP src addr. This
-		 * enables packets are well distributed by RSS in
+		 * Generate multiple flows by varying UDP source port.
+		 * This enables packets are well distributed by RSS in
 		 * receiver side if any and txonly mode can be a decent
 		 * packet generator for developer's quick performance
 		 * regression test.
+		 *
+		 * Only ports in the range 49152 (0xC000) and 65535 (0xFFFF)
+		 * will be used, with the least significant byte representing
+		 * the lcore ID. As such, the most significant byte will cycle
+		 * through 0xC0 and 0xFF.
 		 */
-		addr = (tx_ip_dst_addr | (ip_var++ << 8)) + rte_lcore_id();
-		ip_hdr->src_addr = rte_cpu_to_be_32(addr);
-		RTE_PER_LCORE(_ip_var) = ip_var;
+		port = ((((src_var++) % (0xFF - 0xC0) + 0xC0) & 0xFF) << 8)
+				+ rte_lcore_id();
+		udp_hdr->src_port = rte_cpu_to_be_16(port);
+		RTE_PER_LCORE(_src_var) = src_var;
 	}
-	copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
-			sizeof(struct rte_ether_hdr) +
-			sizeof(struct rte_ipv4_hdr));
 
 	if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND) || txonly_multi_flow)
 		update_pkt_header(pkt, pkt_len);
@@ -393,7 +400,7 @@  pkt_burst_transmit(struct fwd_stream *fs)
 	nb_tx = common_fwd_stream_transmit(fs, pkts_burst, nb_pkt);
 
 	if (txonly_multi_flow)
-		RTE_PER_LCORE(_ip_var) -= nb_pkt - nb_tx;
+		RTE_PER_LCORE(_src_var) -= nb_pkt - nb_tx;
 
 	if (unlikely(nb_tx < nb_pkt)) {
 		if (verbose_level > 0 && fs->fwd_dropped == 0)