[v3,7/8] app/testpmd: improve forwarding cache miss

Message ID 20210917080121.329373-8-xuemingl@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series ethdev: introduce shared Rx queue |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Xueming Li Sept. 17, 2021, 8:01 a.m. UTC
  To minimize cache miss, adds flags and burst size used in forwarding to
stream, moves condition tests in forwarding to flags in stream.

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
---
 app/test-pmd/config.c    | 18 ++++++++++++++----
 app/test-pmd/flowgen.c   |  6 +++---
 app/test-pmd/noisy_vnf.c |  2 +-
 app/test-pmd/testpmd.h   | 21 ++++++++++++---------
 app/test-pmd/txonly.c    |  8 ++++----
 5 files changed, 34 insertions(+), 21 deletions(-)
  

Patch

diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index 035247c33f..5cdf8fa082 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -3050,6 +3050,16 @@  fwd_topology_tx_port_get(portid_t rxp)
 	}
 }
 
+static void
+fwd_stream_set_common(struct fwd_stream *fs)
+{
+	fs->nb_pkt_per_burst = nb_pkt_per_burst;
+	fs->record_burst_stats = !!record_burst_stats;
+	fs->record_core_cycles = !!record_core_cycles;
+	fs->retry_enabled = !!retry_enabled;
+	fs->rxq_share = !!rxq_share;
+}
+
 static void
 simple_fwd_config_setup(void)
 {
@@ -3079,7 +3089,7 @@  simple_fwd_config_setup(void)
 				fwd_ports_ids[fwd_topology_tx_port_get(i)];
 		fwd_streams[i]->tx_queue  = 0;
 		fwd_streams[i]->peer_addr = fwd_streams[i]->tx_port;
-		fwd_streams[i]->retry_enabled = retry_enabled;
+		fwd_stream_set_common(fwd_streams[i]);
 	}
 }
 
@@ -3140,7 +3150,7 @@  rss_fwd_config_setup(void)
 		fs->tx_port = fwd_ports_ids[txp];
 		fs->tx_queue = rxq;
 		fs->peer_addr = fs->tx_port;
-		fs->retry_enabled = retry_enabled;
+		fwd_stream_set_common(fs);
 		rxp++;
 		if (rxp < nb_fwd_ports)
 			continue;
@@ -3255,7 +3265,7 @@  dcb_fwd_config_setup(void)
 				fs->tx_port = fwd_ports_ids[txp];
 				fs->tx_queue = txq + j % nb_tx_queue;
 				fs->peer_addr = fs->tx_port;
-				fs->retry_enabled = retry_enabled;
+				fwd_stream_set_common(fs);
 			}
 			fwd_lcores[lc_id]->stream_nb +=
 				rxp_dcb_info.tc_queue.tc_rxq[i][tc].nb_queue;
@@ -3326,7 +3336,7 @@  icmp_echo_config_setup(void)
 			fs->tx_port = fs->rx_port;
 			fs->tx_queue = rxq;
 			fs->peer_addr = fs->tx_port;
-			fs->retry_enabled = retry_enabled;
+			fwd_stream_set_common(fs);
 			if (verbose_level > 0)
 				printf("  stream=%d port=%d rxq=%d txq=%d\n",
 				       sm_id, fs->rx_port, fs->rx_queue,
diff --git a/app/test-pmd/flowgen.c b/app/test-pmd/flowgen.c
index aa45948b4c..c282f3bcb1 100644
--- a/app/test-pmd/flowgen.c
+++ b/app/test-pmd/flowgen.c
@@ -97,12 +97,12 @@  flow_gen_stream(struct fwd_stream *fs, uint16_t nb_rx,
 	if (tx_offloads	& DEV_TX_OFFLOAD_MACSEC_INSERT)
 		ol_flags |= PKT_TX_MACSEC;
 
-	for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+	for (nb_pkt = 0; nb_pkt < fs->nb_pkt_per_burst; nb_pkt++) {
 		if (!nb_pkt || !nb_clones) {
 			nb_clones = nb_pkt_flowgen_clones;
 			/* Logic limitation */
-			if (nb_clones > nb_pkt_per_burst)
-				nb_clones = nb_pkt_per_burst;
+			if (nb_clones > fs->nb_pkt_per_burst)
+				nb_clones = fs->nb_pkt_per_burst;
 
 			pkt = rte_mbuf_raw_alloc(mbp);
 			if (!pkt)
diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c
index 382a4c2aae..56bf6a4e70 100644
--- a/app/test-pmd/noisy_vnf.c
+++ b/app/test-pmd/noisy_vnf.c
@@ -153,7 +153,7 @@  pkt_burst_noisy_vnf(struct fwd_stream *fs)
 	uint64_t now;
 
 	nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue,
-			pkts_burst, nb_pkt_per_burst);
+			pkts_burst, fs->nb_pkt_per_burst);
 	inc_rx_burst_stats(fs, nb_rx);
 	if (unlikely(nb_rx == 0))
 		goto flush;
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 4792bef03b..3b8796a7a5 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -128,12 +128,17 @@  struct fwd_stream {
 	queueid_t  tx_queue;  /**< TX queue to send forwarded packets */
 	streamid_t peer_addr; /**< index of peer ethernet address of packets */
 
-	unsigned int retry_enabled;
+	uint16_t nb_pkt_per_burst;
+	unsigned int record_burst_stats:1;
+	unsigned int record_core_cycles:1;
+	unsigned int retry_enabled:1;
+	unsigned int rxq_share:1;
 
 	/* "read-write" results */
 	uint64_t rx_packets;  /**< received packets */
 	uint64_t tx_packets;  /**< received packets transmitted */
 	uint64_t fwd_dropped; /**< received packets not forwarded */
+	uint64_t core_cycles; /**< used for RX and TX processing */
 	uint64_t rx_bad_ip_csum ; /**< received packets has bad ip checksum */
 	uint64_t rx_bad_l4_csum ; /**< received packets has bad l4 checksum */
 	uint64_t rx_bad_outer_l4_csum;
@@ -141,7 +146,6 @@  struct fwd_stream {
 	uint64_t rx_bad_outer_ip_csum;
 	/**< received packets having bad outer ip checksum */
 	unsigned int gro_times;	/**< GRO operation times */
-	uint64_t     core_cycles; /**< used for RX and TX processing */
 	struct pkt_burst_stats rx_burst_stats;
 	struct pkt_burst_stats tx_burst_stats;
 	struct fwd_lcore *lcore; /**< Lcore being scheduled. */
@@ -750,28 +754,27 @@  port_pci_reg_write(struct rte_port *port, uint32_t reg_off, uint32_t reg_v)
 static inline void
 get_start_cycles(uint64_t *start_tsc)
 {
-	if (record_core_cycles)
-		*start_tsc = rte_rdtsc();
+	*start_tsc = rte_rdtsc();
 }
 
 static inline void
 get_end_cycles(struct fwd_stream *fs, uint64_t start_tsc)
 {
-	if (record_core_cycles)
+	if (unlikely(fs->record_core_cycles))
 		fs->core_cycles += rte_rdtsc() - start_tsc;
 }
 
 static inline void
 inc_rx_burst_stats(struct fwd_stream *fs, uint16_t nb_rx)
 {
-	if (record_burst_stats)
+	if (unlikely(fs->record_burst_stats))
 		fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
 }
 
 static inline void
 inc_tx_burst_stats(struct fwd_stream *fs, uint16_t nb_tx)
 {
-	if (record_burst_stats)
+	if (unlikely(fs->record_burst_stats))
 		fs->tx_burst_stats.pkt_burst_spread[nb_tx]++;
 }
 
@@ -1032,13 +1035,13 @@  int update_jumbo_frame_offload(portid_t portid);
 static void                                                     \
 pkt_burst_fwd(struct fwd_stream *fs)                            \
 {                                                               \
-	struct rte_mbuf *pkts_burst[nb_pkt_per_burst];          \
+	struct rte_mbuf *pkts_burst[fs->nb_pkt_per_burst];      \
 	uint16_t nb_rx;                                         \
 	uint64_t start_tsc = 0;                                 \
 								\
 	get_start_cycles(&start_tsc);                           \
 	nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue,     \
-			pkts_burst, nb_pkt_per_burst);          \
+			pkts_burst, fs->nb_pkt_per_burst);      \
 	inc_rx_burst_stats(fs, nb_rx);                          \
 	if (unlikely(nb_rx == 0))                               \
 		return;                                         \
diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c
index aed820f5d3..db6130421c 100644
--- a/app/test-pmd/txonly.c
+++ b/app/test-pmd/txonly.c
@@ -367,8 +367,8 @@  pkt_burst_transmit(struct fwd_stream *fs)
 	eth_hdr.ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
 
 	if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
-				nb_pkt_per_burst) == 0) {
-		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+				fs->nb_pkt_per_burst) == 0) {
+		for (nb_pkt = 0; nb_pkt < fs->nb_pkt_per_burst; nb_pkt++) {
 			if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
 							&eth_hdr, vlan_tci,
 							vlan_tci_outer,
@@ -376,12 +376,12 @@  pkt_burst_transmit(struct fwd_stream *fs)
 							nb_pkt, fs))) {
 				rte_mempool_put_bulk(mbp,
 						(void **)&pkts_burst[nb_pkt],
-						nb_pkt_per_burst - nb_pkt);
+						fs->nb_pkt_per_burst - nb_pkt);
 				break;
 			}
 		}
 	} else {
-		for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+		for (nb_pkt = 0; nb_pkt < fs->nb_pkt_per_burst; nb_pkt++) {
 			pkt = rte_mbuf_raw_alloc(mbp);
 			if (pkt == NULL)
 				break;