[2/4] examples/qos_sched: remove TX buffering

Message ID 20230203100533.10377-3-bruce.richardson@intel.com (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers
Series small fixes and improvements for qos_sched example |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Bruce Richardson Feb. 3, 2023, 10:05 a.m. UTC
  Since the qos_sched app does batch dequeues from the QoS block, there is
little point in trying to batch further in the app - just send out the
full burst of packets that were received from the QoS block. With modern
CPUs and write-combining doorbells, the cost of doing smaller TX's is
reduced anyway for the worst case.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
 examples/qos_sched/app_thread.c | 94 ++++-----------------------------
 examples/qos_sched/main.c       | 12 -----
 examples/qos_sched/main.h       |  6 ---
 3 files changed, 9 insertions(+), 103 deletions(-)
  

Comments

Cristian Dumitrescu Feb. 17, 2023, 4:19 p.m. UTC | #1
> -----Original Message-----
> From: Richardson, Bruce <bruce.richardson@intel.com>
> Sent: Friday, February 3, 2023 10:06 AM
> To: dev@dpdk.org
> Cc: Singh, Jasvinder <jasvinder.singh@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Dumitrescu, Cristian
> <cristian.dumitrescu@intel.com>
> Subject: [PATCH 2/4] examples/qos_sched: remove TX buffering
> 
> Since the qos_sched app does batch dequeues from the QoS block, there is
> little point in trying to batch further in the app - just send out the
> full burst of packets that were received from the QoS block. With modern
> CPUs and write-combining doorbells, the cost of doing smaller TX's is
> reduced anyway for the worst case.
> 
> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
>  examples/qos_sched/app_thread.c | 94 ++++-----------------------------
Acked-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
  

Patch

diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
index dbc878b553..1ea732aa91 100644
--- a/examples/qos_sched/app_thread.c
+++ b/examples/qos_sched/app_thread.c
@@ -104,82 +104,21 @@  app_rx_thread(struct thread_conf **confs)
 	}
 }
 
-
-
-/* Send the packet to an output interface
- * For performance reason function returns number of packets dropped, not sent,
- * so 0 means that all packets were sent successfully
- */
-
-static inline void
-app_send_burst(struct thread_conf *qconf)
-{
-	struct rte_mbuf **mbufs;
-	uint32_t n, ret;
-
-	mbufs = (struct rte_mbuf **)qconf->m_table;
-	n = qconf->n_mbufs;
-
-	do {
-		ret = rte_eth_tx_burst(qconf->tx_port, qconf->tx_queue, mbufs, (uint16_t)n);
-		/* we cannot drop the packets, so re-send */
-		/* update number of packets to be sent */
-		n -= ret;
-		mbufs = (struct rte_mbuf **)&mbufs[ret];
-	} while (n);
-}
-
-
-/* Send the packet to an output interface */
-static void
-app_send_packets(struct thread_conf *qconf, struct rte_mbuf **mbufs, uint32_t nb_pkt)
-{
-	uint32_t i, len;
-
-	len = qconf->n_mbufs;
-	for(i = 0; i < nb_pkt; i++) {
-		qconf->m_table[len] = mbufs[i];
-		len++;
-		/* enough pkts to be sent */
-		if (unlikely(len == burst_conf.tx_burst)) {
-			qconf->n_mbufs = len;
-			app_send_burst(qconf);
-			len = 0;
-		}
-	}
-
-	qconf->n_mbufs = len;
-}
-
 void
 app_tx_thread(struct thread_conf **confs)
 {
 	struct rte_mbuf *mbufs[burst_conf.qos_dequeue];
 	struct thread_conf *conf;
 	int conf_idx = 0;
-	int retval;
-	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+	int nb_pkts;
 
 	while ((conf = confs[conf_idx])) {
-		retval = rte_ring_sc_dequeue_bulk(conf->tx_ring, (void **)mbufs,
+		nb_pkts = rte_ring_sc_dequeue_burst(conf->tx_ring, (void **)mbufs,
 					burst_conf.qos_dequeue, NULL);
-		if (likely(retval != 0)) {
-			app_send_packets(conf, mbufs, burst_conf.qos_dequeue);
-
-			conf->counter = 0; /* reset empty read loop counter */
-		}
-
-		conf->counter++;
-
-		/* drain ring and TX queues */
-		if (unlikely(conf->counter > drain_tsc)) {
-			/* now check is there any packets left to be transmitted */
-			if (conf->n_mbufs != 0) {
-				app_send_burst(conf);
-
-				conf->n_mbufs = 0;
-			}
-			conf->counter = 0;
+		if (likely(nb_pkts != 0)) {
+			uint16_t nb_tx = rte_eth_tx_burst(conf->tx_port, 0, mbufs, nb_pkts);
+			if (nb_pkts != nb_tx)
+				rte_pktmbuf_free_bulk(&mbufs[nb_pkts], nb_pkts - nb_tx);
 		}
 
 		conf_idx++;
@@ -230,7 +169,6 @@  app_mixed_thread(struct thread_conf **confs)
 	struct rte_mbuf *mbufs[burst_conf.ring_burst];
 	struct thread_conf *conf;
 	int conf_idx = 0;
-	const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
 
 	while ((conf = confs[conf_idx])) {
 		uint32_t nb_pkt;
@@ -250,23 +188,9 @@  app_mixed_thread(struct thread_conf **confs)
 		nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs,
 					burst_conf.qos_dequeue);
 		if (likely(nb_pkt > 0)) {
-			app_send_packets(conf, mbufs, nb_pkt);
-
-			conf->counter = 0; /* reset empty read loop counter */
-		}
-
-		conf->counter++;
-
-		/* drain ring and TX queues */
-		if (unlikely(conf->counter > drain_tsc)) {
-
-			/* now check is there any packets left to be transmitted */
-			if (conf->n_mbufs != 0) {
-				app_send_burst(conf);
-
-				conf->n_mbufs = 0;
-			}
-			conf->counter = 0;
+			uint16_t nb_tx = rte_eth_tx_burst(conf->tx_port, 0, mbufs, nb_pkt);
+			if (nb_tx != nb_pkt)
+				rte_pktmbuf_free_bulk(&mbufs[nb_tx], nb_pkt - nb_tx);
 		}
 
 		conf_idx++;
diff --git a/examples/qos_sched/main.c b/examples/qos_sched/main.c
index dc6a17a646..b3c2c9ef23 100644
--- a/examples/qos_sched/main.c
+++ b/examples/qos_sched/main.c
@@ -105,12 +105,6 @@  app_main_loop(__rte_unused void *dummy)
 	}
 	else if (mode == (APP_TX_MODE | APP_WT_MODE)) {
 		for (i = 0; i < wt_idx; i++) {
-			wt_confs[i]->m_table = rte_malloc("table_wt", sizeof(struct rte_mbuf *)
-					* burst_conf.tx_burst, RTE_CACHE_LINE_SIZE);
-
-			if (wt_confs[i]->m_table == NULL)
-				rte_panic("flow %u unable to allocate memory buffer\n", i);
-
 			RTE_LOG(INFO, APP,
 				"flow %u lcoreid %u sched+write port %u\n",
 					i, lcore_id, wt_confs[i]->tx_port);
@@ -120,12 +114,6 @@  app_main_loop(__rte_unused void *dummy)
 	}
 	else if (mode == APP_TX_MODE) {
 		for (i = 0; i < tx_idx; i++) {
-			tx_confs[i]->m_table = rte_malloc("table_tx", sizeof(struct rte_mbuf *)
-					* burst_conf.tx_burst, RTE_CACHE_LINE_SIZE);
-
-			if (tx_confs[i]->m_table == NULL)
-				rte_panic("flow %u unable to allocate memory buffer\n", i);
-
 			RTE_LOG(INFO, APP, "flow%u lcoreid%u write port%u\n",
 					i, lcore_id, tx_confs[i]->tx_port);
 		}
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
index 76a68f585f..b9c301483a 100644
--- a/examples/qos_sched/main.h
+++ b/examples/qos_sched/main.h
@@ -37,8 +37,6 @@  extern "C" {
 #define TX_HTHRESH 0  /**< Default values of TX host threshold reg. */
 #define TX_WTHRESH 0  /**< Default values of TX write-back threshold reg. */
 
-#define BURST_TX_DRAIN_US 100
-
 #ifndef APP_MAX_LCORE
 #if (RTE_MAX_LCORE > 64)
 #define APP_MAX_LCORE 64
@@ -75,10 +73,6 @@  struct thread_stat
 
 struct thread_conf
 {
-	uint32_t counter;
-	uint32_t n_mbufs;
-	struct rte_mbuf **m_table;
-
 	uint16_t rx_port;
 	uint16_t tx_port;
 	uint16_t rx_queue;