[v4,29/31] net/ena: support max large llq depth from the device

Message ID 20240312180716.8515-30-shaibran@amazon.com (mailing list archive)
State Accepted, archived
Delegated to: Ferruh Yigit
Headers
Series net/ena: v2.9.0 driver release |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Brandes, Shai March 12, 2024, 6:07 p.m. UTC
From: Shai Brandes <shaibran@amazon.com>

Selected AWS instances from later generations enable
large LLQ by default, allowing the transmission of
packets with headers exceeding 96 bytes.

Due to the overall ENA memory BAR size limitation,
large LLQ has the side effect of halving the maximum
number of LLQ entries (from 1024 to 512).

ENA-Express, powered by AWS Scalable Reliable Datagram
(SRD) technology, requires Tx queue with 1024 entries.
Selected AWS instances from upcoming generations will
have double the size of the ENA memory BAR, enabling ENA-Express
to work with a large LLQ of 1024 entries.

The initial default large LLQ size will remain 512.

Signed-off-by: Shai Brandes <shaibran@amazon.com>
Reviewed-by: Amit Bernstein <amitbern@amazon.com>
---
 doc/guides/rel_notes/release_24_03.rst        |  2 +
 .../net/ena/base/ena_defs/ena_admin_defs.h    |  4 +-
 drivers/net/ena/ena_ethdev.c                  | 38 ++++++++++++-------
 3 files changed, 29 insertions(+), 15 deletions(-)
  

Patch

diff --git a/doc/guides/rel_notes/release_24_03.rst b/doc/guides/rel_notes/release_24_03.rst
index 58d092194e..bee2429ba0 100644
--- a/doc/guides/rel_notes/release_24_03.rst
+++ b/doc/guides/rel_notes/release_24_03.rst
@@ -109,6 +109,8 @@  New Features
   * Added support for sub-optimal configuration notifications from the device.
   * Restructured fast release of mbufs when RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE optimization is enabled.
   * Added `normal_llq_hdr` devarg that enforce normal llq header policy.
+  * Added support for LLQ header size recommendation from the device.
+  * Allowed large LLQ with 1024 entries when the device supports enlarged memory BAR.
 
 * **Updated Atomic Rules' Arkville driver.**
 
diff --git a/drivers/net/ena/base/ena_defs/ena_admin_defs.h b/drivers/net/ena/base/ena_defs/ena_admin_defs.h
index 2adce75ed3..cff6451c96 100644
--- a/drivers/net/ena/base/ena_defs/ena_admin_defs.h
+++ b/drivers/net/ena/base/ena_defs/ena_admin_defs.h
@@ -696,8 +696,8 @@  struct ena_admin_feature_llq_desc {
 	 */
 	uint8_t entry_size_recommended;
 
-	/* reserved */
-	uint8_t reserved1[2];
+	/* max depth of wide llq, or 0 for N/A */
+	uint16_t max_wide_llq_depth;
 
 	/* accelerated low latency queues requirement. driver needs to
 	 * support those requirements in order to use accelerated llq
diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c
index a640a3bc07..c7c2eef92f 100644
--- a/drivers/net/ena/ena_ethdev.c
+++ b/drivers/net/ena/ena_ethdev.c
@@ -42,6 +42,8 @@ 
 
 #define DECIMAL_BASE 10
 
+#define MAX_WIDE_LLQ_DEPTH_UNSUPPORTED 0
+
 /*
  * We should try to keep ENA_CLEANUP_BUF_SIZE lower than
  * RTE_MEMPOOL_CACHE_MAX_SIZE, so we can fit this in mempool local cache.
@@ -1067,7 +1069,7 @@  static int
 ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx,
 		       bool use_large_llq_hdr)
 {
-	struct ena_admin_feature_llq_desc *llq = &ctx->get_feat_ctx->llq;
+	struct ena_admin_feature_llq_desc *dev = &ctx->get_feat_ctx->llq;
 	struct ena_com_dev *ena_dev = ctx->ena_dev;
 	uint32_t max_tx_queue_size;
 	uint32_t max_rx_queue_size;
@@ -1082,7 +1084,7 @@  ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx,
 		if (ena_dev->tx_mem_queue_type ==
 		    ENA_ADMIN_PLACEMENT_POLICY_DEV) {
 			max_tx_queue_size = RTE_MIN(max_tx_queue_size,
-				llq->max_llq_depth);
+				dev->max_llq_depth);
 		} else {
 			max_tx_queue_size = RTE_MIN(max_tx_queue_size,
 				max_queue_ext->max_tx_sq_depth);
@@ -1102,7 +1104,7 @@  ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx,
 		if (ena_dev->tx_mem_queue_type ==
 		    ENA_ADMIN_PLACEMENT_POLICY_DEV) {
 			max_tx_queue_size = RTE_MIN(max_tx_queue_size,
-				llq->max_llq_depth);
+				dev->max_llq_depth);
 		} else {
 			max_tx_queue_size = RTE_MIN(max_tx_queue_size,
 				max_queues->max_sq_depth);
@@ -1118,18 +1120,28 @@  ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx,
 	max_rx_queue_size = rte_align32prevpow2(max_rx_queue_size);
 	max_tx_queue_size = rte_align32prevpow2(max_tx_queue_size);
 
-	if (use_large_llq_hdr) {
-		if ((llq->entry_size_ctrl_supported &
-		     ENA_ADMIN_LIST_ENTRY_SIZE_256B) &&
-		    (ena_dev->tx_mem_queue_type ==
-		     ENA_ADMIN_PLACEMENT_POLICY_DEV)) {
-			max_tx_queue_size /= 2;
-			PMD_INIT_LOG(INFO,
-				"Forcing large headers and decreasing maximum Tx queue size to %d\n",
+	if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV && use_large_llq_hdr) {
+		/* intersection between driver configuration and device capabilities */
+		if (dev->entry_size_ctrl_supported & ENA_ADMIN_LIST_ENTRY_SIZE_256B) {
+			if (dev->max_wide_llq_depth == MAX_WIDE_LLQ_DEPTH_UNSUPPORTED) {
+				/* Devices that do not support the double-sized ENA memory BAR will
+				 * report max_wide_llq_depth as 0. In such case, driver halves the
+				 * queue depth when working in large llq policy.
+				 */
+				max_tx_queue_size >>= 1;
+				PMD_INIT_LOG(INFO,
+					"large LLQ policy requires limiting Tx queue size to %u entries\n",
 				max_tx_queue_size);
+			} else if (dev->max_wide_llq_depth < max_tx_queue_size) {
+				/* In case the queue depth that the driver calculated exceeds
+				 * the maximal value that the device allows, it will be limited
+				 * to that maximal value
+				 */
+				max_tx_queue_size = dev->max_wide_llq_depth;
+			}
 		} else {
-			PMD_INIT_LOG(ERR,
-				"Forcing large headers failed: LLQ is disabled or device does not support large headers\n");
+			PMD_INIT_LOG(INFO,
+				"Forcing large LLQ headers failed since device lacks this support\n");
 		}
 	}