[dpdk-dev] mlx5: RETA query/update support

Message ID 1446485467-17847-1-git-send-email-adrien.mazarguil@6wind.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Adrien Mazarguil Nov. 2, 2015, 5:31 p.m. UTC
  From: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>

ConnectX-4 is able to use indirection tables size of power of two, but
with the current API it is impossible to predict its size, so to simplify,
for any query/update RETA command, the indirection table is modified to use
256 entries.

A port stop/start must be done to apply the new RETA configuration.

Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
 drivers/net/mlx5/mlx5.c        |   8 +-
 drivers/net/mlx5/mlx5.h        |   7 ++
 drivers/net/mlx5/mlx5_ethdev.c |  29 ++++++++
 drivers/net/mlx5/mlx5_rss.c    | 163 +++++++++++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_rxq.c    |  53 ++------------
 drivers/net/mlx5/mlx5_utils.h  |  20 +++++
 6 files changed, 234 insertions(+), 46 deletions(-)
  

Comments

Adrien Mazarguil Nov. 2, 2015, 5:40 p.m. UTC | #1
On Mon, Nov 02, 2015 at 06:31:07PM +0100, Adrien Mazarguil wrote:
> From: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
> 
> ConnectX-4 is able to use indirection tables size of power of two, but
> with the current API it is impossible to predict its size, so to simplify,
> for any query/update RETA command, the indirection table is modified to use
> 256 entries.
> 
> A port stop/start must be done to apply the new RETA configuration.

Please ignore this patch, it's malformed and should be in the proper
thread. I'm going to send an update.
  

Patch

diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 9636588..5a95260 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -133,6 +133,8 @@  mlx5_dev_close(struct rte_eth_dev *dev)
 			rte_free((*priv->rss_conf)[i]);
 		rte_free(priv->rss_conf);
 	}
+	if (priv->reta_idx != NULL)
+		rte_free(priv->reta_idx);
 	priv_unlock(priv);
 	memset(priv, 0, sizeof(*priv));
 }
@@ -160,6 +162,8 @@  static const struct eth_dev_ops mlx5_dev_ops = {
 	.mac_addr_remove = mlx5_mac_addr_remove,
 	.mac_addr_add = mlx5_mac_addr_add,
 	.mtu_set = mlx5_dev_set_mtu,
+	.reta_update = mlx5_dev_rss_reta_update,
+	.reta_query = mlx5_dev_rss_reta_query,
 	.rss_hash_update = mlx5_rss_hash_update,
 	.rss_hash_conf_get = mlx5_rss_hash_conf_get,
 };
@@ -373,7 +377,9 @@  mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		DEBUG("L2 tunnel checksum offloads are %ssupported",
 		      (priv->hw_csum_l2tun ? "" : "not "));
 
-		priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size;
+		priv->ind_table_max_size =
+			RTE_MIN((unsigned int)RSS_INDIRECTION_TABLE_SIZE,
+				exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size);
 		DEBUG("maximum RX indirection table size is %u",
 		      priv->ind_table_max_size);
 
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 0daacc8..b84d31d 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -118,6 +118,8 @@  struct priv {
 	/* RSS configuration array indexed by hash RX queue type. */
 	struct rte_eth_rss_conf *(*rss_conf)[];
 	struct rte_intr_handle intr_handle; /* Interrupt handler. */
+	unsigned int (*reta_idx)[]; /* RETA index table. */
+	unsigned int reta_idx_n; /* RETA index size. */
 	rte_spinlock_t lock; /* Lock for control functions. */
 };
 
@@ -184,6 +186,11 @@  int rss_hash_rss_conf_new_key(struct priv *, const uint8_t *, unsigned int,
 			      uint64_t);
 int mlx5_rss_hash_update(struct rte_eth_dev *, struct rte_eth_rss_conf *);
 int mlx5_rss_hash_conf_get(struct rte_eth_dev *, struct rte_eth_rss_conf *);
+int priv_rss_reta_index_resize(struct priv *, unsigned int);
+int mlx5_dev_rss_reta_query(struct rte_eth_dev *,
+			    struct rte_eth_rss_reta_entry64 *, uint16_t);
+int mlx5_dev_rss_reta_update(struct rte_eth_dev *,
+			     struct rte_eth_rss_reta_entry64 *, uint16_t);
 
 /* mlx5_rxmode.c */
 
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 84e877c..1159fa3 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -410,6 +410,9 @@  dev_configure(struct rte_eth_dev *dev)
 	struct priv *priv = dev->data->dev_private;
 	unsigned int rxqs_n = dev->data->nb_rx_queues;
 	unsigned int txqs_n = dev->data->nb_tx_queues;
+	unsigned int i;
+	unsigned int j;
+	unsigned int reta_idx_n;
 
 	priv->rxqs = (void *)dev->data->rx_queues;
 	priv->txqs = (void *)dev->data->tx_queues;
@@ -418,11 +421,31 @@  dev_configure(struct rte_eth_dev *dev)
 		     (void *)dev, priv->txqs_n, txqs_n);
 		priv->txqs_n = txqs_n;
 	}
+	if (rxqs_n > priv->ind_table_max_size) {
+		ERROR("cannot handle this many RX queues (%u)", rxqs_n);
+		return EINVAL;
+	}
 	if (rxqs_n == priv->rxqs_n)
 		return 0;
 	INFO("%p: RX queues number update: %u -> %u",
 	     (void *)dev, priv->rxqs_n, rxqs_n);
 	priv->rxqs_n = rxqs_n;
+	/* If the requested number of RX queues is not a power of two, use the
+	 * maximum indirection table size for better balancing.
+	 * The result is always rounded to the next power of two. */
+	reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ?
+				     priv->ind_table_max_size :
+				     rxqs_n));
+	if (priv_rss_reta_index_resize(priv, reta_idx_n))
+		return ENOMEM;
+	/* When the number of RX queues is not a power of two, the remaining
+	 * table entries are padded with reused WQs and hashes are not spread
+	 * uniformly. */
+	for (i = 0, j = 0; (i != reta_idx_n); ++i) {
+		(*priv->reta_idx)[i] = j;
+		if (++j == rxqs_n)
+			j = 0;
+	}
 	return 0;
 }
 
@@ -494,6 +517,12 @@  mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 		 0);
 	if (priv_get_ifname(priv, &ifname) == 0)
 		info->if_index = if_nametoindex(ifname);
+	/* FIXME: RETA update/query API expects the callee to know the size of
+	 * the indirection table, for this PMD the size varies depending on
+	 * the number of RX queues, it becomes impossible to find the correct
+	 * size if it is not fixed.
+	 * The API should be updated to solve this problem. */
+	info->reta_size = priv->ind_table_max_size;
 	priv_unlock(priv);
 }
 
diff --git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c
index bf19aca..7eb688a 100644
--- a/drivers/net/mlx5/mlx5_rss.c
+++ b/drivers/net/mlx5/mlx5_rss.c
@@ -211,3 +211,166 @@  mlx5_rss_hash_conf_get(struct rte_eth_dev *dev,
 	priv_unlock(priv);
 	return 0;
 }
+
+/**
+ * Allocate/reallocate RETA index table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @praram reta_size
+ *   The size of the array to allocate.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+int
+priv_rss_reta_index_resize(struct priv *priv, unsigned int reta_size)
+{
+	void *mem;
+	unsigned int old_size = priv->reta_idx_n;
+
+	if (priv->reta_idx_n == reta_size)
+		return 0;
+
+	mem = rte_realloc(priv->reta_idx,
+			  reta_size * sizeof((*priv->reta_idx)[0]), 0);
+	if (!mem)
+		return ENOMEM;
+	priv->reta_idx = mem;
+	priv->reta_idx_n = reta_size;
+
+	if (old_size < reta_size)
+		memset(&(*priv->reta_idx)[old_size], 0,
+		       (reta_size - old_size) *
+		       sizeof((*priv->reta_idx)[0]));
+	return 0;
+}
+
+/**
+ * Query RETA table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in, out] reta_conf
+ *   Pointer to the first RETA configuration structure.
+ * @param reta_size
+ *   Number of entries.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_dev_rss_reta_query(struct priv *priv,
+			struct rte_eth_rss_reta_entry64 *reta_conf,
+			unsigned int reta_size)
+{
+	unsigned int idx;
+	unsigned int i;
+	int ret;
+
+	/* See RETA comment in mlx5_dev_infos_get(). */
+	ret = priv_rss_reta_index_resize(priv, priv->ind_table_max_size);
+	if (ret)
+		return ret;
+
+	/* Fill each entry of the table even if its bit is not set. */
+	for (idx = 0, i = 0; (i != reta_size); ++i) {
+		idx = i / RTE_RETA_GROUP_SIZE;
+		reta_conf[idx].reta[i % RTE_RETA_GROUP_SIZE] =
+			(*priv->reta_idx)[i];
+	}
+	return 0;
+}
+
+/**
+ * Update RETA table.
+ *
+ * @param priv
+ *   Pointer to private structure.
+ * @param[in] reta_conf
+ *   Pointer to the first RETA configuration structure.
+ * @param reta_size
+ *   Number of entries.
+ *
+ * @return
+ *   0 on success, errno value on failure.
+ */
+static int
+priv_dev_rss_reta_update(struct priv *priv,
+			 struct rte_eth_rss_reta_entry64 *reta_conf,
+			 unsigned int reta_size)
+{
+	unsigned int idx;
+	unsigned int i;
+	unsigned int pos;
+	int ret;
+
+	/* See RETA comment in mlx5_dev_infos_get(). */
+	ret = priv_rss_reta_index_resize(priv, priv->ind_table_max_size);
+	if (ret)
+		return ret;
+
+	for (idx = 0, i = 0; (i != reta_size); ++i) {
+		idx = i / RTE_RETA_GROUP_SIZE;
+		pos = i % RTE_RETA_GROUP_SIZE;
+		if (((reta_conf[idx].mask >> i) & 0x1) == 0)
+			continue;
+		assert(reta_conf[idx].reta[pos] < priv->rxqs_n);
+		(*priv->reta_idx)[i] = reta_conf[idx].reta[pos];
+	}
+	return 0;
+}
+
+/**
+ * DPDK callback to get the RETA indirection table.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param reta_conf
+ *   Pointer to RETA configuration structure array.
+ * @param reta_size
+ *   Size of the RETA table.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+int
+mlx5_dev_rss_reta_query(struct rte_eth_dev *dev,
+			struct rte_eth_rss_reta_entry64 *reta_conf,
+			uint16_t reta_size)
+{
+	int ret;
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	ret = priv_dev_rss_reta_query(priv, reta_conf, reta_size);
+	priv_unlock(priv);
+	return -ret;
+}
+
+/**
+ * DPDK callback to update the RETA indirection table.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param reta_conf
+ *   Pointer to RETA configuration structure array.
+ * @param reta_size
+ *   Size of the RETA table.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+int
+mlx5_dev_rss_reta_update(struct rte_eth_dev *dev,
+			 struct rte_eth_rss_reta_entry64 *reta_conf,
+			 uint16_t reta_size)
+{
+	int ret;
+	struct priv *priv = dev->data->dev_private;
+
+	priv_lock(priv);
+	ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size);
+	priv_unlock(priv);
+	return -ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 084bf41..3d7ae7e 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -259,26 +259,6 @@  hash_rxq_flow_attr(const struct hash_rxq *hash_rxq,
 }
 
 /**
- * Return nearest power of two above input value.
- *
- * @param v
- *   Input value.
- *
- * @return
- *   Nearest power of two above input value.
- */
-static unsigned int
-log2above(unsigned int v)
-{
-	unsigned int l;
-	unsigned int r;
-
-	for (l = 0, r = 0; (v >> 1); ++l, v >>= 1)
-		r |= (v & 1);
-	return (l + r);
-}
-
-/**
  * Return the type corresponding to the n'th bit set.
  *
  * @param table
@@ -360,14 +340,7 @@  priv_make_ind_table_init(struct priv *priv,
 int
 priv_create_hash_rxqs(struct priv *priv)
 {
-	/* If the requested number of WQs is not a power of two, use the
-	 * maximum indirection table size for better balancing.
-	 * The result is always rounded to the next power of two. */
-	unsigned int wqs_n =
-		(1 << log2above((priv->rxqs_n & (priv->rxqs_n - 1)) ?
-				priv->ind_table_max_size :
-				priv->rxqs_n));
-	struct ibv_exp_wq *wqs[wqs_n];
+	struct ibv_exp_wq *wqs[priv->reta_idx_n];
 	struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
 	unsigned int ind_tables_n =
 		priv_make_ind_table_init(priv, &ind_table_init);
@@ -393,25 +366,15 @@  priv_create_hash_rxqs(struct priv *priv)
 		      " indirection table cannot be created");
 		return EINVAL;
 	}
-	if ((wqs_n < priv->rxqs_n) || (wqs_n > priv->ind_table_max_size)) {
-		ERROR("cannot handle this many RX queues (%u)", priv->rxqs_n);
-		err = ERANGE;
-		goto error;
-	}
-	if (wqs_n != priv->rxqs_n) {
+	if (priv->rxqs_n & (priv->rxqs_n - 1)) {
 		INFO("%u RX queues are configured, consider rounding this"
 		     " number to the next power of two for better balancing",
 		     priv->rxqs_n);
-		DEBUG("indirection table extended to assume %u WQs", wqs_n);
-	}
-	/* When the number of RX queues is not a power of two, the remaining
-	 * table entries are padded with reused WQs and hashes are not spread
-	 * uniformly. */
-	for (i = 0, j = 0; (i != wqs_n); ++i) {
-		wqs[i] = (*priv->rxqs)[j]->wq;
-		if (++j == priv->rxqs_n)
-			j = 0;
+		DEBUG("indirection table extended to assume %u WQs",
+		      priv->reta_idx_n);
 	}
+	for (i = 0; (i != priv->reta_idx_n); ++i)
+		wqs[i] = (*priv->rxqs)[(*priv->reta_idx)[i]]->wq;
 	/* Get number of hash RX queues to configure. */
 	for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
 		hash_rxqs_n += ind_table_init[i].hash_types_n;
@@ -436,8 +399,8 @@  priv_create_hash_rxqs(struct priv *priv)
 		unsigned int ind_tbl_size = ind_table_init[i].max_size;
 		struct ibv_exp_rwq_ind_table *ind_table;
 
-		if (wqs_n < ind_tbl_size)
-			ind_tbl_size = wqs_n;
+		if (priv->reta_idx_n < ind_tbl_size)
+			ind_tbl_size = priv->reta_idx_n;
 		ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
 		errno = 0;
 		ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index f1fad18..9b5e86a 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -161,4 +161,24 @@  pmd_drv_log_basename(const char *s)
 	\
 	snprintf(name, sizeof(name), __VA_ARGS__)
 
+/**
+ * Return nearest power of two above input value.
+ *
+ * @param v
+ *   Input value.
+ *
+ * @return
+ *   Nearest power of two above input value.
+ */
+static inline unsigned int
+log2above(unsigned int v)
+{
+	unsigned int l;
+	unsigned int r;
+
+	for (l = 0, r = 0; (v >> 1); ++l, v >>= 1)
+		r |= (v & 1);
+	return (l + r);
+}
+
 #endif /* RTE_PMD_MLX5_UTILS_H_ */