diff mbox series

[v5,2/9] net/mlx5: support representor of sub function

Message ID 1616939297-15627-3-git-send-email-xuemingl@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers show
Series net/mlx5: support SubFunction representor | expand

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Xueming(Steven) Li March 28, 2021, 1:48 p.m. UTC
This patch adds support for SF representor. Similar to VF representor,
switch port name of SF representor in phys_port_name sysfs key is
"pf<x>sf<y>".

Device representor argument is "representors=sf[list]", list member
could be mix of instance and range. Example:
  representors=sf[0,2,4,8-12,-1]

To probe VF representor and SF representor, need to separate into 2
devices:
  -a <BDF>,representor=vf[list] -a <BDF>,representor=sf[list]

Signed-off-by: Xueming Li <xuemingl@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 doc/guides/nics/mlx5.rst                |  58 +++++++++--
 drivers/net/mlx5/linux/mlx5_ethdev_os.c |   2 +
 drivers/net/mlx5/linux/mlx5_os.c        | 127 +++++++++++++++++++-----
 drivers/net/mlx5/mlx5.c                 |   1 +
 drivers/net/mlx5/mlx5.h                 |   9 ++
 drivers/net/mlx5/mlx5_ethdev.c          | 100 +++++++++++++++++++
 6 files changed, 263 insertions(+), 34 deletions(-)
diff mbox series

Patch

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index a2cfc51b2a..2e2909d82d 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -931,14 +931,18 @@  Driver options
 - ``representor`` parameter [list]
 
   This parameter can be used to instantiate DPDK Ethernet devices from
-  existing port (or VF) representors configured on the device.
+  existing port (PF, VF or SF) representors configured on the device.
 
   It is a standard parameter whose format is described in
   :ref:`ethernet_device_standard_device_arguments`.
 
-  For instance, to probe port representors 0 through 2::
+  For instance, to probe VF port representors 0 through 2::
 
-    representor=[0-2]
+    representor=vf[0-2]
+
+  To probe SF port representors 0 through 2::
+
+    representor=sf[0-2]
 
 - ``max_dump_files_num`` parameter [int]
 
@@ -1351,15 +1355,15 @@  Quick Start Guide on OFED/EN
 Enable switchdev mode
 ---------------------
 
-Switchdev mode is a mode in E-Switch, that binds between representor and VF.
-Representor is a port in DPDK that is connected to a VF in such a way
-that assuming there are no offload flows, each packet that is sent from the VF
-will be received by the corresponding representor. While each packet that is
-sent to a representor will be received by the VF.
+Switchdev mode is a mode in E-Switch, that binds between representor and VF or SF.
+Representor is a port in DPDK that is connected to a VF or SF in such a way
+that assuming there are no offload flows, each packet that is sent from the VF or SF
+will be received by the corresponding representor. While each packet that is or SF
+sent to a representor will be received by the VF or SF.
 This is very useful in case of SRIOV mode, where the first packet that is sent
-by the VF will be received by the DPDK application which will decide if this
+by the VF or SF will be received by the DPDK application which will decide if this
 flow should be offloaded to the E-Switch. After offloading the flow packet
-that the VF that are matching the flow will not be received any more by
+that the VF or SF that are matching the flow will not be received any more by
 the DPDK application.
 
 1. Enable SRIOV mode::
@@ -1386,6 +1390,40 @@  the DPDK application.
 
         echo switchdev > /sys/class/net/<net device>/compat/devlink/mode
 
+SubFunction representor support
+-------------------------------
+SubFunction is a portion of the PCI device, a SF netdev has its own
+dedicated queues(txq, rxq). A SF netdev supports E-Switch representation
+offload similar to existing PF and VF representors. A SF shares PCI
+level resources with other SFs and/or with its parent PCI function.
+
+1. Configure SF feature::
+
+        mlxconfig -d <mst device> set PF_BAR2_SIZE=<0/1/2/3> PF_BAR2_ENABLE=1
+
+        Value of PF_BAR2_SIZE:
+
+            0: 8 SFs
+            1: 16 SFs
+            2: 32 SFs
+            3: 64 SFs
+
+2. Reset the FW::
+
+        mlxfwreset -d <mst device> reset
+
+3. Enable switchdev mode::
+
+        echo switchdev > /sys/class/net/<net device>/compat/devlink/mode
+
+4. Create SF::
+
+        mlnx-sf -d <PCI_BDF> -a create
+
+5. Probe SF representor::
+
+        testpmd> port attach <PCI_BDF>,representor=sf0,dv_flow_en=1
+
 Performance tuning
 ------------------
 
diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
index cb692b22f2..2127fcfbfa 100644
--- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c
+++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c
@@ -1010,6 +1010,8 @@  mlx5_sysfs_check_switch_info(bool device_dir,
 	case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
 		/* Fallthrough */
 	case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+		/* Fallthrough */
+	case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
 		/* New representors naming schema. */
 		switch_info->representor = 1;
 		break;
diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c
index 5740214950..aac923ea39 100644
--- a/drivers/net/mlx5/linux/mlx5_os.c
+++ b/drivers/net/mlx5/linux/mlx5_os.c
@@ -701,6 +701,8 @@  mlx5_queue_counter_id_prepare(struct rte_eth_dev *dev)
  *   Verbs device parameters (name, port, switch_info) to spawn.
  * @param config
  *   Device configuration parameters.
+ * @param config
+ *   Device arguments.
  *
  * @return
  *   A valid Ethernet device object on success, NULL otherwise and rte_errno
@@ -712,7 +714,8 @@  mlx5_queue_counter_id_prepare(struct rte_eth_dev *dev)
 static struct rte_eth_dev *
 mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	       struct mlx5_dev_spawn_data *spawn,
-	       struct mlx5_dev_config *config)
+	       struct mlx5_dev_config *config,
+	       struct rte_eth_devargs *eth_da)
 {
 	const struct mlx5_switch_info *switch_info = &spawn->info;
 	struct mlx5_dev_ctx_shared *sh = NULL;
@@ -742,34 +745,82 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
 
 	/* Determine if this port representor is supposed to be spawned. */
 	if (switch_info->representor && dpdk_dev->devargs) {
-		struct rte_eth_devargs eth_da;
-
-		err = rte_eth_devargs_parse(dpdk_dev->devargs->args, &eth_da);
-		if (err) {
-			rte_errno = -err;
-			DRV_LOG(ERR, "failed to process device arguments: %s",
-				strerror(rte_errno));
-			return NULL;
-		}
-		if (eth_da.type == RTE_ETH_REPRESENTOR_NONE) {
-			/* Representor not specified. */
+		switch (eth_da->type) {
+		case RTE_ETH_REPRESENTOR_SF:
+			if (switch_info->name_type !=
+					MLX5_PHYS_PORT_NAME_TYPE_PFSF) {
+				rte_errno = EBUSY;
+				return NULL;
+			}
+			break;
+		case RTE_ETH_REPRESENTOR_VF:
+			/* Allows HPF representor index -1 as exception. */
+			if (!(spawn->info.port_name == -1 &&
+			      switch_info->name_type ==
+					MLX5_PHYS_PORT_NAME_TYPE_PFHPF) &&
+			    switch_info->name_type !=
+					MLX5_PHYS_PORT_NAME_TYPE_PFVF) {
+				rte_errno = EBUSY;
+				return NULL;
+			}
+			break;
+		case RTE_ETH_REPRESENTOR_NONE:
 			rte_errno = EBUSY;
 			return NULL;
-		}
-		if (eth_da.type != RTE_ETH_REPRESENTOR_VF) {
+			break;
+		default:
 			rte_errno = ENOTSUP;
 			DRV_LOG(ERR, "unsupported representor type: %s",
 				dpdk_dev->devargs->args);
 			return NULL;
 		}
-		for (i = 0; i < eth_da.nb_representor_ports; ++i)
-			if (eth_da.representor_ports[i] ==
+		/* Check controller ID: */
+		for (i = 0; i < eth_da->nb_mh_controllers; ++i)
+			if (eth_da->mh_controllers[i] ==
+			    (uint16_t)switch_info->ctrl_num)
+				break;
+		if (eth_da->nb_mh_controllers &&
+		    i == eth_da->nb_mh_controllers) {
+			rte_errno = EBUSY;
+			return NULL;
+		}
+		/* Check SF/VF ID: */
+		for (i = 0; i < eth_da->nb_representor_ports; ++i)
+			if (eth_da->representor_ports[i] ==
 			    (uint16_t)switch_info->port_name)
 				break;
-		if (i == eth_da.nb_representor_ports) {
+		if (eth_da->type != RTE_ETH_REPRESENTOR_PF &&
+		    i == eth_da->nb_representor_ports) {
 			rte_errno = EBUSY;
 			return NULL;
 		}
+		/* Check PF ID. Check after repr port to avoid warning flood. */
+		if (spawn->pf_bond >= 0) {
+			for (i = 0; i < eth_da->nb_ports; ++i)
+				if (eth_da->ports[i] ==
+				    (uint16_t)switch_info->pf_num)
+					break;
+			if (eth_da->nb_ports && i == eth_da->nb_ports) {
+				/* For backward compatibility, bonding
+				 * representor syntax supported with limitation,
+				 * device iterator won't find it:
+				 *    <PF1_BDF>,representor=#
+				 */
+				if (switch_info->pf_num > 0 &&
+				    eth_da->ports[0] == 0) {
+					DRV_LOG(WARNING, "Representor on Bonding PF should use pf#vf# format: %s",
+						dpdk_dev->devargs->args);
+				} else {
+					rte_errno = EBUSY;
+					return NULL;
+				}
+			}
+		} else if (eth_da->nb_ports > 1 || eth_da->ports[0]) {
+			rte_errno = EINVAL;
+			DRV_LOG(ERR, "PF id not supported by non-bond device: %s",
+				dpdk_dev->devargs->args);
+			return NULL;
+		}
 	}
 	/* Build device name. */
 	if (spawn->pf_bond <  0) {
@@ -777,8 +828,11 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
 		if (!switch_info->representor)
 			strlcpy(name, dpdk_dev->name, sizeof(name));
 		else
-			snprintf(name, sizeof(name), "%s_representor_%u",
-				 dpdk_dev->name, switch_info->port_name);
+			snprintf(name, sizeof(name), "%s_representor_%s%u",
+				 dpdk_dev->name,
+				 switch_info->name_type ==
+				 MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf",
+				 switch_info->port_name);
 	} else {
 		/* Bonding device. */
 		if (!switch_info->representor)
@@ -786,9 +840,11 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
 				 dpdk_dev->name,
 				 mlx5_os_get_dev_device_name(spawn->phys_dev));
 		else
-			snprintf(name, sizeof(name), "%s_%s_representor_%u",
+			snprintf(name, sizeof(name), "%s_%s_representor_%s%u",
 				 dpdk_dev->name,
 				 mlx5_os_get_dev_device_name(spawn->phys_dev),
+				 switch_info->name_type ==
+				 MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf",
 				 switch_info->port_name);
 	}
 	/* check if the device is already spawned */
@@ -1063,9 +1119,7 @@  mlx5_dev_spawn(struct rte_device *dpdk_dev,
 	priv->vport_id = switch_info->representor ?
 			 switch_info->port_name + 1 : -1;
 #endif
-	/* representor_id field keeps the unmodified VF index. */
-	priv->representor_id = switch_info->representor ?
-			       switch_info->port_name : -1;
+	priv->representor_id = mlx5_representor_id_encode(switch_info);
 	/*
 	 * Look for sibling devices in order to reuse their switch domain
 	 * if any, otherwise allocate one.
@@ -1839,6 +1893,7 @@  mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 	struct mlx5_dev_spawn_data *list = NULL;
 	struct mlx5_dev_config dev_config;
 	unsigned int dev_config_vf;
+	struct rte_eth_devargs eth_da = { .type = RTE_ETH_REPRESENTOR_NONE };
 	int ret;
 
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
@@ -1849,6 +1904,27 @@  mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 			strerror(rte_errno));
 		return -rte_errno;
 	}
+	if (pci_dev->device.devargs) {
+		/* Parse representor information from device argument. */
+		if (pci_dev->device.devargs->cls_str)
+			ret = rte_eth_devargs_parse
+				(pci_dev->device.devargs->cls_str, &eth_da);
+		if (ret) {
+			DRV_LOG(ERR, "failed to parse device arguments: %s",
+				pci_dev->device.devargs->cls_str);
+			return -rte_errno;
+		}
+		if (eth_da.type == RTE_ETH_REPRESENTOR_NONE) {
+			/* Support legacy device argument */
+			ret = rte_eth_devargs_parse
+				(pci_dev->device.devargs->args, &eth_da);
+			if (ret) {
+				DRV_LOG(ERR, "failed to parse device arguments: %s",
+					pci_dev->device.devargs->args);
+				return -rte_errno;
+			}
+		}
+	}
 	errno = 0;
 	ibv_list = mlx5_glue->get_device_list(&ret);
 	if (!ibv_list) {
@@ -2021,6 +2097,8 @@  mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 				case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
 					/* Fallthrough */
 				case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+					/* Fallthrough */
+				case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
 					if (list[ns].info.pf_num == bd)
 						ns++;
 					break;
@@ -2198,7 +2276,8 @@  mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 		dev_config.log_hp_size = MLX5_ARG_UNSET;
 		list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device,
 						 &list[i],
-						 &dev_config);
+						 &dev_config,
+						 &eth_da);
 		if (!list[i].eth_dev) {
 			if (rte_errno != EBUSY && rte_errno != EEXIST)
 				break;
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index fb586317ca..22058a0ad5 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1454,6 +1454,7 @@  const struct eth_dev_ops mlx5_dev_ops = {
 	.xstats_get_names = mlx5_xstats_get_names,
 	.fw_version_get = mlx5_fw_version_get,
 	.dev_infos_get = mlx5_dev_infos_get,
+	.representor_info_get = mlx5_representor_info_get,
 	.read_clock = mlx5_txpp_read_clock,
 	.dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
 	.vlan_filter_set = mlx5_vlan_filter_set,
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 8e8727a6c5..33c6b39a1e 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1032,6 +1032,15 @@  int mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh);
 /* mlx5_ethdev.c */
 
 int mlx5_dev_configure(struct rte_eth_dev *dev);
+int mlx5_representor_info_get(struct rte_eth_dev *dev,
+			      struct rte_eth_representor_info *info);
+#define MLX5_REPRESENTOR_ID(pf, type, repr) \
+		(((pf) << 14) + ((type) << 12) + ((repr) & 0xfff))
+#define MLX5_REPRESENTOR_REPR(repr_id) \
+		((repr_id) & 0xfff)
+#define MLX5_REPRESENTOR_TYPE(repr_id) \
+		(((repr_id) >> 12) & 3)
+uint16_t mlx5_representor_id_encode(const struct mlx5_switch_info *info);
 int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver,
 			size_t fw_size);
 int mlx5_dev_infos_get(struct rte_eth_dev *dev,
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 51b39ddde5..1ffb13cf2e 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -377,6 +377,106 @@  mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
 	return 0;
 }
 
+/**
+ * Calculate representor ID from port switch info.
+ *
+ * Uint16 representor ID bits definition:
+ *   pf: 2
+ *   type: 2
+ *   vf/sf: 12
+ *
+ * @param info
+ *   Port switch info.
+ *
+ * @return
+ *   Encoded representor ID.
+ */
+uint16_t
+mlx5_representor_id_encode(const struct mlx5_switch_info *info)
+{
+	enum rte_eth_representor_type type = RTE_ETH_REPRESENTOR_VF;
+	uint16_t repr = info->port_name;
+
+	if (info->representor == 0)
+		return UINT16_MAX;
+	if (info->name_type == MLX5_PHYS_PORT_NAME_TYPE_PFSF)
+		type = RTE_ETH_REPRESENTOR_SF;
+	if (info->name_type == MLX5_PHYS_PORT_NAME_TYPE_PFHPF)
+		repr = UINT16_MAX;
+	return MLX5_REPRESENTOR_ID(info->pf_num, type, repr);
+}
+
+/**
+ * DPDK callback to get information about representor.
+ *
+ * Representor ID bits definition:
+ *   vf/sf: 12
+ *   type: 2
+ *   pf: 2
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param[out] info
+ *   Nullable info structure output buffer.
+ *
+ * @return
+ *   negative on error, or the number of representor ranges.
+ */
+int
+mlx5_representor_info_get(struct rte_eth_dev *dev,
+			  struct rte_eth_representor_info *info)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	int n_type = 3; /* Number of representor types, VF, HPF and SF. */
+	int n_pf = 2; /* Number of PFs. */
+	int i = 0, pf;
+
+	if (info == NULL)
+		goto out;
+	info->controller = 0;
+	info->pf = priv->pf_bond >= 0 ? priv->pf_bond : 0;
+	for (pf = 0; pf < n_pf; ++pf) {
+		/* VF range. */
+		info->ranges[i].type = RTE_ETH_REPRESENTOR_VF;
+		info->ranges[i].controller = 0;
+		info->ranges[i].pf = pf;
+		info->ranges[i].vf = 0;
+		info->ranges[i].id_base =
+			MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, 0);
+		info->ranges[i].id_end =
+			MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
+		snprintf(info->ranges[i].name,
+			 sizeof(info->ranges[i].name), "pf%dvf", pf);
+		i++;
+		/* HPF range. */
+		info->ranges[i].type = RTE_ETH_REPRESENTOR_VF;
+		info->ranges[i].controller = 0;
+		info->ranges[i].pf = pf;
+		info->ranges[i].vf = UINT16_MAX;
+		info->ranges[i].id_base =
+			MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
+		info->ranges[i].id_end =
+			MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
+		snprintf(info->ranges[i].name,
+			 sizeof(info->ranges[i].name), "pf%dvf", pf);
+		i++;
+		/* SF range. */
+		info->ranges[i].type = RTE_ETH_REPRESENTOR_SF;
+		info->ranges[i].controller = 0;
+		info->ranges[i].pf = pf;
+		info->ranges[i].vf = 0;
+		info->ranges[i].id_base =
+			MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, 0);
+		info->ranges[i].id_end =
+			MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1);
+		snprintf(info->ranges[i].name,
+			 sizeof(info->ranges[i].name), "pf%dsf", pf);
+		i++;
+	}
+out:
+	return n_type * n_pf;
+}
+
 /**
  * Get firmware version of a device.
  *