@@ -1759,6 +1759,147 @@ behavior as librte_net_mlx4::
> port config all rss all
> port start all
+
+Multiport E-Switch
+------------------
+
+In standard deployments of NVIDIA ConnectX and BlueField HCAs, where embedded switch is enabled,
+each physical port is associated with a single switching domain.
+Only PFs, VFs and SFs related to that physical port are connected to this domain
+and offloaded flow rules are allowed to steer traffic only between the entities in the given domain.
+
+The following diagram pictures the high level overview of this architecture:
+
+::
+
+ .---. .------. .------. .---. .------. .------.
+ |PF0| |PF0VFi| |PF0SFi| |PF1| |PF1VFi| |PF1SFi|
+ .-+-. .--+---. .--+---. .-+-. .--+---. .--+---.
+ | | | | | |
+ .---|------|--------|-------|------|--------|---------.
+ | | | | | | | HCA|
+ | .-+------+--------+---. .-+------+--------+---. |
+ | | | | | |
+ | | E-Switch | | E-Switch | |
+ | | PF0 | | PF1 | |
+ | | | | | |
+ | .---------+-----------. .--------+------------. |
+ | | | |
+ .--------+--+---+---------------+--+---+--------------.
+ | | | |
+ | PHY0 | | PHY1 |
+ | | | |
+ .------. .------.
+
+Multiport E-Switch is a deployment scenario where:
+
+- All physical ports, PFs, VFs and SFs share the same switching domain.
+- Each physical port gets a separate representor port.
+- Traffic can be matched or forwarded explicitly between any of the entities
+ connected to the domain.
+
+The following diagram pictures the high level overview of this architecture:
+
+::
+
+
+ .---. .------. .------. .---. .------. .------.
+ |PF0| |PF0VFi| |PF0SFi| |PF1| |PF1VFi| |PF1SFi|
+ .-+-. .--+---. .--+---. .-+-. .--+---. .--+---.
+ | | | | | |
+ .---|------|--------|-------|------|--------|---------.
+ | | | | | | | HCA|
+ | .-+------+--------+-------+------+--------+---. |
+ | | | |
+ | | Shared | |
+ | | E-Switch | |
+ | | | |
+ | .---------+----------------------+------------. |
+ | | | |
+ .--------+--+---+---------------+--+---+--------------.
+ | | | |
+ | PHY0 | | PHY1 |
+ | | | |
+ .------. .------.
+
+
+In this deployment a single application can control the switching and forwarding behavior for all
+entities on the HCA.
+
+With this configuration, mlx5 PMD supports:
+
+- matching traffic coming from physical port, PF, VF or SF using REPRESENTED_PORT items;
+- forwarding traffic to physical port, PF, VF or SF using REPRESENTED_PORT actions;
+
+
+Requirements
+~~~~~~~~~~~~
+
+Supported HCAs:
+
+- ConnectX family: ConnectX-6 Dx and above.
+- BlueField family: BlueField-2 and above.
+- FW version: at least ``XX.37.1014``.
+
+Supported mlx5 kernel modules versions:
+
+- Upstream Linux - from version 6.3.
+- Modules packaged in MLNX_OFED - from version v23.04-0.5.3.3.
+
+
+Configuration
+~~~~~~~~~~~~~
+
+#. Apply required FW configuration::
+
+ sudo mlxconfig -d /dev/mst/mt4125_pciconf0 set LAG_RESOURCE_ALLOCATION=1
+
+#. Reset FW or cold reboot the host.
+#. Switch E-Switch mode on all of the PFs to ``switchdev`` mode::
+
+ sudo devlink dev eswitch set pci/0000:08:00.0 mode switchdev
+ sudo devlink dev eswitch set pci/0000:08:00.1 mode switchdev
+
+#. Enable Multiport E-Switch on all of the PFs::
+
+ sudo devlink dev param set pci/0000:08:00.0 name esw_multiport value true cmode runtime
+ sudo devlink dev param set pci/0000:08:00.1 name esw_multiport value true cmode runtime
+
+#. Configure required number of VFs/SFs::
+
+ echo 4 | sudo tee /sys/class/net/eth2/device/sriov_numvfs
+ echo 4 | sudo tee /sys/class/net/eth3/device/sriov_numvfs
+
+#. Start testpmd and verify that all ports are visible::
+
+ $ sudo dpdk-testpmd -a 08:00.0,dv_flow_en=2,representor=pf0-1vf0-3 -- -i
+ testpmd> show port summary all
+ Number of available ports: 10
+ Port MAC Address Name Driver Status Link
+ 0 E8:EB:D5:18:22:BC 08:00.0_p0 mlx5_pci up 200 Gbps
+ 1 E8:EB:D5:18:22:BD 08:00.0_p1 mlx5_pci up 200 Gbps
+ 2 D2:F6:43:0B:9E:19 08:00.0_representor_c0pf0vf0 mlx5_pci up 200 Gbps
+ 3 E6:42:27:B7:68:BD 08:00.0_representor_c0pf0vf1 mlx5_pci up 200 Gbps
+ 4 A6:5B:7F:8B:B8:47 08:00.0_representor_c0pf0vf2 mlx5_pci up 200 Gbps
+ 5 12:93:50:45:89:02 08:00.0_representor_c0pf0vf3 mlx5_pci up 200 Gbps
+ 6 06:D3:B2:79:FE:AC 08:00.0_representor_c0pf1vf0 mlx5_pci up 200 Gbps
+ 7 12:FC:08:E4:C2:CA 08:00.0_representor_c0pf1vf1 mlx5_pci up 200 Gbps
+ 8 8E:A9:9A:D0:35:4C 08:00.0_representor_c0pf1vf2 mlx5_pci up 200 Gbps
+ 9 E6:35:83:1F:B0:A9 08:00.0_representor_c0pf1vf3 mlx5_pci up 200 Gbps
+
+
+Limitations
+~~~~~~~~~~~
+
+- Multiport E-Switch is not supported on Windows.
+- Multiport E-Switch is supported only with HW Steering flow engine (``dv_flow_en=2``).
+- Matching traffic coming from a physical port and forwarding it to a physical port
+ (either the same or other one) is not supported.
+
+ In order to achieve such a functionality, an application has to setup hairpin queues between
+ physical port representors and forward the traffic using hairpin queues.
+
+
Usage example
-------------
@@ -157,6 +157,7 @@ New Features
* Added support for ``RTE_FLOW_ACTION_TYPE_INDIRECT_LIST`` flow action.
* Added support for ``RTE_FLOW_ITEM_TYPE_PTYPE`` flow item.
* Added support for ``RTE_FLOW_ACTION_TYPE_PORT_REPRESENTOR`` flow action and mirror.
+ * Added support for Multiport E-Switch.
* **Updated Solarflare net driver.**
@@ -169,6 +169,7 @@ struct mlx5_switch_info {
int32_t ctrl_num; /**< Controller number (valid for c#pf#vf# format). */
int32_t pf_num; /**< PF number (valid for pfxvfx format only). */
int32_t port_name; /**< Representor port name. */
+ int32_t mpesw_owner; /**< MPESW owner port number. */
uint64_t switch_id; /**< Switch identifier. */
};
@@ -959,7 +959,30 @@ mlx5_representor_match(struct mlx5_dev_spawn_data *spawn,
uint16_t repr_id = mlx5_representor_id_encode(switch_info,
eth_da->type);
+ /*
+ * Assuming Multiport E-Switch device was detected,
+ * if spawned port is an uplink, check if the port
+ * was requested through representor devarg.
+ */
+ if (mlx5_is_probed_port_on_mpesw_device(spawn) &&
+ switch_info->name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK) {
+ for (p = 0; p < eth_da->nb_ports; ++p)
+ if (switch_info->port_name == eth_da->ports[p])
+ return true;
+ rte_errno = EBUSY;
+ return false;
+ }
switch (eth_da->type) {
+ case RTE_ETH_REPRESENTOR_PF:
+ /*
+ * PF representors provided in devargs translate to uplink ports, but
+ * if and only if the device is a part of MPESW device.
+ */
+ if (!mlx5_is_probed_port_on_mpesw_device(spawn)) {
+ rte_errno = EBUSY;
+ return false;
+ }
+ break;
case RTE_ETH_REPRESENTOR_SF:
if (!(spawn->info.port_name == -1 &&
switch_info->name_type ==
@@ -989,7 +1012,7 @@ mlx5_representor_match(struct mlx5_dev_spawn_data *spawn,
}
/* Check representor ID: */
for (p = 0; p < eth_da->nb_ports; ++p) {
- if (spawn->pf_bond < 0) {
+ if (!mlx5_is_probed_port_on_mpesw_device(spawn) && spawn->pf_bond < 0) {
/* For non-LAG mode, allow and ignore pf. */
switch_info->pf_num = eth_da->ports[p];
repr_id = mlx5_representor_id_encode(switch_info,
@@ -1051,17 +1074,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
!mlx5_representor_match(spawn, eth_da))
return NULL;
/* Build device name. */
- if (spawn->pf_bond < 0) {
- /* Single device. */
- if (!switch_info->representor)
- strlcpy(name, dpdk_dev->name, sizeof(name));
- else
- err = snprintf(name, sizeof(name), "%s_representor_%s%u",
- dpdk_dev->name,
- switch_info->name_type ==
- MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf",
- switch_info->port_name);
- } else {
+ if (spawn->pf_bond >= 0) {
/* Bonding device. */
if (!switch_info->representor) {
err = snprintf(name, sizeof(name), "%s_%s",
@@ -1075,6 +1088,30 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf",
switch_info->port_name);
}
+ } else if (mlx5_is_probed_port_on_mpesw_device(spawn)) {
+ /* MPESW device. */
+ if (switch_info->name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK) {
+ err = snprintf(name, sizeof(name), "%s_p%d",
+ dpdk_dev->name, spawn->mpesw_port);
+ } else {
+ err = snprintf(name, sizeof(name), "%s_representor_c%dpf%d%s%u",
+ dpdk_dev->name,
+ switch_info->ctrl_num,
+ switch_info->pf_num,
+ switch_info->name_type ==
+ MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf",
+ switch_info->port_name);
+ }
+ } else {
+ /* Single device. */
+ if (!switch_info->representor)
+ strlcpy(name, dpdk_dev->name, sizeof(name));
+ else
+ err = snprintf(name, sizeof(name), "%s_representor_%s%u",
+ dpdk_dev->name,
+ switch_info->name_type ==
+ MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf",
+ switch_info->port_name);
}
if (err >= (int)sizeof(name))
DRV_LOG(WARNING, "device name overflow %s", name);
@@ -1202,13 +1239,25 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
priv->vport_meta_tag = 0;
priv->vport_meta_mask = 0;
priv->pf_bond = spawn->pf_bond;
+ priv->mpesw_port = spawn->mpesw_port;
+ priv->mpesw_uplink = false;
+ priv->mpesw_owner = spawn->info.mpesw_owner;
+ if (mlx5_is_port_on_mpesw_device(priv))
+ priv->mpesw_uplink = (spawn->info.name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK);
DRV_LOG(DEBUG,
- "dev_port=%u bus=%s pci=%s master=%d representor=%d pf_bond=%d\n",
+ "dev_port=%u bus=%s pci=%s master=%d representor=%d pf_bond=%d "
+ "mpesw_port=%d mpesw_uplink=%d",
priv->dev_port, dpdk_dev->bus->name,
priv->pci_dev ? priv->pci_dev->name : "NONE",
- priv->master, priv->representor, priv->pf_bond);
+ priv->master, priv->representor, priv->pf_bond,
+ priv->mpesw_port, priv->mpesw_uplink);
+ if (mlx5_is_port_on_mpesw_device(priv) && priv->sh->config.dv_flow_en != 2) {
+ DRV_LOG(ERR, "MPESW device is supported only with HWS");
+ err = ENOTSUP;
+ goto error;
+ }
/*
* If we have E-Switch we should determine the vport attributes.
* E-Switch may use either source vport field or reg_c[0] metadata
@@ -2029,7 +2078,7 @@ mlx5_sysfs_esw_multiport_get(struct ibv_device *ibv, struct rte_pci_addr *pci_ad
return ret;
}
-static __rte_unused int
+static int
mlx5_is_mpesw_enabled(struct ibv_device *ibv, struct rte_pci_addr *ibv_pci_addr, int *enabled)
{
/*
@@ -2049,6 +2098,84 @@ mlx5_is_mpesw_enabled(struct ibv_device *ibv, struct rte_pci_addr *ibv_pci_addr,
return -rte_errno;
}
+static int
+mlx5_device_mpesw_pci_match(struct ibv_device *ibv,
+ const struct rte_pci_addr *owner_pci,
+ int nl_rdma)
+{
+ struct rte_pci_addr ibdev_pci_addr = { 0 };
+ char ifname[IF_NAMESIZE + 1] = { 0 };
+ unsigned int ifindex;
+ unsigned int np;
+ unsigned int i;
+ int enabled = 0;
+ int ret;
+
+ /* Check if IB device's PCI address matches the probed PCI address. */
+ if (mlx5_get_pci_addr(ibv->ibdev_path, &ibdev_pci_addr)) {
+ DRV_LOG(DEBUG, "Skipping MPESW check for IB device %s since "
+ "there is no underlying PCI device", ibv->name);
+ rte_errno = ENOENT;
+ return -rte_errno;
+ }
+ if (ibdev_pci_addr.domain != owner_pci->domain ||
+ ibdev_pci_addr.bus != owner_pci->bus ||
+ ibdev_pci_addr.devid != owner_pci->devid ||
+ ibdev_pci_addr.function != owner_pci->function) {
+ return -1;
+ }
+ /* Check if IB device has MPESW enabled. */
+ if (mlx5_is_mpesw_enabled(ibv, &ibdev_pci_addr, &enabled))
+ return -1;
+ if (!enabled)
+ return -1;
+ /* Iterate through IB ports to find MPESW master uplink port. */
+ if (nl_rdma < 0)
+ return -1;
+ np = mlx5_nl_portnum(nl_rdma, ibv->name);
+ if (!np)
+ return -1;
+ for (i = 1; i <= np; ++i) {
+ struct rte_pci_addr pci_addr;
+ FILE *file;
+ char port_name[IF_NAMESIZE + 1];
+ struct mlx5_switch_info info;
+
+ /* Check whether IB port has a corresponding netdev. */
+ ifindex = mlx5_nl_ifindex(nl_rdma, ibv->name, i);
+ if (!ifindex)
+ continue;
+ if (!if_indextoname(ifindex, ifname))
+ continue;
+ /* Read port name and determine its type. */
+ MKSTR(ifphysportname, "/sys/class/net/%s/phys_port_name", ifname);
+ file = fopen(ifphysportname, "rb");
+ if (!file)
+ continue;
+ ret = fscanf(file, "%16s", port_name);
+ fclose(file);
+ if (ret != 1)
+ continue;
+ memset(&info, 0, sizeof(info));
+ mlx5_translate_port_name(port_name, &info);
+ if (info.name_type != MLX5_PHYS_PORT_NAME_TYPE_UPLINK)
+ continue;
+ /* Fetch PCI address of the device to which the netdev is bound. */
+ MKSTR(ifpath, "/sys/class/net/%s", ifname);
+ if (mlx5_get_pci_addr(ifpath, &pci_addr))
+ continue;
+ if (pci_addr.domain == ibdev_pci_addr.domain &&
+ pci_addr.bus == ibdev_pci_addr.bus &&
+ pci_addr.devid == ibdev_pci_addr.devid &&
+ pci_addr.function == ibdev_pci_addr.function) {
+ MLX5_ASSERT(info.port_name >= 0);
+ return info.port_name;
+ }
+ }
+ /* No matching MPESW uplink port was found. */
+ return -1;
+}
+
/**
* Register a PCI device within bonding.
*
@@ -2097,6 +2224,12 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
* >= 0 - bonding device (value is slave PF index)
*/
int bd = -1;
+ /*
+ * Multiport E-Switch (MPESW) device:
+ * < 0 - no MPESW device or could not determine if it is MPESW device,
+ * >= 0 - MPESW device. Value is the port index of the MPESW owner.
+ */
+ int mpesw = MLX5_MPESW_PORT_INVALID;
struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(cdev->dev);
struct mlx5_dev_spawn_data *list = NULL;
struct rte_eth_devargs eth_da = *req_eth_da;
@@ -2150,17 +2283,38 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
bd, ibv_list[ret]->name);
ibv_match[nd++] = ibv_list[ret];
break;
- } else {
- /* Bonding device not found. */
- if (mlx5_get_pci_addr(ibv_list[ret]->ibdev_path,
- &pci_addr))
- continue;
- if (rte_pci_addr_cmp(&owner_pci, &pci_addr) != 0)
- continue;
- DRV_LOG(INFO, "PCI information matches for device \"%s\"",
+ }
+ mpesw = mlx5_device_mpesw_pci_match(ibv_list[ret], &owner_pci, nl_rdma);
+ if (mpesw >= 0) {
+ /*
+ * MPESW device detected. Only one matching IB device is allowed,
+ * so if any matches were found previously, fail gracefully.
+ */
+ if (nd) {
+ DRV_LOG(ERR,
+ "PCI information matches MPESW device \"%s\", "
+ "but multiple matching PCI devices were found. "
+ "Probing failed.",
+ ibv_list[ret]->name);
+ rte_errno = ENOENT;
+ ret = -rte_errno;
+ goto exit;
+ }
+ DRV_LOG(INFO,
+ "PCI information matches MPESW device \"%s\"",
ibv_list[ret]->name);
ibv_match[nd++] = ibv_list[ret];
+ break;
}
+ /* Bonding or MPESW device was not found. */
+ if (mlx5_get_pci_addr(ibv_list[ret]->ibdev_path,
+ &pci_addr))
+ continue;
+ if (rte_pci_addr_cmp(&owner_pci, &pci_addr) != 0)
+ continue;
+ DRV_LOG(INFO, "PCI information matches for device \"%s\"",
+ ibv_list[ret]->name);
+ ibv_match[nd++] = ibv_list[ret];
}
ibv_match[nd] = NULL;
if (!nd) {
@@ -2192,6 +2346,12 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
ret = -rte_errno;
goto exit;
}
+ if (mpesw >= 0 && !np) {
+ DRV_LOG(ERR, "Cannot get ports for MPESW device.");
+ rte_errno = ENOENT;
+ ret = -rte_errno;
+ goto exit;
+ }
}
/* Now we can determine the maximal amount of devices to be spawned. */
list = mlx5_malloc(MLX5_MEM_ZERO,
@@ -2203,7 +2363,7 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
ret = -rte_errno;
goto exit;
}
- if (bd >= 0 || np > 1) {
+ if (bd >= 0 || mpesw >= 0 || np > 1) {
/*
* Single IB device with multiple ports found,
* it may be E-Switch master device and representors.
@@ -2222,6 +2382,7 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
list[ns].pci_dev = pci_dev;
list[ns].cdev = cdev;
list[ns].pf_bond = bd;
+ list[ns].mpesw_port = MLX5_MPESW_PORT_INVALID;
list[ns].ifindex = mlx5_nl_ifindex(nl_rdma,
ibv_match[0]->name,
i);
@@ -2278,6 +2439,46 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
}
continue;
}
+ if (!ret && mpesw >= 0) {
+ switch (list[ns].info.name_type) {
+ case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+ /* Owner port is treated as master port. */
+ if (list[ns].info.port_name == mpesw) {
+ list[ns].info.master = 1;
+ list[ns].info.representor = 0;
+ } else {
+ list[ns].info.master = 0;
+ list[ns].info.representor = 1;
+ }
+ /*
+ * Ports of this type have uplink port index
+ * encoded in the name. This index is also a PF index.
+ */
+ list[ns].info.pf_num = list[ns].info.port_name;
+ list[ns].mpesw_port = list[ns].info.port_name;
+ list[ns].info.mpesw_owner = mpesw;
+ ns++;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
+ case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+ case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
+ /* Only spawn representors related to the probed PF. */
+ if (list[ns].info.pf_num == owner_id) {
+ /*
+ * Ports of this type have PF index encoded in name,
+ * which translate to the related uplink port index.
+ */
+ list[ns].mpesw_port = list[ns].info.pf_num;
+ /* MPESW owner is also saved but not used now. */
+ list[ns].info.mpesw_owner = mpesw;
+ ns++;
+ }
+ break;
+ default:
+ break;
+ }
+ continue;
+ }
if (!ret && (list[ns].info.representor ^
list[ns].info.master))
ns++;
@@ -2317,6 +2518,7 @@ mlx5_os_pci_probe_pf(struct mlx5_common_device *cdev,
list[ns].pci_dev = pci_dev;
list[ns].cdev = cdev;
list[ns].pf_bond = -1;
+ list[ns].mpesw_port = MLX5_MPESW_PORT_INVALID;
list[ns].ifindex = 0;
if (nl_rdma >= 0)
list[ns].ifindex = mlx5_nl_ifindex
@@ -2597,7 +2799,10 @@ mlx5_os_auxiliary_probe(struct mlx5_common_device *cdev,
struct mlx5_kvargs_ctrl *mkvlist)
{
struct rte_eth_devargs eth_da = { .nb_ports = 0 };
- struct mlx5_dev_spawn_data spawn = { .pf_bond = -1 };
+ struct mlx5_dev_spawn_data spawn = {
+ .pf_bond = -1,
+ .mpesw_port = MLX5_MPESW_PORT_INVALID,
+ };
struct rte_device *dev = cdev->dev;
struct rte_auxiliary_device *adev = RTE_DEV_TO_AUXILIARY(dev);
struct rte_eth_dev *eth_dev;
@@ -186,12 +186,15 @@ struct mlx5_dev_cap {
char fw_ver[64]; /* Firmware version of this device. */
};
+#define MLX5_MPESW_PORT_INVALID (-1)
+
/** Data associated with devices to spawn. */
struct mlx5_dev_spawn_data {
uint32_t ifindex; /**< Network interface index. */
uint32_t max_port; /**< Device maximal port index. */
uint32_t phys_port; /**< Device physical port index. */
int pf_bond; /**< bonding device PF index. < 0 - no bonding */
+ int mpesw_port; /**< MPESW uplink index. Valid if mpesw_owner_port >= 0. */
struct mlx5_switch_info info; /**< Switch information. */
const char *phys_dev_name; /**< Name of physical device. */
struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */
@@ -200,6 +203,23 @@ struct mlx5_dev_spawn_data {
struct mlx5_bond_info *bond_info;
};
+/**
+ * Check if the port requested to be probed is MPESW physical device
+ * or a representor port.
+ *
+ * @param spawn
+ * Parameters of the probed port.
+ *
+ * @return
+ * True if the probed port is a physical device or representor in MPESW setup.
+ * False otherwise or MPESW was not configured.
+ */
+static inline bool
+mlx5_is_probed_port_on_mpesw_device(struct mlx5_dev_spawn_data *spawn)
+{
+ return spawn->mpesw_port >= 0;
+}
+
/** Data associated with socket messages. */
struct mlx5_flow_dump_req {
uint32_t port_id; /**< There are plans in DPDK to extend port_id. */
@@ -1768,6 +1788,9 @@ struct mlx5_priv {
uint32_t vport_meta_mask; /* Used for vport index field match mask. */
uint16_t representor_id; /* UINT16_MAX if not a representor. */
int32_t pf_bond; /* >=0, representor owner PF index in bonding. */
+ int32_t mpesw_owner; /* >=0, representor owner PF index in MPESW. */
+ int32_t mpesw_port; /* Related port index of MPESW device. < 0 - no MPESW. */
+ bool mpesw_uplink; /* If true, port is an uplink port. */
unsigned int if_index; /* Associated kernel network device index. */
/* RX/TX queues. */
unsigned int rxqs_n; /* RX queues array size. */
@@ -1933,6 +1956,22 @@ mlx5_devx_obj_ops_en(struct mlx5_dev_ctx_shared *sh)
sh->dev_cap.dest_tir);
}
+/**
+ * Check if the port is either MPESW physical device or a representor port.
+ *
+ * @param priv
+ * Pointer to port's private data.
+ *
+ * @return
+ * True if the port is a physical device or representor in MPESW setup.
+ * False otherwise or MPESW was not configured.
+ */
+static inline bool
+mlx5_is_port_on_mpesw_device(struct mlx5_priv *priv)
+{
+ return priv->mpesw_port >= 0;
+}
+
/* mlx5.c */
int mlx5_getenv_int(const char *);
@@ -395,18 +395,30 @@ uint16_t
mlx5_representor_id_encode(const struct mlx5_switch_info *info,
enum rte_eth_representor_type hpf_type)
{
- enum rte_eth_representor_type type = RTE_ETH_REPRESENTOR_VF;
+ enum rte_eth_representor_type type;
uint16_t repr = info->port_name;
-
- if (info->representor == 0)
- return UINT16_MAX;
- if (info->name_type == MLX5_PHYS_PORT_NAME_TYPE_PFSF)
+ int32_t pf = info->pf_num;
+
+ switch (info->name_type) {
+ case MLX5_PHYS_PORT_NAME_TYPE_UPLINK:
+ if (!info->representor)
+ return UINT16_MAX;
+ type = RTE_ETH_REPRESENTOR_PF;
+ pf = info->mpesw_owner;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_PFSF:
type = RTE_ETH_REPRESENTOR_SF;
- if (info->name_type == MLX5_PHYS_PORT_NAME_TYPE_PFHPF) {
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_PFHPF:
type = hpf_type;
repr = UINT16_MAX;
+ break;
+ case MLX5_PHYS_PORT_NAME_TYPE_PFVF:
+ default:
+ type = RTE_ETH_REPRESENTOR_VF;
+ break;
}
- return MLX5_REPRESENTOR_ID(info->pf_num, type, repr);
+ return MLX5_REPRESENTOR_ID(pf, type, repr);
}
/**
@@ -430,7 +442,7 @@ mlx5_representor_info_get(struct rte_eth_dev *dev,
struct rte_eth_representor_info *info)
{
struct mlx5_priv *priv = dev->data->dev_private;
- int n_type = 4; /* Representor types, VF, HPF@VF, SF and HPF@SF. */
+ int n_type = 5; /* Representor types: PF, VF, HPF@VF, SF and HPF@SF. */
int n_pf = 2; /* Number of PFs. */
int i = 0, pf;
int n_entries;
@@ -443,7 +455,30 @@ mlx5_representor_info_get(struct rte_eth_dev *dev,
n_entries = info->nb_ranges_alloc;
info->controller = 0;
- info->pf = priv->pf_bond >= 0 ? priv->pf_bond : 0;
+ info->pf = 0;
+ if (mlx5_is_port_on_mpesw_device(priv)) {
+ info->pf = priv->mpesw_port;
+ /* PF range, both ports will show the same information. */
+ info->ranges[i].type = RTE_ETH_REPRESENTOR_PF;
+ info->ranges[i].controller = 0;
+ info->ranges[i].pf = priv->mpesw_owner + 1;
+ info->ranges[i].vf = 0;
+ /*
+ * The representor indexes should be the values set of "priv->mpesw_port".
+ * In the real case now, only 1 PF/UPLINK representor is supported.
+ * The port index will always be the value of "owner + 1".
+ */
+ info->ranges[i].id_base =
+ MLX5_REPRESENTOR_ID(priv->mpesw_owner, info->ranges[i].type,
+ info->ranges[i].pf);
+ info->ranges[i].id_end =
+ MLX5_REPRESENTOR_ID(priv->mpesw_owner, info->ranges[i].type,
+ info->ranges[i].pf);
+ snprintf(info->ranges[i].name, sizeof(info->ranges[i].name),
+ "pf%d", info->ranges[i].pf);
+ i++;
+ } else if (priv->pf_bond >= 0)
+ info->pf = priv->pf_bond;
for (pf = 0; pf < n_pf; ++pf) {
/* VF range. */
info->ranges[i].type = RTE_ETH_REPRESENTOR_VF;
@@ -1331,7 +1331,7 @@ flow_hw_represented_port_compile(struct rte_eth_dev *dev,
if (!priv->master)
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
- "represented_port acton must"
+ "represented_port action must"
" be used on proxy port");
if (m && !!m->port_id) {
struct mlx5_priv *port_priv;
@@ -9188,7 +9188,7 @@ flow_hw_set_port_info(struct rte_eth_dev *dev)
info = &mlx5_flow_hw_port_infos[port_id];
info->regc_mask = priv->vport_meta_mask;
info->regc_value = priv->vport_meta_tag;
- info->is_wire = priv->master;
+ info->is_wire = mlx5_is_port_on_mpesw_device(priv) ? priv->mpesw_uplink : priv->master;
}
/* Clears vport tag and mask used for HWS rules. */
@@ -157,9 +157,13 @@ mlx5_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
/*
* Configuring the VF instead of its representor,
- * need to skip the special case of HPF on BlueField.
+ * need to skip the special cases:
+ * - HPF on BlueField,
+ * - SF representors,
+ * - uplink ports when running in MPESW mode.
*/
- if (priv->representor && !mlx5_is_hpf(dev) && !mlx5_is_sf_repr(dev)) {
+ if (priv->representor && !mlx5_is_hpf(dev) && !mlx5_is_sf_repr(dev) &&
+ !priv->mpesw_uplink) {
DRV_LOG(DEBUG, "VF represented by port %u setting primary MAC address",
dev->data->port_id);
if (priv->pf_bond >= 0) {