From patchwork Sun Mar 28 13:48:07 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xueming Li X-Patchwork-Id: 89967 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 0A317A034F; Sun, 28 Mar 2021 15:48:58 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id F22D2140DCF; Sun, 28 Mar 2021 15:48:56 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by mails.dpdk.org (Postfix) with ESMTP id 7D870140DC9 for ; Sun, 28 Mar 2021 15:48:55 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from xuemingl@nvidia.com) with SMTP; 28 Mar 2021 16:48:55 +0300 Received: from nvidia.com (pegasus05.mtr.labs.mlnx [10.210.16.100]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 12SDmlHj018703; Sun, 28 Mar 2021 16:48:54 +0300 From: Xueming Li To: Viacheslav Ovsiienko Cc: dev@dpdk.org, xuemingl@nvidia.com, Asaf Penso , Matan Azrad , Shahaf Shuler Date: Sun, 28 Mar 2021 13:48:07 +0000 Message-Id: <1616939297-15627-2-git-send-email-xuemingl@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> References: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> In-Reply-To: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> References: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> Subject: [dpdk-dev] [PATCH v5 1/9] common/mlx5: sub-function representor port name parsing X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch supports representor name parsing for SF. In sysfs, representor name stored under "phys_port_name" sysfs key, similar to VF representor, switch port name of SF representor is "pfsf". For netlink message, net SF type is supported. Examples: pf0sf1 pf0sf[0-3] Signed-off-by: Xueming Li Acked-by: Viacheslav Ovsiienko --- drivers/common/mlx5/linux/mlx5_common_os.c | 32 +++++++++++++++------- drivers/common/mlx5/linux/mlx5_nl.c | 3 ++ drivers/common/mlx5/mlx5_common.h | 2 ++ drivers/net/mlx5/linux/mlx5_ethdev_os.c | 3 ++ 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/common/mlx5/linux/mlx5_common_os.c b/drivers/common/mlx5/linux/mlx5_common_os.c index 0edd78ea6d..5cf9576921 100644 --- a/drivers/common/mlx5/linux/mlx5_common_os.c +++ b/drivers/common/mlx5/linux/mlx5_common_os.c @@ -97,22 +97,34 @@ void mlx5_translate_port_name(const char *port_name_in, struct mlx5_switch_info *port_info_out) { - char pf_c1, pf_c2, vf_c1, vf_c2, eol; + char ctrl = 0, pf_c1, pf_c2, vf_c1, vf_c2, eol; char *end; int sc_items; - /* - * Check for port-name as a string of the form pf0vf0 - * (support kernel ver >= 5.0 or OFED ver >= 4.6). - */ + sc_items = sscanf(port_name_in, "%c%d", + &ctrl, &port_info_out->ctrl_num); + if (sc_items == 2 && ctrl == 'c') { + port_name_in++; /* 'c' */ + port_name_in += snprintf(NULL, 0, "%d", + port_info_out->ctrl_num); + } + /* Check for port-name as a string of the form pf0vf0 or pf0sf0 */ sc_items = sscanf(port_name_in, "%c%c%d%c%c%d%c", &pf_c1, &pf_c2, &port_info_out->pf_num, &vf_c1, &vf_c2, &port_info_out->port_name, &eol); - if (sc_items == 6 && - pf_c1 == 'p' && pf_c2 == 'f' && - vf_c1 == 'v' && vf_c2 == 'f') { - port_info_out->name_type = MLX5_PHYS_PORT_NAME_TYPE_PFVF; - return; + if (sc_items == 6 && pf_c1 == 'p' && pf_c2 == 'f') { + if (vf_c1 == 'v' && vf_c2 == 'f') { + /* Kernel ver >= 5.0 or OFED ver >= 4.6 */ + port_info_out->name_type = + MLX5_PHYS_PORT_NAME_TYPE_PFVF; + return; + } + if (vf_c1 == 's' && vf_c2 == 'f') { + /* Kernel ver >= 5.11 or OFED ver >= 5.1 */ + port_info_out->name_type = + MLX5_PHYS_PORT_NAME_TYPE_PFSF; + return; + } } /* * Check for port-name as a string of the form p0 diff --git a/drivers/common/mlx5/linux/mlx5_nl.c b/drivers/common/mlx5/linux/mlx5_nl.c index ef7a521379..752c57b33d 100644 --- a/drivers/common/mlx5/linux/mlx5_nl.c +++ b/drivers/common/mlx5/linux/mlx5_nl.c @@ -746,6 +746,7 @@ mlx5_nl_mac_addr_sync(int nlsk_fd, unsigned int iface_idx, int i; int ret; + memset(macs, 0, n * sizeof(macs[0])); ret = mlx5_nl_mac_addr_list(nlsk_fd, iface_idx, &macs, &macs_n); if (ret) return; @@ -1158,6 +1159,8 @@ mlx5_nl_check_switch_info(bool num_vf_set, case MLX5_PHYS_PORT_NAME_TYPE_PFHPF: /* Fallthrough */ case MLX5_PHYS_PORT_NAME_TYPE_PFVF: + /* Fallthrough */ + case MLX5_PHYS_PORT_NAME_TYPE_PFSF: /* New representors naming schema. */ switch_info->representor = 1; break; diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h index 5028a05b49..8eda6749b4 100644 --- a/drivers/common/mlx5/mlx5_common.h +++ b/drivers/common/mlx5/mlx5_common.h @@ -151,6 +151,7 @@ enum mlx5_nl_phys_port_name_type { MLX5_PHYS_PORT_NAME_TYPE_UPLINK, /* p0, kernel ver >= 5.0 */ MLX5_PHYS_PORT_NAME_TYPE_PFVF, /* pf0vf0, kernel ver >= 5.0 */ MLX5_PHYS_PORT_NAME_TYPE_PFHPF, /* pf0, kernel ver >= 5.7, HPF rep */ + MLX5_PHYS_PORT_NAME_TYPE_PFSF, /* pf0sf0, kernel ver >= 5.0 */ MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN, /* Unrecognized. */ }; @@ -159,6 +160,7 @@ struct mlx5_switch_info { uint32_t master:1; /**< Master device. */ uint32_t representor:1; /**< Representor device. */ enum mlx5_nl_phys_port_name_type name_type; /** < Port name type. */ + int32_t ctrl_num; /**< Controller number (valid for c#pf#vf# format). */ int32_t pf_num; /**< PF number (valid for pfxvfx format only). */ int32_t port_name; /**< Representor port name. */ uint64_t switch_id; /**< Switch identifier. */ diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c index 0e8de9439e..cb692b22f2 100644 --- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c +++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c @@ -1013,6 +1013,9 @@ mlx5_sysfs_check_switch_info(bool device_dir, /* New representors naming schema. */ switch_info->representor = 1; break; + default: + switch_info->master = device_dir; + break; } } From patchwork Sun Mar 28 13:48:08 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xueming Li X-Patchwork-Id: 89968 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 9221EA034F; Sun, 28 Mar 2021 15:49:04 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 4CA8A140DCE; Sun, 28 Mar 2021 15:49:03 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by mails.dpdk.org (Postfix) with ESMTP id 9F61A140DBB for ; Sun, 28 Mar 2021 15:49:01 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from xuemingl@nvidia.com) with SMTP; 28 Mar 2021 16:49:01 +0300 Received: from nvidia.com (pegasus05.mtr.labs.mlnx [10.210.16.100]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 12SDmlHk018703; Sun, 28 Mar 2021 16:49:01 +0300 From: Xueming Li To: Viacheslav Ovsiienko Cc: dev@dpdk.org, xuemingl@nvidia.com, Asaf Penso , Matan Azrad , Shahaf Shuler Date: Sun, 28 Mar 2021 13:48:08 +0000 Message-Id: <1616939297-15627-3-git-send-email-xuemingl@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> References: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> In-Reply-To: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> References: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> Subject: [dpdk-dev] [PATCH v5 2/9] net/mlx5: support representor of sub function X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch adds support for SF representor. Similar to VF representor, switch port name of SF representor in phys_port_name sysfs key is "pfsf". Device representor argument is "representors=sf[list]", list member could be mix of instance and range. Example: representors=sf[0,2,4,8-12,-1] To probe VF representor and SF representor, need to separate into 2 devices: -a ,representor=vf[list] -a ,representor=sf[list] Signed-off-by: Xueming Li Acked-by: Viacheslav Ovsiienko --- doc/guides/nics/mlx5.rst | 58 +++++++++-- drivers/net/mlx5/linux/mlx5_ethdev_os.c | 2 + drivers/net/mlx5/linux/mlx5_os.c | 127 +++++++++++++++++++----- drivers/net/mlx5/mlx5.c | 1 + drivers/net/mlx5/mlx5.h | 9 ++ drivers/net/mlx5/mlx5_ethdev.c | 100 +++++++++++++++++++ 6 files changed, 263 insertions(+), 34 deletions(-) diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index a2cfc51b2a..2e2909d82d 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -931,14 +931,18 @@ Driver options - ``representor`` parameter [list] This parameter can be used to instantiate DPDK Ethernet devices from - existing port (or VF) representors configured on the device. + existing port (PF, VF or SF) representors configured on the device. It is a standard parameter whose format is described in :ref:`ethernet_device_standard_device_arguments`. - For instance, to probe port representors 0 through 2:: + For instance, to probe VF port representors 0 through 2:: - representor=[0-2] + representor=vf[0-2] + + To probe SF port representors 0 through 2:: + + representor=sf[0-2] - ``max_dump_files_num`` parameter [int] @@ -1351,15 +1355,15 @@ Quick Start Guide on OFED/EN Enable switchdev mode --------------------- -Switchdev mode is a mode in E-Switch, that binds between representor and VF. -Representor is a port in DPDK that is connected to a VF in such a way -that assuming there are no offload flows, each packet that is sent from the VF -will be received by the corresponding representor. While each packet that is -sent to a representor will be received by the VF. +Switchdev mode is a mode in E-Switch, that binds between representor and VF or SF. +Representor is a port in DPDK that is connected to a VF or SF in such a way +that assuming there are no offload flows, each packet that is sent from the VF or SF +will be received by the corresponding representor. While each packet that is or SF +sent to a representor will be received by the VF or SF. This is very useful in case of SRIOV mode, where the first packet that is sent -by the VF will be received by the DPDK application which will decide if this +by the VF or SF will be received by the DPDK application which will decide if this flow should be offloaded to the E-Switch. After offloading the flow packet -that the VF that are matching the flow will not be received any more by +that the VF or SF that are matching the flow will not be received any more by the DPDK application. 1. Enable SRIOV mode:: @@ -1386,6 +1390,40 @@ the DPDK application. echo switchdev > /sys/class/net//compat/devlink/mode +SubFunction representor support +------------------------------- +SubFunction is a portion of the PCI device, a SF netdev has its own +dedicated queues(txq, rxq). A SF netdev supports E-Switch representation +offload similar to existing PF and VF representors. A SF shares PCI +level resources with other SFs and/or with its parent PCI function. + +1. Configure SF feature:: + + mlxconfig -d set PF_BAR2_SIZE=<0/1/2/3> PF_BAR2_ENABLE=1 + + Value of PF_BAR2_SIZE: + + 0: 8 SFs + 1: 16 SFs + 2: 32 SFs + 3: 64 SFs + +2. Reset the FW:: + + mlxfwreset -d reset + +3. Enable switchdev mode:: + + echo switchdev > /sys/class/net//compat/devlink/mode + +4. Create SF:: + + mlnx-sf -d -a create + +5. Probe SF representor:: + + testpmd> port attach ,representor=sf0,dv_flow_en=1 + Performance tuning ------------------ diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c index cb692b22f2..2127fcfbfa 100644 --- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c +++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c @@ -1010,6 +1010,8 @@ mlx5_sysfs_check_switch_info(bool device_dir, case MLX5_PHYS_PORT_NAME_TYPE_PFHPF: /* Fallthrough */ case MLX5_PHYS_PORT_NAME_TYPE_PFVF: + /* Fallthrough */ + case MLX5_PHYS_PORT_NAME_TYPE_PFSF: /* New representors naming schema. */ switch_info->representor = 1; break; diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 5740214950..aac923ea39 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -701,6 +701,8 @@ mlx5_queue_counter_id_prepare(struct rte_eth_dev *dev) * Verbs device parameters (name, port, switch_info) to spawn. * @param config * Device configuration parameters. + * @param config + * Device arguments. * * @return * A valid Ethernet device object on success, NULL otherwise and rte_errno @@ -712,7 +714,8 @@ mlx5_queue_counter_id_prepare(struct rte_eth_dev *dev) static struct rte_eth_dev * mlx5_dev_spawn(struct rte_device *dpdk_dev, struct mlx5_dev_spawn_data *spawn, - struct mlx5_dev_config *config) + struct mlx5_dev_config *config, + struct rte_eth_devargs *eth_da) { const struct mlx5_switch_info *switch_info = &spawn->info; struct mlx5_dev_ctx_shared *sh = NULL; @@ -742,34 +745,82 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, /* Determine if this port representor is supposed to be spawned. */ if (switch_info->representor && dpdk_dev->devargs) { - struct rte_eth_devargs eth_da; - - err = rte_eth_devargs_parse(dpdk_dev->devargs->args, ð_da); - if (err) { - rte_errno = -err; - DRV_LOG(ERR, "failed to process device arguments: %s", - strerror(rte_errno)); - return NULL; - } - if (eth_da.type == RTE_ETH_REPRESENTOR_NONE) { - /* Representor not specified. */ + switch (eth_da->type) { + case RTE_ETH_REPRESENTOR_SF: + if (switch_info->name_type != + MLX5_PHYS_PORT_NAME_TYPE_PFSF) { + rte_errno = EBUSY; + return NULL; + } + break; + case RTE_ETH_REPRESENTOR_VF: + /* Allows HPF representor index -1 as exception. */ + if (!(spawn->info.port_name == -1 && + switch_info->name_type == + MLX5_PHYS_PORT_NAME_TYPE_PFHPF) && + switch_info->name_type != + MLX5_PHYS_PORT_NAME_TYPE_PFVF) { + rte_errno = EBUSY; + return NULL; + } + break; + case RTE_ETH_REPRESENTOR_NONE: rte_errno = EBUSY; return NULL; - } - if (eth_da.type != RTE_ETH_REPRESENTOR_VF) { + break; + default: rte_errno = ENOTSUP; DRV_LOG(ERR, "unsupported representor type: %s", dpdk_dev->devargs->args); return NULL; } - for (i = 0; i < eth_da.nb_representor_ports; ++i) - if (eth_da.representor_ports[i] == + /* Check controller ID: */ + for (i = 0; i < eth_da->nb_mh_controllers; ++i) + if (eth_da->mh_controllers[i] == + (uint16_t)switch_info->ctrl_num) + break; + if (eth_da->nb_mh_controllers && + i == eth_da->nb_mh_controllers) { + rte_errno = EBUSY; + return NULL; + } + /* Check SF/VF ID: */ + for (i = 0; i < eth_da->nb_representor_ports; ++i) + if (eth_da->representor_ports[i] == (uint16_t)switch_info->port_name) break; - if (i == eth_da.nb_representor_ports) { + if (eth_da->type != RTE_ETH_REPRESENTOR_PF && + i == eth_da->nb_representor_ports) { rte_errno = EBUSY; return NULL; } + /* Check PF ID. Check after repr port to avoid warning flood. */ + if (spawn->pf_bond >= 0) { + for (i = 0; i < eth_da->nb_ports; ++i) + if (eth_da->ports[i] == + (uint16_t)switch_info->pf_num) + break; + if (eth_da->nb_ports && i == eth_da->nb_ports) { + /* For backward compatibility, bonding + * representor syntax supported with limitation, + * device iterator won't find it: + * ,representor=# + */ + if (switch_info->pf_num > 0 && + eth_da->ports[0] == 0) { + DRV_LOG(WARNING, "Representor on Bonding PF should use pf#vf# format: %s", + dpdk_dev->devargs->args); + } else { + rte_errno = EBUSY; + return NULL; + } + } + } else if (eth_da->nb_ports > 1 || eth_da->ports[0]) { + rte_errno = EINVAL; + DRV_LOG(ERR, "PF id not supported by non-bond device: %s", + dpdk_dev->devargs->args); + return NULL; + } } /* Build device name. */ if (spawn->pf_bond < 0) { @@ -777,8 +828,11 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, if (!switch_info->representor) strlcpy(name, dpdk_dev->name, sizeof(name)); else - snprintf(name, sizeof(name), "%s_representor_%u", - dpdk_dev->name, switch_info->port_name); + snprintf(name, sizeof(name), "%s_representor_%s%u", + dpdk_dev->name, + switch_info->name_type == + MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf", + switch_info->port_name); } else { /* Bonding device. */ if (!switch_info->representor) @@ -786,9 +840,11 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, dpdk_dev->name, mlx5_os_get_dev_device_name(spawn->phys_dev)); else - snprintf(name, sizeof(name), "%s_%s_representor_%u", + snprintf(name, sizeof(name), "%s_%s_representor_%s%u", dpdk_dev->name, mlx5_os_get_dev_device_name(spawn->phys_dev), + switch_info->name_type == + MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf", switch_info->port_name); } /* check if the device is already spawned */ @@ -1063,9 +1119,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, priv->vport_id = switch_info->representor ? switch_info->port_name + 1 : -1; #endif - /* representor_id field keeps the unmodified VF index. */ - priv->representor_id = switch_info->representor ? - switch_info->port_name : -1; + priv->representor_id = mlx5_representor_id_encode(switch_info); /* * Look for sibling devices in order to reuse their switch domain * if any, otherwise allocate one. @@ -1839,6 +1893,7 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, struct mlx5_dev_spawn_data *list = NULL; struct mlx5_dev_config dev_config; unsigned int dev_config_vf; + struct rte_eth_devargs eth_da = { .type = RTE_ETH_REPRESENTOR_NONE }; int ret; if (rte_eal_process_type() == RTE_PROC_PRIMARY) @@ -1849,6 +1904,27 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, strerror(rte_errno)); return -rte_errno; } + if (pci_dev->device.devargs) { + /* Parse representor information from device argument. */ + if (pci_dev->device.devargs->cls_str) + ret = rte_eth_devargs_parse + (pci_dev->device.devargs->cls_str, ð_da); + if (ret) { + DRV_LOG(ERR, "failed to parse device arguments: %s", + pci_dev->device.devargs->cls_str); + return -rte_errno; + } + if (eth_da.type == RTE_ETH_REPRESENTOR_NONE) { + /* Support legacy device argument */ + ret = rte_eth_devargs_parse + (pci_dev->device.devargs->args, ð_da); + if (ret) { + DRV_LOG(ERR, "failed to parse device arguments: %s", + pci_dev->device.devargs->args); + return -rte_errno; + } + } + } errno = 0; ibv_list = mlx5_glue->get_device_list(&ret); if (!ibv_list) { @@ -2021,6 +2097,8 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, case MLX5_PHYS_PORT_NAME_TYPE_PFHPF: /* Fallthrough */ case MLX5_PHYS_PORT_NAME_TYPE_PFVF: + /* Fallthrough */ + case MLX5_PHYS_PORT_NAME_TYPE_PFSF: if (list[ns].info.pf_num == bd) ns++; break; @@ -2198,7 +2276,8 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, dev_config.log_hp_size = MLX5_ARG_UNSET; list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device, &list[i], - &dev_config); + &dev_config, + ð_da); if (!list[i].eth_dev) { if (rte_errno != EBUSY && rte_errno != EEXIST) break; diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index fb586317ca..22058a0ad5 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -1454,6 +1454,7 @@ const struct eth_dev_ops mlx5_dev_ops = { .xstats_get_names = mlx5_xstats_get_names, .fw_version_get = mlx5_fw_version_get, .dev_infos_get = mlx5_dev_infos_get, + .representor_info_get = mlx5_representor_info_get, .read_clock = mlx5_txpp_read_clock, .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, .vlan_filter_set = mlx5_vlan_filter_set, diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 8e8727a6c5..33c6b39a1e 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -1032,6 +1032,15 @@ int mlx5_flow_aso_age_mng_init(struct mlx5_dev_ctx_shared *sh); /* mlx5_ethdev.c */ int mlx5_dev_configure(struct rte_eth_dev *dev); +int mlx5_representor_info_get(struct rte_eth_dev *dev, + struct rte_eth_representor_info *info); +#define MLX5_REPRESENTOR_ID(pf, type, repr) \ + (((pf) << 14) + ((type) << 12) + ((repr) & 0xfff)) +#define MLX5_REPRESENTOR_REPR(repr_id) \ + ((repr_id) & 0xfff) +#define MLX5_REPRESENTOR_TYPE(repr_id) \ + (((repr_id) >> 12) & 3) +uint16_t mlx5_representor_id_encode(const struct mlx5_switch_info *info); int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size); int mlx5_dev_infos_get(struct rte_eth_dev *dev, diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index 51b39ddde5..1ffb13cf2e 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -377,6 +377,106 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) return 0; } +/** + * Calculate representor ID from port switch info. + * + * Uint16 representor ID bits definition: + * pf: 2 + * type: 2 + * vf/sf: 12 + * + * @param info + * Port switch info. + * + * @return + * Encoded representor ID. + */ +uint16_t +mlx5_representor_id_encode(const struct mlx5_switch_info *info) +{ + enum rte_eth_representor_type type = RTE_ETH_REPRESENTOR_VF; + uint16_t repr = info->port_name; + + if (info->representor == 0) + return UINT16_MAX; + if (info->name_type == MLX5_PHYS_PORT_NAME_TYPE_PFSF) + type = RTE_ETH_REPRESENTOR_SF; + if (info->name_type == MLX5_PHYS_PORT_NAME_TYPE_PFHPF) + repr = UINT16_MAX; + return MLX5_REPRESENTOR_ID(info->pf_num, type, repr); +} + +/** + * DPDK callback to get information about representor. + * + * Representor ID bits definition: + * vf/sf: 12 + * type: 2 + * pf: 2 + * + * @param dev + * Pointer to Ethernet device structure. + * @param[out] info + * Nullable info structure output buffer. + * + * @return + * negative on error, or the number of representor ranges. + */ +int +mlx5_representor_info_get(struct rte_eth_dev *dev, + struct rte_eth_representor_info *info) +{ + struct mlx5_priv *priv = dev->data->dev_private; + int n_type = 3; /* Number of representor types, VF, HPF and SF. */ + int n_pf = 2; /* Number of PFs. */ + int i = 0, pf; + + if (info == NULL) + goto out; + info->controller = 0; + info->pf = priv->pf_bond >= 0 ? priv->pf_bond : 0; + for (pf = 0; pf < n_pf; ++pf) { + /* VF range. */ + info->ranges[i].type = RTE_ETH_REPRESENTOR_VF; + info->ranges[i].controller = 0; + info->ranges[i].pf = pf; + info->ranges[i].vf = 0; + info->ranges[i].id_base = + MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, 0); + info->ranges[i].id_end = + MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1); + snprintf(info->ranges[i].name, + sizeof(info->ranges[i].name), "pf%dvf", pf); + i++; + /* HPF range. */ + info->ranges[i].type = RTE_ETH_REPRESENTOR_VF; + info->ranges[i].controller = 0; + info->ranges[i].pf = pf; + info->ranges[i].vf = UINT16_MAX; + info->ranges[i].id_base = + MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1); + info->ranges[i].id_end = + MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1); + snprintf(info->ranges[i].name, + sizeof(info->ranges[i].name), "pf%dvf", pf); + i++; + /* SF range. */ + info->ranges[i].type = RTE_ETH_REPRESENTOR_SF; + info->ranges[i].controller = 0; + info->ranges[i].pf = pf; + info->ranges[i].vf = 0; + info->ranges[i].id_base = + MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, 0); + info->ranges[i].id_end = + MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1); + snprintf(info->ranges[i].name, + sizeof(info->ranges[i].name), "pf%dsf", pf); + i++; + } +out: + return n_type * n_pf; +} + /** * Get firmware version of a device. * From patchwork Sun Mar 28 13:48:09 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xueming Li X-Patchwork-Id: 89969 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id C9DE2A034F; Sun, 28 Mar 2021 15:49:11 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 71A8E140DE3; Sun, 28 Mar 2021 15:49:08 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by mails.dpdk.org (Postfix) with ESMTP id ADA44140DE3 for ; Sun, 28 Mar 2021 15:49:06 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from xuemingl@nvidia.com) with SMTP; 28 Mar 2021 16:49:04 +0300 Received: from nvidia.com (pegasus05.mtr.labs.mlnx [10.210.16.100]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 12SDmlHl018703; Sun, 28 Mar 2021 16:49:03 +0300 From: Xueming Li To: Viacheslav Ovsiienko Cc: dev@dpdk.org, xuemingl@nvidia.com, Asaf Penso , Matan Azrad , Shahaf Shuler Date: Sun, 28 Mar 2021 13:48:09 +0000 Message-Id: <1616939297-15627-4-git-send-email-xuemingl@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> References: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> In-Reply-To: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> References: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> Subject: [dpdk-dev] [PATCH v5 3/9] net/mlx5: revert setting bonding representor to first PF X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" With kernel bonding, representors on second PF are being probed by devargs: ,representor=pf1vf No need to save primary PF port ID and lookup when probing sibling ports, revert patch [1] [1]: commit e6818853c022 ("net/mlx5: set representor to first PF in bonding mode") Signed-off-by: Xueming Li Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/linux/mlx5_os.c | 20 ++------------------ drivers/net/mlx5/mlx5.c | 1 - drivers/net/mlx5/mlx5.h | 1 - 3 files changed, 2 insertions(+), 20 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index aac923ea39..0c56cae489 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -862,13 +862,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, rte_errno = ENOMEM; return NULL; } - priv = eth_dev->data->dev_private; - if (priv->sh->bond_dev != UINT16_MAX) - /* For bonding port, use primary PCI device. */ - eth_dev->device = - rte_eth_devices[priv->sh->bond_dev].device; - else - eth_dev->device = dpdk_dev; + eth_dev->device = dpdk_dev; eth_dev->dev_ops = &mlx5_dev_sec_ops; eth_dev->rx_descriptor_status = mlx5_rx_descriptor_status; eth_dev->tx_descriptor_status = mlx5_tx_descriptor_status; @@ -1485,17 +1479,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, eth_dev->data->dev_private = priv; priv->dev_data = eth_dev->data; eth_dev->data->mac_addrs = priv->mac; - if (spawn->pf_bond < 0) { - eth_dev->device = dpdk_dev; - } else { - /* Use primary bond PCI as device. */ - if (sh->bond_dev == UINT16_MAX) { - sh->bond_dev = eth_dev->data->port_id; - eth_dev->device = dpdk_dev; - } else { - eth_dev->device = rte_eth_devices[sh->bond_dev].device; - } - } + eth_dev->device = dpdk_dev; eth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS; /* Configure the first MAC address by default. */ if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) { diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 22058a0ad5..aa8b50c642 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -917,7 +917,6 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, goto error; } sh->refcnt = 1; - sh->bond_dev = UINT16_MAX; sh->max_port = spawn->max_port; strncpy(sh->ibdev_name, mlx5_os_get_ctx_device_name(sh->ctx), sizeof(sh->ibdev_name) - 1); diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 33c6b39a1e..bee0696518 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -667,7 +667,6 @@ struct mlx5_flex_parser_profiles { struct mlx5_dev_ctx_shared { LIST_ENTRY(mlx5_dev_ctx_shared) next; uint32_t refcnt; - uint16_t bond_dev; /* Bond primary device id. */ uint32_t devx:1; /* Opened with DV. */ uint32_t flow_hit_aso_en:1; /* Flow Hit ASO is supported. */ uint32_t rq_ts_format:2; /* RQ timestamp formats supported. */ From patchwork Sun Mar 28 13:48:10 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xueming Li X-Patchwork-Id: 89971 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id BE5F8A034F; Sun, 28 Mar 2021 15:49:26 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 4DA3D140DF9; Sun, 28 Mar 2021 15:49:14 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by mails.dpdk.org (Postfix) with ESMTP id BA5F2140DED for ; Sun, 28 Mar 2021 15:49:11 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from xuemingl@nvidia.com) with SMTP; 28 Mar 2021 16:49:06 +0300 Received: from nvidia.com (pegasus05.mtr.labs.mlnx [10.210.16.100]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 12SDmlHm018703; Sun, 28 Mar 2021 16:49:06 +0300 From: Xueming Li To: Viacheslav Ovsiienko Cc: dev@dpdk.org, xuemingl@nvidia.com, Asaf Penso , Matan Azrad , Shahaf Shuler Date: Sun, 28 Mar 2021 13:48:10 +0000 Message-Id: <1616939297-15627-5-git-send-email-xuemingl@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> References: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> In-Reply-To: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> References: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> Subject: [dpdk-dev] [PATCH v5 4/9] net/mlx5: refactor bonding representor probe X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" To probe representor on 2nd PF of kernel bonding device, had to specify PF1 BDF in devarg: ,representor=0 When closing bonding device, all representors had to be closed together and this implies all representors have to use primary PF of bonding device. So after probing representor port on 2nd PF, when locating new probed device using device argument, the filter used 2nd PF as PCI address and failed to locate new device. Conflict happened by using current representor devargs: - Use PCI BDF to specify representor owner PF - Use PCI BDF to locate probed representor device. - PMD uses primary PCI BDF as PCI device. To resolve such conflicts, new representor syntax is introduced here: ,representor=pfXvfY All representors must use primary PF as owner PCI device, PMD internally locate owner PCI address by checking representor "pfX" part. To EAL, all representors are registered to primary PCI device, the 2nd PF is hidden to EAL, thus all search should be consistent. Same to VF representor, HPF (host PF on BlueField) uses same syntax to probe, example: representor=pf1vf[0-3,-1] This patch also adds pf index into kernel bonding representor port name: __representor_pfvf Signed-off-by: Xueming Li Acked-by: Viacheslav Ovsiienko --- doc/guides/nics/mlx5.rst | 4 +- drivers/net/mlx5/linux/mlx5_os.c | 249 +++++++++++++++++-------------- drivers/net/mlx5/mlx5.c | 20 +++ drivers/net/mlx5/mlx5.h | 3 +- drivers/net/mlx5/mlx5_defs.h | 4 - drivers/net/mlx5/mlx5_ethdev.c | 27 ---- drivers/net/mlx5/mlx5_mac.c | 6 +- 7 files changed, 163 insertions(+), 150 deletions(-) diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index 2e2909d82d..92fe7c11e4 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -938,11 +938,11 @@ Driver options For instance, to probe VF port representors 0 through 2:: - representor=vf[0-2] + ,representor=vf[0-2] To probe SF port representors 0 through 2:: - representor=sf[0-2] + ,representor=sf[0-2] - ``max_dump_files_num`` parameter [int] diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 0c56cae489..bb4a8719f7 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -692,6 +692,71 @@ mlx5_queue_counter_id_prepare(struct rte_eth_dev *dev) "available.", dev->data->port_id); } +/** + * Check if representor spawn info match devargs. + * + * @param spawn + * Verbs device parameters (name, port, switch_info) to spawn. + * @param eth_da + * Device devargs to probe. + * + * @return + * Match result. + */ +static bool +mlx5_representor_match(struct mlx5_dev_spawn_data *spawn, + struct rte_eth_devargs *eth_da) +{ + struct mlx5_switch_info *switch_info = &spawn->info; + unsigned int p, f; + uint16_t id; + uint16_t repr_id = mlx5_representor_id_encode(switch_info); + + switch (eth_da->type) { + case RTE_ETH_REPRESENTOR_SF: + if (switch_info->name_type != MLX5_PHYS_PORT_NAME_TYPE_PFSF) { + rte_errno = EBUSY; + return false; + } + break; + case RTE_ETH_REPRESENTOR_VF: + /* Allows HPF representor index -1 as exception. */ + if (!(spawn->info.port_name == -1 && + switch_info->name_type == + MLX5_PHYS_PORT_NAME_TYPE_PFHPF) && + switch_info->name_type != MLX5_PHYS_PORT_NAME_TYPE_PFVF) { + rte_errno = EBUSY; + return false; + } + break; + case RTE_ETH_REPRESENTOR_NONE: + rte_errno = EBUSY; + return false; + default: + rte_errno = ENOTSUP; + DRV_LOG(ERR, "unsupported representor type"); + return false; + } + /* Check representor ID: */ + for (p = 0; p < eth_da->nb_ports; ++p) { + if (spawn->pf_bond < 0) { + /* For non-LAG mode, allow and ignore pf. */ + switch_info->pf_num = eth_da->ports[p]; + repr_id = mlx5_representor_id_encode(switch_info); + } + for (f = 0; f < eth_da->nb_representor_ports; ++f) { + id = MLX5_REPRESENTOR_ID + (eth_da->ports[p], eth_da->type, + eth_da->representor_ports[f]); + if (repr_id == id) + return true; + } + } + rte_errno = EBUSY; + return false; +} + + /** * Spawn an Ethernet device from Verbs information. * @@ -738,115 +803,44 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, char name[RTE_ETH_NAME_MAX_LEN]; int own_domain_id = 0; uint16_t port_id; - unsigned int i; #ifdef HAVE_MLX5DV_DR_DEVX_PORT struct mlx5dv_devx_port devx_port = { .comp_mask = 0 }; #endif /* Determine if this port representor is supposed to be spawned. */ - if (switch_info->representor && dpdk_dev->devargs) { - switch (eth_da->type) { - case RTE_ETH_REPRESENTOR_SF: - if (switch_info->name_type != - MLX5_PHYS_PORT_NAME_TYPE_PFSF) { - rte_errno = EBUSY; - return NULL; - } - break; - case RTE_ETH_REPRESENTOR_VF: - /* Allows HPF representor index -1 as exception. */ - if (!(spawn->info.port_name == -1 && - switch_info->name_type == - MLX5_PHYS_PORT_NAME_TYPE_PFHPF) && - switch_info->name_type != - MLX5_PHYS_PORT_NAME_TYPE_PFVF) { - rte_errno = EBUSY; - return NULL; - } - break; - case RTE_ETH_REPRESENTOR_NONE: - rte_errno = EBUSY; - return NULL; - break; - default: - rte_errno = ENOTSUP; - DRV_LOG(ERR, "unsupported representor type: %s", - dpdk_dev->devargs->args); - return NULL; - } - /* Check controller ID: */ - for (i = 0; i < eth_da->nb_mh_controllers; ++i) - if (eth_da->mh_controllers[i] == - (uint16_t)switch_info->ctrl_num) - break; - if (eth_da->nb_mh_controllers && - i == eth_da->nb_mh_controllers) { - rte_errno = EBUSY; - return NULL; - } - /* Check SF/VF ID: */ - for (i = 0; i < eth_da->nb_representor_ports; ++i) - if (eth_da->representor_ports[i] == - (uint16_t)switch_info->port_name) - break; - if (eth_da->type != RTE_ETH_REPRESENTOR_PF && - i == eth_da->nb_representor_ports) { - rte_errno = EBUSY; - return NULL; - } - /* Check PF ID. Check after repr port to avoid warning flood. */ - if (spawn->pf_bond >= 0) { - for (i = 0; i < eth_da->nb_ports; ++i) - if (eth_da->ports[i] == - (uint16_t)switch_info->pf_num) - break; - if (eth_da->nb_ports && i == eth_da->nb_ports) { - /* For backward compatibility, bonding - * representor syntax supported with limitation, - * device iterator won't find it: - * ,representor=# - */ - if (switch_info->pf_num > 0 && - eth_da->ports[0] == 0) { - DRV_LOG(WARNING, "Representor on Bonding PF should use pf#vf# format: %s", - dpdk_dev->devargs->args); - } else { - rte_errno = EBUSY; - return NULL; - } - } - } else if (eth_da->nb_ports > 1 || eth_da->ports[0]) { - rte_errno = EINVAL; - DRV_LOG(ERR, "PF id not supported by non-bond device: %s", - dpdk_dev->devargs->args); - return NULL; - } - } + if (switch_info->representor && dpdk_dev->devargs && + !mlx5_representor_match(spawn, eth_da)) + return NULL; /* Build device name. */ - if (spawn->pf_bond < 0) { + if (spawn->pf_bond < 0) { /* Single device. */ if (!switch_info->representor) strlcpy(name, dpdk_dev->name, sizeof(name)); else - snprintf(name, sizeof(name), "%s_representor_%s%u", + err = snprintf(name, sizeof(name), "%s_representor_%s%u", dpdk_dev->name, switch_info->name_type == MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf", switch_info->port_name); } else { /* Bonding device. */ - if (!switch_info->representor) - snprintf(name, sizeof(name), "%s_%s", + if (!switch_info->representor) { + err = snprintf(name, sizeof(name), "%s_%s", dpdk_dev->name, mlx5_os_get_dev_device_name(spawn->phys_dev)); - else - snprintf(name, sizeof(name), "%s_%s_representor_%s%u", - dpdk_dev->name, - mlx5_os_get_dev_device_name(spawn->phys_dev), - switch_info->name_type == - MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf", - switch_info->port_name); + } else { + err = snprintf(name, sizeof(name), "%s_%s_representor_c%dpf%d%s%u", + dpdk_dev->name, + mlx5_os_get_dev_device_name(spawn->phys_dev), + switch_info->ctrl_num, + switch_info->pf_num, + switch_info->name_type == + MLX5_PHYS_PORT_NAME_TYPE_PFSF ? "sf" : "vf", + switch_info->port_name); + } } + if (err >= (int)sizeof(name)) + DRV_LOG(WARNING, "device name overflow %s", name); /* check if the device is already spawned */ if (rte_eth_dev_get_port_by_name(name, &port_id) == 0) { rte_errno = EEXIST; @@ -1739,9 +1733,11 @@ mlx5_dev_spawn_data_cmp(const void *a, const void *b) * @param[in] ibv_dev * Pointer to Infiniband device structure. * @param[in] pci_dev - * Pointer to PCI device structure to match PCI address. + * Pointer to primary PCI address structure to match. * @param[in] nl_rdma * Netlink RDMA group socket handle. + * @param[in] owner + * Rerepsentor owner PF index. * * @return * negative value if no bonding device found, otherwise @@ -1749,8 +1745,8 @@ mlx5_dev_spawn_data_cmp(const void *a, const void *b) */ static int mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, - const struct rte_pci_device *pci_dev, - int nl_rdma) + const struct rte_pci_addr *pci_dev, + int nl_rdma, uint16_t owner) { char ifname[IF_NAMESIZE + 1]; unsigned int ifindex; @@ -1807,10 +1803,10 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, " for netdev \"%s\"", ifname); continue; } - if (pci_dev->addr.domain != pci_addr.domain || - pci_dev->addr.bus != pci_addr.bus || - pci_dev->addr.devid != pci_addr.devid || - pci_dev->addr.function != pci_addr.function) + if (pci_dev->domain != pci_addr.domain || + pci_dev->bus != pci_addr.bus || + pci_dev->devid != pci_addr.devid || + pci_dev->function + owner != pci_addr.function) continue; /* Slave interface PCI address match found. */ fclose(file); @@ -1878,7 +1874,8 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, struct mlx5_dev_config dev_config; unsigned int dev_config_vf; struct rte_eth_devargs eth_da = { .type = RTE_ETH_REPRESENTOR_NONE }; - int ret; + struct rte_pci_addr owner_pci = pci_dev->addr; /* Owner PF. */ + int ret = -1; if (rte_eal_process_type() == RTE_PROC_PRIMARY) mlx5_pmd_socket_init(); @@ -1930,7 +1927,8 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, DRV_LOG(DEBUG, "checking device \"%s\"", ibv_list[ret]->name); bd = mlx5_device_bond_pci_match - (ibv_list[ret], pci_dev, nl_rdma); + (ibv_list[ret], &owner_pci, nl_rdma, + eth_da.ports[0]); if (bd >= 0) { /* * Bonding device detected. Only one match is allowed, @@ -1947,23 +1945,28 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, ret = -rte_errno; goto exit; } + /* Amend owner pci address if owner PF ID specified. */ + if (eth_da.nb_representor_ports) + owner_pci.function += eth_da.ports[0]; DRV_LOG(INFO, "PCI information matches for" " slave %d bonding device \"%s\"", bd, ibv_list[ret]->name); ibv_match[nd++] = ibv_list[ret]; break; + } else { + /* Bonding device not found. */ + if (mlx5_dev_to_pci_addr + (ibv_list[ret]->ibdev_path, &pci_addr)) + continue; + if (owner_pci.domain != pci_addr.domain || + owner_pci.bus != pci_addr.bus || + owner_pci.devid != pci_addr.devid || + owner_pci.function != pci_addr.function) + continue; + DRV_LOG(INFO, "PCI information matches for device \"%s\"", + ibv_list[ret]->name); + ibv_match[nd++] = ibv_list[ret]; } - if (mlx5_dev_to_pci_addr - (ibv_list[ret]->ibdev_path, &pci_addr)) - continue; - if (pci_dev->addr.domain != pci_addr.domain || - pci_dev->addr.bus != pci_addr.bus || - pci_dev->addr.devid != pci_addr.devid || - pci_dev->addr.function != pci_addr.function) - continue; - DRV_LOG(INFO, "PCI information matches for device \"%s\"", - ibv_list[ret]->name); - ibv_match[nd++] = ibv_list[ret]; } ibv_match[nd] = NULL; if (!nd) { @@ -1971,8 +1974,8 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, DRV_LOG(WARNING, "no Verbs device matches PCI device " PCI_PRI_FMT "," " are kernel drivers loaded?", - pci_dev->addr.domain, pci_dev->addr.bus, - pci_dev->addr.devid, pci_dev->addr.function); + owner_pci.domain, owner_pci.bus, + owner_pci.devid, owner_pci.function); rte_errno = ENOENT; ret = -rte_errno; goto exit; @@ -2237,6 +2240,24 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, dev_config_vf = 0; break; } + if (eth_da.type != RTE_ETH_REPRESENTOR_NONE) { + /* Set devargs default values. */ + if (eth_da.nb_mh_controllers == 0) { + eth_da.nb_mh_controllers = 1; + eth_da.mh_controllers[0] = 0; + } + if (eth_da.nb_ports == 0 && ns > 0) { + if (list[0].pf_bond >= 0 && list[0].info.representor) + DRV_LOG(WARNING, "Representor on Bonding device should use pf#vf# syntax: %s", + pci_dev->device.devargs->args); + eth_da.nb_ports = 1; + eth_da.ports[0] = list[0].info.pf_num; + } + if (eth_da.nb_representor_ports == 0) { + eth_da.nb_representor_ports = 1; + eth_da.representor_ports[0] = 0; + } + } for (i = 0; i != ns; ++i) { uint32_t restore; @@ -2278,8 +2299,8 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, DRV_LOG(ERR, "probe of PCI device " PCI_PRI_FMT " aborted after" " encountering an error: %s", - pci_dev->addr.domain, pci_dev->addr.bus, - pci_dev->addr.devid, pci_dev->addr.function, + owner_pci.domain, owner_pci.bus, + owner_pci.devid, owner_pci.function, strerror(rte_errno)); ret = -rte_errno; /* Roll back. */ diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index aa8b50c642..1a3043a4e7 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -355,6 +355,26 @@ static const struct mlx5_indexed_pool_config mlx5_ipool_cfg[] = { #define MLX5_FLOW_TABLE_HLIST_ARRAY_SIZE 4096 +/** + * Decide whether representor ID is a HPF(host PF) port on BF2. + * + * @param dev + * Pointer to Ethernet device structure. + * + * @return + * Non-zero if HPF, otherwise 0. + */ +bool +mlx5_is_hpf(struct rte_eth_dev *dev) +{ + struct mlx5_priv *priv = dev->data->dev_private; + uint16_t repr = MLX5_REPRESENTOR_REPR(priv->representor_id); + int type = MLX5_REPRESENTOR_TYPE(priv->representor_id); + + return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_VF && + MLX5_REPRESENTOR_REPR(-1) == repr; +} + /** * Initialize the ASO aging management structure. * diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index bee0696518..34f4bd5dfc 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -923,7 +923,7 @@ struct mlx5_priv { uint16_t vport_id; /* Associated VF vport index (if any). */ uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */ uint32_t vport_meta_mask; /* Used for vport index field match mask. */ - int32_t representor_id; /* Port representor identifier. */ + int32_t representor_id; /* -1 if not a representor. */ int32_t pf_bond; /* >=0 means PF index in bonding configuration. */ unsigned int if_index; /* Associated kernel network device index. */ uint32_t bond_ifindex; /**< Bond interface index. */ @@ -999,6 +999,7 @@ int mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev, struct rte_eth_udp_tunnel *udp_tunnel); uint16_t mlx5_eth_find_next(uint16_t port_id, struct rte_pci_device *pci_dev); int mlx5_dev_close(struct rte_eth_dev *dev); +bool mlx5_is_hpf(struct rte_eth_dev *dev); void mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh); /* Macro to iterate over all valid ports for mlx5 driver. */ diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h index af29d93901..8f2807dcd9 100644 --- a/drivers/net/mlx5/mlx5_defs.h +++ b/drivers/net/mlx5/mlx5_defs.h @@ -48,10 +48,6 @@ #define MLX5_PMD_SOFT_COUNTERS 1 #endif -/* Switch port ID parameters for bonding configurations. */ -#define MLX5_PORT_ID_BONDING_PF_MASK 0xf -#define MLX5_PORT_ID_BONDING_PF_SHIFT 12 - /* Alarm timeout. */ #define MLX5_ALARM_TIMEOUT_US 100000 diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index 1ffb13cf2e..130980d4d6 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -330,33 +330,6 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) if (priv->representor) { uint16_t port_id; - if (priv->pf_bond >= 0) { - /* - * Switch port ID is opaque value with driver defined - * format. Push the PF index in bonding configurations - * in upper four bits of port ID. If we get too many - * representors (more than 4K) or PFs (more than 15) - * this approach must be reconsidered. - */ - /* Switch port ID for VF representors: 0 - 0xFFE */ - if ((info->switch_info.port_id != 0xffff && - info->switch_info.port_id >= - ((1 << MLX5_PORT_ID_BONDING_PF_SHIFT) - 1)) || - priv->pf_bond > MLX5_PORT_ID_BONDING_PF_MASK) { - DRV_LOG(ERR, "can't update switch port ID" - " for bonding device"); - MLX5_ASSERT(false); - return -ENODEV; - } - /* - * Switch port ID for Host PF representor - * (representor_id is -1) , set to 0xFFF - */ - if (info->switch_info.port_id == 0xffff) - info->switch_info.port_id = 0xfff; - info->switch_info.port_id |= - priv->pf_bond << MLX5_PORT_ID_BONDING_PF_SHIFT; - } MLX5_ETH_FOREACH_DEV(port_id, priv->pci_dev) { struct mlx5_priv *opriv = rte_eth_devices[port_id].data->dev_private; diff --git a/drivers/net/mlx5/mlx5_mac.c b/drivers/net/mlx5/mlx5_mac.c index 6ffcfcd97a..7b2be04889 100644 --- a/drivers/net/mlx5/mlx5_mac.c +++ b/drivers/net/mlx5/mlx5_mac.c @@ -159,7 +159,7 @@ mlx5_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr) * Configuring the VF instead of its representor, * need to skip the special case of HPF on Bluefield. */ - if (priv->representor && priv->representor_id >= 0) { + if (priv->representor && !mlx5_is_hpf(dev)) { DRV_LOG(DEBUG, "VF represented by port %u setting primary MAC address", dev->data->port_id); RTE_ETH_FOREACH_DEV_SIBLING(port_id, dev->data->port_id) { @@ -169,7 +169,9 @@ mlx5_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr) return mlx5_os_vf_mac_addr_modify (priv, mlx5_ifindex(&rte_eth_devices[port_id]), - mac_addr, priv->representor_id); + mac_addr, + MLX5_REPRESENTOR_REPR + (priv->representor_id)); } } rte_errno = -ENOTSUP; From patchwork Sun Mar 28 13:48:11 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xueming Li X-Patchwork-Id: 89970 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id F263FA034F; Sun, 28 Mar 2021 15:49:18 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 14790140DEC; Sun, 28 Mar 2021 15:49:13 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by mails.dpdk.org (Postfix) with ESMTP id B4DAA140DEC for ; Sun, 28 Mar 2021 15:49:11 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from xuemingl@nvidia.com) with SMTP; 28 Mar 2021 16:49:11 +0300 Received: from nvidia.com (pegasus05.mtr.labs.mlnx [10.210.16.100]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 12SDmlHn018703; Sun, 28 Mar 2021 16:49:10 +0300 From: Xueming Li To: Viacheslav Ovsiienko Cc: dev@dpdk.org, xuemingl@nvidia.com, Asaf Penso , Matan Azrad , Shahaf Shuler Date: Sun, 28 Mar 2021 13:48:11 +0000 Message-Id: <1616939297-15627-6-git-send-email-xuemingl@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> References: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> In-Reply-To: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> References: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> Subject: [dpdk-dev] [PATCH v5 5/9] net/mlx5: support list value of representor PF X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" To probe representors from different kernel bonding PFs, had to specify 2 separate devargs like this: -a 03:00.0,representor=pf0vf[0-3] -a 03:00.0,representor=pf1vf[0-3] This patch supports range or list of PF section in devargs, so the alternative short devargs of above is: -a 03:00.0,representor=pf[0-1]vf[0-3] Signed-off-by: Xueming Li Acked-by: Viacheslav Ovsiienko --- doc/guides/nics/mlx5.rst | 4 ++ drivers/net/mlx5/linux/mlx5_os.c | 100 +++++++++++++++++++++---------- 2 files changed, 72 insertions(+), 32 deletions(-) diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst index 92fe7c11e4..b39bc475ad 100644 --- a/doc/guides/nics/mlx5.rst +++ b/doc/guides/nics/mlx5.rst @@ -944,6 +944,10 @@ Driver options ,representor=sf[0-2] + To probe VF port representors 0 through 2 on both PFs of bonding device:: + + ,representor=pf[0,1]vf[0-2] + - ``max_dump_files_num`` parameter [int] The maximum number of files per PMD entity that may be created for debug information. diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index bb4a8719f7..2c702cf614 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1829,21 +1829,25 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, } /** - * DPDK callback to register a PCI device. + * Register a PCI device within bonding. * - * This function spawns Ethernet devices out of a given PCI device. + * This function spawns Ethernet devices out of a given PCI device and + * bonding owner PF index. * - * @param[in] pci_drv - * PCI driver structure (mlx5_driver). * @param[in] pci_dev * PCI device information. + * @param[in] req_eth_da + * Requested ethdev device argument. + * @param[in] owner_id + * Requested owner PF port ID within bonding device, default to 0. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ -int -mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, - struct rte_pci_device *pci_dev) +static int +mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev, + struct rte_eth_devargs *req_eth_da, + uint16_t owner_id) { struct ibv_device **ibv_list; /* @@ -1873,7 +1877,7 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, struct mlx5_dev_spawn_data *list = NULL; struct mlx5_dev_config dev_config; unsigned int dev_config_vf; - struct rte_eth_devargs eth_da = { .type = RTE_ETH_REPRESENTOR_NONE }; + struct rte_eth_devargs eth_da = *req_eth_da; struct rte_pci_addr owner_pci = pci_dev->addr; /* Owner PF. */ int ret = -1; @@ -1885,27 +1889,6 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, strerror(rte_errno)); return -rte_errno; } - if (pci_dev->device.devargs) { - /* Parse representor information from device argument. */ - if (pci_dev->device.devargs->cls_str) - ret = rte_eth_devargs_parse - (pci_dev->device.devargs->cls_str, ð_da); - if (ret) { - DRV_LOG(ERR, "failed to parse device arguments: %s", - pci_dev->device.devargs->cls_str); - return -rte_errno; - } - if (eth_da.type == RTE_ETH_REPRESENTOR_NONE) { - /* Support legacy device argument */ - ret = rte_eth_devargs_parse - (pci_dev->device.devargs->args, ð_da); - if (ret) { - DRV_LOG(ERR, "failed to parse device arguments: %s", - pci_dev->device.devargs->args); - return -rte_errno; - } - } - } errno = 0; ibv_list = mlx5_glue->get_device_list(&ret); if (!ibv_list) { @@ -1927,8 +1910,7 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, DRV_LOG(DEBUG, "checking device \"%s\"", ibv_list[ret]->name); bd = mlx5_device_bond_pci_match - (ibv_list[ret], &owner_pci, nl_rdma, - eth_da.ports[0]); + (ibv_list[ret], &owner_pci, nl_rdma, owner_id); if (bd >= 0) { /* * Bonding device detected. Only one match is allowed, @@ -1947,7 +1929,7 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, } /* Amend owner pci address if owner PF ID specified. */ if (eth_da.nb_representor_ports) - owner_pci.function += eth_da.ports[0]; + owner_pci.function += owner_id; DRV_LOG(INFO, "PCI information matches for" " slave %d bonding device \"%s\"", bd, ibv_list[ret]->name); @@ -2335,6 +2317,60 @@ mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, return ret; } +/** + * DPDK callback to register a PCI device. + * + * This function spawns Ethernet devices out of a given PCI device. + * + * @param[in] pci_drv + * PCI driver structure (mlx5_driver). + * @param[in] pci_dev + * PCI device information. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +int +mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + struct rte_pci_device *pci_dev) +{ + struct rte_eth_devargs eth_da = { .type = RTE_ETH_REPRESENTOR_NONE }; + int ret = 0; + uint16_t p; + + if (pci_dev->device.devargs) { + /* Parse representor information from device argument. */ + if (pci_dev->device.devargs->cls_str) + ret = rte_eth_devargs_parse + (pci_dev->device.devargs->cls_str, ð_da); + if (ret) { + DRV_LOG(ERR, "failed to parse device arguments: %s", + pci_dev->device.devargs->cls_str); + return -rte_errno; + } + if (eth_da.type == RTE_ETH_REPRESENTOR_NONE) { + /* Support legacy device argument */ + ret = rte_eth_devargs_parse + (pci_dev->device.devargs->args, ð_da); + if (ret) { + DRV_LOG(ERR, "failed to parse device arguments: %s", + pci_dev->device.devargs->args); + return -rte_errno; + } + } + } + + if (eth_da.nb_ports > 0) { + /* Iterate all port if devargs pf is range: "pf[0-1]vf[...]". */ + for (p = 0; p < eth_da.nb_ports; p++) + ret = mlx5_os_pci_probe_pf(pci_dev, ð_da, + eth_da.ports[p]); + } else { + ret = mlx5_os_pci_probe_pf(pci_dev, ð_da, 0); + } + return ret; +} + static int mlx5_config_doorbell_mapping_env(const struct mlx5_dev_config *config) { From patchwork Sun Mar 28 13:48:12 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xueming Li X-Patchwork-Id: 89972 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 80A0FA034F; Sun, 28 Mar 2021 15:49:37 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id EF224140E10; Sun, 28 Mar 2021 15:49:18 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by mails.dpdk.org (Postfix) with ESMTP id AB477140DFC for ; Sun, 28 Mar 2021 15:49:16 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from xuemingl@nvidia.com) with SMTP; 28 Mar 2021 16:49:13 +0300 Received: from nvidia.com (pegasus05.mtr.labs.mlnx [10.210.16.100]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 12SDmlHo018703; Sun, 28 Mar 2021 16:49:13 +0300 From: Xueming Li To: Viacheslav Ovsiienko Cc: dev@dpdk.org, xuemingl@nvidia.com, Asaf Penso , Matan Azrad , Shahaf Shuler Date: Sun, 28 Mar 2021 13:48:12 +0000 Message-Id: <1616939297-15627-7-git-send-email-xuemingl@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> References: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> In-Reply-To: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> References: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> Subject: [dpdk-dev] [PATCH v5 6/9] net/mlx5: save bonding member ports information X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Since kernel bonding netdev doesn't provide statistics counter that reflects all member ports, PMD has to manually summarize counters from each member ports. As a preparation, this patch collects bonding member port information and saves to shared context data. Signed-off-by: Xueming Li Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/linux/mlx5_ethdev_os.c | 4 +- drivers/net/mlx5/linux/mlx5_os.c | 91 ++++++++++++++++--------- drivers/net/mlx5/mlx5.c | 2 + drivers/net/mlx5/mlx5.h | 21 +++++- drivers/net/mlx5/mlx5_ethdev.c | 5 +- 5 files changed, 86 insertions(+), 37 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c index 2127fcfbfa..e7ec07e364 100644 --- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c +++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c @@ -150,8 +150,8 @@ mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[MLX5_NAMESIZE]) MLX5_ASSERT(priv); MLX5_ASSERT(priv->sh); - if (priv->bond_ifindex > 0) { - memcpy(ifname, priv->bond_name, MLX5_NAMESIZE); + if (priv->master && priv->sh->bond.ifindex > 0) { + memcpy(ifname, priv->sh->bond.ifname, MLX5_NAMESIZE); return 0; } ifindex = mlx5_ifindex(dev); diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 2c702cf614..5bdc8caee5 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -1457,19 +1457,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, */ MLX5_ASSERT(spawn->ifindex); priv->if_index = spawn->ifindex; - if (priv->pf_bond >= 0 && priv->master) { - /* Get bond interface info */ - err = mlx5_sysfs_bond_info(priv->if_index, - &priv->bond_ifindex, - priv->bond_name); - if (err) - DRV_LOG(ERR, "unable to get bond info: %s", - strerror(rte_errno)); - else - DRV_LOG(INFO, "PF device %u, bond device %u(%s)", - priv->if_index, priv->bond_ifindex, - priv->bond_name); - } eth_dev->data->dev_private = priv; priv->dev_data = eth_dev->data; eth_dev->data->mac_addrs = priv->mac; @@ -1738,6 +1725,8 @@ mlx5_dev_spawn_data_cmp(const void *a, const void *b) * Netlink RDMA group socket handle. * @param[in] owner * Rerepsentor owner PF index. + * @param[out] bond_info + * Pointer to bonding information. * * @return * negative value if no bonding device found, otherwise @@ -1746,19 +1735,22 @@ mlx5_dev_spawn_data_cmp(const void *a, const void *b) static int mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, const struct rte_pci_addr *pci_dev, - int nl_rdma, uint16_t owner) + int nl_rdma, uint16_t owner, + struct mlx5_bond_info *bond_info) { char ifname[IF_NAMESIZE + 1]; unsigned int ifindex; unsigned int np, i; - FILE *file = NULL; + FILE *bond_file = NULL, *file; int pf = -1; + int ret; /* * Try to get master device name. If something goes * wrong suppose the lack of kernel support and no * bonding devices. */ + memset(bond_info, 0, sizeof(*bond_info)); if (nl_rdma < 0) return -1; if (!strstr(ibv_dev->name, "bond")) @@ -1782,15 +1774,15 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, /* Try to read bonding slave names from sysfs. */ MKSTR(slaves, "/sys/class/net/%s/master/bonding/slaves", ifname); - file = fopen(slaves, "r"); - if (file) + bond_file = fopen(slaves, "r"); + if (bond_file) break; } - if (!file) + if (!bond_file) return -1; /* Use safe format to check maximal buffer length. */ MLX5_ASSERT(atol(RTE_STR(IF_NAMESIZE)) == IF_NAMESIZE); - while (fscanf(file, "%" RTE_STR(IF_NAMESIZE) "s", ifname) == 1) { + while (fscanf(bond_file, "%" RTE_STR(IF_NAMESIZE) "s", ifname) == 1) { char tmp_str[IF_NAMESIZE + 32]; struct rte_pci_addr pci_addr; struct mlx5_switch_info info; @@ -1803,13 +1795,7 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, " for netdev \"%s\"", ifname); continue; } - if (pci_dev->domain != pci_addr.domain || - pci_dev->bus != pci_addr.bus || - pci_dev->devid != pci_addr.devid || - pci_dev->function + owner != pci_addr.function) - continue; /* Slave interface PCI address match found. */ - fclose(file); snprintf(tmp_str, sizeof(tmp_str), "/sys/class/net/%s/phys_port_name", ifname); file = fopen(tmp_str, "rb"); @@ -1818,13 +1804,52 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev, info.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET; if (fscanf(file, "%32s", tmp_str) == 1) mlx5_translate_port_name(tmp_str, &info); - if (info.name_type == MLX5_PHYS_PORT_NAME_TYPE_LEGACY || - info.name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK) + fclose(file); + /* Only process PF ports. */ + if (info.name_type != MLX5_PHYS_PORT_NAME_TYPE_LEGACY && + info.name_type != MLX5_PHYS_PORT_NAME_TYPE_UPLINK) + continue; + /* Check max bonding member. */ + if (info.port_name >= MLX5_BOND_MAX_PORTS) { + DRV_LOG(WARNING, "bonding index out of range, " + "please increase MLX5_BOND_MAX_PORTS: %s", + tmp_str); + break; + } + /* Match PCI address. */ + if (pci_dev->domain == pci_addr.domain && + pci_dev->bus == pci_addr.bus && + pci_dev->devid == pci_addr.devid && + pci_dev->function + owner == pci_addr.function) pf = info.port_name; - break; - } - if (file) + /* Get ifindex. */ + snprintf(tmp_str, sizeof(tmp_str), + "/sys/class/net/%s/ifindex", ifname); + file = fopen(tmp_str, "rb"); + if (!file) + break; + ret = fscanf(file, "%u", &ifindex); fclose(file); + if (ret != 1) + break; + /* Save bonding info. */ + strncpy(bond_info->ports[info.port_name].ifname, ifname, + sizeof(bond_info->ports[0].ifname)); + bond_info->ports[info.port_name].pci_addr = pci_addr; + bond_info->ports[info.port_name].ifindex = ifindex; + bond_info->n_port++; + } + if (pf >= 0) { + /* Get bond interface info */ + ret = mlx5_sysfs_bond_info(ifindex, &bond_info->ifindex, + bond_info->ifname); + if (ret) + DRV_LOG(ERR, "unable to get bond info: %s", + strerror(rte_errno)); + else + DRV_LOG(INFO, "PF device %u, bond device %u(%s)", + ifindex, bond_info->ifindex, bond_info->ifname); + } return pf; } @@ -1879,6 +1904,7 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev, unsigned int dev_config_vf; struct rte_eth_devargs eth_da = *req_eth_da; struct rte_pci_addr owner_pci = pci_dev->addr; /* Owner PF. */ + struct mlx5_bond_info bond_info; int ret = -1; if (rte_eal_process_type() == RTE_PROC_PRIMARY) @@ -1910,7 +1936,8 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev, DRV_LOG(DEBUG, "checking device \"%s\"", ibv_list[ret]->name); bd = mlx5_device_bond_pci_match - (ibv_list[ret], &owner_pci, nl_rdma, owner_id); + (ibv_list[ret], &owner_pci, nl_rdma, owner_id, + &bond_info); if (bd >= 0) { /* * Bonding device detected. Only one match is allowed, @@ -2019,6 +2046,7 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev, MLX5_ASSERT(nd == 1); MLX5_ASSERT(np); for (i = 1; i <= np; ++i) { + list[ns].bond_info = &bond_info; list[ns].max_port = np; list[ns].phys_port = i; list[ns].phys_dev = ibv_match[0]; @@ -2109,6 +2137,7 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev, */ for (i = 0; i != nd; ++i) { memset(&list[ns].info, 0, sizeof(list[ns].info)); + list[ns].bond_info = NULL; list[ns].max_port = 1; list[ns].phys_port = 1; list[ns].phys_dev = ibv_match[i]; diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 1a3043a4e7..303c25203a 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -928,6 +928,8 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn, rte_errno = ENOMEM; goto exit; } + if (spawn->bond_info) + sh->bond = *spawn->bond_info; err = mlx5_os_open_device(spawn, config, sh); if (!sh->ctx) goto error; diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 34f4bd5dfc..a1d2798373 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -115,6 +115,7 @@ struct mlx5_dev_spawn_data { void *phys_dev; /**< Associated physical device. */ struct rte_eth_dev *eth_dev; /**< Associated Ethernet device. */ struct rte_pci_device *pci_dev; /**< Backend PCI device. */ + struct mlx5_bond_info *bond_info; }; /** Key string for IPC. */ @@ -660,6 +661,21 @@ struct mlx5_flex_parser_profiles { void *obj; /* Flex parser node object. */ }; +/* Max member ports per bonding device. */ +#define MLX5_BOND_MAX_PORTS 2 + +/* Bonding device information. */ +struct mlx5_bond_info { + int n_port; /* Number of bond member ports. */ + uint32_t ifindex; + char ifname[MLX5_NAMESIZE + 1]; + struct { + char ifname[MLX5_NAMESIZE + 1]; + uint32_t ifindex; + struct rte_pci_addr pci_addr; + } ports[MLX5_BOND_MAX_PORTS]; +}; + /* * Shared Infiniband device context for Master/Representors * which belong to same IB device with multiple IB ports. @@ -673,6 +689,7 @@ struct mlx5_dev_ctx_shared { uint32_t sq_ts_format:2; /* SQ timestamp formats supported. */ uint32_t qp_ts_format:2; /* QP timestamp formats supported. */ uint32_t max_port; /* Maximal IB device port index. */ + struct mlx5_bond_info bond; /* Bonding information. */ void *ctx; /* Verbs/DV/DevX context. */ void *pd; /* Protection Domain. */ uint32_t pdn; /* Protection Domain number. */ @@ -924,10 +941,8 @@ struct mlx5_priv { uint32_t vport_meta_tag; /* Used for vport index match ove VF LAG. */ uint32_t vport_meta_mask; /* Used for vport index field match mask. */ int32_t representor_id; /* -1 if not a representor. */ - int32_t pf_bond; /* >=0 means PF index in bonding configuration. */ + int32_t pf_bond; /* >=0, representor owner PF index in bonding. */ unsigned int if_index; /* Associated kernel network device index. */ - uint32_t bond_ifindex; /**< Bond interface index. */ - char bond_name[MLX5_NAMESIZE]; /**< Bond interface name. */ /* RX/TX queues. */ unsigned int rxqs_n; /* RX queues array size. */ unsigned int txqs_n; /* TX queues array size. */ diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index 130980d4d6..4f97a69a20 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -42,7 +42,10 @@ mlx5_ifindex(const struct rte_eth_dev *dev) MLX5_ASSERT(priv); MLX5_ASSERT(priv->if_index); - ifindex = priv->bond_ifindex > 0 ? priv->bond_ifindex : priv->if_index; + if (priv->master && priv->sh->bond.ifindex > 0) + ifindex = priv->sh->bond.ifindex; + else + ifindex = priv->if_index; if (!ifindex) rte_errno = ENXIO; return ifindex; From patchwork Sun Mar 28 13:48:13 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xueming Li X-Patchwork-Id: 89973 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 791B2A034F; Sun, 28 Mar 2021 15:49:46 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 9D0AE140E14; Sun, 28 Mar 2021 15:49:27 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by mails.dpdk.org (Postfix) with ESMTP id B34C8140E18 for ; Sun, 28 Mar 2021 15:49:26 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from xuemingl@nvidia.com) with SMTP; 28 Mar 2021 16:49:21 +0300 Received: from nvidia.com (pegasus05.mtr.labs.mlnx [10.210.16.100]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 12SDmlHp018703; Sun, 28 Mar 2021 16:49:21 +0300 From: Xueming Li To: Viacheslav Ovsiienko Cc: dev@dpdk.org, xuemingl@nvidia.com, Asaf Penso , Matan Azrad , Shahaf Shuler Date: Sun, 28 Mar 2021 13:48:13 +0000 Message-Id: <1616939297-15627-8-git-send-email-xuemingl@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> References: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> In-Reply-To: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> References: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> Subject: [dpdk-dev] [PATCH v5 7/9] net/mlx5: fix setting VF default MAC through representor X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" With kernel bonding, there was an error when setting VF MAC address through representor. The Netlink api requires ifindex of owner PF, not bonding device ifindex. Uses owner PF ifindex to modify VF default MAC in case of bonding device. Fixes: c21e5facf7d2 ("net/mlx5: use bond index for netdev operations") Signed-off-by: Xueming Li Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/mlx5_mac.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/mlx5/mlx5_mac.c b/drivers/net/mlx5/mlx5_mac.c index 7b2be04889..a7946f7756 100644 --- a/drivers/net/mlx5/mlx5_mac.c +++ b/drivers/net/mlx5/mlx5_mac.c @@ -154,6 +154,7 @@ mlx5_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr) { uint16_t port_id; struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_priv *pf_priv; /* * Configuring the VF instead of its representor, @@ -162,17 +163,21 @@ mlx5_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr) if (priv->representor && !mlx5_is_hpf(dev)) { DRV_LOG(DEBUG, "VF represented by port %u setting primary MAC address", dev->data->port_id); + if (priv->pf_bond >= 0) { + /* Bonding, get owner PF ifindex from shared data. */ + return mlx5_os_vf_mac_addr_modify + (priv, + priv->sh->bond.ports[priv->pf_bond].ifindex, + mac_addr, + MLX5_REPRESENTOR_REPR(priv->representor_id)); + } RTE_ETH_FOREACH_DEV_SIBLING(port_id, dev->data->port_id) { - priv = rte_eth_devices[port_id].data->dev_private; - if (priv->master == 1) { - priv = dev->data->dev_private; + pf_priv = rte_eth_devices[port_id].data->dev_private; + if (pf_priv->master == 1) return mlx5_os_vf_mac_addr_modify - (priv, - mlx5_ifindex(&rte_eth_devices[port_id]), - mac_addr, + (priv, pf_priv->if_index, mac_addr, MLX5_REPRESENTOR_REPR (priv->representor_id)); - } } rte_errno = -ENOTSUP; return rte_errno; From patchwork Sun Mar 28 13:48:14 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xueming Li X-Patchwork-Id: 89974 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 9AE0FA034F; Sun, 28 Mar 2021 15:49:52 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id D1455140E25; Sun, 28 Mar 2021 15:49:28 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by mails.dpdk.org (Postfix) with ESMTP id B7F66140E1E for ; Sun, 28 Mar 2021 15:49:26 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from xuemingl@nvidia.com) with SMTP; 28 Mar 2021 16:49:24 +0300 Received: from nvidia.com (pegasus05.mtr.labs.mlnx [10.210.16.100]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 12SDmlHq018703; Sun, 28 Mar 2021 16:49:23 +0300 From: Xueming Li To: Viacheslav Ovsiienko Cc: dev@dpdk.org, xuemingl@nvidia.com, Asaf Penso , Matan Azrad , Shahaf Shuler Date: Sun, 28 Mar 2021 13:48:14 +0000 Message-Id: <1616939297-15627-9-git-send-email-xuemingl@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> References: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> In-Reply-To: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> References: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> Subject: [dpdk-dev] [PATCH v5 8/9] net/mlx5: improve xstats of bonding port X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" In case of kernel bonding device, counter was read from first bonding PF member. This patch reads all member PFs and sums to get bond xstats. Signed-off-by: Xueming Li Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/linux/mlx5_ethdev_os.c | 127 +++++++++++++++++++----- 1 file changed, 102 insertions(+), 25 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_ethdev_os.c b/drivers/net/mlx5/linux/mlx5_ethdev_os.c index e7ec07e364..e8aaa0d36a 100644 --- a/drivers/net/mlx5/linux/mlx5_ethdev_os.c +++ b/drivers/net/mlx5/linux/mlx5_ethdev_os.c @@ -169,10 +169,10 @@ mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[MLX5_NAMESIZE]) } /** - * Perform ifreq ioctl() on associated Ethernet device. + * Perform ifreq ioctl() on associated netdev ifname. * - * @param[in] dev - * Pointer to Ethernet device. + * @param[in] ifname + * Pointer to netdev name. * @param req * Request number to pass to ioctl(). * @param[out] ifr @@ -182,7 +182,7 @@ mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[MLX5_NAMESIZE]) * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) +mlx5_ifreq_by_ifname(const char *ifname, int req, struct ifreq *ifr) { int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); int ret = 0; @@ -191,9 +191,7 @@ mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) rte_errno = errno; return -rte_errno; } - ret = mlx5_get_ifname(dev, &ifr->ifr_name); - if (ret) - goto error; + rte_strscpy(ifr->ifr_name, ifname, sizeof(ifr->ifr_name)); ret = ioctl(sock, req, ifr); if (ret == -1) { rte_errno = errno; @@ -206,6 +204,31 @@ mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) return -rte_errno; } +/** + * Perform ifreq ioctl() on associated Ethernet device. + * + * @param[in] dev + * Pointer to Ethernet device. + * @param req + * Request number to pass to ioctl(). + * @param[out] ifr + * Interface request structure output buffer. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) +{ + char ifname[sizeof(ifr->ifr_name)]; + int ret; + + ret = mlx5_get_ifname(dev, &ifname); + if (ret) + return -rte_errno; + return mlx5_ifreq_by_ifname(ifname, req, ifr); +} + /** * Get device MTU. * @@ -1243,6 +1266,8 @@ int mlx5_get_module_eeprom(struct rte_eth_dev *dev, * * @param dev * Pointer to Ethernet device. + * @param[in] pf + * PF index in case of bonding device, -1 otherwise * @param[out] stats * Counters table output buffer. * @@ -1250,8 +1275,8 @@ int mlx5_get_module_eeprom(struct rte_eth_dev *dev, * 0 on success and stats is filled, negative errno value otherwise and * rte_errno is set. */ -int -mlx5_os_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats) +static int +_mlx5_os_read_dev_counters(struct rte_eth_dev *dev, int pf, uint64_t *stats) { struct mlx5_priv *priv = dev->data->dev_private; struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; @@ -1265,7 +1290,11 @@ mlx5_os_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats) et_stats->cmd = ETHTOOL_GSTATS; et_stats->n_stats = xstats_ctrl->stats_n; ifr.ifr_data = (caddr_t)et_stats; - ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); + if (pf >= 0) + ret = mlx5_ifreq_by_ifname(priv->sh->bond.ports[pf].ifname, + SIOCETHTOOL, &ifr); + else + ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); if (ret) { DRV_LOG(WARNING, "port %u unable to read statistic values from device", @@ -1273,23 +1302,60 @@ mlx5_os_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats) return ret; } for (i = 0; i != xstats_ctrl->mlx5_stats_n; ++i) { - if (xstats_ctrl->info[i].dev) { - ret = mlx5_os_read_dev_stat(priv, - xstats_ctrl->info[i].ctr_name, - &stats[i]); - /* return last xstats counter if fail to read. */ - if (ret == 0) - xstats_ctrl->xstats[i] = stats[i]; - else - stats[i] = xstats_ctrl->xstats[i]; - } else { - stats[i] = (uint64_t) - et_stats->data[xstats_ctrl->dev_table_idx[i]]; - } + if (xstats_ctrl->info[i].dev) + continue; + stats[i] += (uint64_t) + et_stats->data[xstats_ctrl->dev_table_idx[i]]; } return 0; } +/** + * Read device counters. + * + * @param dev + * Pointer to Ethernet device. + * @param[out] stats + * Counters table output buffer. + * + * @return + * 0 on success and stats is filled, negative errno value otherwise and + * rte_errno is set. + */ +int +mlx5_os_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats) +{ + struct mlx5_priv *priv = dev->data->dev_private; + struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; + int ret = 0, i; + + memset(stats, 0, sizeof(*stats) * xstats_ctrl->mlx5_stats_n); + /* Read ifreq counters. */ + if (priv->master && priv->pf_bond >= 0) { + /* Sum xstats from bonding device member ports. */ + for (i = 0; i < priv->sh->bond.n_port; i++) { + ret = _mlx5_os_read_dev_counters(dev, i, stats); + if (ret) + return ret; + } + } else { + ret = _mlx5_os_read_dev_counters(dev, -1, stats); + } + /* Read IB counters. */ + for (i = 0; i != xstats_ctrl->mlx5_stats_n; ++i) { + if (!xstats_ctrl->info[i].dev) + continue; + ret = mlx5_os_read_dev_stat(priv, xstats_ctrl->info[i].ctr_name, + &stats[i]); + /* return last xstats counter if fail to read. */ + if (ret != 0) + xstats_ctrl->xstats[i] = stats[i]; + else + stats[i] = xstats_ctrl->xstats[i]; + } + return ret; +} + /** * Query the number of statistics provided by ETHTOOL. * @@ -1303,13 +1369,19 @@ mlx5_os_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats) int mlx5_os_get_stats_n(struct rte_eth_dev *dev) { + struct mlx5_priv *priv = dev->data->dev_private; struct ethtool_drvinfo drvinfo; struct ifreq ifr; int ret; drvinfo.cmd = ETHTOOL_GDRVINFO; ifr.ifr_data = (caddr_t)&drvinfo; - ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); + if (priv->master && priv->pf_bond >= 0) + /* Bonding PF. */ + ret = mlx5_ifreq_by_ifname(priv->sh->bond.ports[0].ifname, + SIOCETHTOOL, &ifr); + else + ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); if (ret) { DRV_LOG(WARNING, "port %u unable to query number of statistics", dev->data->port_id); @@ -1480,7 +1552,12 @@ mlx5_os_stats_init(struct rte_eth_dev *dev) strings->string_set = ETH_SS_STATS; strings->len = dev_stats_n; ifr.ifr_data = (caddr_t)strings; - ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); + if (priv->master && priv->pf_bond >= 0) + /* Bonding master. */ + ret = mlx5_ifreq_by_ifname(priv->sh->bond.ports[0].ifname, + SIOCETHTOOL, &ifr); + else + ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); if (ret) { DRV_LOG(WARNING, "port %u unable to get statistic names", dev->data->port_id); From patchwork Sun Mar 28 13:48:15 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xueming Li X-Patchwork-Id: 89975 X-Patchwork-Delegate: rasland@nvidia.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 018EAA034F; Sun, 28 Mar 2021 15:49:59 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 1C7DF140E24; Sun, 28 Mar 2021 15:49:33 +0200 (CEST) Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by mails.dpdk.org (Postfix) with ESMTP id B55FC40042 for ; Sun, 28 Mar 2021 15:49:31 +0200 (CEST) Received: from Internal Mail-Server by MTLPINE1 (envelope-from xuemingl@nvidia.com) with SMTP; 28 Mar 2021 16:49:26 +0300 Received: from nvidia.com (pegasus05.mtr.labs.mlnx [10.210.16.100]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 12SDmlHr018703; Sun, 28 Mar 2021 16:49:26 +0300 From: Xueming Li To: Viacheslav Ovsiienko Cc: dev@dpdk.org, xuemingl@nvidia.com, Asaf Penso , Matan Azrad , Shahaf Shuler Date: Sun, 28 Mar 2021 13:48:15 +0000 Message-Id: <1616939297-15627-10-git-send-email-xuemingl@nvidia.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> References: <1616939297-15627-1-git-send-email-xuemingl@nvidia.com> In-Reply-To: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> References: <1608304614-13908-2-git-send-email-xuemingl@nvidia.com> Subject: [dpdk-dev] [PATCH v5 9/9] net/mlx5: probe host PF representor with SubFunction X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" To simplify BlueField HPF representor(vf[-1]) probe, this patch allows probe it with "sf" syntax: "sf[-1]". Signed-off-by: Xueming Li Acked-by: Viacheslav Ovsiienko --- drivers/net/mlx5/linux/mlx5_os.c | 14 ++++++++++---- drivers/net/mlx5/mlx5.h | 3 ++- drivers/net/mlx5/mlx5_ethdev.c | 25 +++++++++++++++++++++---- 3 files changed, 33 insertions(+), 9 deletions(-) diff --git a/drivers/net/mlx5/linux/mlx5_os.c b/drivers/net/mlx5/linux/mlx5_os.c index 5bdc8caee5..74f72188ff 100644 --- a/drivers/net/mlx5/linux/mlx5_os.c +++ b/drivers/net/mlx5/linux/mlx5_os.c @@ -710,11 +710,15 @@ mlx5_representor_match(struct mlx5_dev_spawn_data *spawn, struct mlx5_switch_info *switch_info = &spawn->info; unsigned int p, f; uint16_t id; - uint16_t repr_id = mlx5_representor_id_encode(switch_info); + uint16_t repr_id = mlx5_representor_id_encode(switch_info, + eth_da->type); switch (eth_da->type) { case RTE_ETH_REPRESENTOR_SF: - if (switch_info->name_type != MLX5_PHYS_PORT_NAME_TYPE_PFSF) { + if (!(spawn->info.port_name == -1 && + switch_info->name_type == + MLX5_PHYS_PORT_NAME_TYPE_PFHPF) && + switch_info->name_type != MLX5_PHYS_PORT_NAME_TYPE_PFSF) { rte_errno = EBUSY; return false; } @@ -742,7 +746,8 @@ mlx5_representor_match(struct mlx5_dev_spawn_data *spawn, if (spawn->pf_bond < 0) { /* For non-LAG mode, allow and ignore pf. */ switch_info->pf_num = eth_da->ports[p]; - repr_id = mlx5_representor_id_encode(switch_info); + repr_id = mlx5_representor_id_encode(switch_info, + eth_da->type); } for (f = 0; f < eth_da->nb_representor_ports; ++f) { id = MLX5_REPRESENTOR_ID @@ -1107,7 +1112,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, priv->vport_id = switch_info->representor ? switch_info->port_name + 1 : -1; #endif - priv->representor_id = mlx5_representor_id_encode(switch_info); + priv->representor_id = mlx5_representor_id_encode(switch_info, + eth_da->type); /* * Look for sibling devices in order to reuse their switch domain * if any, otherwise allocate one. diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index a1d2798373..fa9e68ded9 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -1055,7 +1055,8 @@ int mlx5_representor_info_get(struct rte_eth_dev *dev, ((repr_id) & 0xfff) #define MLX5_REPRESENTOR_TYPE(repr_id) \ (((repr_id) >> 12) & 3) -uint16_t mlx5_representor_id_encode(const struct mlx5_switch_info *info); +uint16_t mlx5_representor_id_encode(const struct mlx5_switch_info *info, + enum rte_eth_representor_type hpf_type); int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size); int mlx5_dev_infos_get(struct rte_eth_dev *dev, diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index 4f97a69a20..564d7132e0 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -363,12 +363,15 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) * * @param info * Port switch info. + * @param hpf_type + * Use this type if port is HPF. * * @return * Encoded representor ID. */ uint16_t -mlx5_representor_id_encode(const struct mlx5_switch_info *info) +mlx5_representor_id_encode(const struct mlx5_switch_info *info, + enum rte_eth_representor_type hpf_type) { enum rte_eth_representor_type type = RTE_ETH_REPRESENTOR_VF; uint16_t repr = info->port_name; @@ -377,8 +380,10 @@ mlx5_representor_id_encode(const struct mlx5_switch_info *info) return UINT16_MAX; if (info->name_type == MLX5_PHYS_PORT_NAME_TYPE_PFSF) type = RTE_ETH_REPRESENTOR_SF; - if (info->name_type == MLX5_PHYS_PORT_NAME_TYPE_PFHPF) + if (info->name_type == MLX5_PHYS_PORT_NAME_TYPE_PFHPF) { + type = hpf_type; repr = UINT16_MAX; + } return MLX5_REPRESENTOR_ID(info->pf_num, type, repr); } @@ -403,7 +408,7 @@ mlx5_representor_info_get(struct rte_eth_dev *dev, struct rte_eth_representor_info *info) { struct mlx5_priv *priv = dev->data->dev_private; - int n_type = 3; /* Number of representor types, VF, HPF and SF. */ + int n_type = 4; /* Representor types, VF, HPF@VF, SF and HPF@SF. */ int n_pf = 2; /* Number of PFs. */ int i = 0, pf; @@ -424,7 +429,7 @@ mlx5_representor_info_get(struct rte_eth_dev *dev, snprintf(info->ranges[i].name, sizeof(info->ranges[i].name), "pf%dvf", pf); i++; - /* HPF range. */ + /* HPF range of VF type. */ info->ranges[i].type = RTE_ETH_REPRESENTOR_VF; info->ranges[i].controller = 0; info->ranges[i].pf = pf; @@ -448,6 +453,18 @@ mlx5_representor_info_get(struct rte_eth_dev *dev, snprintf(info->ranges[i].name, sizeof(info->ranges[i].name), "pf%dsf", pf); i++; + /* HPF range of SF type. */ + info->ranges[i].type = RTE_ETH_REPRESENTOR_SF; + info->ranges[i].controller = 0; + info->ranges[i].pf = pf; + info->ranges[i].vf = UINT16_MAX; + info->ranges[i].id_base = + MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1); + info->ranges[i].id_end = + MLX5_REPRESENTOR_ID(pf, info->ranges[i].type, -1); + snprintf(info->ranges[i].name, + sizeof(info->ranges[i].name), "pf%dsf", pf); + i++; } out: return n_type * n_pf;