get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/99799/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 99799,
    "url": "http://patches.dpdk.org/api/patches/99799/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20210927133450.10653-3-srikanth.k@oneconvergence.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20210927133450.10653-3-srikanth.k@oneconvergence.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20210927133450.10653-3-srikanth.k@oneconvergence.com",
    "date": "2021-09-27T13:34:33",
    "name": "[02/19] net/mlx5: stub for FreeBSD",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "7d2130ff30b1852a969a2242c5f159e90e8420ef",
    "submitter": {
        "id": 2368,
        "url": "http://patches.dpdk.org/api/people/2368/?format=api",
        "name": "Srikanth Kaka",
        "email": "srikanth.k@oneconvergence.com"
    },
    "delegate": {
        "id": 3268,
        "url": "http://patches.dpdk.org/api/users/3268/?format=api",
        "username": "rasland",
        "first_name": "Raslan",
        "last_name": "Darawsheh",
        "email": "rasland@nvidia.com"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20210927133450.10653-3-srikanth.k@oneconvergence.com/mbox/",
    "series": [
        {
            "id": 19193,
            "url": "http://patches.dpdk.org/api/series/19193/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=19193",
            "date": "2021-09-27T13:34:31",
            "name": "MLX5 FreeBSD support",
            "version": 1,
            "mbox": "http://patches.dpdk.org/series/19193/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/99799/comments/",
    "check": "warning",
    "checks": "http://patches.dpdk.org/api/patches/99799/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id EBD06A0C46;\n\tMon, 27 Sep 2021 16:58:21 +0200 (CEST)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id B02C7410F5;\n\tMon, 27 Sep 2021 16:58:10 +0200 (CEST)",
            "from mail-pg1-f175.google.com (mail-pg1-f175.google.com\n [209.85.215.175])\n by mails.dpdk.org (Postfix) with ESMTP id 7B57040E3C\n for <dev@dpdk.org>; Mon, 27 Sep 2021 15:35:52 +0200 (CEST)",
            "by mail-pg1-f175.google.com with SMTP id m21so17776752pgu.13\n for <dev@dpdk.org>; Mon, 27 Sep 2021 06:35:52 -0700 (PDT)",
            "from srikanth-ThinkPad-T450.domain.name ([223.178.22.200])\n by smtp.gmail.com with ESMTPSA id t6sm17342274pfh.63.2021.09.27.06.35.45\n (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256);\n Mon, 27 Sep 2021 06:35:48 -0700 (PDT)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=oneconvergence.com; s=google;\n h=from:to:cc:subject:date:message-id:in-reply-to:references\n :mime-version:content-transfer-encoding;\n bh=DPEYmGRqv6dhNGvr7n/p2GJk6FEXPAmLLasJPYIKvCg=;\n b=fDWDcExSdgcGDhTgp2FUooCHHLtO8mfqmdDX32Oj395HnsA5ckm5lOMEfKOGADL0Fs\n QsRjMK7xpjedmbEuv7FIhxfNtG5tw98Q5GvUw/kDd0tV0X6U2BQ18n/mON/mefGcX1EU\n S5Kq98hcamcLOxaaG+R5YIMO4Ue0Xjcu1Cwes=",
        "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n d=1e100.net; s=20210112;\n h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to\n :references:mime-version:content-transfer-encoding;\n bh=DPEYmGRqv6dhNGvr7n/p2GJk6FEXPAmLLasJPYIKvCg=;\n b=OfvVbtLfOXtFpBTOYrOylbIV2LzWkbYI0AZGUoNnIrarMnW1B9FjZfpkJXQp3Y/IYp\n /c7AUKws68o5wNxUWhPdQtFxi0QAbgGQMqXxOVqpfDoqxzIrj/+rOFzNxa8A0/Nleqrj\n BmXG8H+ymdC9QOoi+eaABvoYTR4gv5IISK30BRyA3Iqz0vxM66mtoM+s1dtoV4YsMLd0\n Wk8G19SDoYkJDmBQdnBbc/JcOzgNFfAYjckL9w0xArEpdqLCLxesPuYMuao9AJ0JmCjR\n lcxPU4u1qWZezp5DiqG6j/SqCFII7ZHkJjE57Wj9NaDdtRMmL4WQkBhl10rtNNNgKw5A\n /KXg==",
        "X-Gm-Message-State": "AOAM533Qo43Yl2bI0MXJIv367kVg27VFQAcbxRoEjR/75n3dvo3kaDJu\n bMHyo1HhUQ0yUwAlhY93K2YKDQ==",
        "X-Google-Smtp-Source": "\n ABdhPJw30ikcbsHNfgBwyTCdo9Y86lY8K7KDAZH2FqDPaTOzELZeYG021AnRrg8l8KIjg5/A6+aucQ==",
        "X-Received": "by 2002:a63:b707:: with SMTP id t7mr16462406pgf.55.1632749748961;\n Mon, 27 Sep 2021 06:35:48 -0700 (PDT)",
        "From": "Srikanth Kaka <srikanth.k@oneconvergence.com>",
        "To": "Matan Azrad <matan@nvidia.com>,\n Viacheslav Ovsiienko <viacheslavo@nvidia.com>",
        "Cc": "dev@dpdk.org, Vag Singh <vag.singh@oneconvergence.com>,\n Anand Thulasiram <avelu@juniper.net>,\n Srikanth Kaka <srikanth.k@oneconvergence.com>",
        "Date": "Mon, 27 Sep 2021 19:04:33 +0530",
        "Message-Id": "<20210927133450.10653-3-srikanth.k@oneconvergence.com>",
        "X-Mailer": "git-send-email 2.30.2",
        "In-Reply-To": "<20210927133450.10653-1-srikanth.k@oneconvergence.com>",
        "References": "<20210927133450.10653-1-srikanth.k@oneconvergence.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "X-Mailman-Approved-At": "Mon, 27 Sep 2021 16:58:07 +0200",
        "Subject": "[dpdk-dev] [PATCH 02/19] net/mlx5: stub for FreeBSD",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "These files are a copy of their Linux equivalents.\nThey will be ported to FreeBSD.\n\nSigned-off-by: Srikanth Kaka <srikanth.k@oneconvergence.com>\nSigned-off-by: Vag Singh <vag.singh@oneconvergence.com>\nSigned-off-by: Anand Thulasiram <avelu@juniper.net>\n---\n drivers/net/mlx5/freebsd/mlx5_ethdev_os.c | 1632 +++++++++++\n drivers/net/mlx5/freebsd/mlx5_flow_os.c   |   38 +\n drivers/net/mlx5/freebsd/mlx5_flow_os.h   |  484 ++++\n drivers/net/mlx5/freebsd/mlx5_mp_os.c     |  305 ++\n drivers/net/mlx5/freebsd/mlx5_os.c        | 3208 +++++++++++++++++++++\n drivers/net/mlx5/freebsd/mlx5_os.h        |   24 +\n drivers/net/mlx5/freebsd/mlx5_socket.c    |  249 ++\n drivers/net/mlx5/freebsd/mlx5_verbs.c     | 1208 ++++++++\n drivers/net/mlx5/freebsd/mlx5_verbs.h     |   18 +\n 9 files changed, 7166 insertions(+)\n create mode 100644 drivers/net/mlx5/freebsd/mlx5_ethdev_os.c\n create mode 100644 drivers/net/mlx5/freebsd/mlx5_flow_os.c\n create mode 100644 drivers/net/mlx5/freebsd/mlx5_flow_os.h\n create mode 100644 drivers/net/mlx5/freebsd/mlx5_mp_os.c\n create mode 100644 drivers/net/mlx5/freebsd/mlx5_os.c\n create mode 100644 drivers/net/mlx5/freebsd/mlx5_os.h\n create mode 100644 drivers/net/mlx5/freebsd/mlx5_socket.c\n create mode 100644 drivers/net/mlx5/freebsd/mlx5_verbs.c\n create mode 100644 drivers/net/mlx5/freebsd/mlx5_verbs.h",
    "diff": "diff --git a/drivers/net/mlx5/freebsd/mlx5_ethdev_os.c b/drivers/net/mlx5/freebsd/mlx5_ethdev_os.c\nnew file mode 100644\nindex 0000000000..f34133e2c6\n--- /dev/null\n+++ b/drivers/net/mlx5/freebsd/mlx5_ethdev_os.c\n@@ -0,0 +1,1632 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright 2015 6WIND S.A.\n+ * Copyright 2015 Mellanox Technologies, Ltd\n+ */\n+\n+#include <stddef.h>\n+#include <inttypes.h>\n+#include <unistd.h>\n+#include <stdbool.h>\n+#include <stdint.h>\n+#include <stdio.h>\n+#include <string.h>\n+#include <stdlib.h>\n+#include <errno.h>\n+#include <dirent.h>\n+#include <net/if.h>\n+#include <sys/ioctl.h>\n+#include <sys/socket.h>\n+#include <netinet/in.h>\n+#include <linux/ethtool.h>\n+#include <linux/sockios.h>\n+#include <fcntl.h>\n+#include <stdalign.h>\n+#include <sys/un.h>\n+#include <time.h>\n+\n+#include <ethdev_driver.h>\n+#include <rte_bus_pci.h>\n+#include <rte_mbuf.h>\n+#include <rte_common.h>\n+#include <rte_interrupts.h>\n+#include <rte_malloc.h>\n+#include <rte_string_fns.h>\n+#include <rte_rwlock.h>\n+#include <rte_cycles.h>\n+\n+#include <mlx5_glue.h>\n+#include <mlx5_devx_cmds.h>\n+#include <mlx5_common.h>\n+#include <mlx5_malloc.h>\n+\n+#include \"mlx5.h\"\n+#include \"mlx5_rxtx.h\"\n+#include \"mlx5_utils.h\"\n+\n+/* Supported speed values found in /usr/include/linux/ethtool.h */\n+#ifndef HAVE_SUPPORTED_40000baseKR4_Full\n+#define SUPPORTED_40000baseKR4_Full (1 << 23)\n+#endif\n+#ifndef HAVE_SUPPORTED_40000baseCR4_Full\n+#define SUPPORTED_40000baseCR4_Full (1 << 24)\n+#endif\n+#ifndef HAVE_SUPPORTED_40000baseSR4_Full\n+#define SUPPORTED_40000baseSR4_Full (1 << 25)\n+#endif\n+#ifndef HAVE_SUPPORTED_40000baseLR4_Full\n+#define SUPPORTED_40000baseLR4_Full (1 << 26)\n+#endif\n+#ifndef HAVE_SUPPORTED_56000baseKR4_Full\n+#define SUPPORTED_56000baseKR4_Full (1 << 27)\n+#endif\n+#ifndef HAVE_SUPPORTED_56000baseCR4_Full\n+#define SUPPORTED_56000baseCR4_Full (1 << 28)\n+#endif\n+#ifndef HAVE_SUPPORTED_56000baseSR4_Full\n+#define SUPPORTED_56000baseSR4_Full (1 << 29)\n+#endif\n+#ifndef HAVE_SUPPORTED_56000baseLR4_Full\n+#define SUPPORTED_56000baseLR4_Full (1 << 30)\n+#endif\n+\n+/* Add defines in case the running kernel is not the same as user headers. */\n+#ifndef ETHTOOL_GLINKSETTINGS\n+struct ethtool_link_settings {\n+\tuint32_t cmd;\n+\tuint32_t speed;\n+\tuint8_t duplex;\n+\tuint8_t port;\n+\tuint8_t phy_address;\n+\tuint8_t autoneg;\n+\tuint8_t mdio_support;\n+\tuint8_t eth_to_mdix;\n+\tuint8_t eth_tp_mdix_ctrl;\n+\tint8_t link_mode_masks_nwords;\n+\tuint32_t reserved[8];\n+\tuint32_t link_mode_masks[];\n+};\n+\n+/* The kernel values can be found in /include/uapi/linux/ethtool.h */\n+#define ETHTOOL_GLINKSETTINGS 0x0000004c\n+#define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5\n+#define ETHTOOL_LINK_MODE_Autoneg_BIT 6\n+#define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17\n+#define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18\n+#define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19\n+#define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20\n+#define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21\n+#define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22\n+#define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23\n+#define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24\n+#define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25\n+#define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26\n+#define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27\n+#define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28\n+#define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29\n+#define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30\n+#endif\n+#ifndef HAVE_ETHTOOL_LINK_MODE_25G\n+#define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31\n+#define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32\n+#define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33\n+#endif\n+#ifndef HAVE_ETHTOOL_LINK_MODE_50G\n+#define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34\n+#define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35\n+#endif\n+#ifndef HAVE_ETHTOOL_LINK_MODE_100G\n+#define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36\n+#define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37\n+#define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38\n+#define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39\n+#endif\n+#ifndef HAVE_ETHTOOL_LINK_MODE_200G\n+#define ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT 62\n+#define ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT 63\n+#define ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT 0 /* 64 - 64 */\n+#define ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT 1 /* 65 - 64 */\n+#define ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT 2 /* 66 - 64 */\n+#endif\n+\n+/* Get interface index from SubFunction device name. */\n+int\n+mlx5_auxiliary_get_ifindex(const char *sf_name)\n+{\n+\tchar if_name[IF_NAMESIZE] = { 0 };\n+\n+\tif (mlx5_auxiliary_get_child_name(sf_name, \"/net\",\n+\t\t\t\t\t  if_name, sizeof(if_name)) != 0)\n+\t\treturn -rte_errno;\n+\treturn if_nametoindex(if_name);\n+}\n+\n+/**\n+ * Get interface name from private structure.\n+ *\n+ * This is a port representor-aware version of mlx5_get_ifname_sysfs().\n+ *\n+ * @param[in] dev\n+ *   Pointer to Ethernet device.\n+ * @param[out] ifname\n+ *   Interface name output buffer.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+int\n+mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[MLX5_NAMESIZE])\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tunsigned int ifindex;\n+\n+\tMLX5_ASSERT(priv);\n+\tMLX5_ASSERT(priv->sh);\n+\tif (priv->master && priv->sh->bond.ifindex > 0) {\n+\t\tmemcpy(ifname, priv->sh->bond.ifname, MLX5_NAMESIZE);\n+\t\treturn 0;\n+\t}\n+\tifindex = mlx5_ifindex(dev);\n+\tif (!ifindex) {\n+\t\tif (!priv->representor)\n+\t\t\treturn mlx5_get_ifname_sysfs(priv->sh->ibdev_path,\n+\t\t\t\t\t\t     *ifname);\n+\t\trte_errno = ENXIO;\n+\t\treturn -rte_errno;\n+\t}\n+\tif (if_indextoname(ifindex, &(*ifname)[0]))\n+\t\treturn 0;\n+\trte_errno = errno;\n+\treturn -rte_errno;\n+}\n+\n+/**\n+ * Perform ifreq ioctl() on associated netdev ifname.\n+ *\n+ * @param[in] ifname\n+ *   Pointer to netdev name.\n+ * @param req\n+ *   Request number to pass to ioctl().\n+ * @param[out] ifr\n+ *   Interface request structure output buffer.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_ifreq_by_ifname(const char *ifname, int req, struct ifreq *ifr)\n+{\n+\tint sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);\n+\tint ret = 0;\n+\n+\tif (sock == -1) {\n+\t\trte_errno = errno;\n+\t\treturn -rte_errno;\n+\t}\n+\trte_strscpy(ifr->ifr_name, ifname, sizeof(ifr->ifr_name));\n+\tret = ioctl(sock, req, ifr);\n+\tif (ret == -1) {\n+\t\trte_errno = errno;\n+\t\tgoto error;\n+\t}\n+\tclose(sock);\n+\treturn 0;\n+error:\n+\tclose(sock);\n+\treturn -rte_errno;\n+}\n+\n+/**\n+ * Perform ifreq ioctl() on associated Ethernet device.\n+ *\n+ * @param[in] dev\n+ *   Pointer to Ethernet device.\n+ * @param req\n+ *   Request number to pass to ioctl().\n+ * @param[out] ifr\n+ *   Interface request structure output buffer.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr)\n+{\n+\tchar ifname[sizeof(ifr->ifr_name)];\n+\tint ret;\n+\n+\tret = mlx5_get_ifname(dev, &ifname);\n+\tif (ret)\n+\t\treturn -rte_errno;\n+\treturn mlx5_ifreq_by_ifname(ifname, req, ifr);\n+}\n+\n+/**\n+ * Get device MTU.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ * @param[out] mtu\n+ *   MTU value output buffer.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+int\n+mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu)\n+{\n+\tstruct ifreq request;\n+\tint ret = mlx5_ifreq(dev, SIOCGIFMTU, &request);\n+\n+\tif (ret)\n+\t\treturn ret;\n+\t*mtu = request.ifr_mtu;\n+\treturn 0;\n+}\n+\n+/**\n+ * Set device MTU.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ * @param mtu\n+ *   MTU value to set.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+int\n+mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)\n+{\n+\tstruct ifreq request = { .ifr_mtu = mtu, };\n+\n+\treturn mlx5_ifreq(dev, SIOCSIFMTU, &request);\n+}\n+\n+/**\n+ * Set device flags.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ * @param keep\n+ *   Bitmask for flags that must remain untouched.\n+ * @param flags\n+ *   Bitmask for flags to modify.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags)\n+{\n+\tstruct ifreq request;\n+\tint ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request);\n+\n+\tif (ret)\n+\t\treturn ret;\n+\trequest.ifr_flags &= keep;\n+\trequest.ifr_flags |= flags & ~keep;\n+\treturn mlx5_ifreq(dev, SIOCSIFFLAGS, &request);\n+}\n+\n+/**\n+ * Get device current raw clock counter\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param[out] time\n+ *   Current raw clock counter of the device.\n+ *\n+ * @return\n+ *   0 if the clock has correctly been read\n+ *   The value of errno in case of error\n+ */\n+int\n+mlx5_read_clock(struct rte_eth_dev *dev, uint64_t *clock)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct ibv_context *ctx = priv->sh->ctx;\n+\tstruct ibv_values_ex values;\n+\tint err = 0;\n+\n+\tvalues.comp_mask = IBV_VALUES_MASK_RAW_CLOCK;\n+\terr = mlx5_glue->query_rt_values_ex(ctx, &values);\n+\tif (err != 0) {\n+\t\tDRV_LOG(WARNING, \"Could not query the clock !\");\n+\t\treturn err;\n+\t}\n+\t*clock = values.raw_clock.tv_nsec;\n+\treturn 0;\n+}\n+\n+/**\n+ * Retrieve the master device for representor in the same switch domain.\n+ *\n+ * @param dev\n+ *   Pointer to representor Ethernet device structure.\n+ *\n+ * @return\n+ *   Master device structure  on success, NULL otherwise.\n+ */\n+static struct rte_eth_dev *\n+mlx5_find_master_dev(struct rte_eth_dev *dev)\n+{\n+\tstruct mlx5_priv *priv;\n+\tuint16_t port_id;\n+\tuint16_t domain_id;\n+\n+\tpriv = dev->data->dev_private;\n+\tdomain_id = priv->domain_id;\n+\tMLX5_ASSERT(priv->representor);\n+\tMLX5_ETH_FOREACH_DEV(port_id, dev->device) {\n+\t\tstruct mlx5_priv *opriv =\n+\t\t\trte_eth_devices[port_id].data->dev_private;\n+\t\tif (opriv &&\n+\t\t    opriv->master &&\n+\t\t    opriv->domain_id == domain_id &&\n+\t\t    opriv->sh == priv->sh)\n+\t\t\treturn &rte_eth_devices[port_id];\n+\t}\n+\treturn NULL;\n+}\n+\n+/**\n+ * DPDK callback to retrieve physical link information.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param[out] link\n+ *   Storage for current link status.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev,\n+\t\t\t       struct rte_eth_link *link)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct ethtool_cmd edata = {\n+\t\t.cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */\n+\t};\n+\tstruct ifreq ifr;\n+\tstruct rte_eth_link dev_link;\n+\tint link_speed = 0;\n+\tint ret;\n+\n+\tret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);\n+\tif (ret) {\n+\t\tDRV_LOG(WARNING, \"port %u ioctl(SIOCGIFFLAGS) failed: %s\",\n+\t\t\tdev->data->port_id, strerror(rte_errno));\n+\t\treturn ret;\n+\t}\n+\tdev_link = (struct rte_eth_link) {\n+\t\t.link_status = ((ifr.ifr_flags & IFF_UP) &&\n+\t\t\t\t(ifr.ifr_flags & IFF_RUNNING)),\n+\t};\n+\tifr = (struct ifreq) {\n+\t\t.ifr_data = (void *)&edata,\n+\t};\n+\tret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);\n+\tif (ret) {\n+\t\tif (ret == -ENOTSUP && priv->representor) {\n+\t\t\tstruct rte_eth_dev *master;\n+\n+\t\t\t/*\n+\t\t\t * For representors we can try to inherit link\n+\t\t\t * settings from the master device. Actually\n+\t\t\t * link settings do not make a lot of sense\n+\t\t\t * for representors due to missing physical\n+\t\t\t * link. The old kernel drivers supported\n+\t\t\t * emulated settings query for representors,\n+\t\t\t * the new ones do not, so we have to add\n+\t\t\t * this code for compatibility issues.\n+\t\t\t */\n+\t\t\tmaster = mlx5_find_master_dev(dev);\n+\t\t\tif (master) {\n+\t\t\t\tifr = (struct ifreq) {\n+\t\t\t\t\t.ifr_data = (void *)&edata,\n+\t\t\t\t};\n+\t\t\t\tret = mlx5_ifreq(master, SIOCETHTOOL, &ifr);\n+\t\t\t}\n+\t\t}\n+\t\tif (ret) {\n+\t\t\tDRV_LOG(WARNING,\n+\t\t\t\t\"port %u ioctl(SIOCETHTOOL,\"\n+\t\t\t\t\" ETHTOOL_GSET) failed: %s\",\n+\t\t\t\tdev->data->port_id, strerror(rte_errno));\n+\t\t\treturn ret;\n+\t\t}\n+\t}\n+\tlink_speed = ethtool_cmd_speed(&edata);\n+\tif (link_speed == -1)\n+\t\tdev_link.link_speed = ETH_SPEED_NUM_UNKNOWN;\n+\telse\n+\t\tdev_link.link_speed = link_speed;\n+\tpriv->link_speed_capa = 0;\n+\tif (edata.supported & (SUPPORTED_1000baseT_Full |\n+\t\t\t       SUPPORTED_1000baseKX_Full))\n+\t\tpriv->link_speed_capa |= ETH_LINK_SPEED_1G;\n+\tif (edata.supported & SUPPORTED_10000baseKR_Full)\n+\t\tpriv->link_speed_capa |= ETH_LINK_SPEED_10G;\n+\tif (edata.supported & (SUPPORTED_40000baseKR4_Full |\n+\t\t\t       SUPPORTED_40000baseCR4_Full |\n+\t\t\t       SUPPORTED_40000baseSR4_Full |\n+\t\t\t       SUPPORTED_40000baseLR4_Full))\n+\t\tpriv->link_speed_capa |= ETH_LINK_SPEED_40G;\n+\tdev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ?\n+\t\t\t\tETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);\n+\tdev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &\n+\t\t\tETH_LINK_SPEED_FIXED);\n+\t*link = dev_link;\n+\treturn 0;\n+}\n+\n+/**\n+ * Retrieve physical link information (unlocked version using new ioctl).\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param[out] link\n+ *   Storage for current link status.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,\n+\t\t\t     struct rte_eth_link *link)\n+\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };\n+\tstruct ifreq ifr;\n+\tstruct rte_eth_link dev_link;\n+\tstruct rte_eth_dev *master = NULL;\n+\tuint64_t sc;\n+\tint ret;\n+\n+\tret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);\n+\tif (ret) {\n+\t\tDRV_LOG(WARNING, \"port %u ioctl(SIOCGIFFLAGS) failed: %s\",\n+\t\t\tdev->data->port_id, strerror(rte_errno));\n+\t\treturn ret;\n+\t}\n+\tdev_link = (struct rte_eth_link) {\n+\t\t.link_status = ((ifr.ifr_flags & IFF_UP) &&\n+\t\t\t\t(ifr.ifr_flags & IFF_RUNNING)),\n+\t};\n+\tifr = (struct ifreq) {\n+\t\t.ifr_data = (void *)&gcmd,\n+\t};\n+\tret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);\n+\tif (ret) {\n+\t\tif (ret == -ENOTSUP && priv->representor) {\n+\t\t\t/*\n+\t\t\t * For representors we can try to inherit link\n+\t\t\t * settings from the master device. Actually\n+\t\t\t * link settings do not make a lot of sense\n+\t\t\t * for representors due to missing physical\n+\t\t\t * link. The old kernel drivers supported\n+\t\t\t * emulated settings query for representors,\n+\t\t\t * the new ones do not, so we have to add\n+\t\t\t * this code for compatibility issues.\n+\t\t\t */\n+\t\t\tmaster = mlx5_find_master_dev(dev);\n+\t\t\tif (master) {\n+\t\t\t\tifr = (struct ifreq) {\n+\t\t\t\t\t.ifr_data = (void *)&gcmd,\n+\t\t\t\t};\n+\t\t\t\tret = mlx5_ifreq(master, SIOCETHTOOL, &ifr);\n+\t\t\t}\n+\t\t}\n+\t\tif (ret) {\n+\t\t\tDRV_LOG(DEBUG,\n+\t\t\t\t\"port %u ioctl(SIOCETHTOOL,\"\n+\t\t\t\t\" ETHTOOL_GLINKSETTINGS) failed: %s\",\n+\t\t\t\tdev->data->port_id, strerror(rte_errno));\n+\t\t\treturn ret;\n+\t\t}\n+\t}\n+\tgcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords;\n+\n+\talignas(struct ethtool_link_settings)\n+\tuint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) +\n+\t\t     sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3];\n+\tstruct ethtool_link_settings *ecmd = (void *)data;\n+\n+\t*ecmd = gcmd;\n+\tifr.ifr_data = (void *)ecmd;\n+\tret = mlx5_ifreq(master ? master : dev, SIOCETHTOOL, &ifr);\n+\tif (ret) {\n+\t\tDRV_LOG(DEBUG,\n+\t\t\t\"port %u ioctl(SIOCETHTOOL,\"\n+\t\t\t\"ETHTOOL_GLINKSETTINGS) failed: %s\",\n+\t\t\tdev->data->port_id, strerror(rte_errno));\n+\t\treturn ret;\n+\t}\n+\tdev_link.link_speed = (ecmd->speed == UINT32_MAX) ?\n+\t\t\t\tETH_SPEED_NUM_UNKNOWN : ecmd->speed;\n+\tsc = ecmd->link_mode_masks[0] |\n+\t\t((uint64_t)ecmd->link_mode_masks[1] << 32);\n+\tpriv->link_speed_capa = 0;\n+\tif (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseT_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_1000baseKX_Full_BIT)))\n+\t\tpriv->link_speed_capa |= ETH_LINK_SPEED_1G;\n+\tif (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseKR_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT)))\n+\t\tpriv->link_speed_capa |= ETH_LINK_SPEED_10G;\n+\tif (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT)))\n+\t\tpriv->link_speed_capa |= ETH_LINK_SPEED_20G;\n+\tif (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT)))\n+\t\tpriv->link_speed_capa |= ETH_LINK_SPEED_40G;\n+\tif (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT)))\n+\t\tpriv->link_speed_capa |= ETH_LINK_SPEED_56G;\n+\tif (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseCR_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseKR_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_25000baseSR_Full_BIT)))\n+\t\tpriv->link_speed_capa |= ETH_LINK_SPEED_25G;\n+\tif (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT)))\n+\t\tpriv->link_speed_capa |= ETH_LINK_SPEED_50G;\n+\tif (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT)))\n+\t\tpriv->link_speed_capa |= ETH_LINK_SPEED_100G;\n+\tif (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseKR4_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseSR4_Full_BIT)))\n+\t\tpriv->link_speed_capa |= ETH_LINK_SPEED_200G;\n+\n+\tsc = ecmd->link_mode_masks[2] |\n+\t\t((uint64_t)ecmd->link_mode_masks[3] << 32);\n+\tif (sc & (MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT) |\n+\t\t  MLX5_BITSHIFT\n+\t\t       (ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT) |\n+\t\t  MLX5_BITSHIFT(ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT)))\n+\t\tpriv->link_speed_capa |= ETH_LINK_SPEED_200G;\n+\tdev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ?\n+\t\t\t\tETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);\n+\tdev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &\n+\t\t\t\t  ETH_LINK_SPEED_FIXED);\n+\t*link = dev_link;\n+\treturn 0;\n+}\n+\n+/**\n+ * DPDK callback to retrieve physical link information.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param wait_to_complete\n+ *   Wait for request completion.\n+ *\n+ * @return\n+ *   0 if link status was not updated, positive if it was, a negative errno\n+ *   value otherwise and rte_errno is set.\n+ */\n+int\n+mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete)\n+{\n+\tint ret;\n+\tstruct rte_eth_link dev_link;\n+\ttime_t start_time = time(NULL);\n+\tint retry = MLX5_GET_LINK_STATUS_RETRY_COUNT;\n+\n+\tdo {\n+\t\tret = mlx5_link_update_unlocked_gs(dev, &dev_link);\n+\t\tif (ret == -ENOTSUP)\n+\t\t\tret = mlx5_link_update_unlocked_gset(dev, &dev_link);\n+\t\tif (ret == 0)\n+\t\t\tbreak;\n+\t\t/* Handle wait to complete situation. */\n+\t\tif ((wait_to_complete || retry) && ret == -EAGAIN) {\n+\t\t\tif (abs((int)difftime(time(NULL), start_time)) <\n+\t\t\t    MLX5_LINK_STATUS_TIMEOUT) {\n+\t\t\t\tusleep(0);\n+\t\t\t\tcontinue;\n+\t\t\t} else {\n+\t\t\t\trte_errno = EBUSY;\n+\t\t\t\treturn -rte_errno;\n+\t\t\t}\n+\t\t} else if (ret < 0) {\n+\t\t\treturn ret;\n+\t\t}\n+\t} while (wait_to_complete || retry-- > 0);\n+\tret = !!memcmp(&dev->data->dev_link, &dev_link,\n+\t\t       sizeof(struct rte_eth_link));\n+\tdev->data->dev_link = dev_link;\n+\treturn ret;\n+}\n+\n+/**\n+ * DPDK callback to get flow control status.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param[out] fc_conf\n+ *   Flow control output buffer.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+int\n+mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)\n+{\n+\tstruct ifreq ifr;\n+\tstruct ethtool_pauseparam ethpause = {\n+\t\t.cmd = ETHTOOL_GPAUSEPARAM\n+\t};\n+\tint ret;\n+\n+\tifr.ifr_data = (void *)&ethpause;\n+\tret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);\n+\tif (ret) {\n+\t\tDRV_LOG(WARNING,\n+\t\t\t\"port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:\"\n+\t\t\t\" %s\",\n+\t\t\tdev->data->port_id, strerror(rte_errno));\n+\t\treturn ret;\n+\t}\n+\tfc_conf->autoneg = ethpause.autoneg;\n+\tif (ethpause.rx_pause && ethpause.tx_pause)\n+\t\tfc_conf->mode = RTE_FC_FULL;\n+\telse if (ethpause.rx_pause)\n+\t\tfc_conf->mode = RTE_FC_RX_PAUSE;\n+\telse if (ethpause.tx_pause)\n+\t\tfc_conf->mode = RTE_FC_TX_PAUSE;\n+\telse\n+\t\tfc_conf->mode = RTE_FC_NONE;\n+\treturn 0;\n+}\n+\n+/**\n+ * DPDK callback to modify flow control parameters.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param[in] fc_conf\n+ *   Flow control parameters.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+int\n+mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)\n+{\n+\tstruct ifreq ifr;\n+\tstruct ethtool_pauseparam ethpause = {\n+\t\t.cmd = ETHTOOL_SPAUSEPARAM\n+\t};\n+\tint ret;\n+\n+\tifr.ifr_data = (void *)&ethpause;\n+\tethpause.autoneg = fc_conf->autoneg;\n+\tif (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||\n+\t    (fc_conf->mode & RTE_FC_RX_PAUSE))\n+\t\tethpause.rx_pause = 1;\n+\telse\n+\t\tethpause.rx_pause = 0;\n+\n+\tif (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) ||\n+\t    (fc_conf->mode & RTE_FC_TX_PAUSE))\n+\t\tethpause.tx_pause = 1;\n+\telse\n+\t\tethpause.tx_pause = 0;\n+\tret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);\n+\tif (ret) {\n+\t\tDRV_LOG(WARNING,\n+\t\t\t\"port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)\"\n+\t\t\t\" failed: %s\",\n+\t\t\tdev->data->port_id, strerror(rte_errno));\n+\t\treturn ret;\n+\t}\n+\treturn 0;\n+}\n+\n+/**\n+ * Handle asynchronous removal event for entire multiport device.\n+ *\n+ * @param sh\n+ *   Infiniband device shared context.\n+ */\n+static void\n+mlx5_dev_interrupt_device_fatal(struct mlx5_dev_ctx_shared *sh)\n+{\n+\tuint32_t i;\n+\n+\tfor (i = 0; i < sh->max_port; ++i) {\n+\t\tstruct rte_eth_dev *dev;\n+\n+\t\tif (sh->port[i].ih_port_id >= RTE_MAX_ETHPORTS) {\n+\t\t\t/*\n+\t\t\t * Or not existing port either no\n+\t\t\t * handler installed for this port.\n+\t\t\t */\n+\t\t\tcontinue;\n+\t\t}\n+\t\tdev = &rte_eth_devices[sh->port[i].ih_port_id];\n+\t\tMLX5_ASSERT(dev);\n+\t\tif (dev->data->dev_conf.intr_conf.rmv)\n+\t\t\trte_eth_dev_callback_process\n+\t\t\t\t(dev, RTE_ETH_EVENT_INTR_RMV, NULL);\n+\t}\n+}\n+\n+/**\n+ * Handle shared asynchronous events the NIC (removal event\n+ * and link status change). Supports multiport IB device.\n+ *\n+ * @param cb_arg\n+ *   Callback argument.\n+ */\n+void\n+mlx5_dev_interrupt_handler(void *cb_arg)\n+{\n+\tstruct mlx5_dev_ctx_shared *sh = cb_arg;\n+\tstruct ibv_async_event event;\n+\n+\t/* Read all message from the IB device and acknowledge them. */\n+\tfor (;;) {\n+\t\tstruct rte_eth_dev *dev;\n+\t\tuint32_t tmp;\n+\n+\t\tif (mlx5_glue->get_async_event(sh->ctx, &event))\n+\t\t\tbreak;\n+\t\t/* Retrieve and check IB port index. */\n+\t\ttmp = (uint32_t)event.element.port_num;\n+\t\tif (!tmp && event.event_type == IBV_EVENT_DEVICE_FATAL) {\n+\t\t\t/*\n+\t\t\t * The DEVICE_FATAL event is called once for\n+\t\t\t * entire device without port specifying.\n+\t\t\t * We should notify all existing ports.\n+\t\t\t */\n+\t\t\tmlx5_glue->ack_async_event(&event);\n+\t\t\tmlx5_dev_interrupt_device_fatal(sh);\n+\t\t\tcontinue;\n+\t\t}\n+\t\tMLX5_ASSERT(tmp && (tmp <= sh->max_port));\n+\t\tif (!tmp) {\n+\t\t\t/* Unsupported device level event. */\n+\t\t\tmlx5_glue->ack_async_event(&event);\n+\t\t\tDRV_LOG(DEBUG,\n+\t\t\t\t\"unsupported common event (type %d)\",\n+\t\t\t\tevent.event_type);\n+\t\t\tcontinue;\n+\t\t}\n+\t\tif (tmp > sh->max_port) {\n+\t\t\t/* Invalid IB port index. */\n+\t\t\tmlx5_glue->ack_async_event(&event);\n+\t\t\tDRV_LOG(DEBUG,\n+\t\t\t\t\"cannot handle an event (type %d)\"\n+\t\t\t\t\"due to invalid IB port index (%u)\",\n+\t\t\t\tevent.event_type, tmp);\n+\t\t\tcontinue;\n+\t\t}\n+\t\tif (sh->port[tmp - 1].ih_port_id >= RTE_MAX_ETHPORTS) {\n+\t\t\t/* No handler installed. */\n+\t\t\tmlx5_glue->ack_async_event(&event);\n+\t\t\tDRV_LOG(DEBUG,\n+\t\t\t\t\"cannot handle an event (type %d)\"\n+\t\t\t\t\"due to no handler installed for port %u\",\n+\t\t\t\tevent.event_type, tmp);\n+\t\t\tcontinue;\n+\t\t}\n+\t\t/* Retrieve ethernet device descriptor. */\n+\t\ttmp = sh->port[tmp - 1].ih_port_id;\n+\t\tdev = &rte_eth_devices[tmp];\n+\t\tMLX5_ASSERT(dev);\n+\t\tif ((event.event_type == IBV_EVENT_PORT_ACTIVE ||\n+\t\t     event.event_type == IBV_EVENT_PORT_ERR) &&\n+\t\t\tdev->data->dev_conf.intr_conf.lsc) {\n+\t\t\tmlx5_glue->ack_async_event(&event);\n+\t\t\tif (mlx5_link_update(dev, 0) == -EAGAIN) {\n+\t\t\t\tusleep(0);\n+\t\t\t\tcontinue;\n+\t\t\t}\n+\t\t\trte_eth_dev_callback_process\n+\t\t\t\t(dev, RTE_ETH_EVENT_INTR_LSC, NULL);\n+\t\t\tcontinue;\n+\t\t}\n+\t\tDRV_LOG(DEBUG,\n+\t\t\t\"port %u cannot handle an unknown event (type %d)\",\n+\t\t\tdev->data->port_id, event.event_type);\n+\t\tmlx5_glue->ack_async_event(&event);\n+\t}\n+}\n+\n+/*\n+ * Unregister callback handler safely. The handler may be active\n+ * while we are trying to unregister it, in this case code -EAGAIN\n+ * is returned by rte_intr_callback_unregister(). This routine checks\n+ * the return code and tries to unregister handler again.\n+ *\n+ * @param handle\n+ *   interrupt handle\n+ * @param cb_fn\n+ *   pointer to callback routine\n+ * @cb_arg\n+ *   opaque callback parameter\n+ */\n+void\n+mlx5_intr_callback_unregister(const struct rte_intr_handle *handle,\n+\t\t\t      rte_intr_callback_fn cb_fn, void *cb_arg)\n+{\n+\t/*\n+\t * Try to reduce timeout management overhead by not calling\n+\t * the timer related routines on the first iteration. If the\n+\t * unregistering succeeds on first call there will be no\n+\t * timer calls at all.\n+\t */\n+\tuint64_t twait = 0;\n+\tuint64_t start = 0;\n+\n+\tdo {\n+\t\tint ret;\n+\n+\t\tret = rte_intr_callback_unregister(handle, cb_fn, cb_arg);\n+\t\tif (ret >= 0)\n+\t\t\treturn;\n+\t\tif (ret != -EAGAIN) {\n+\t\t\tDRV_LOG(INFO, \"failed to unregister interrupt\"\n+\t\t\t\t      \" handler (error: %d)\", ret);\n+\t\t\tMLX5_ASSERT(false);\n+\t\t\treturn;\n+\t\t}\n+\t\tif (twait) {\n+\t\t\tstruct timespec onems;\n+\n+\t\t\t/* Wait one millisecond and try again. */\n+\t\t\tonems.tv_sec = 0;\n+\t\t\tonems.tv_nsec = NS_PER_S / MS_PER_S;\n+\t\t\tnanosleep(&onems, 0);\n+\t\t\t/* Check whether one second elapsed. */\n+\t\t\tif ((rte_get_timer_cycles() - start) <= twait)\n+\t\t\t\tcontinue;\n+\t\t} else {\n+\t\t\t/*\n+\t\t\t * We get the amount of timer ticks for one second.\n+\t\t\t * If this amount elapsed it means we spent one\n+\t\t\t * second in waiting. This branch is executed once\n+\t\t\t * on first iteration.\n+\t\t\t */\n+\t\t\ttwait = rte_get_timer_hz();\n+\t\t\tMLX5_ASSERT(twait);\n+\t\t}\n+\t\t/*\n+\t\t * Timeout elapsed, show message (once a second) and retry.\n+\t\t * We have no other acceptable option here, if we ignore\n+\t\t * the unregistering return code the handler will not\n+\t\t * be unregistered, fd will be closed and we may get the\n+\t\t * crush. Hanging and messaging in the loop seems not to be\n+\t\t * the worst choice.\n+\t\t */\n+\t\tDRV_LOG(INFO, \"Retrying to unregister interrupt handler\");\n+\t\tstart = rte_get_timer_cycles();\n+\t} while (true);\n+}\n+\n+/**\n+ * Handle DEVX interrupts from the NIC.\n+ * This function is probably called from the DPDK host thread.\n+ *\n+ * @param cb_arg\n+ *   Callback argument.\n+ */\n+void\n+mlx5_dev_interrupt_handler_devx(void *cb_arg)\n+{\n+#ifndef HAVE_IBV_DEVX_ASYNC\n+\t(void)cb_arg;\n+\treturn;\n+#else\n+\tstruct mlx5_dev_ctx_shared *sh = cb_arg;\n+\tunion {\n+\t\tstruct mlx5dv_devx_async_cmd_hdr cmd_resp;\n+\t\tuint8_t buf[MLX5_ST_SZ_BYTES(query_flow_counter_out) +\n+\t\t\t    MLX5_ST_SZ_BYTES(traffic_counter) +\n+\t\t\t    sizeof(struct mlx5dv_devx_async_cmd_hdr)];\n+\t} out;\n+\tuint8_t *buf = out.buf + sizeof(out.cmd_resp);\n+\n+\twhile (!mlx5_glue->devx_get_async_cmd_comp(sh->devx_comp,\n+\t\t\t\t\t\t   &out.cmd_resp,\n+\t\t\t\t\t\t   sizeof(out.buf)))\n+\t\tmlx5_flow_async_pool_query_handle\n+\t\t\t(sh, (uint64_t)out.cmd_resp.wr_id,\n+\t\t\t mlx5_devx_get_out_command_status(buf));\n+#endif /* HAVE_IBV_DEVX_ASYNC */\n+}\n+\n+/**\n+ * DPDK callback to bring the link DOWN.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+int\n+mlx5_set_link_down(struct rte_eth_dev *dev)\n+{\n+\treturn mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP);\n+}\n+\n+/**\n+ * DPDK callback to bring the link UP.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+int\n+mlx5_set_link_up(struct rte_eth_dev *dev)\n+{\n+\treturn mlx5_set_flags(dev, ~IFF_UP, IFF_UP);\n+}\n+\n+/**\n+ * Check if mlx5 device was removed.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ *\n+ * @return\n+ *   1 when device is removed, otherwise 0.\n+ */\n+int\n+mlx5_is_removed(struct rte_eth_dev *dev)\n+{\n+\tstruct ibv_device_attr device_attr;\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\n+\tif (mlx5_glue->query_device(priv->sh->ctx, &device_attr) == EIO)\n+\t\treturn 1;\n+\treturn 0;\n+}\n+\n+/**\n+ * Analyze gathered port parameters via sysfs to recognize master\n+ * and representor devices for E-Switch configuration.\n+ *\n+ * @param[in] device_dir\n+ *   flag of presence of \"device\" directory under port device key.\n+ * @param[inout] switch_info\n+ *   Port information, including port name as a number and port name\n+ *   type if recognized\n+ *\n+ * @return\n+ *   master and representor flags are set in switch_info according to\n+ *   recognized parameters (if any).\n+ */\n+static void\n+mlx5_sysfs_check_switch_info(bool device_dir,\n+\t\t\t     struct mlx5_switch_info *switch_info)\n+{\n+\tswitch (switch_info->name_type) {\n+\tcase MLX5_PHYS_PORT_NAME_TYPE_UNKNOWN:\n+\t\t/*\n+\t\t * Name is not recognized, assume the master,\n+\t\t * check the device directory presence.\n+\t\t */\n+\t\tswitch_info->master = device_dir;\n+\t\tbreak;\n+\tcase MLX5_PHYS_PORT_NAME_TYPE_NOTSET:\n+\t\t/*\n+\t\t * Name is not set, this assumes the legacy naming\n+\t\t * schema for master, just check if there is\n+\t\t * a device directory.\n+\t\t */\n+\t\tswitch_info->master = device_dir;\n+\t\tbreak;\n+\tcase MLX5_PHYS_PORT_NAME_TYPE_UPLINK:\n+\t\t/* New uplink naming schema recognized. */\n+\t\tswitch_info->master = 1;\n+\t\tbreak;\n+\tcase MLX5_PHYS_PORT_NAME_TYPE_LEGACY:\n+\t\t/* Legacy representors naming schema. */\n+\t\tswitch_info->representor = !device_dir;\n+\t\tbreak;\n+\tcase MLX5_PHYS_PORT_NAME_TYPE_PFHPF:\n+\t\t/* Fallthrough */\n+\tcase MLX5_PHYS_PORT_NAME_TYPE_PFVF:\n+\t\t/* Fallthrough */\n+\tcase MLX5_PHYS_PORT_NAME_TYPE_PFSF:\n+\t\t/* New representors naming schema. */\n+\t\tswitch_info->representor = 1;\n+\t\tbreak;\n+\tdefault:\n+\t\tswitch_info->master = device_dir;\n+\t\tbreak;\n+\t}\n+}\n+\n+/**\n+ * Get switch information associated with network interface.\n+ *\n+ * @param ifindex\n+ *   Network interface index.\n+ * @param[out] info\n+ *   Switch information object, populated in case of success.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+int\n+mlx5_sysfs_switch_info(unsigned int ifindex, struct mlx5_switch_info *info)\n+{\n+\tchar ifname[IF_NAMESIZE];\n+\tchar port_name[IF_NAMESIZE];\n+\tFILE *file;\n+\tstruct mlx5_switch_info data = {\n+\t\t.master = 0,\n+\t\t.representor = 0,\n+\t\t.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,\n+\t\t.port_name = 0,\n+\t\t.switch_id = 0,\n+\t};\n+\tDIR *dir;\n+\tbool port_switch_id_set = false;\n+\tbool device_dir = false;\n+\tchar c;\n+\tint ret;\n+\n+\tif (!if_indextoname(ifindex, ifname)) {\n+\t\trte_errno = errno;\n+\t\treturn -rte_errno;\n+\t}\n+\n+\tMKSTR(phys_port_name, \"/sys/class/net/%s/phys_port_name\",\n+\t      ifname);\n+\tMKSTR(phys_switch_id, \"/sys/class/net/%s/phys_switch_id\",\n+\t      ifname);\n+\tMKSTR(pci_device, \"/sys/class/net/%s/device\",\n+\t      ifname);\n+\n+\tfile = fopen(phys_port_name, \"rb\");\n+\tif (file != NULL) {\n+\t\tret = fscanf(file, \"%\" RTE_STR(IF_NAMESIZE) \"s\", port_name);\n+\t\tfclose(file);\n+\t\tif (ret == 1)\n+\t\t\tmlx5_translate_port_name(port_name, &data);\n+\t}\n+\tfile = fopen(phys_switch_id, \"rb\");\n+\tif (file == NULL) {\n+\t\trte_errno = errno;\n+\t\treturn -rte_errno;\n+\t}\n+\tport_switch_id_set =\n+\t\tfscanf(file, \"%\" SCNx64 \"%c\", &data.switch_id, &c) == 2 &&\n+\t\tc == '\\n';\n+\tfclose(file);\n+\tdir = opendir(pci_device);\n+\tif (dir != NULL) {\n+\t\tclosedir(dir);\n+\t\tdevice_dir = true;\n+\t}\n+\tif (port_switch_id_set) {\n+\t\t/* We have some E-Switch configuration. */\n+\t\tmlx5_sysfs_check_switch_info(device_dir, &data);\n+\t}\n+\t*info = data;\n+\tMLX5_ASSERT(!(data.master && data.representor));\n+\tif (data.master && data.representor) {\n+\t\tDRV_LOG(ERR, \"ifindex %u device is recognized as master\"\n+\t\t\t     \" and as representor\", ifindex);\n+\t\trte_errno = ENODEV;\n+\t\treturn -rte_errno;\n+\t}\n+\treturn 0;\n+}\n+\n+/**\n+ * Get bond information associated with network interface.\n+ *\n+ * @param pf_ifindex\n+ *   Network interface index of bond slave interface\n+ * @param[out] ifindex\n+ *   Pointer to bond ifindex.\n+ * @param[out] ifname\n+ *   Pointer to bond ifname.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+int\n+mlx5_sysfs_bond_info(unsigned int pf_ifindex, unsigned int *ifindex,\n+\t\t     char *ifname)\n+{\n+\tchar name[IF_NAMESIZE];\n+\tFILE *file;\n+\tunsigned int index;\n+\tint ret;\n+\n+\tif (!if_indextoname(pf_ifindex, name) || !strlen(name)) {\n+\t\trte_errno = errno;\n+\t\treturn -rte_errno;\n+\t}\n+\tMKSTR(bond_if, \"/sys/class/net/%s/master/ifindex\", name);\n+\t/* read bond ifindex */\n+\tfile = fopen(bond_if, \"rb\");\n+\tif (file == NULL) {\n+\t\trte_errno = errno;\n+\t\treturn -rte_errno;\n+\t}\n+\tret = fscanf(file, \"%u\", &index);\n+\tfclose(file);\n+\tif (ret <= 0) {\n+\t\trte_errno = errno;\n+\t\treturn -rte_errno;\n+\t}\n+\tif (ifindex)\n+\t\t*ifindex = index;\n+\n+\t/* read bond device name from symbol link */\n+\tif (ifname) {\n+\t\tif (!if_indextoname(index, ifname)) {\n+\t\t\trte_errno = errno;\n+\t\t\treturn -rte_errno;\n+\t\t}\n+\t}\n+\treturn 0;\n+}\n+\n+/**\n+ * DPDK callback to retrieve plug-in module EEPROM information (type and size).\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param[out] modinfo\n+ *   Storage for plug-in module EEPROM information.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+int\n+mlx5_get_module_info(struct rte_eth_dev *dev,\n+\t\t     struct rte_eth_dev_module_info *modinfo)\n+{\n+\tstruct ethtool_modinfo info = {\n+\t\t.cmd = ETHTOOL_GMODULEINFO,\n+\t};\n+\tstruct ifreq ifr = (struct ifreq) {\n+\t\t.ifr_data = (void *)&info,\n+\t};\n+\tint ret = 0;\n+\n+\tif (!dev) {\n+\t\tDRV_LOG(WARNING, \"missing argument, cannot get module info\");\n+\t\trte_errno = EINVAL;\n+\t\treturn -rte_errno;\n+\t}\n+\tret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);\n+\tif (ret) {\n+\t\tDRV_LOG(WARNING, \"port %u ioctl(SIOCETHTOOL) failed: %s\",\n+\t\t\tdev->data->port_id, strerror(rte_errno));\n+\t\treturn ret;\n+\t}\n+\tmodinfo->type = info.type;\n+\tmodinfo->eeprom_len = info.eeprom_len;\n+\treturn ret;\n+}\n+\n+/**\n+ * DPDK callback to retrieve plug-in module EEPROM data.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param[out] info\n+ *   Storage for plug-in module EEPROM data.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+int mlx5_get_module_eeprom(struct rte_eth_dev *dev,\n+\t\t\t   struct rte_dev_eeprom_info *info)\n+{\n+\tstruct ethtool_eeprom *eeprom;\n+\tstruct ifreq ifr;\n+\tint ret = 0;\n+\n+\tif (!dev) {\n+\t\tDRV_LOG(WARNING, \"missing argument, cannot get module eeprom\");\n+\t\trte_errno = EINVAL;\n+\t\treturn -rte_errno;\n+\t}\n+\teeprom = mlx5_malloc(MLX5_MEM_ZERO,\n+\t\t\t     (sizeof(struct ethtool_eeprom) + info->length), 0,\n+\t\t\t     SOCKET_ID_ANY);\n+\tif (!eeprom) {\n+\t\tDRV_LOG(WARNING, \"port %u cannot allocate memory for \"\n+\t\t\t\"eeprom data\", dev->data->port_id);\n+\t\trte_errno = ENOMEM;\n+\t\treturn -rte_errno;\n+\t}\n+\teeprom->cmd = ETHTOOL_GMODULEEEPROM;\n+\teeprom->offset = info->offset;\n+\teeprom->len = info->length;\n+\tifr = (struct ifreq) {\n+\t\t.ifr_data = (void *)eeprom,\n+\t};\n+\tret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);\n+\tif (ret)\n+\t\tDRV_LOG(WARNING, \"port %u ioctl(SIOCETHTOOL) failed: %s\",\n+\t\t\tdev->data->port_id, strerror(rte_errno));\n+\telse\n+\t\trte_memcpy(info->data, eeprom->data, info->length);\n+\tmlx5_free(eeprom);\n+\treturn ret;\n+}\n+\n+/**\n+ * Read device counters table.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ * @param[in] pf\n+ *   PF index in case of bonding device, -1 otherwise\n+ * @param[out] stats\n+ *   Counters table output buffer.\n+ *\n+ * @return\n+ *   0 on success and stats is filled, negative errno value otherwise and\n+ *   rte_errno is set.\n+ */\n+static int\n+_mlx5_os_read_dev_counters(struct rte_eth_dev *dev, int pf, uint64_t *stats)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;\n+\tunsigned int i;\n+\tstruct ifreq ifr;\n+\tunsigned int stats_sz = xstats_ctrl->stats_n * sizeof(uint64_t);\n+\tunsigned char et_stat_buf[sizeof(struct ethtool_stats) + stats_sz];\n+\tstruct ethtool_stats *et_stats = (struct ethtool_stats *)et_stat_buf;\n+\tint ret;\n+\n+\tet_stats->cmd = ETHTOOL_GSTATS;\n+\tet_stats->n_stats = xstats_ctrl->stats_n;\n+\tifr.ifr_data = (caddr_t)et_stats;\n+\tif (pf >= 0)\n+\t\tret = mlx5_ifreq_by_ifname(priv->sh->bond.ports[pf].ifname,\n+\t\t\t\t\t   SIOCETHTOOL, &ifr);\n+\telse\n+\t\tret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);\n+\tif (ret) {\n+\t\tDRV_LOG(WARNING,\n+\t\t\t\"port %u unable to read statistic values from device\",\n+\t\t\tdev->data->port_id);\n+\t\treturn ret;\n+\t}\n+\tfor (i = 0; i != xstats_ctrl->mlx5_stats_n; ++i) {\n+\t\tif (xstats_ctrl->info[i].dev)\n+\t\t\tcontinue;\n+\t\tstats[i] += (uint64_t)\n+\t\t\t    et_stats->data[xstats_ctrl->dev_table_idx[i]];\n+\t}\n+\treturn 0;\n+}\n+\n+/**\n+ * Read device counters.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ * @param[out] stats\n+ *   Counters table output buffer.\n+ *\n+ * @return\n+ *   0 on success and stats is filled, negative errno value otherwise and\n+ *   rte_errno is set.\n+ */\n+int\n+mlx5_os_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;\n+\tint ret = 0, i;\n+\n+\tmemset(stats, 0, sizeof(*stats) * xstats_ctrl->mlx5_stats_n);\n+\t/* Read ifreq counters. */\n+\tif (priv->master && priv->pf_bond >= 0) {\n+\t\t/* Sum xstats from bonding device member ports. */\n+\t\tfor (i = 0; i < priv->sh->bond.n_port; i++) {\n+\t\t\tret = _mlx5_os_read_dev_counters(dev, i, stats);\n+\t\t\tif (ret)\n+\t\t\t\treturn ret;\n+\t\t}\n+\t} else {\n+\t\tret = _mlx5_os_read_dev_counters(dev, -1, stats);\n+\t}\n+\t/* Read IB counters. */\n+\tfor (i = 0; i != xstats_ctrl->mlx5_stats_n; ++i) {\n+\t\tif (!xstats_ctrl->info[i].dev)\n+\t\t\tcontinue;\n+\t\tret = mlx5_os_read_dev_stat(priv, xstats_ctrl->info[i].ctr_name,\n+\t\t\t\t\t    &stats[i]);\n+\t\t/* return last xstats counter if fail to read. */\n+\t\tif (ret != 0)\n+\t\t\txstats_ctrl->xstats[i] = stats[i];\n+\t\telse\n+\t\t\tstats[i] = xstats_ctrl->xstats[i];\n+\t}\n+\treturn ret;\n+}\n+\n+/**\n+ * Query the number of statistics provided by ETHTOOL.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ *\n+ * @return\n+ *   Number of statistics on success, negative errno value otherwise and\n+ *   rte_errno is set.\n+ */\n+int\n+mlx5_os_get_stats_n(struct rte_eth_dev *dev)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct ethtool_drvinfo drvinfo;\n+\tstruct ifreq ifr;\n+\tint ret;\n+\n+\tdrvinfo.cmd = ETHTOOL_GDRVINFO;\n+\tifr.ifr_data = (caddr_t)&drvinfo;\n+\tif (priv->master && priv->pf_bond >= 0)\n+\t\t/* Bonding PF. */\n+\t\tret = mlx5_ifreq_by_ifname(priv->sh->bond.ports[0].ifname,\n+\t\t\t\t\t   SIOCETHTOOL, &ifr);\n+\telse\n+\t\tret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);\n+\tif (ret) {\n+\t\tDRV_LOG(WARNING, \"port %u unable to query number of statistics\",\n+\t\t\tdev->data->port_id);\n+\t\treturn ret;\n+\t}\n+\treturn drvinfo.n_stats;\n+}\n+\n+static const struct mlx5_counter_ctrl mlx5_counters_init[] = {\n+\t{\n+\t\t.dpdk_name = \"rx_unicast_bytes\",\n+\t\t.ctr_name = \"rx_vport_unicast_bytes\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"rx_multicast_bytes\",\n+\t\t.ctr_name = \"rx_vport_multicast_bytes\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"rx_broadcast_bytes\",\n+\t\t.ctr_name = \"rx_vport_broadcast_bytes\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"rx_unicast_packets\",\n+\t\t.ctr_name = \"rx_vport_unicast_packets\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"rx_multicast_packets\",\n+\t\t.ctr_name = \"rx_vport_multicast_packets\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"rx_broadcast_packets\",\n+\t\t.ctr_name = \"rx_vport_broadcast_packets\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"tx_unicast_bytes\",\n+\t\t.ctr_name = \"tx_vport_unicast_bytes\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"tx_multicast_bytes\",\n+\t\t.ctr_name = \"tx_vport_multicast_bytes\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"tx_broadcast_bytes\",\n+\t\t.ctr_name = \"tx_vport_broadcast_bytes\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"tx_unicast_packets\",\n+\t\t.ctr_name = \"tx_vport_unicast_packets\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"tx_multicast_packets\",\n+\t\t.ctr_name = \"tx_vport_multicast_packets\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"tx_broadcast_packets\",\n+\t\t.ctr_name = \"tx_vport_broadcast_packets\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"rx_wqe_errors\",\n+\t\t.ctr_name = \"rx_wqe_err\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"rx_phy_crc_errors\",\n+\t\t.ctr_name = \"rx_crc_errors_phy\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"rx_phy_in_range_len_errors\",\n+\t\t.ctr_name = \"rx_in_range_len_errors_phy\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"rx_phy_symbol_errors\",\n+\t\t.ctr_name = \"rx_symbol_err_phy\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"tx_phy_errors\",\n+\t\t.ctr_name = \"tx_errors_phy\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"rx_out_of_buffer\",\n+\t\t.ctr_name = \"out_of_buffer\",\n+\t\t.dev = 1,\n+\t},\n+\t{\n+\t\t.dpdk_name = \"tx_phy_packets\",\n+\t\t.ctr_name = \"tx_packets_phy\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"rx_phy_packets\",\n+\t\t.ctr_name = \"rx_packets_phy\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"tx_phy_discard_packets\",\n+\t\t.ctr_name = \"tx_discards_phy\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"rx_phy_discard_packets\",\n+\t\t.ctr_name = \"rx_discards_phy\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"tx_phy_bytes\",\n+\t\t.ctr_name = \"tx_bytes_phy\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"rx_phy_bytes\",\n+\t\t.ctr_name = \"rx_bytes_phy\",\n+\t},\n+\t/* Representor only */\n+\t{\n+\t\t.dpdk_name = \"rx_vport_packets\",\n+\t\t.ctr_name = \"vport_rx_packets\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"rx_vport_bytes\",\n+\t\t.ctr_name = \"vport_rx_bytes\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"tx_vport_packets\",\n+\t\t.ctr_name = \"vport_tx_packets\",\n+\t},\n+\t{\n+\t\t.dpdk_name = \"tx_vport_bytes\",\n+\t\t.ctr_name = \"vport_tx_bytes\",\n+\t},\n+};\n+\n+static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);\n+\n+/**\n+ * Init the structures to read device counters.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ */\n+void\n+mlx5_os_stats_init(struct rte_eth_dev *dev)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;\n+\tstruct mlx5_stats_ctrl *stats_ctrl = &priv->stats_ctrl;\n+\tunsigned int i;\n+\tunsigned int j;\n+\tstruct ifreq ifr;\n+\tstruct ethtool_gstrings *strings = NULL;\n+\tunsigned int dev_stats_n;\n+\tunsigned int str_sz;\n+\tint ret;\n+\n+\t/* So that it won't aggregate for each init. */\n+\txstats_ctrl->mlx5_stats_n = 0;\n+\tret = mlx5_os_get_stats_n(dev);\n+\tif (ret < 0) {\n+\t\tDRV_LOG(WARNING, \"port %u no extended statistics available\",\n+\t\t\tdev->data->port_id);\n+\t\treturn;\n+\t}\n+\tdev_stats_n = ret;\n+\t/* Allocate memory to grab stat names and values. */\n+\tstr_sz = dev_stats_n * ETH_GSTRING_LEN;\n+\tstrings = (struct ethtool_gstrings *)\n+\t\t  mlx5_malloc(0, str_sz + sizeof(struct ethtool_gstrings), 0,\n+\t\t\t      SOCKET_ID_ANY);\n+\tif (!strings) {\n+\t\tDRV_LOG(WARNING, \"port %u unable to allocate memory for xstats\",\n+\t\t     dev->data->port_id);\n+\t\treturn;\n+\t}\n+\tstrings->cmd = ETHTOOL_GSTRINGS;\n+\tstrings->string_set = ETH_SS_STATS;\n+\tstrings->len = dev_stats_n;\n+\tifr.ifr_data = (caddr_t)strings;\n+\tif (priv->master && priv->pf_bond >= 0)\n+\t\t/* Bonding master. */\n+\t\tret = mlx5_ifreq_by_ifname(priv->sh->bond.ports[0].ifname,\n+\t\t\t\t\t   SIOCETHTOOL, &ifr);\n+\telse\n+\t\tret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);\n+\tif (ret) {\n+\t\tDRV_LOG(WARNING, \"port %u unable to get statistic names\",\n+\t\t\tdev->data->port_id);\n+\t\tgoto free;\n+\t}\n+\tfor (i = 0; i != dev_stats_n; ++i) {\n+\t\tconst char *curr_string = (const char *)\n+\t\t\t&strings->data[i * ETH_GSTRING_LEN];\n+\n+\t\tfor (j = 0; j != xstats_n; ++j) {\n+\t\t\tif (!strcmp(mlx5_counters_init[j].ctr_name,\n+\t\t\t\t    curr_string)) {\n+\t\t\t\tunsigned int idx = xstats_ctrl->mlx5_stats_n++;\n+\n+\t\t\t\txstats_ctrl->dev_table_idx[idx] = i;\n+\t\t\t\txstats_ctrl->info[idx] = mlx5_counters_init[j];\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t}\n+\t}\n+\t/* Add dev counters. */\n+\tfor (i = 0; i != xstats_n; ++i) {\n+\t\tif (mlx5_counters_init[i].dev) {\n+\t\t\tunsigned int idx = xstats_ctrl->mlx5_stats_n++;\n+\n+\t\t\txstats_ctrl->info[idx] = mlx5_counters_init[i];\n+\t\t\txstats_ctrl->hw_stats[idx] = 0;\n+\t\t}\n+\t}\n+\tMLX5_ASSERT(xstats_ctrl->mlx5_stats_n <= MLX5_MAX_XSTATS);\n+\txstats_ctrl->stats_n = dev_stats_n;\n+\t/* Copy to base at first time. */\n+\tret = mlx5_os_read_dev_counters(dev, xstats_ctrl->base);\n+\tif (ret)\n+\t\tDRV_LOG(ERR, \"port %u cannot read device counters: %s\",\n+\t\t\tdev->data->port_id, strerror(rte_errno));\n+\tmlx5_os_read_dev_stat(priv, \"out_of_buffer\", &stats_ctrl->imissed_base);\n+\tstats_ctrl->imissed = 0;\n+free:\n+\tmlx5_free(strings);\n+}\n+\n+/**\n+ * Get MAC address by querying netdevice.\n+ *\n+ * @param[in] dev\n+ *   Pointer to Ethernet device.\n+ * @param[out] mac\n+ *   MAC address output buffer.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+int\n+mlx5_get_mac(struct rte_eth_dev *dev, uint8_t (*mac)[RTE_ETHER_ADDR_LEN])\n+{\n+\tstruct ifreq request;\n+\tint ret;\n+\n+\tret = mlx5_ifreq(dev, SIOCGIFHWADDR, &request);\n+\tif (ret)\n+\t\treturn ret;\n+\tmemcpy(mac, request.ifr_hwaddr.sa_data, RTE_ETHER_ADDR_LEN);\n+\treturn 0;\n+}\ndiff --git a/drivers/net/mlx5/freebsd/mlx5_flow_os.c b/drivers/net/mlx5/freebsd/mlx5_flow_os.c\nnew file mode 100644\nindex 0000000000..893f00b824\n--- /dev/null\n+++ b/drivers/net/mlx5/freebsd/mlx5_flow_os.c\n@@ -0,0 +1,38 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright 2020 Mellanox Technologies, Ltd\n+ */\n+\n+#include \"mlx5_flow_os.h\"\n+\n+#include <rte_thread.h>\n+\n+/* Key of thread specific flow workspace data. */\n+static rte_thread_key key_workspace;\n+\n+int\n+mlx5_flow_os_init_workspace_once(void)\n+{\n+\tif (rte_thread_key_create(&key_workspace, flow_release_workspace)) {\n+\t\tDRV_LOG(ERR, \"Can't create flow workspace data thread key.\");\n+\t\treturn -ENOMEM;\n+\t}\n+\treturn 0;\n+}\n+\n+void *\n+mlx5_flow_os_get_specific_workspace(void)\n+{\n+\treturn rte_thread_value_get(key_workspace);\n+}\n+\n+int\n+mlx5_flow_os_set_specific_workspace(struct mlx5_flow_workspace *data)\n+{\n+\treturn rte_thread_value_set(key_workspace, data);\n+}\n+\n+void\n+mlx5_flow_os_release_workspace(void)\n+{\n+\trte_thread_key_delete(key_workspace);\n+}\ndiff --git a/drivers/net/mlx5/freebsd/mlx5_flow_os.h b/drivers/net/mlx5/freebsd/mlx5_flow_os.h\nnew file mode 100644\nindex 0000000000..1926d26410\n--- /dev/null\n+++ b/drivers/net/mlx5/freebsd/mlx5_flow_os.h\n@@ -0,0 +1,484 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright 2020 Mellanox Technologies, Ltd\n+ */\n+\n+#ifndef RTE_PMD_MLX5_FLOW_OS_H_\n+#define RTE_PMD_MLX5_FLOW_OS_H_\n+\n+#include \"mlx5_flow.h\"\n+\n+#ifdef HAVE_IBV_FLOW_DV_SUPPORT\n+extern const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops;\n+#endif\n+\n+/**\n+ * Get OS enforced flow type. MLX5_FLOW_TYPE_MAX means \"non enforced type\".\n+ *\n+ * @return\n+ *   Flow type (MLX5_FLOW_TYPE_MAX)\n+ */\n+static inline enum mlx5_flow_drv_type\n+mlx5_flow_os_get_type(void)\n+{\n+\treturn MLX5_FLOW_TYPE_MAX;\n+}\n+\n+/**\n+ * Check if item type is supported.\n+ *\n+ * @param item\n+ *   Item type to check.\n+ *\n+ * @return\n+ *   True is this item type is supported, false if not supported.\n+ */\n+static inline bool\n+mlx5_flow_os_item_supported(int item __rte_unused)\n+{\n+\treturn true;\n+}\n+\n+/**\n+ * Check if action type is supported.\n+ *\n+ * @param action\n+ *   Action type to check.\n+ *\n+ * @return\n+ *   True is this action type is supported, false if not supported.\n+ */\n+static inline bool\n+mlx5_flow_os_action_supported(int action __rte_unused)\n+{\n+\treturn true;\n+}\n+\n+/**\n+ * Create flow rule.\n+ *\n+ * @param[in] matcher\n+ *   Pointer to match mask structure.\n+ * @param[in] match_value\n+ *   Pointer to match value structure.\n+ * @param[in] num_actions\n+ *   Number of actions in flow rule.\n+ * @param[in] actions\n+ *   Pointer to array of flow rule actions.\n+ * @param[out] flow\n+ *   Pointer to a valid flow rule object on success, NULL otherwise.\n+ *\n+ * @return\n+ *   0 on success, or -1 on failure and errno is set.\n+ */\n+static inline int\n+mlx5_flow_os_create_flow(void *matcher, void *match_value,\n+\t\t\t size_t num_actions, void *actions[], void **flow)\n+{\n+\t*flow = mlx5_glue->dv_create_flow(matcher, match_value,\n+\t\t\t\t\t  num_actions, actions);\n+\treturn (*flow) ? 0 : -1;\n+}\n+\n+/**\n+ * Destroy flow rule.\n+ *\n+ * @param[in] drv_flow_ptr\n+ *   Pointer to flow rule object.\n+ *\n+ * @return\n+ *   0 on success, or the value of errno on failure.\n+ */\n+static inline int\n+mlx5_flow_os_destroy_flow(void *drv_flow_ptr)\n+{\n+\treturn mlx5_glue->dv_destroy_flow(drv_flow_ptr);\n+}\n+\n+/**\n+ * Create flow table.\n+ *\n+ * @param[in] domain\n+ *   Pointer to relevant domain.\n+ * @param[in] table_id\n+ *   Table ID.\n+ * @param[out] table\n+ *   Pointer to a valid flow table object on success, NULL otherwise.\n+ *\n+ * @return\n+ *   0 on success, or -1 on failure and errno is set.\n+ */\n+static inline int\n+mlx5_flow_os_create_flow_tbl(void *domain, uint32_t table_id, void **table)\n+{\n+\t*table = mlx5_glue->dr_create_flow_tbl(domain, table_id);\n+\treturn (*table) ? 0 : -1;\n+}\n+\n+/**\n+ * Destroy flow table.\n+ *\n+ * @param[in] table\n+ *   Pointer to table object to destroy.\n+ *\n+ * @return\n+ *   0 on success, or the value of errno on failure.\n+ */\n+static inline int\n+mlx5_flow_os_destroy_flow_tbl(void *table)\n+{\n+\treturn mlx5_glue->dr_destroy_flow_tbl(table);\n+}\n+\n+/**\n+ * Create flow matcher in a flow table.\n+ *\n+ * @param[in] ctx\n+ *   Pointer to relevant device context.\n+ * @param[in] attr\n+ *   Pointer to relevant attributes.\n+ * @param[in] table\n+ *   Pointer to table object.\n+ * @param[out] matcher\n+ *   Pointer to a valid flow matcher object on success, NULL otherwise.\n+ *\n+ * @return\n+ *   0 on success, or -1 on failure and errno is set.\n+ */\n+static inline int\n+mlx5_flow_os_create_flow_matcher(void *ctx, void *attr, void *table,\n+\t\t\t\t void **matcher)\n+{\n+\t*matcher = mlx5_glue->dv_create_flow_matcher(ctx, attr, table);\n+\treturn (*matcher) ? 0 : -1;\n+}\n+\n+/**\n+ * Destroy flow matcher.\n+ *\n+ * @param[in] matcher\n+ *   Pointer to matcher object to destroy.\n+ *\n+ * @return\n+ *   0 on success, or the value of errno on failure.\n+ */\n+static inline int\n+mlx5_flow_os_destroy_flow_matcher(void *matcher)\n+{\n+\treturn mlx5_glue->dv_destroy_flow_matcher(matcher);\n+}\n+\n+/**\n+ * Create flow action: packet reformat.\n+ *\n+ * @param[in] ctx\n+ *   Pointer to relevant device context.\n+ * @param[in] domain\n+ *   Pointer to domain handler.\n+ * @param[in] resource\n+ *   Pointer to action data resource.\n+ * @param[out] action\n+ *   Pointer to a valid action on success, NULL otherwise.\n+ *\n+ *\n+ * @return\n+ *   0 on success, or -1 on failure and errno is set.\n+ */\n+static inline int\n+mlx5_flow_os_create_flow_action_packet_reformat(void *ctx, void *domain,\n+\t\t\t\t\t\tvoid *resource, void **action)\n+{\n+\tstruct mlx5_flow_dv_encap_decap_resource *res =\n+\t\t\t(struct mlx5_flow_dv_encap_decap_resource *)resource;\n+\n+\t*action = mlx5_glue->dv_create_flow_action_packet_reformat\n+\t\t\t\t\t(ctx, res->reformat_type, res->ft_type,\n+\t\t\t\t\t domain, res->flags, res->size,\n+\t\t\t\t\t (res->size ? res->buf : NULL));\n+\treturn (*action) ? 0 : -1;\n+}\n+\n+/**\n+ * Create flow action: modify header.\n+ *\n+ * @param[in] ctx\n+ *   Pointer to relevant device context.\n+ * @param[in] domain\n+ *   Pointer to domain handler.\n+ * @param[in] resource\n+ *   Pointer to action data resource.\n+ * @param[in] actions_len\n+ *   Total length of actions data in resource.\n+ * @param[out] action\n+ *   Pointer to a valid action on success, NULL otherwise.\n+ *\n+ *\n+ * @return\n+ *   0 on success, or -1 on failure and errno is set.\n+ */\n+static inline int\n+mlx5_flow_os_create_flow_action_modify_header(void *ctx, void *domain,\n+\t\t\t\t\t      void *resource,\n+\t\t\t\t\t      uint32_t actions_len,\n+\t\t\t\t\t      void **action)\n+{\n+\tstruct mlx5_flow_dv_modify_hdr_resource *res =\n+\t\t\t(struct mlx5_flow_dv_modify_hdr_resource *)resource;\n+\n+\t*action = mlx5_glue->dv_create_flow_action_modify_header\n+\t\t\t\t\t(ctx, res->ft_type, domain, res->root ?\n+\t\t\t\t\t MLX5DV_DR_ACTION_FLAGS_ROOT_LEVEL : 0,\n+\t\t\t\t\t actions_len, (uint64_t *)res->actions);\n+\treturn (*action) ? 0 : -1;\n+}\n+\n+/**\n+ * Create flow action: destination flow table.\n+ *\n+ * @param[in] tbl_obj\n+ *   Pointer to destination table object.\n+ * @param[out] action\n+ *   Pointer to a valid action on success, NULL otherwise.\n+ *\n+ * @return\n+ *   0 on success, or -1 on failure and errno is set.\n+ */\n+static inline int\n+mlx5_flow_os_create_flow_action_dest_flow_tbl(void *tbl_obj, void **action)\n+{\n+\t*action = mlx5_glue->dr_create_flow_action_dest_flow_tbl(tbl_obj);\n+\treturn (*action) ? 0 : -1;\n+}\n+\n+/**\n+ * Create flow action: destination port.\n+ *\n+ * @param[in] domain\n+ *   Pointer to domain handler.\n+ * @param[in] port_id\n+ *   Destination port ID.\n+ * @param[out] action\n+ *   Pointer to a valid action on success, NULL otherwise.\n+ *\n+ * @return\n+ *   0 on success, or -1 on failure and errno is set.\n+ */\n+static inline int\n+mlx5_flow_os_create_flow_action_dest_port(void *domain, uint32_t port_id,\n+\t\t\t\t\t  void **action)\n+{\n+\t/*\n+\t * Depending on rdma_core version the glue routine calls\n+\t * either mlx5dv_dr_action_create_dest_ib_port(domain, dev_port)\n+\t * or mlx5dv_dr_action_create_dest_vport(domain, vport_id).\n+\t */\n+\t*action = mlx5_glue->dr_create_flow_action_dest_port(domain, port_id);\n+\treturn (*action) ? 0 : -1;\n+}\n+\n+/**\n+ * Create flow action: push vlan.\n+ *\n+ * @param[in] domain\n+ *   Pointer to domain handler.\n+ * @param[in] vlan_tag\n+ *   VLAN tag value.\n+ * @param[out] action\n+ *   Pointer to a valid action on success, NULL otherwise.\n+ *\n+ * @return\n+ *   0 on success, or -1 on failure and errno is set.\n+ */\n+static inline int\n+mlx5_flow_os_create_flow_action_push_vlan(void *domain, rte_be32_t vlan_tag,\n+\t\t\t\t\t  void **action)\n+{\n+\t*action = mlx5_glue->dr_create_flow_action_push_vlan(domain, vlan_tag);\n+\treturn (*action) ? 0 : -1;\n+}\n+\n+/**\n+ * Create flow action: count.\n+ *\n+ * @param[in] cnt_obj\n+ *   Pointer to DevX counter object.\n+ * @param[in] offset\n+ *   Offset of counter in array.\n+ * @param[out] action\n+ *   Pointer to a valid action on success, NULL otherwise.\n+ *\n+ * @return\n+ *   0 on success, or -1 on failure and errno is set.\n+ */\n+static inline int\n+mlx5_flow_os_create_flow_action_count(void *cnt_obj, uint16_t offset,\n+\t\t\t\t      void **action)\n+{\n+\t*action = mlx5_glue->dv_create_flow_action_counter(cnt_obj, offset);\n+\treturn (*action) ? 0 : -1;\n+}\n+\n+/**\n+ * Create flow action: tag.\n+ *\n+ * @param[in] tag\n+ *   Tag value.\n+ * @param[out] action\n+ *   Pointer to a valid action on success, NULL otherwise.\n+ *\n+ * @return\n+ *   0 on success, or -1 on failure and errno is set.\n+ */\n+static inline int\n+mlx5_flow_os_create_flow_action_tag(uint32_t tag, void **action)\n+{\n+\t*action = mlx5_glue->dv_create_flow_action_tag(tag);\n+\treturn (*action) ? 0 : -1;\n+}\n+\n+/**\n+ * Create flow action: drop.\n+ *\n+ * @param[out] action\n+ *   Pointer to a valid action on success, NULL otherwise.\n+ *\n+ * @return\n+ *   0 on success, or -1 on failure and errno is set.\n+ */\n+static inline int\n+mlx5_flow_os_create_flow_action_drop(void **action)\n+{\n+\t*action = mlx5_glue->dr_create_flow_action_drop();\n+\treturn (*action) ? 0 : -1;\n+}\n+\n+/**\n+ * Create flow action: default miss.\n+ *\n+ * @param[out] action\n+ *   Pointer to a valid action on success, NULL otherwise.\n+ *\n+ * @return\n+ *   0 on success, or -1 on failure and errno is set.\n+ */\n+static inline int\n+mlx5_flow_os_create_flow_action_default_miss(void **action)\n+{\n+\t*action = mlx5_glue->dr_create_flow_action_default_miss();\n+\treturn (*action) ? 0 : -1;\n+}\n+\n+/**\n+ * Create flow action: dest_devx_tir\n+ *\n+ * @param[in] tir\n+ *   Pointer to DevX tir object\n+ * @param[out] action\n+ *   Pointer to a valid action on success, NULL otherwise.\n+ *\n+ * @return\n+ *   0 on success, or -1 on failure and errno is set.\n+ */\n+static inline int\n+mlx5_flow_os_create_flow_action_dest_devx_tir(struct mlx5_devx_obj *tir,\n+\t\t\t\t\t      void **action)\n+{\n+#ifdef HAVE_IBV_FLOW_DV_SUPPORT\n+\t*action = mlx5_glue->dv_create_flow_action_dest_devx_tir(tir->obj);\n+\treturn (*action) ? 0 : -1;\n+#else\n+\t/* If no DV support - skip the operation and return success */\n+\tRTE_SET_USED(tir);\n+\t*action = 0;\n+\treturn 0;\n+#endif\n+}\n+\n+/**\n+ * Create flow action: sampler\n+ *\n+ * @param[in] attr\n+ *   Pointer to sampler attribute\n+ * @param[out] action\n+ *   Pointer to a valid action on success, NULL otherwise.\n+ *\n+ * @return\n+ *   0 on success, or -1 on failure and errno is set.\n+ */\n+static inline int\n+mlx5_os_flow_dr_create_flow_action_sampler\n+\t\t\t(struct mlx5dv_dr_flow_sampler_attr *attr,\n+\t\t\tvoid **action)\n+{\n+\t*action = mlx5_glue->dr_create_flow_action_sampler(attr);\n+\treturn (*action) ? 0 : -1;\n+}\n+\n+/**\n+ * Create flow action: dest_array\n+ *\n+ * @param[in] domain\n+ *   Pointer to relevant domain.\n+ * @param[in] num_dest\n+ *   Number of destinations array.\n+ * @param[in] dests\n+ *   Array of destination attributes.\n+ * @param[out] action\n+ *   Pointer to a valid action on success, NULL otherwise.\n+ *\n+ * @return\n+ *   0 on success, or -1 on failure and errno is set.\n+ */\n+static inline int\n+mlx5_os_flow_dr_create_flow_action_dest_array\n+\t\t\t(void *domain,\n+\t\t\t size_t num_dest,\n+\t\t\t struct mlx5dv_dr_action_dest_attr *dests[],\n+\t\t\t void **action)\n+{\n+\t*action = mlx5_glue->dr_create_flow_action_dest_array(\n+\t\t\t\t\t\tdomain, num_dest, dests);\n+\treturn (*action) ? 0 : -1;\n+}\n+\n+/**\n+ * Destroy flow action.\n+ *\n+ * @param[in] action\n+ *   Pointer to action object to destroy.\n+ *\n+ * @return\n+ *   0 on success, or the value of errno on failure.\n+ */\n+static inline int\n+mlx5_flow_os_destroy_flow_action(void *action)\n+{\n+\treturn mlx5_glue->destroy_flow_action(action);\n+}\n+\n+/**\n+ * OS wrapper over Verbs API.\n+ * Adjust flow priority based on the highest layer and the request priority.\n+ *\n+ * @param[in] dev\n+ *    Pointer to the Ethernet device structure.\n+ * @param[in] priority\n+ *    The rule base priority.\n+ * @param[in] subpriority\n+ *    The priority based on the items.\n+ *\n+ * @return\n+ *    The new priority.\n+ */\n+static inline uint32_t\n+mlx5_os_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,\n+\t\t\t  uint32_t subpriority)\n+{\n+\treturn mlx5_flow_adjust_priority(dev, priority, subpriority);\n+}\n+\n+static inline int\n+mlx5_os_flow_dr_sync_domain(void *domain, uint32_t flags)\n+{\n+\treturn mlx5_glue->dr_sync_domain(domain, flags);\n+}\n+#endif /* RTE_PMD_MLX5_FLOW_OS_H_ */\ndiff --git a/drivers/net/mlx5/freebsd/mlx5_mp_os.c b/drivers/net/mlx5/freebsd/mlx5_mp_os.c\nnew file mode 100644\nindex 0000000000..3a4aa766f8\n--- /dev/null\n+++ b/drivers/net/mlx5/freebsd/mlx5_mp_os.c\n@@ -0,0 +1,305 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright 2019 6WIND S.A.\n+ * Copyright 2019 Mellanox Technologies, Ltd\n+ */\n+\n+#include <stdio.h>\n+#include <time.h>\n+\n+#include <rte_eal.h>\n+#include <ethdev_driver.h>\n+#include <rte_string_fns.h>\n+\n+#include <mlx5_common_mp.h>\n+#include <mlx5_common_mr.h>\n+#include <mlx5_malloc.h>\n+\n+#include \"mlx5.h\"\n+#include \"mlx5_rxtx.h\"\n+#include \"mlx5_rx.h\"\n+#include \"mlx5_tx.h\"\n+#include \"mlx5_utils.h\"\n+\n+int\n+mlx5_mp_os_primary_handle(const struct rte_mp_msg *mp_msg, const void *peer)\n+{\n+\tstruct rte_mp_msg mp_res;\n+\tstruct mlx5_mp_param *res = (struct mlx5_mp_param *)mp_res.param;\n+\tconst struct mlx5_mp_param *param =\n+\t\t(const struct mlx5_mp_param *)mp_msg->param;\n+\tstruct rte_eth_dev *dev;\n+\tstruct mlx5_priv *priv;\n+\tstruct mr_cache_entry entry;\n+\tuint32_t lkey;\n+\tint ret;\n+\n+\tMLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);\n+\tif (!rte_eth_dev_is_valid_port(param->port_id)) {\n+\t\trte_errno = ENODEV;\n+\t\tDRV_LOG(ERR, \"port %u invalid port ID\", param->port_id);\n+\t\treturn -rte_errno;\n+\t}\n+\tdev = &rte_eth_devices[param->port_id];\n+\tpriv = dev->data->dev_private;\n+\tswitch (param->type) {\n+\tcase MLX5_MP_REQ_CREATE_MR:\n+\t\tmp_init_msg(&priv->mp_id, &mp_res, param->type);\n+\t\tlkey = mlx5_mr_create_primary(priv->sh->pd,\n+\t\t\t\t\t      &priv->sh->share_cache,\n+\t\t\t\t\t      &entry, param->args.addr,\n+\t\t\t\t\t      priv->config.mr_ext_memseg_en);\n+\t\tif (lkey == UINT32_MAX)\n+\t\t\tres->result = -rte_errno;\n+\t\tret = rte_mp_reply(&mp_res, peer);\n+\t\tbreak;\n+\tcase MLX5_MP_REQ_VERBS_CMD_FD:\n+\t\tmp_init_msg(&priv->mp_id, &mp_res, param->type);\n+\t\tmp_res.num_fds = 1;\n+\t\tmp_res.fds[0] = ((struct ibv_context *)priv->sh->ctx)->cmd_fd;\n+\t\tres->result = 0;\n+\t\tret = rte_mp_reply(&mp_res, peer);\n+\t\tbreak;\n+\tcase MLX5_MP_REQ_QUEUE_STATE_MODIFY:\n+\t\tmp_init_msg(&priv->mp_id, &mp_res, param->type);\n+\t\tres->result = mlx5_queue_state_modify_primary\n+\t\t\t\t\t(dev, &param->args.state_modify);\n+\t\tret = rte_mp_reply(&mp_res, peer);\n+\t\tbreak;\n+\tcase MLX5_MP_REQ_QUEUE_RX_STOP:\n+\t\tmp_init_msg(&priv->mp_id, &mp_res, param->type);\n+\t\tres->result = mlx5_rx_queue_stop_primary\n+\t\t\t\t\t(dev, param->args.queue_id.queue_id);\n+\t\tret = rte_mp_reply(&mp_res, peer);\n+\t\tbreak;\n+\tcase MLX5_MP_REQ_QUEUE_RX_START:\n+\t\tmp_init_msg(&priv->mp_id, &mp_res, param->type);\n+\t\tres->result = mlx5_rx_queue_start_primary\n+\t\t\t\t\t(dev, param->args.queue_id.queue_id);\n+\t\tret = rte_mp_reply(&mp_res, peer);\n+\t\tbreak;\n+\tcase MLX5_MP_REQ_QUEUE_TX_STOP:\n+\t\tmp_init_msg(&priv->mp_id, &mp_res, param->type);\n+\t\tres->result = mlx5_tx_queue_stop_primary\n+\t\t\t\t\t(dev, param->args.queue_id.queue_id);\n+\t\tret = rte_mp_reply(&mp_res, peer);\n+\t\tbreak;\n+\tcase MLX5_MP_REQ_QUEUE_TX_START:\n+\t\tmp_init_msg(&priv->mp_id, &mp_res, param->type);\n+\t\tres->result = mlx5_tx_queue_start_primary\n+\t\t\t\t\t(dev, param->args.queue_id.queue_id);\n+\t\tret = rte_mp_reply(&mp_res, peer);\n+\t\tbreak;\n+\tdefault:\n+\t\trte_errno = EINVAL;\n+\t\tDRV_LOG(ERR, \"port %u invalid mp request type\",\n+\t\t\tdev->data->port_id);\n+\t\treturn -rte_errno;\n+\t}\n+\treturn ret;\n+}\n+\n+/**\n+ * IPC message handler of a secondary process.\n+ *\n+ * @param[in] dev\n+ *   Pointer to Ethernet structure.\n+ * @param[in] peer\n+ *   Pointer to the peer socket path.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+int\n+mlx5_mp_os_secondary_handle(const struct rte_mp_msg *mp_msg, const void *peer)\n+{\n+struct rte_mp_msg mp_res;\n+\tstruct mlx5_mp_param *res = (struct mlx5_mp_param *)mp_res.param;\n+\tconst struct mlx5_mp_param *param =\n+\t\t(const struct mlx5_mp_param *)mp_msg->param;\n+\tstruct rte_eth_dev *dev;\n+\tstruct mlx5_proc_priv *ppriv;\n+\tstruct mlx5_priv *priv;\n+\tint ret;\n+\n+\tMLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY);\n+\tif (!rte_eth_dev_is_valid_port(param->port_id)) {\n+\t\trte_errno = ENODEV;\n+\t\tDRV_LOG(ERR, \"port %u invalid port ID\", param->port_id);\n+\t\treturn -rte_errno;\n+\t}\n+\tdev = &rte_eth_devices[param->port_id];\n+\tpriv = dev->data->dev_private;\n+\tswitch (param->type) {\n+\tcase MLX5_MP_REQ_START_RXTX:\n+\t\tDRV_LOG(INFO, \"port %u starting datapath\", dev->data->port_id);\n+\t\tdev->rx_pkt_burst = mlx5_select_rx_function(dev);\n+\t\tdev->tx_pkt_burst = mlx5_select_tx_function(dev);\n+\t\tppriv = (struct mlx5_proc_priv *)dev->process_private;\n+\t\t/* If Tx queue number changes, re-initialize UAR. */\n+\t\tif (ppriv->uar_table_sz != priv->txqs_n) {\n+\t\t\tmlx5_tx_uar_uninit_secondary(dev);\n+\t\t\tmlx5_proc_priv_uninit(dev);\n+\t\t\tret = mlx5_proc_priv_init(dev);\n+\t\t\tif (ret)\n+\t\t\t\treturn -rte_errno;\n+\t\t\tret = mlx5_tx_uar_init_secondary(dev, mp_msg->fds[0]);\n+\t\t\tif (ret) {\n+\t\t\t\tmlx5_proc_priv_uninit(dev);\n+\t\t\t\treturn -rte_errno;\n+\t\t\t}\n+\t\t}\n+\t\trte_mb();\n+\t\tmp_init_msg(&priv->mp_id, &mp_res, param->type);\n+\t\tres->result = 0;\n+\t\tret = rte_mp_reply(&mp_res, peer);\n+\t\tbreak;\n+\tcase MLX5_MP_REQ_STOP_RXTX:\n+\t\tDRV_LOG(INFO, \"port %u stopping datapath\", dev->data->port_id);\n+\t\tdev->rx_pkt_burst = removed_rx_burst;\n+\t\tdev->tx_pkt_burst = removed_tx_burst;\n+\t\trte_mb();\n+\t\tmp_init_msg(&priv->mp_id, &mp_res, param->type);\n+\t\tres->result = 0;\n+\t\tret = rte_mp_reply(&mp_res, peer);\n+\t\tbreak;\n+\tdefault:\n+\t\trte_errno = EINVAL;\n+\t\tDRV_LOG(ERR, \"port %u invalid mp request type\",\n+\t\t\tdev->data->port_id);\n+\t\treturn -rte_errno;\n+\t}\n+\treturn ret;\n+}\n+\n+/**\n+ * Broadcast request of stopping/starting data-path to secondary processes.\n+ *\n+ * @param[in] dev\n+ *   Pointer to Ethernet structure.\n+ * @param[in] type\n+ *   Request type.\n+ */\n+static void\n+mp_req_on_rxtx(struct rte_eth_dev *dev, enum mlx5_mp_req_type type)\n+{\n+\tstruct rte_mp_msg mp_req;\n+\tstruct rte_mp_msg *mp_res;\n+\tstruct rte_mp_reply mp_rep;\n+\tstruct mlx5_mp_param *res;\n+\tstruct timespec ts = {.tv_sec = MLX5_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tint ret;\n+\tint i;\n+\n+\tMLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);\n+\tif (!mlx5_shared_data->secondary_cnt)\n+\t\treturn;\n+\tif (type != MLX5_MP_REQ_START_RXTX && type != MLX5_MP_REQ_STOP_RXTX) {\n+\t\tDRV_LOG(ERR, \"port %u unknown request (req_type %d)\",\n+\t\t\tdev->data->port_id, type);\n+\t\treturn;\n+\t}\n+\tmp_init_msg(&priv->mp_id, &mp_req, type);\n+\tif (type == MLX5_MP_REQ_START_RXTX) {\n+\t\tmp_req.num_fds = 1;\n+\t\tmp_req.fds[0] = ((struct ibv_context *)priv->sh->ctx)->cmd_fd;\n+\t}\n+\tret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);\n+\tif (ret) {\n+\t\tif (rte_errno != ENOTSUP)\n+\t\t\tDRV_LOG(ERR, \"port %u failed to request stop/start Rx/Tx (%d)\",\n+\t\t\t\tdev->data->port_id, type);\n+\t\tgoto exit;\n+\t}\n+\tif (mp_rep.nb_sent != mp_rep.nb_received) {\n+\t\tDRV_LOG(ERR,\n+\t\t\t\"port %u not all secondaries responded (req_type %d)\",\n+\t\t\tdev->data->port_id, type);\n+\t\tgoto exit;\n+\t}\n+\tfor (i = 0; i < mp_rep.nb_received; i++) {\n+\t\tmp_res = &mp_rep.msgs[i];\n+\t\tres = (struct mlx5_mp_param *)mp_res->param;\n+\t\tif (res->result) {\n+\t\t\tDRV_LOG(ERR, \"port %u request failed on secondary #%d\",\n+\t\t\t\tdev->data->port_id, i);\n+\t\t\tgoto exit;\n+\t\t}\n+\t}\n+exit:\n+\tmlx5_free(mp_rep.msgs);\n+}\n+\n+/**\n+ * Broadcast request of starting data-path to secondary processes. The request\n+ * is synchronous.\n+ *\n+ * @param[in] dev\n+ *   Pointer to Ethernet structure.\n+ */\n+void\n+mlx5_mp_os_req_start_rxtx(struct rte_eth_dev *dev)\n+{\n+\tmp_req_on_rxtx(dev, MLX5_MP_REQ_START_RXTX);\n+}\n+\n+/**\n+ * Broadcast request of stopping data-path to secondary processes. The request\n+ * is synchronous.\n+ *\n+ * @param[in] dev\n+ *   Pointer to Ethernet structure.\n+ */\n+void\n+mlx5_mp_os_req_stop_rxtx(struct rte_eth_dev *dev)\n+{\n+\tmp_req_on_rxtx(dev, MLX5_MP_REQ_STOP_RXTX);\n+}\n+\n+/**\n+ * Request Verbs Rx/Tx queue stop or start to the primary process.\n+ *\n+ * @param[in] dev\n+ *   Pointer to Ethernet structure.\n+ * @param queue_id\n+ *   Queue ID to control.\n+ * @param req_type\n+ *   request type\n+ *     MLX5_MP_REQ_QUEUE_RX_START - start Rx queue\n+ *     MLX5_MP_REQ_QUEUE_TX_START - stop Tx queue\n+ *     MLX5_MP_REQ_QUEUE_RX_STOP - stop Rx queue\n+ *     MLX5_MP_REQ_QUEUE_TX_STOP - stop Tx queue\n+ * @return\n+ *   0 on success, a negative errno value otherwise and\n+ *     rte_errno is set.\n+ */\n+int\n+mlx5_mp_os_req_queue_control(struct rte_eth_dev *dev, uint16_t queue_id,\n+\t\t\t  enum mlx5_mp_req_type req_type)\n+{\n+\tstruct rte_mp_msg mp_req;\n+\tstruct rte_mp_msg *mp_res;\n+\tstruct rte_mp_reply mp_rep;\n+\tstruct mlx5_mp_param *req = (struct mlx5_mp_param *)mp_req.param;\n+\tstruct mlx5_mp_param *res;\n+\tstruct timespec ts = {.tv_sec = MLX5_MP_REQ_TIMEOUT_SEC, .tv_nsec = 0};\n+\tstruct mlx5_priv *priv;\n+\tint ret;\n+\n+\tMLX5_ASSERT(rte_eal_process_type() == RTE_PROC_SECONDARY);\n+\tpriv = dev->data->dev_private;\n+\tmp_init_msg(&priv->mp_id, &mp_req, req_type);\n+\treq->args.queue_id.queue_id = queue_id;\n+\tret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);\n+\tif (ret) {\n+\t\tDRV_LOG(ERR, \"port %u request to primary process failed\",\n+\t\t\tdev->data->port_id);\n+\t\treturn -rte_errno;\n+\t}\n+\tMLX5_ASSERT(mp_rep.nb_received == 1);\n+\tmp_res = &mp_rep.msgs[0];\n+\tres = (struct mlx5_mp_param *)mp_res->param;\n+\tret = res->result;\n+\tfree(mp_rep.msgs);\n+\treturn ret;\n+}\ndiff --git a/drivers/net/mlx5/freebsd/mlx5_os.c b/drivers/net/mlx5/freebsd/mlx5_os.c\nnew file mode 100644\nindex 0000000000..3746057673\n--- /dev/null\n+++ b/drivers/net/mlx5/freebsd/mlx5_os.c\n@@ -0,0 +1,3208 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright 2015 6WIND S.A.\n+ * Copyright 2020 Mellanox Technologies, Ltd\n+ */\n+\n+#include <stddef.h>\n+#include <unistd.h>\n+#include <string.h>\n+#include <stdint.h>\n+#include <stdlib.h>\n+#include <errno.h>\n+#include <net/if.h>\n+#include <linux/rtnetlink.h>\n+#include <linux/sockios.h>\n+#include <linux/ethtool.h>\n+#include <fcntl.h>\n+\n+#include <rte_malloc.h>\n+#include <ethdev_driver.h>\n+#include <ethdev_pci.h>\n+#include <rte_pci.h>\n+#include <rte_bus_pci.h>\n+#include <rte_bus_auxiliary.h>\n+#include <rte_common.h>\n+#include <rte_kvargs.h>\n+#include <rte_rwlock.h>\n+#include <rte_spinlock.h>\n+#include <rte_string_fns.h>\n+#include <rte_alarm.h>\n+#include <rte_eal_paging.h>\n+\n+#include <mlx5_glue.h>\n+#include <mlx5_devx_cmds.h>\n+#include <mlx5_common.h>\n+#include <mlx5_common_mp.h>\n+#include <mlx5_common_mr.h>\n+#include <mlx5_malloc.h>\n+\n+#include \"mlx5_defs.h\"\n+#include \"mlx5.h\"\n+#include \"mlx5_common_os.h\"\n+#include \"mlx5_utils.h\"\n+#include \"mlx5_rxtx.h\"\n+#include \"mlx5_rx.h\"\n+#include \"mlx5_tx.h\"\n+#include \"mlx5_autoconf.h\"\n+#include \"mlx5_mr.h\"\n+#include \"mlx5_flow.h\"\n+#include \"rte_pmd_mlx5.h\"\n+#include \"mlx5_verbs.h\"\n+#include \"mlx5_nl.h\"\n+#include \"mlx5_devx.h\"\n+\n+#ifndef HAVE_IBV_MLX5_MOD_MPW\n+#define MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED (1 << 2)\n+#define MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW (1 << 3)\n+#endif\n+\n+#ifndef HAVE_IBV_MLX5_MOD_CQE_128B_COMP\n+#define MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP (1 << 4)\n+#endif\n+\n+static const char *MZ_MLX5_PMD_SHARED_DATA = \"mlx5_pmd_shared_data\";\n+\n+/* Spinlock for mlx5_shared_data allocation. */\n+static rte_spinlock_t mlx5_shared_data_lock = RTE_SPINLOCK_INITIALIZER;\n+\n+/* Process local data for secondary processes. */\n+static struct mlx5_local_data mlx5_local_data;\n+\n+/* rte flow indexed pool configuration. */\n+static struct mlx5_indexed_pool_config icfg[] = {\n+\t{\n+\t\t.size = sizeof(struct rte_flow),\n+\t\t.trunk_size = 64,\n+\t\t.need_lock = 1,\n+\t\t.release_mem_en = 0,\n+\t\t.malloc = mlx5_malloc,\n+\t\t.free = mlx5_free,\n+\t\t.per_core_cache = 0,\n+\t\t.type = \"ctl_flow_ipool\",\n+\t},\n+\t{\n+\t\t.size = sizeof(struct rte_flow),\n+\t\t.trunk_size = 64,\n+\t\t.grow_trunk = 3,\n+\t\t.grow_shift = 2,\n+\t\t.need_lock = 1,\n+\t\t.release_mem_en = 0,\n+\t\t.malloc = mlx5_malloc,\n+\t\t.free = mlx5_free,\n+\t\t.per_core_cache = 1 << 14,\n+\t\t.type = \"rte_flow_ipool\",\n+\t},\n+\t{\n+\t\t.size = sizeof(struct rte_flow),\n+\t\t.trunk_size = 64,\n+\t\t.grow_trunk = 3,\n+\t\t.grow_shift = 2,\n+\t\t.need_lock = 1,\n+\t\t.release_mem_en = 0,\n+\t\t.malloc = mlx5_malloc,\n+\t\t.free = mlx5_free,\n+\t\t.per_core_cache = 0,\n+\t\t.type = \"mcp_flow_ipool\",\n+\t},\n+};\n+\n+/**\n+ * Set the completion channel file descriptor interrupt as non-blocking.\n+ *\n+ * @param[in] rxq_obj\n+ *   Pointer to RQ channel object, which includes the channel fd\n+ *\n+ * @param[out] fd\n+ *   The file descriptor (representing the intetrrupt) used in this channel.\n+ *\n+ * @return\n+ *   0 on successfully setting the fd to non-blocking, non-zero otherwise.\n+ */\n+int\n+mlx5_os_set_nonblock_channel_fd(int fd)\n+{\n+\tint flags;\n+\n+\tflags = fcntl(fd, F_GETFL);\n+\treturn fcntl(fd, F_SETFL, flags | O_NONBLOCK);\n+}\n+\n+/**\n+ * Get mlx5 device attributes. The glue function query_device_ex() is called\n+ * with out parameter of type 'struct ibv_device_attr_ex *'. Then fill in mlx5\n+ * device attributes from the glue out parameter.\n+ *\n+ * @param dev\n+ *   Pointer to ibv context.\n+ *\n+ * @param device_attr\n+ *   Pointer to mlx5 device attributes.\n+ *\n+ * @return\n+ *   0 on success, non zero error number otherwise\n+ */\n+int\n+mlx5_os_get_dev_attr(void *ctx, struct mlx5_dev_attr *device_attr)\n+{\n+\tint err;\n+\tstruct ibv_device_attr_ex attr_ex;\n+\tmemset(device_attr, 0, sizeof(*device_attr));\n+\terr = mlx5_glue->query_device_ex(ctx, NULL, &attr_ex);\n+\tif (err)\n+\t\treturn err;\n+\n+\tdevice_attr->device_cap_flags_ex = attr_ex.device_cap_flags_ex;\n+\tdevice_attr->max_qp_wr = attr_ex.orig_attr.max_qp_wr;\n+\tdevice_attr->max_sge = attr_ex.orig_attr.max_sge;\n+\tdevice_attr->max_cq = attr_ex.orig_attr.max_cq;\n+\tdevice_attr->max_cqe = attr_ex.orig_attr.max_cqe;\n+\tdevice_attr->max_mr = attr_ex.orig_attr.max_mr;\n+\tdevice_attr->max_pd = attr_ex.orig_attr.max_pd;\n+\tdevice_attr->max_qp = attr_ex.orig_attr.max_qp;\n+\tdevice_attr->max_srq = attr_ex.orig_attr.max_srq;\n+\tdevice_attr->max_srq_wr = attr_ex.orig_attr.max_srq_wr;\n+\tdevice_attr->raw_packet_caps = attr_ex.raw_packet_caps;\n+\tdevice_attr->max_rwq_indirection_table_size =\n+\t\tattr_ex.rss_caps.max_rwq_indirection_table_size;\n+\tdevice_attr->max_tso = attr_ex.tso_caps.max_tso;\n+\tdevice_attr->tso_supported_qpts = attr_ex.tso_caps.supported_qpts;\n+\n+\tstruct mlx5dv_context dv_attr = { .comp_mask = 0 };\n+\terr = mlx5_glue->dv_query_device(ctx, &dv_attr);\n+\tif (err)\n+\t\treturn err;\n+\n+\tdevice_attr->flags = dv_attr.flags;\n+\tdevice_attr->comp_mask = dv_attr.comp_mask;\n+#ifdef HAVE_IBV_MLX5_MOD_SWP\n+\tdevice_attr->sw_parsing_offloads =\n+\t\tdv_attr.sw_parsing_caps.sw_parsing_offloads;\n+#endif\n+\tdevice_attr->min_single_stride_log_num_of_bytes =\n+\t\tdv_attr.striding_rq_caps.min_single_stride_log_num_of_bytes;\n+\tdevice_attr->max_single_stride_log_num_of_bytes =\n+\t\tdv_attr.striding_rq_caps.max_single_stride_log_num_of_bytes;\n+\tdevice_attr->min_single_wqe_log_num_of_strides =\n+\t\tdv_attr.striding_rq_caps.min_single_wqe_log_num_of_strides;\n+\tdevice_attr->max_single_wqe_log_num_of_strides =\n+\t\tdv_attr.striding_rq_caps.max_single_wqe_log_num_of_strides;\n+\tdevice_attr->stride_supported_qpts =\n+\t\tdv_attr.striding_rq_caps.supported_qpts;\n+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT\n+\tdevice_attr->tunnel_offloads_caps = dv_attr.tunnel_offloads_caps;\n+#endif\n+\tstrlcpy(device_attr->fw_ver, attr_ex.orig_attr.fw_ver,\n+\t\tsizeof(device_attr->fw_ver));\n+\n+\treturn err;\n+}\n+\n+/**\n+ * Verbs callback to allocate a memory. This function should allocate the space\n+ * according to the size provided residing inside a huge page.\n+ * Please note that all allocation must respect the alignment from libmlx5\n+ * (i.e. currently rte_mem_page_size()).\n+ *\n+ * @param[in] size\n+ *   The size in bytes of the memory to allocate.\n+ * @param[in] data\n+ *   A pointer to the callback data.\n+ *\n+ * @return\n+ *   Allocated buffer, NULL otherwise and rte_errno is set.\n+ */\n+static void *\n+mlx5_alloc_verbs_buf(size_t size, void *data)\n+{\n+\tstruct mlx5_dev_ctx_shared *sh = data;\n+\tvoid *ret;\n+\tsize_t alignment = rte_mem_page_size();\n+\tif (alignment == (size_t)-1) {\n+\t\tDRV_LOG(ERR, \"Failed to get mem page size\");\n+\t\trte_errno = ENOMEM;\n+\t\treturn NULL;\n+\t}\n+\n+\tMLX5_ASSERT(data != NULL);\n+\tret = mlx5_malloc(0, size, alignment, sh->numa_node);\n+\tif (!ret && size)\n+\t\trte_errno = ENOMEM;\n+\treturn ret;\n+}\n+\n+/**\n+ * Detect misc5 support or not\n+ *\n+ * @param[in] priv\n+ *   Device private data pointer\n+ */\n+#ifdef HAVE_MLX5DV_DR\n+static void\n+__mlx5_discovery_misc5_cap(struct mlx5_priv *priv)\n+{\n+#ifdef HAVE_IBV_FLOW_DV_SUPPORT\n+\t/* Dummy VxLAN matcher to detect rdma-core misc5 cap\n+\t * Case: IPv4--->UDP--->VxLAN--->vni\n+\t */\n+\tvoid *tbl;\n+\tstruct mlx5_flow_dv_match_params matcher_mask;\n+\tvoid *match_m;\n+\tvoid *matcher;\n+\tvoid *headers_m;\n+\tvoid *misc5_m;\n+\tuint32_t *tunnel_header_m;\n+\tstruct mlx5dv_flow_matcher_attr dv_attr;\n+\n+\tmemset(&matcher_mask, 0, sizeof(matcher_mask));\n+\tmatcher_mask.size = sizeof(matcher_mask.buf);\n+\tmatch_m = matcher_mask.buf;\n+\theaders_m = MLX5_ADDR_OF(fte_match_param, match_m, outer_headers);\n+\tmisc5_m = MLX5_ADDR_OF(fte_match_param,\n+\t\t\t       match_m, misc_parameters_5);\n+\ttunnel_header_m = (uint32_t *)\n+\t\t\t\tMLX5_ADDR_OF(fte_match_set_misc5,\n+\t\t\t\tmisc5_m, tunnel_header_1);\n+\tMLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol, 0xff);\n+\tMLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 4);\n+\tMLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_dport, 0xffff);\n+\t*tunnel_header_m = 0xffffff;\n+\n+\ttbl = mlx5_glue->dr_create_flow_tbl(priv->sh->rx_domain, 1);\n+\tif (!tbl) {\n+\t\tDRV_LOG(INFO, \"No SW steering support\");\n+\t\treturn;\n+\t}\n+\tdv_attr.type = IBV_FLOW_ATTR_NORMAL,\n+\tdv_attr.match_mask = (void *)&matcher_mask,\n+\tdv_attr.match_criteria_enable =\n+\t\t\t(1 << MLX5_MATCH_CRITERIA_ENABLE_OUTER_BIT) |\n+\t\t\t(1 << MLX5_MATCH_CRITERIA_ENABLE_MISC5_BIT);\n+\tdv_attr.priority = 3;\n+#ifdef HAVE_MLX5DV_DR_ESWITCH\n+\tvoid *misc2_m;\n+\tif (priv->config.dv_esw_en) {\n+\t\t/* FDB enabled reg_c_0 */\n+\t\tdv_attr.match_criteria_enable |=\n+\t\t\t\t(1 << MLX5_MATCH_CRITERIA_ENABLE_MISC2_BIT);\n+\t\tmisc2_m = MLX5_ADDR_OF(fte_match_param,\n+\t\t\t\t       match_m, misc_parameters_2);\n+\t\tMLX5_SET(fte_match_set_misc2, misc2_m,\n+\t\t\t metadata_reg_c_0, 0xffff);\n+\t}\n+#endif\n+\tmatcher = mlx5_glue->dv_create_flow_matcher(priv->sh->ctx,\n+\t\t\t\t\t\t    &dv_attr, tbl);\n+\tif (matcher) {\n+\t\tpriv->sh->misc5_cap = 1;\n+\t\tmlx5_glue->dv_destroy_flow_matcher(matcher);\n+\t}\n+\tmlx5_glue->dr_destroy_flow_tbl(tbl);\n+#else\n+\tRTE_SET_USED(priv);\n+#endif\n+}\n+#endif\n+\n+/**\n+ * Verbs callback to free a memory.\n+ *\n+ * @param[in] ptr\n+ *   A pointer to the memory to free.\n+ * @param[in] data\n+ *   A pointer to the callback data.\n+ */\n+static void\n+mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)\n+{\n+\tMLX5_ASSERT(data != NULL);\n+\tmlx5_free(ptr);\n+}\n+\n+/**\n+ * Initialize DR related data within private structure.\n+ * Routine checks the reference counter and does actual\n+ * resources creation/initialization only if counter is zero.\n+ *\n+ * @param[in] priv\n+ *   Pointer to the private device data structure.\n+ *\n+ * @return\n+ *   Zero on success, positive error code otherwise.\n+ */\n+static int\n+mlx5_alloc_shared_dr(struct mlx5_priv *priv)\n+{\n+\tstruct mlx5_dev_ctx_shared *sh = priv->sh;\n+\tchar s[MLX5_NAME_SIZE] __rte_unused;\n+\tint err;\n+\n+\tMLX5_ASSERT(sh && sh->refcnt);\n+\tif (sh->refcnt > 1)\n+\t\treturn 0;\n+\terr = mlx5_alloc_table_hash_list(priv);\n+\tif (err)\n+\t\tgoto error;\n+\t/* The resources below are only valid with DV support. */\n+#ifdef HAVE_IBV_FLOW_DV_SUPPORT\n+\t/* Init port id action list. */\n+\tsnprintf(s, sizeof(s), \"%s_port_id_action_list\", sh->ibdev_name);\n+\tsh->port_id_action_list = mlx5_list_create(s, sh, true,\n+\t\t\t\t\t\t   flow_dv_port_id_create_cb,\n+\t\t\t\t\t\t   flow_dv_port_id_match_cb,\n+\t\t\t\t\t\t   flow_dv_port_id_remove_cb,\n+\t\t\t\t\t\t   flow_dv_port_id_clone_cb,\n+\t\t\t\t\t\t flow_dv_port_id_clone_free_cb);\n+\tif (!sh->port_id_action_list)\n+\t\tgoto error;\n+\t/* Init push vlan action list. */\n+\tsnprintf(s, sizeof(s), \"%s_push_vlan_action_list\", sh->ibdev_name);\n+\tsh->push_vlan_action_list = mlx5_list_create(s, sh, true,\n+\t\t\t\t\t\t    flow_dv_push_vlan_create_cb,\n+\t\t\t\t\t\t    flow_dv_push_vlan_match_cb,\n+\t\t\t\t\t\t    flow_dv_push_vlan_remove_cb,\n+\t\t\t\t\t\t    flow_dv_push_vlan_clone_cb,\n+\t\t\t\t\t       flow_dv_push_vlan_clone_free_cb);\n+\tif (!sh->push_vlan_action_list)\n+\t\tgoto error;\n+\t/* Init sample action list. */\n+\tsnprintf(s, sizeof(s), \"%s_sample_action_list\", sh->ibdev_name);\n+\tsh->sample_action_list = mlx5_list_create(s, sh, true,\n+\t\t\t\t\t\t  flow_dv_sample_create_cb,\n+\t\t\t\t\t\t  flow_dv_sample_match_cb,\n+\t\t\t\t\t\t  flow_dv_sample_remove_cb,\n+\t\t\t\t\t\t  flow_dv_sample_clone_cb,\n+\t\t\t\t\t\t  flow_dv_sample_clone_free_cb);\n+\tif (!sh->sample_action_list)\n+\t\tgoto error;\n+\t/* Init dest array action list. */\n+\tsnprintf(s, sizeof(s), \"%s_dest_array_list\", sh->ibdev_name);\n+\tsh->dest_array_list = mlx5_list_create(s, sh, true,\n+\t\t\t\t\t       flow_dv_dest_array_create_cb,\n+\t\t\t\t\t       flow_dv_dest_array_match_cb,\n+\t\t\t\t\t       flow_dv_dest_array_remove_cb,\n+\t\t\t\t\t       flow_dv_dest_array_clone_cb,\n+\t\t\t\t\t      flow_dv_dest_array_clone_free_cb);\n+\tif (!sh->dest_array_list)\n+\t\tgoto error;\n+#endif\n+#ifdef HAVE_MLX5DV_DR\n+\tvoid *domain;\n+\n+\t/* Reference counter is zero, we should initialize structures. */\n+\tdomain = mlx5_glue->dr_create_domain(sh->ctx,\n+\t\t\t\t\t     MLX5DV_DR_DOMAIN_TYPE_NIC_RX);\n+\tif (!domain) {\n+\t\tDRV_LOG(ERR, \"ingress mlx5dv_dr_create_domain failed\");\n+\t\terr = errno;\n+\t\tgoto error;\n+\t}\n+\tsh->rx_domain = domain;\n+\tdomain = mlx5_glue->dr_create_domain(sh->ctx,\n+\t\t\t\t\t     MLX5DV_DR_DOMAIN_TYPE_NIC_TX);\n+\tif (!domain) {\n+\t\tDRV_LOG(ERR, \"egress mlx5dv_dr_create_domain failed\");\n+\t\terr = errno;\n+\t\tgoto error;\n+\t}\n+\tsh->tx_domain = domain;\n+#ifdef HAVE_MLX5DV_DR_ESWITCH\n+\tif (priv->config.dv_esw_en) {\n+\t\tdomain  = mlx5_glue->dr_create_domain\n+\t\t\t(sh->ctx, MLX5DV_DR_DOMAIN_TYPE_FDB);\n+\t\tif (!domain) {\n+\t\t\tDRV_LOG(ERR, \"FDB mlx5dv_dr_create_domain failed\");\n+\t\t\terr = errno;\n+\t\t\tgoto error;\n+\t\t}\n+\t\tsh->fdb_domain = domain;\n+\t}\n+\t/*\n+\t * The drop action is just some dummy placeholder in rdma-core. It\n+\t * does not belong to domains and has no any attributes, and, can be\n+\t * shared by the entire device.\n+\t */\n+\tsh->dr_drop_action = mlx5_glue->dr_create_flow_action_drop();\n+\tif (!sh->dr_drop_action) {\n+\t\tDRV_LOG(ERR, \"FDB mlx5dv_dr_create_flow_action_drop\");\n+\t\terr = errno;\n+\t\tgoto error;\n+\t}\n+#endif\n+\tif (!sh->tunnel_hub && priv->config.dv_miss_info)\n+\t\terr = mlx5_alloc_tunnel_hub(sh);\n+\tif (err) {\n+\t\tDRV_LOG(ERR, \"mlx5_alloc_tunnel_hub failed err=%d\", err);\n+\t\tgoto error;\n+\t}\n+\tif (priv->config.reclaim_mode == MLX5_RCM_AGGR) {\n+\t\tmlx5_glue->dr_reclaim_domain_memory(sh->rx_domain, 1);\n+\t\tmlx5_glue->dr_reclaim_domain_memory(sh->tx_domain, 1);\n+\t\tif (sh->fdb_domain)\n+\t\t\tmlx5_glue->dr_reclaim_domain_memory(sh->fdb_domain, 1);\n+\t}\n+\tsh->pop_vlan_action = mlx5_glue->dr_create_flow_action_pop_vlan();\n+\tif (!priv->config.allow_duplicate_pattern) {\n+#ifndef HAVE_MLX5_DR_ALLOW_DUPLICATE\n+\t\tDRV_LOG(WARNING, \"Disallow duplicate pattern is not supported - maybe old rdma-core version?\");\n+#endif\n+\t\tmlx5_glue->dr_allow_duplicate_rules(sh->rx_domain, 0);\n+\t\tmlx5_glue->dr_allow_duplicate_rules(sh->tx_domain, 0);\n+\t\tif (sh->fdb_domain)\n+\t\t\tmlx5_glue->dr_allow_duplicate_rules(sh->fdb_domain, 0);\n+\t}\n+\n+\t__mlx5_discovery_misc5_cap(priv);\n+#endif /* HAVE_MLX5DV_DR */\n+\tsh->default_miss_action =\n+\t\t\tmlx5_glue->dr_create_flow_action_default_miss();\n+\tif (!sh->default_miss_action)\n+\t\tDRV_LOG(WARNING, \"Default miss action is not supported.\");\n+\treturn 0;\n+error:\n+\t/* Rollback the created objects. */\n+\tif (sh->rx_domain) {\n+\t\tmlx5_glue->dr_destroy_domain(sh->rx_domain);\n+\t\tsh->rx_domain = NULL;\n+\t}\n+\tif (sh->tx_domain) {\n+\t\tmlx5_glue->dr_destroy_domain(sh->tx_domain);\n+\t\tsh->tx_domain = NULL;\n+\t}\n+\tif (sh->fdb_domain) {\n+\t\tmlx5_glue->dr_destroy_domain(sh->fdb_domain);\n+\t\tsh->fdb_domain = NULL;\n+\t}\n+\tif (sh->dr_drop_action) {\n+\t\tmlx5_glue->destroy_flow_action(sh->dr_drop_action);\n+\t\tsh->dr_drop_action = NULL;\n+\t}\n+\tif (sh->pop_vlan_action) {\n+\t\tmlx5_glue->destroy_flow_action(sh->pop_vlan_action);\n+\t\tsh->pop_vlan_action = NULL;\n+\t}\n+\tif (sh->encaps_decaps) {\n+\t\tmlx5_hlist_destroy(sh->encaps_decaps);\n+\t\tsh->encaps_decaps = NULL;\n+\t}\n+\tif (sh->modify_cmds) {\n+\t\tmlx5_hlist_destroy(sh->modify_cmds);\n+\t\tsh->modify_cmds = NULL;\n+\t}\n+\tif (sh->tag_table) {\n+\t\t/* tags should be destroyed with flow before. */\n+\t\tmlx5_hlist_destroy(sh->tag_table);\n+\t\tsh->tag_table = NULL;\n+\t}\n+\tif (sh->tunnel_hub) {\n+\t\tmlx5_release_tunnel_hub(sh, priv->dev_port);\n+\t\tsh->tunnel_hub = NULL;\n+\t}\n+\tmlx5_free_table_hash_list(priv);\n+\tif (sh->port_id_action_list) {\n+\t\tmlx5_list_destroy(sh->port_id_action_list);\n+\t\tsh->port_id_action_list = NULL;\n+\t}\n+\tif (sh->push_vlan_action_list) {\n+\t\tmlx5_list_destroy(sh->push_vlan_action_list);\n+\t\tsh->push_vlan_action_list = NULL;\n+\t}\n+\tif (sh->sample_action_list) {\n+\t\tmlx5_list_destroy(sh->sample_action_list);\n+\t\tsh->sample_action_list = NULL;\n+\t}\n+\tif (sh->dest_array_list) {\n+\t\tmlx5_list_destroy(sh->dest_array_list);\n+\t\tsh->dest_array_list = NULL;\n+\t}\n+\treturn err;\n+}\n+\n+/**\n+ * Destroy DR related data within private structure.\n+ *\n+ * @param[in] priv\n+ *   Pointer to the private device data structure.\n+ */\n+void\n+mlx5_os_free_shared_dr(struct mlx5_priv *priv)\n+{\n+\tstruct mlx5_dev_ctx_shared *sh = priv->sh;\n+\n+\tMLX5_ASSERT(sh && sh->refcnt);\n+\tif (sh->refcnt > 1)\n+\t\treturn;\n+#ifdef HAVE_MLX5DV_DR\n+\tif (sh->rx_domain) {\n+\t\tmlx5_glue->dr_destroy_domain(sh->rx_domain);\n+\t\tsh->rx_domain = NULL;\n+\t}\n+\tif (sh->tx_domain) {\n+\t\tmlx5_glue->dr_destroy_domain(sh->tx_domain);\n+\t\tsh->tx_domain = NULL;\n+\t}\n+#ifdef HAVE_MLX5DV_DR_ESWITCH\n+\tif (sh->fdb_domain) {\n+\t\tmlx5_glue->dr_destroy_domain(sh->fdb_domain);\n+\t\tsh->fdb_domain = NULL;\n+\t}\n+\tif (sh->dr_drop_action) {\n+\t\tmlx5_glue->destroy_flow_action(sh->dr_drop_action);\n+\t\tsh->dr_drop_action = NULL;\n+\t}\n+#endif\n+\tif (sh->pop_vlan_action) {\n+\t\tmlx5_glue->destroy_flow_action(sh->pop_vlan_action);\n+\t\tsh->pop_vlan_action = NULL;\n+\t}\n+#endif /* HAVE_MLX5DV_DR */\n+\tif (sh->default_miss_action)\n+\t\tmlx5_glue->destroy_flow_action\n+\t\t\t\t(sh->default_miss_action);\n+\tif (sh->encaps_decaps) {\n+\t\tmlx5_hlist_destroy(sh->encaps_decaps);\n+\t\tsh->encaps_decaps = NULL;\n+\t}\n+\tif (sh->modify_cmds) {\n+\t\tmlx5_hlist_destroy(sh->modify_cmds);\n+\t\tsh->modify_cmds = NULL;\n+\t}\n+\tif (sh->tag_table) {\n+\t\t/* tags should be destroyed with flow before. */\n+\t\tmlx5_hlist_destroy(sh->tag_table);\n+\t\tsh->tag_table = NULL;\n+\t}\n+\tif (sh->tunnel_hub) {\n+\t\tmlx5_release_tunnel_hub(sh, priv->dev_port);\n+\t\tsh->tunnel_hub = NULL;\n+\t}\n+\tmlx5_free_table_hash_list(priv);\n+\tif (sh->port_id_action_list) {\n+\t\tmlx5_list_destroy(sh->port_id_action_list);\n+\t\tsh->port_id_action_list = NULL;\n+\t}\n+\tif (sh->push_vlan_action_list) {\n+\t\tmlx5_list_destroy(sh->push_vlan_action_list);\n+\t\tsh->push_vlan_action_list = NULL;\n+\t}\n+\tif (sh->sample_action_list) {\n+\t\tmlx5_list_destroy(sh->sample_action_list);\n+\t\tsh->sample_action_list = NULL;\n+\t}\n+\tif (sh->dest_array_list) {\n+\t\tmlx5_list_destroy(sh->dest_array_list);\n+\t\tsh->dest_array_list = NULL;\n+\t}\n+}\n+\n+/**\n+ * Initialize shared data between primary and secondary process.\n+ *\n+ * A memzone is reserved by primary process and secondary processes attach to\n+ * the memzone.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_init_shared_data(void)\n+{\n+\tconst struct rte_memzone *mz;\n+\tint ret = 0;\n+\n+\trte_spinlock_lock(&mlx5_shared_data_lock);\n+\tif (mlx5_shared_data == NULL) {\n+\t\tif (rte_eal_process_type() == RTE_PROC_PRIMARY) {\n+\t\t\t/* Allocate shared memory. */\n+\t\t\tmz = rte_memzone_reserve(MZ_MLX5_PMD_SHARED_DATA,\n+\t\t\t\t\t\t sizeof(*mlx5_shared_data),\n+\t\t\t\t\t\t SOCKET_ID_ANY, 0);\n+\t\t\tif (mz == NULL) {\n+\t\t\t\tDRV_LOG(ERR,\n+\t\t\t\t\t\"Cannot allocate mlx5 shared data\");\n+\t\t\t\tret = -rte_errno;\n+\t\t\t\tgoto error;\n+\t\t\t}\n+\t\t\tmlx5_shared_data = mz->addr;\n+\t\t\tmemset(mlx5_shared_data, 0, sizeof(*mlx5_shared_data));\n+\t\t\trte_spinlock_init(&mlx5_shared_data->lock);\n+\t\t} else {\n+\t\t\t/* Lookup allocated shared memory. */\n+\t\t\tmz = rte_memzone_lookup(MZ_MLX5_PMD_SHARED_DATA);\n+\t\t\tif (mz == NULL) {\n+\t\t\t\tDRV_LOG(ERR,\n+\t\t\t\t\t\"Cannot attach mlx5 shared data\");\n+\t\t\t\tret = -rte_errno;\n+\t\t\t\tgoto error;\n+\t\t\t}\n+\t\t\tmlx5_shared_data = mz->addr;\n+\t\t\tmemset(&mlx5_local_data, 0, sizeof(mlx5_local_data));\n+\t\t}\n+\t}\n+error:\n+\trte_spinlock_unlock(&mlx5_shared_data_lock);\n+\treturn ret;\n+}\n+\n+/**\n+ * PMD global initialization.\n+ *\n+ * Independent from individual device, this function initializes global\n+ * per-PMD data structures distinguishing primary and secondary processes.\n+ * Hence, each initialization is called once per a process.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_init_once(void)\n+{\n+\tstruct mlx5_shared_data *sd;\n+\tstruct mlx5_local_data *ld = &mlx5_local_data;\n+\tint ret = 0;\n+\n+\tif (mlx5_init_shared_data())\n+\t\treturn -rte_errno;\n+\tsd = mlx5_shared_data;\n+\tMLX5_ASSERT(sd);\n+\trte_spinlock_lock(&sd->lock);\n+\tswitch (rte_eal_process_type()) {\n+\tcase RTE_PROC_PRIMARY:\n+\t\tif (sd->init_done)\n+\t\t\tbreak;\n+\t\tLIST_INIT(&sd->mem_event_cb_list);\n+\t\trte_rwlock_init(&sd->mem_event_rwlock);\n+\t\trte_mem_event_callback_register(\"MLX5_MEM_EVENT_CB\",\n+\t\t\t\t\t\tmlx5_mr_mem_event_cb, NULL);\n+\t\tret = mlx5_mp_init_primary(MLX5_MP_NAME,\n+\t\t\t\t\t   mlx5_mp_os_primary_handle);\n+\t\tif (ret)\n+\t\t\tgoto out;\n+\t\tsd->init_done = true;\n+\t\tbreak;\n+\tcase RTE_PROC_SECONDARY:\n+\t\tif (ld->init_done)\n+\t\t\tbreak;\n+\t\tret = mlx5_mp_init_secondary(MLX5_MP_NAME,\n+\t\t\t\t\t     mlx5_mp_os_secondary_handle);\n+\t\tif (ret)\n+\t\t\tgoto out;\n+\t\t++sd->secondary_cnt;\n+\t\tld->init_done = true;\n+\t\tbreak;\n+\tdefault:\n+\t\tbreak;\n+\t}\n+out:\n+\trte_spinlock_unlock(&sd->lock);\n+\treturn ret;\n+}\n+\n+/**\n+ * Create the Tx queue DevX/Verbs object.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ * @param idx\n+ *   Queue index in DPDK Tx queue array.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_os_txq_obj_new(struct rte_eth_dev *dev, uint16_t idx)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_txq_data *txq_data = (*priv->txqs)[idx];\n+\tstruct mlx5_txq_ctrl *txq_ctrl =\n+\t\t\tcontainer_of(txq_data, struct mlx5_txq_ctrl, txq);\n+\n+\tif (txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN)\n+\t\treturn mlx5_txq_devx_obj_new(dev, idx);\n+#ifdef HAVE_MLX5DV_DEVX_UAR_OFFSET\n+\tif (!priv->config.dv_esw_en)\n+\t\treturn mlx5_txq_devx_obj_new(dev, idx);\n+#endif\n+\treturn mlx5_txq_ibv_obj_new(dev, idx);\n+}\n+\n+/**\n+ * Release an Tx DevX/verbs queue object.\n+ *\n+ * @param txq_obj\n+ *   DevX/Verbs Tx queue object.\n+ */\n+static void\n+mlx5_os_txq_obj_release(struct mlx5_txq_obj *txq_obj)\n+{\n+\tif (txq_obj->txq_ctrl->type == MLX5_TXQ_TYPE_HAIRPIN) {\n+\t\tmlx5_txq_devx_obj_release(txq_obj);\n+\t\treturn;\n+\t}\n+#ifdef HAVE_MLX5DV_DEVX_UAR_OFFSET\n+\tif (!txq_obj->txq_ctrl->priv->config.dv_esw_en) {\n+\t\tmlx5_txq_devx_obj_release(txq_obj);\n+\t\treturn;\n+\t}\n+#endif\n+\tmlx5_txq_ibv_obj_release(txq_obj);\n+}\n+\n+/**\n+ * DV flow counter mode detect and config.\n+ *\n+ * @param dev\n+ *   Pointer to rte_eth_dev structure.\n+ *\n+ */\n+static void\n+mlx5_flow_counter_mode_config(struct rte_eth_dev *dev __rte_unused)\n+{\n+#ifdef HAVE_IBV_FLOW_DV_SUPPORT\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_dev_ctx_shared *sh = priv->sh;\n+\tbool fallback;\n+\n+#ifndef HAVE_IBV_DEVX_ASYNC\n+\tfallback = true;\n+#else\n+\tfallback = false;\n+\tif (!priv->config.devx || !priv->config.dv_flow_en ||\n+\t    !priv->config.hca_attr.flow_counters_dump ||\n+\t    !(priv->config.hca_attr.flow_counter_bulk_alloc_bitmap & 0x4) ||\n+\t    (mlx5_flow_dv_discover_counter_offset_support(dev) == -ENOTSUP))\n+\t\tfallback = true;\n+#endif\n+\tif (fallback)\n+\t\tDRV_LOG(INFO, \"Use fall-back DV counter management. Flow \"\n+\t\t\t\"counter dump:%d, bulk_alloc_bitmap:0x%hhx.\",\n+\t\t\tpriv->config.hca_attr.flow_counters_dump,\n+\t\t\tpriv->config.hca_attr.flow_counter_bulk_alloc_bitmap);\n+\t/* Initialize fallback mode only on the port initializes sh. */\n+\tif (sh->refcnt == 1)\n+\t\tsh->cmng.counter_fallback = fallback;\n+\telse if (fallback != sh->cmng.counter_fallback)\n+\t\tDRV_LOG(WARNING, \"Port %d in sh has different fallback mode \"\n+\t\t\t\"with others:%d.\", PORT_ID(priv), fallback);\n+#endif\n+}\n+\n+/**\n+ * DR flow drop action support detect.\n+ *\n+ * @param dev\n+ *   Pointer to rte_eth_dev structure.\n+ *\n+ */\n+static void\n+mlx5_flow_drop_action_config(struct rte_eth_dev *dev __rte_unused)\n+{\n+#ifdef HAVE_MLX5DV_DR\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\n+\tif (!priv->config.dv_flow_en || !priv->sh->dr_drop_action)\n+\t\treturn;\n+\t/**\n+\t * DR supports drop action placeholder when it is supported;\n+\t * otherwise, use the queue drop action.\n+\t */\n+\tif (mlx5_flow_discover_dr_action_support(dev))\n+\t\tpriv->root_drop_action = priv->drop_queue.hrxq->action;\n+\telse\n+\t\tpriv->root_drop_action = priv->sh->dr_drop_action;\n+#endif\n+}\n+\n+static void\n+mlx5_queue_counter_id_prepare(struct rte_eth_dev *dev)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tvoid *ctx = priv->sh->ctx;\n+\n+\tpriv->q_counters = mlx5_devx_cmd_queue_counter_alloc(ctx);\n+\tif (!priv->q_counters) {\n+\t\tstruct ibv_cq *cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0);\n+\t\tstruct ibv_wq *wq;\n+\n+\t\tDRV_LOG(DEBUG, \"Port %d queue counter object cannot be created \"\n+\t\t\t\"by DevX - fall-back to use the kernel driver global \"\n+\t\t\t\"queue counter.\", dev->data->port_id);\n+\t\t/* Create WQ by kernel and query its queue counter ID. */\n+\t\tif (cq) {\n+\t\t\twq = mlx5_glue->create_wq(ctx,\n+\t\t\t\t\t\t  &(struct ibv_wq_init_attr){\n+\t\t\t\t\t\t    .wq_type = IBV_WQT_RQ,\n+\t\t\t\t\t\t    .max_wr = 1,\n+\t\t\t\t\t\t    .max_sge = 1,\n+\t\t\t\t\t\t    .pd = priv->sh->pd,\n+\t\t\t\t\t\t    .cq = cq,\n+\t\t\t\t\t\t});\n+\t\t\tif (wq) {\n+\t\t\t\t/* Counter is assigned only on RDY state. */\n+\t\t\t\tint ret = mlx5_glue->modify_wq(wq,\n+\t\t\t\t\t\t &(struct ibv_wq_attr){\n+\t\t\t\t\t\t .attr_mask = IBV_WQ_ATTR_STATE,\n+\t\t\t\t\t\t .wq_state = IBV_WQS_RDY,\n+\t\t\t\t\t\t});\n+\n+\t\t\t\tif (ret == 0)\n+\t\t\t\t\tmlx5_devx_cmd_wq_query(wq,\n+\t\t\t\t\t\t\t &priv->counter_set_id);\n+\t\t\t\tclaim_zero(mlx5_glue->destroy_wq(wq));\n+\t\t\t}\n+\t\t\tclaim_zero(mlx5_glue->destroy_cq(cq));\n+\t\t}\n+\t} else {\n+\t\tpriv->counter_set_id = priv->q_counters->id;\n+\t}\n+\tif (priv->counter_set_id == 0)\n+\t\tDRV_LOG(INFO, \"Part of the port %d statistics will not be \"\n+\t\t\t\"available.\", dev->data->port_id);\n+}\n+\n+/**\n+ * Check if representor spawn info match devargs.\n+ *\n+ * @param spawn\n+ *   Verbs device parameters (name, port, switch_info) to spawn.\n+ * @param eth_da\n+ *   Device devargs to probe.\n+ *\n+ * @return\n+ *   Match result.\n+ */\n+static bool\n+mlx5_representor_match(struct mlx5_dev_spawn_data *spawn,\n+\t\t       struct rte_eth_devargs *eth_da)\n+{\n+\tstruct mlx5_switch_info *switch_info = &spawn->info;\n+\tunsigned int p, f;\n+\tuint16_t id;\n+\tuint16_t repr_id = mlx5_representor_id_encode(switch_info,\n+\t\t\t\t\t\t      eth_da->type);\n+\n+\tswitch (eth_da->type) {\n+\tcase RTE_ETH_REPRESENTOR_SF:\n+\t\tif (!(spawn->info.port_name == -1 &&\n+\t\t      switch_info->name_type ==\n+\t\t\t\tMLX5_PHYS_PORT_NAME_TYPE_PFHPF) &&\n+\t\t    switch_info->name_type != MLX5_PHYS_PORT_NAME_TYPE_PFSF) {\n+\t\t\trte_errno = EBUSY;\n+\t\t\treturn false;\n+\t\t}\n+\t\tbreak;\n+\tcase RTE_ETH_REPRESENTOR_VF:\n+\t\t/* Allows HPF representor index -1 as exception. */\n+\t\tif (!(spawn->info.port_name == -1 &&\n+\t\t      switch_info->name_type ==\n+\t\t\t\tMLX5_PHYS_PORT_NAME_TYPE_PFHPF) &&\n+\t\t    switch_info->name_type != MLX5_PHYS_PORT_NAME_TYPE_PFVF) {\n+\t\t\trte_errno = EBUSY;\n+\t\t\treturn false;\n+\t\t}\n+\t\tbreak;\n+\tcase RTE_ETH_REPRESENTOR_NONE:\n+\t\trte_errno = EBUSY;\n+\t\treturn false;\n+\tdefault:\n+\t\trte_errno = ENOTSUP;\n+\t\tDRV_LOG(ERR, \"unsupported representor type\");\n+\t\treturn false;\n+\t}\n+\t/* Check representor ID: */\n+\tfor (p = 0; p < eth_da->nb_ports; ++p) {\n+\t\tif (spawn->pf_bond < 0) {\n+\t\t\t/* For non-LAG mode, allow and ignore pf. */\n+\t\t\tswitch_info->pf_num = eth_da->ports[p];\n+\t\t\trepr_id = mlx5_representor_id_encode(switch_info,\n+\t\t\t\t\t\t\t     eth_da->type);\n+\t\t}\n+\t\tfor (f = 0; f < eth_da->nb_representor_ports; ++f) {\n+\t\t\tid = MLX5_REPRESENTOR_ID\n+\t\t\t\t(eth_da->ports[p], eth_da->type,\n+\t\t\t\t eth_da->representor_ports[f]);\n+\t\t\tif (repr_id == id)\n+\t\t\t\treturn true;\n+\t\t}\n+\t}\n+\trte_errno = EBUSY;\n+\treturn false;\n+}\n+\n+\n+/**\n+ * Spawn an Ethernet device from Verbs information.\n+ *\n+ * @param dpdk_dev\n+ *   Backing DPDK device.\n+ * @param spawn\n+ *   Verbs device parameters (name, port, switch_info) to spawn.\n+ * @param config\n+ *   Device configuration parameters.\n+ * @param config\n+ *   Device arguments.\n+ *\n+ * @return\n+ *   A valid Ethernet device object on success, NULL otherwise and rte_errno\n+ *   is set. The following errors are defined:\n+ *\n+ *   EBUSY: device is not supposed to be spawned.\n+ *   EEXIST: device is already spawned\n+ */\n+static struct rte_eth_dev *\n+mlx5_dev_spawn(struct rte_device *dpdk_dev,\n+\t       struct mlx5_dev_spawn_data *spawn,\n+\t       struct mlx5_dev_config *config,\n+\t       struct rte_eth_devargs *eth_da)\n+{\n+\tconst struct mlx5_switch_info *switch_info = &spawn->info;\n+\tstruct mlx5_dev_ctx_shared *sh = NULL;\n+\tstruct ibv_port_attr port_attr;\n+\tstruct mlx5dv_context dv_attr = { .comp_mask = 0 };\n+\tstruct rte_eth_dev *eth_dev = NULL;\n+\tstruct mlx5_priv *priv = NULL;\n+\tint err = 0;\n+\tunsigned int hw_padding = 0;\n+\tunsigned int mps;\n+\tunsigned int tunnel_en = 0;\n+\tunsigned int mpls_en = 0;\n+\tunsigned int swp = 0;\n+\tunsigned int mprq = 0;\n+\tunsigned int mprq_min_stride_size_n = 0;\n+\tunsigned int mprq_max_stride_size_n = 0;\n+\tunsigned int mprq_min_stride_num_n = 0;\n+\tunsigned int mprq_max_stride_num_n = 0;\n+\tstruct rte_ether_addr mac;\n+\tchar name[RTE_ETH_NAME_MAX_LEN];\n+\tint own_domain_id = 0;\n+\tuint16_t port_id;\n+\tstruct mlx5_port_info vport_info = { .query_flags = 0 };\n+\tint i;\n+\n+\t/* Determine if this port representor is supposed to be spawned. */\n+\tif (switch_info->representor && dpdk_dev->devargs &&\n+\t    !mlx5_representor_match(spawn, eth_da))\n+\t\treturn NULL;\n+\t/* Build device name. */\n+\tif (spawn->pf_bond < 0) {\n+\t\t/* Single device. */\n+\t\tif (!switch_info->representor)\n+\t\t\tstrlcpy(name, dpdk_dev->name, sizeof(name));\n+\t\telse\n+\t\t\terr = snprintf(name, sizeof(name), \"%s_representor_%s%u\",\n+\t\t\t\t dpdk_dev->name,\n+\t\t\t\t switch_info->name_type ==\n+\t\t\t\t MLX5_PHYS_PORT_NAME_TYPE_PFSF ? \"sf\" : \"vf\",\n+\t\t\t\t switch_info->port_name);\n+\t} else {\n+\t\t/* Bonding device. */\n+\t\tif (!switch_info->representor) {\n+\t\t\terr = snprintf(name, sizeof(name), \"%s_%s\",\n+\t\t\t\t dpdk_dev->name,\n+\t\t\t\t mlx5_os_get_dev_device_name(spawn->phys_dev));\n+\t\t} else {\n+\t\t\terr = snprintf(name, sizeof(name), \"%s_%s_representor_c%dpf%d%s%u\",\n+\t\t\t\tdpdk_dev->name,\n+\t\t\t\tmlx5_os_get_dev_device_name(spawn->phys_dev),\n+\t\t\t\tswitch_info->ctrl_num,\n+\t\t\t\tswitch_info->pf_num,\n+\t\t\t\tswitch_info->name_type ==\n+\t\t\t\tMLX5_PHYS_PORT_NAME_TYPE_PFSF ? \"sf\" : \"vf\",\n+\t\t\t\tswitch_info->port_name);\n+\t\t}\n+\t}\n+\tif (err >= (int)sizeof(name))\n+\t\tDRV_LOG(WARNING, \"device name overflow %s\", name);\n+\t/* check if the device is already spawned */\n+\tif (rte_eth_dev_get_port_by_name(name, &port_id) == 0) {\n+\t\trte_errno = EEXIST;\n+\t\treturn NULL;\n+\t}\n+\tDRV_LOG(DEBUG, \"naming Ethernet device \\\"%s\\\"\", name);\n+\tif (rte_eal_process_type() == RTE_PROC_SECONDARY) {\n+\t\tstruct mlx5_mp_id mp_id;\n+\n+\t\teth_dev = rte_eth_dev_attach_secondary(name);\n+\t\tif (eth_dev == NULL) {\n+\t\t\tDRV_LOG(ERR, \"can not attach rte ethdev\");\n+\t\t\trte_errno = ENOMEM;\n+\t\t\treturn NULL;\n+\t\t}\n+\t\teth_dev->device = dpdk_dev;\n+\t\teth_dev->dev_ops = &mlx5_dev_sec_ops;\n+\t\teth_dev->rx_descriptor_status = mlx5_rx_descriptor_status;\n+\t\teth_dev->tx_descriptor_status = mlx5_tx_descriptor_status;\n+\t\terr = mlx5_proc_priv_init(eth_dev);\n+\t\tif (err)\n+\t\t\treturn NULL;\n+\t\tmp_id.port_id = eth_dev->data->port_id;\n+\t\tstrlcpy(mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN);\n+\t\t/* Receive command fd from primary process */\n+\t\terr = mlx5_mp_req_verbs_cmd_fd(&mp_id);\n+\t\tif (err < 0)\n+\t\t\tgoto err_secondary;\n+\t\t/* Remap UAR for Tx queues. */\n+\t\terr = mlx5_tx_uar_init_secondary(eth_dev, err);\n+\t\tif (err)\n+\t\t\tgoto err_secondary;\n+\t\t/*\n+\t\t * Ethdev pointer is still required as input since\n+\t\t * the primary device is not accessible from the\n+\t\t * secondary process.\n+\t\t */\n+\t\teth_dev->rx_pkt_burst = mlx5_select_rx_function(eth_dev);\n+\t\teth_dev->tx_pkt_burst = mlx5_select_tx_function(eth_dev);\n+\t\treturn eth_dev;\n+err_secondary:\n+\t\tmlx5_dev_close(eth_dev);\n+\t\treturn NULL;\n+\t}\n+\t/*\n+\t * Some parameters (\"tx_db_nc\" in particularly) are needed in\n+\t * advance to create dv/verbs device context. We proceed the\n+\t * devargs here to get ones, and later proceed devargs again\n+\t * to override some hardware settings.\n+\t */\n+\terr = mlx5_args(config, dpdk_dev->devargs);\n+\tif (err) {\n+\t\terr = rte_errno;\n+\t\tDRV_LOG(ERR, \"failed to process device arguments: %s\",\n+\t\t\tstrerror(rte_errno));\n+\t\tgoto error;\n+\t}\n+\tif (config->dv_miss_info) {\n+\t\tif (switch_info->master || switch_info->representor)\n+\t\t\tconfig->dv_xmeta_en = MLX5_XMETA_MODE_META16;\n+\t}\n+\tmlx5_malloc_mem_select(config->sys_mem_en);\n+\tsh = mlx5_alloc_shared_dev_ctx(spawn, config);\n+\tif (!sh)\n+\t\treturn NULL;\n+\tconfig->devx = sh->devx;\n+#ifdef HAVE_MLX5DV_DR_ACTION_DEST_DEVX_TIR\n+\tconfig->dest_tir = 1;\n+#endif\n+#ifdef HAVE_IBV_MLX5_MOD_SWP\n+\tdv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_SWP;\n+#endif\n+\t/*\n+\t * Multi-packet send is supported by ConnectX-4 Lx PF as well\n+\t * as all ConnectX-5 devices.\n+\t */\n+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT\n+\tdv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS;\n+#endif\n+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT\n+\tdv_attr.comp_mask |= MLX5DV_CONTEXT_MASK_STRIDING_RQ;\n+#endif\n+\tmlx5_glue->dv_query_device(sh->ctx, &dv_attr);\n+\tif (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) {\n+\t\tif (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW) {\n+\t\t\tDRV_LOG(DEBUG, \"enhanced MPW is supported\");\n+\t\t\tmps = MLX5_MPW_ENHANCED;\n+\t\t} else {\n+\t\t\tDRV_LOG(DEBUG, \"MPW is supported\");\n+\t\t\tmps = MLX5_MPW;\n+\t\t}\n+\t} else {\n+\t\tDRV_LOG(DEBUG, \"MPW isn't supported\");\n+\t\tmps = MLX5_MPW_DISABLED;\n+\t}\n+#ifdef HAVE_IBV_MLX5_MOD_SWP\n+\tif (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_SWP)\n+\t\tswp = dv_attr.sw_parsing_caps.sw_parsing_offloads;\n+\tDRV_LOG(DEBUG, \"SWP support: %u\", swp);\n+#endif\n+\tconfig->swp = !!swp;\n+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT\n+\tif (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_STRIDING_RQ) {\n+\t\tstruct mlx5dv_striding_rq_caps mprq_caps =\n+\t\t\tdv_attr.striding_rq_caps;\n+\n+\t\tDRV_LOG(DEBUG, \"\\tmin_single_stride_log_num_of_bytes: %d\",\n+\t\t\tmprq_caps.min_single_stride_log_num_of_bytes);\n+\t\tDRV_LOG(DEBUG, \"\\tmax_single_stride_log_num_of_bytes: %d\",\n+\t\t\tmprq_caps.max_single_stride_log_num_of_bytes);\n+\t\tDRV_LOG(DEBUG, \"\\tmin_single_wqe_log_num_of_strides: %d\",\n+\t\t\tmprq_caps.min_single_wqe_log_num_of_strides);\n+\t\tDRV_LOG(DEBUG, \"\\tmax_single_wqe_log_num_of_strides: %d\",\n+\t\t\tmprq_caps.max_single_wqe_log_num_of_strides);\n+\t\tDRV_LOG(DEBUG, \"\\tsupported_qpts: %d\",\n+\t\t\tmprq_caps.supported_qpts);\n+\t\tDRV_LOG(DEBUG, \"device supports Multi-Packet RQ\");\n+\t\tmprq = 1;\n+\t\tmprq_min_stride_size_n =\n+\t\t\tmprq_caps.min_single_stride_log_num_of_bytes;\n+\t\tmprq_max_stride_size_n =\n+\t\t\tmprq_caps.max_single_stride_log_num_of_bytes;\n+\t\tmprq_min_stride_num_n =\n+\t\t\tmprq_caps.min_single_wqe_log_num_of_strides;\n+\t\tmprq_max_stride_num_n =\n+\t\t\tmprq_caps.max_single_wqe_log_num_of_strides;\n+\t}\n+#endif\n+\t/* Rx CQE compression is enabled by default. */\n+\tconfig->cqe_comp = 1;\n+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT\n+\tif (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS) {\n+\t\ttunnel_en = ((dv_attr.tunnel_offloads_caps &\n+\t\t\t      MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_VXLAN) &&\n+\t\t\t     (dv_attr.tunnel_offloads_caps &\n+\t\t\t      MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GRE) &&\n+\t\t\t     (dv_attr.tunnel_offloads_caps &\n+\t\t\t      MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_GENEVE));\n+\t}\n+\tDRV_LOG(DEBUG, \"tunnel offloading is %ssupported\",\n+\t\ttunnel_en ? \"\" : \"not \");\n+#else\n+\tDRV_LOG(WARNING,\n+\t\t\"tunnel offloading disabled due to old OFED/rdma-core version\");\n+#endif\n+\tconfig->tunnel_en = tunnel_en;\n+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT\n+\tmpls_en = ((dv_attr.tunnel_offloads_caps &\n+\t\t    MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_GRE) &&\n+\t\t   (dv_attr.tunnel_offloads_caps &\n+\t\t    MLX5DV_RAW_PACKET_CAP_TUNNELED_OFFLOAD_CW_MPLS_OVER_UDP));\n+\tDRV_LOG(DEBUG, \"MPLS over GRE/UDP tunnel offloading is %ssupported\",\n+\t\tmpls_en ? \"\" : \"not \");\n+#else\n+\tDRV_LOG(WARNING, \"MPLS over GRE/UDP tunnel offloading disabled due to\"\n+\t\t\" old OFED/rdma-core version or firmware configuration\");\n+#endif\n+\tconfig->mpls_en = mpls_en;\n+\t/* Check port status. */\n+\terr = mlx5_glue->query_port(sh->ctx, spawn->phys_port, &port_attr);\n+\tif (err) {\n+\t\tDRV_LOG(ERR, \"port query failed: %s\", strerror(err));\n+\t\tgoto error;\n+\t}\n+\tif (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {\n+\t\tDRV_LOG(ERR, \"port is not configured in Ethernet mode\");\n+\t\terr = EINVAL;\n+\t\tgoto error;\n+\t}\n+\tif (port_attr.state != IBV_PORT_ACTIVE)\n+\t\tDRV_LOG(DEBUG, \"port is not active: \\\"%s\\\" (%d)\",\n+\t\t\tmlx5_glue->port_state_str(port_attr.state),\n+\t\t\tport_attr.state);\n+\t/* Allocate private eth device data. */\n+\tpriv = mlx5_malloc(MLX5_MEM_ZERO | MLX5_MEM_RTE,\n+\t\t\t   sizeof(*priv),\n+\t\t\t   RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);\n+\tif (priv == NULL) {\n+\t\tDRV_LOG(ERR, \"priv allocation failure\");\n+\t\terr = ENOMEM;\n+\t\tgoto error;\n+\t}\n+\tpriv->sh = sh;\n+\tpriv->dev_port = spawn->phys_port;\n+\tpriv->pci_dev = spawn->pci_dev;\n+\tpriv->mtu = RTE_ETHER_MTU;\n+\t/* Some internal functions rely on Netlink sockets, open them now. */\n+\tpriv->nl_socket_rdma = mlx5_nl_init(NETLINK_RDMA);\n+\tpriv->nl_socket_route =\tmlx5_nl_init(NETLINK_ROUTE);\n+\tpriv->representor = !!switch_info->representor;\n+\tpriv->master = !!switch_info->master;\n+\tpriv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;\n+\tpriv->vport_meta_tag = 0;\n+\tpriv->vport_meta_mask = 0;\n+\tpriv->pf_bond = spawn->pf_bond;\n+\n+\tDRV_LOG(DEBUG,\n+\t\t\"dev_port=%u bus=%s pci=%s master=%d representor=%d pf_bond=%d\\n\",\n+\t\tpriv->dev_port, dpdk_dev->bus->name,\n+\t\tpriv->pci_dev ? priv->pci_dev->name : \"NONE\",\n+\t\tpriv->master, priv->representor, priv->pf_bond);\n+\n+\t/*\n+\t * If we have E-Switch we should determine the vport attributes.\n+\t * E-Switch may use either source vport field or reg_c[0] metadata\n+\t * register to match on vport index. The engaged part of metadata\n+\t * register is defined by mask.\n+\t */\n+\tif (switch_info->representor || switch_info->master) {\n+\t\terr = mlx5_glue->devx_port_query(sh->ctx,\n+\t\t\t\t\t\t spawn->phys_port,\n+\t\t\t\t\t\t &vport_info);\n+\t\tif (err) {\n+\t\t\tDRV_LOG(WARNING,\n+\t\t\t\t\"can't query devx port %d on device %s\",\n+\t\t\t\tspawn->phys_port,\n+\t\t\t\tmlx5_os_get_dev_device_name(spawn->phys_dev));\n+\t\t\tvport_info.query_flags = 0;\n+\t\t}\n+\t}\n+\tif (vport_info.query_flags & MLX5_PORT_QUERY_REG_C0) {\n+\t\tpriv->vport_meta_tag = vport_info.vport_meta_tag;\n+\t\tpriv->vport_meta_mask = vport_info.vport_meta_mask;\n+\t\tif (!priv->vport_meta_mask) {\n+\t\t\tDRV_LOG(ERR, \"vport zero mask for port %d\"\n+\t\t\t\t     \" on bonding device %s\",\n+\t\t\t\t     spawn->phys_port,\n+\t\t\t\t     mlx5_os_get_dev_device_name\n+\t\t\t\t\t\t\t(spawn->phys_dev));\n+\t\t\terr = ENOTSUP;\n+\t\t\tgoto error;\n+\t\t}\n+\t\tif (priv->vport_meta_tag & ~priv->vport_meta_mask) {\n+\t\t\tDRV_LOG(ERR, \"invalid vport tag for port %d\"\n+\t\t\t\t     \" on bonding device %s\",\n+\t\t\t\t     spawn->phys_port,\n+\t\t\t\t     mlx5_os_get_dev_device_name\n+\t\t\t\t\t\t\t(spawn->phys_dev));\n+\t\t\terr = ENOTSUP;\n+\t\t\tgoto error;\n+\t\t}\n+\t}\n+\tif (vport_info.query_flags & MLX5_PORT_QUERY_VPORT) {\n+\t\tpriv->vport_id = vport_info.vport_id;\n+\t} else if (spawn->pf_bond >= 0 &&\n+\t\t   (switch_info->representor || switch_info->master)) {\n+\t\tDRV_LOG(ERR, \"can't deduce vport index for port %d\"\n+\t\t\t     \" on bonding device %s\",\n+\t\t\t     spawn->phys_port,\n+\t\t\t     mlx5_os_get_dev_device_name(spawn->phys_dev));\n+\t\terr = ENOTSUP;\n+\t\tgoto error;\n+\t} else {\n+\t\t/*\n+\t\t * Suppose vport index in compatible way. Kernel/rdma_core\n+\t\t * support single E-Switch per PF configurations only and\n+\t\t * vport_id field contains the vport index for associated VF,\n+\t\t * which is deduced from representor port name.\n+\t\t * For example, let's have the IB device port 10, it has\n+\t\t * attached network device eth0, which has port name attribute\n+\t\t * pf0vf2, we can deduce the VF number as 2, and set vport index\n+\t\t * as 3 (2+1). This assigning schema should be changed if the\n+\t\t * multiple E-Switch instances per PF configurations or/and PCI\n+\t\t * subfunctions are added.\n+\t\t */\n+\t\tpriv->vport_id = switch_info->representor ?\n+\t\t\t\t switch_info->port_name + 1 : -1;\n+\t}\n+\tpriv->representor_id = mlx5_representor_id_encode(switch_info,\n+\t\t\t\t\t\t\t  eth_da->type);\n+\t/*\n+\t * Look for sibling devices in order to reuse their switch domain\n+\t * if any, otherwise allocate one.\n+\t */\n+\tMLX5_ETH_FOREACH_DEV(port_id, dpdk_dev) {\n+\t\tconst struct mlx5_priv *opriv =\n+\t\t\trte_eth_devices[port_id].data->dev_private;\n+\n+\t\tif (!opriv ||\n+\t\t    opriv->sh != priv->sh ||\n+\t\t\topriv->domain_id ==\n+\t\t\tRTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)\n+\t\t\tcontinue;\n+\t\tpriv->domain_id = opriv->domain_id;\n+\t\tDRV_LOG(DEBUG, \"dev_port-%u inherit domain_id=%u\\n\",\n+\t\t\tpriv->dev_port, priv->domain_id);\n+\t\tbreak;\n+\t}\n+\tif (priv->domain_id == RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) {\n+\t\terr = rte_eth_switch_domain_alloc(&priv->domain_id);\n+\t\tif (err) {\n+\t\t\terr = rte_errno;\n+\t\t\tDRV_LOG(ERR, \"unable to allocate switch domain: %s\",\n+\t\t\t\tstrerror(rte_errno));\n+\t\t\tgoto error;\n+\t\t}\n+\t\town_domain_id = 1;\n+\t\tDRV_LOG(DEBUG, \"dev_port-%u new domain_id=%u\\n\",\n+\t\t\tpriv->dev_port, priv->domain_id);\n+\t}\n+\t/* Override some values set by hardware configuration. */\n+\tmlx5_args(config, dpdk_dev->devargs);\n+\terr = mlx5_dev_check_sibling_config(priv, config, dpdk_dev);\n+\tif (err)\n+\t\tgoto error;\n+\tconfig->hw_csum = !!(sh->device_attr.device_cap_flags_ex &\n+\t\t\t    IBV_DEVICE_RAW_IP_CSUM);\n+\tDRV_LOG(DEBUG, \"checksum offloading is %ssupported\",\n+\t\t(config->hw_csum ? \"\" : \"not \"));\n+#if !defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) && \\\n+\t!defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)\n+\tDRV_LOG(DEBUG, \"counters are not supported\");\n+#endif\n+#if !defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_MLX5DV_DR)\n+\tif (config->dv_flow_en) {\n+\t\tDRV_LOG(WARNING, \"DV flow is not supported\");\n+\t\tconfig->dv_flow_en = 0;\n+\t}\n+#endif\n+\tif (spawn->max_port > UINT8_MAX) {\n+\t\t/* Verbs can't support ports larger than 255 by design. */\n+\t\tDRV_LOG(ERR, \"can't support IB ports > UINT8_MAX\");\n+\t\terr = EINVAL;\n+\t\tgoto error;\n+\t}\n+\tconfig->ind_table_max_size =\n+\t\tsh->device_attr.max_rwq_indirection_table_size;\n+\t/*\n+\t * Remove this check once DPDK supports larger/variable\n+\t * indirection tables.\n+\t */\n+\tif (config->ind_table_max_size > (unsigned int)ETH_RSS_RETA_SIZE_512)\n+\t\tconfig->ind_table_max_size = ETH_RSS_RETA_SIZE_512;\n+\tDRV_LOG(DEBUG, \"maximum Rx indirection table size is %u\",\n+\t\tconfig->ind_table_max_size);\n+\tconfig->hw_vlan_strip = !!(sh->device_attr.raw_packet_caps &\n+\t\t\t\t  IBV_RAW_PACKET_CAP_CVLAN_STRIPPING);\n+\tDRV_LOG(DEBUG, \"VLAN stripping is %ssupported\",\n+\t\t(config->hw_vlan_strip ? \"\" : \"not \"));\n+\tconfig->hw_fcs_strip = !!(sh->device_attr.raw_packet_caps &\n+\t\t\t\t IBV_RAW_PACKET_CAP_SCATTER_FCS);\n+#if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)\n+\thw_padding = !!sh->device_attr.rx_pad_end_addr_align;\n+#elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)\n+\thw_padding = !!(sh->device_attr.device_cap_flags_ex &\n+\t\t\tIBV_DEVICE_PCI_WRITE_END_PADDING);\n+#endif\n+\tif (config->hw_padding && !hw_padding) {\n+\t\tDRV_LOG(DEBUG, \"Rx end alignment padding isn't supported\");\n+\t\tconfig->hw_padding = 0;\n+\t} else if (config->hw_padding) {\n+\t\tDRV_LOG(DEBUG, \"Rx end alignment padding is enabled\");\n+\t}\n+\tconfig->tso = (sh->device_attr.max_tso > 0 &&\n+\t\t      (sh->device_attr.tso_supported_qpts &\n+\t\t       (1 << IBV_QPT_RAW_PACKET)));\n+\tif (config->tso)\n+\t\tconfig->tso_max_payload_sz = sh->device_attr.max_tso;\n+\t/*\n+\t * MPW is disabled by default, while the Enhanced MPW is enabled\n+\t * by default.\n+\t */\n+\tif (config->mps == MLX5_ARG_UNSET)\n+\t\tconfig->mps = (mps == MLX5_MPW_ENHANCED) ? MLX5_MPW_ENHANCED :\n+\t\t\t\t\t\t\t  MLX5_MPW_DISABLED;\n+\telse\n+\t\tconfig->mps = config->mps ? mps : MLX5_MPW_DISABLED;\n+\tDRV_LOG(INFO, \"%sMPS is %s\",\n+\t\tconfig->mps == MLX5_MPW_ENHANCED ? \"enhanced \" :\n+\t\tconfig->mps == MLX5_MPW ? \"legacy \" : \"\",\n+\t\tconfig->mps != MLX5_MPW_DISABLED ? \"enabled\" : \"disabled\");\n+\tif (config->devx) {\n+\t\terr = mlx5_devx_cmd_query_hca_attr(sh->ctx, &config->hca_attr);\n+\t\tif (err) {\n+\t\t\terr = -err;\n+\t\t\tgoto error;\n+\t\t}\n+\t\t/* Check relax ordering support. */\n+\t\tif (!haswell_broadwell_cpu) {\n+\t\t\tsh->cmng.relaxed_ordering_write =\n+\t\t\t\tconfig->hca_attr.relaxed_ordering_write;\n+\t\t\tsh->cmng.relaxed_ordering_read =\n+\t\t\t\tconfig->hca_attr.relaxed_ordering_read;\n+\t\t} else {\n+\t\t\tsh->cmng.relaxed_ordering_read = 0;\n+\t\t\tsh->cmng.relaxed_ordering_write = 0;\n+\t\t}\n+\t\tsh->rq_ts_format = config->hca_attr.rq_ts_format;\n+\t\tsh->sq_ts_format = config->hca_attr.sq_ts_format;\n+\t\tsh->steering_format_version =\n+\t\t\tconfig->hca_attr.steering_format_version;\n+\t\tsh->qp_ts_format = config->hca_attr.qp_ts_format;\n+\t\t/* Check for LRO support. */\n+\t\tif (config->dest_tir && config->hca_attr.lro_cap &&\n+\t\t    config->dv_flow_en) {\n+\t\t\t/* TBD check tunnel lro caps. */\n+\t\t\tconfig->lro.supported = config->hca_attr.lro_cap;\n+\t\t\tDRV_LOG(DEBUG, \"Device supports LRO\");\n+\t\t\t/*\n+\t\t\t * If LRO timeout is not configured by application,\n+\t\t\t * use the minimal supported value.\n+\t\t\t */\n+\t\t\tif (!config->lro.timeout)\n+\t\t\t\tconfig->lro.timeout =\n+\t\t\t\tconfig->hca_attr.lro_timer_supported_periods[0];\n+\t\t\tDRV_LOG(DEBUG, \"LRO session timeout set to %d usec\",\n+\t\t\t\tconfig->lro.timeout);\n+\t\t\tDRV_LOG(DEBUG, \"LRO minimal size of TCP segment \"\n+\t\t\t\t\"required for coalescing is %d bytes\",\n+\t\t\t\tconfig->hca_attr.lro_min_mss_size);\n+\t\t}\n+#if defined(HAVE_MLX5DV_DR) && \\\n+\t(defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_METER) || \\\n+\t defined(HAVE_MLX5_DR_CREATE_ACTION_ASO))\n+\t\tif (config->hca_attr.qos.sup &&\n+\t\t    config->hca_attr.qos.flow_meter_old &&\n+\t\t    config->dv_flow_en) {\n+\t\t\tuint8_t reg_c_mask =\n+\t\t\t\tconfig->hca_attr.qos.flow_meter_reg_c_ids;\n+\t\t\t/*\n+\t\t\t * Meter needs two REG_C's for color match and pre-sfx\n+\t\t\t * flow match. Here get the REG_C for color match.\n+\t\t\t * REG_C_0 and REG_C_1 is reserved for metadata feature.\n+\t\t\t */\n+\t\t\treg_c_mask &= 0xfc;\n+\t\t\tif (__builtin_popcount(reg_c_mask) < 1) {\n+\t\t\t\tpriv->mtr_en = 0;\n+\t\t\t\tDRV_LOG(WARNING, \"No available register for\"\n+\t\t\t\t\t\" meter.\");\n+\t\t\t} else {\n+\t\t\t\t/*\n+\t\t\t\t * The meter color register is used by the\n+\t\t\t\t * flow-hit feature as well.\n+\t\t\t\t * The flow-hit feature must use REG_C_3\n+\t\t\t\t * Prefer REG_C_3 if it is available.\n+\t\t\t\t */\n+\t\t\t\tif (reg_c_mask & (1 << (REG_C_3 - REG_C_0)))\n+\t\t\t\t\tpriv->mtr_color_reg = REG_C_3;\n+\t\t\t\telse\n+\t\t\t\t\tpriv->mtr_color_reg = ffs(reg_c_mask)\n+\t\t\t\t\t\t\t      - 1 + REG_C_0;\n+\t\t\t\tpriv->mtr_en = 1;\n+\t\t\t\tpriv->mtr_reg_share =\n+\t\t\t\t      config->hca_attr.qos.flow_meter;\n+\t\t\t\tDRV_LOG(DEBUG, \"The REG_C meter uses is %d\",\n+\t\t\t\t\tpriv->mtr_color_reg);\n+\t\t\t}\n+\t\t}\n+\t\tif (config->hca_attr.qos.sup &&\n+\t\t\tconfig->hca_attr.qos.flow_meter_aso_sup) {\n+\t\t\tuint32_t log_obj_size =\n+\t\t\t\trte_log2_u32(MLX5_ASO_MTRS_PER_POOL >> 1);\n+\t\t\tif (log_obj_size >=\n+\t\t\tconfig->hca_attr.qos.log_meter_aso_granularity &&\n+\t\t\tlog_obj_size <=\n+\t\t\tconfig->hca_attr.qos.log_meter_aso_max_alloc)\n+\t\t\t\tsh->meter_aso_en = 1;\n+\t\t}\n+\t\tif (priv->mtr_en) {\n+\t\t\terr = mlx5_aso_flow_mtrs_mng_init(priv->sh);\n+\t\t\tif (err) {\n+\t\t\t\terr = -err;\n+\t\t\t\tgoto error;\n+\t\t\t}\n+\t\t}\n+\t\tif (config->hca_attr.flow.tunnel_header_0_1)\n+\t\t\tsh->tunnel_header_0_1 = 1;\n+#endif\n+#ifdef HAVE_MLX5_DR_CREATE_ACTION_ASO\n+\t\tif (config->hca_attr.flow_hit_aso &&\n+\t\t    priv->mtr_color_reg == REG_C_3) {\n+\t\t\tsh->flow_hit_aso_en = 1;\n+\t\t\terr = mlx5_flow_aso_age_mng_init(sh);\n+\t\t\tif (err) {\n+\t\t\t\terr = -err;\n+\t\t\t\tgoto error;\n+\t\t\t}\n+\t\t\tDRV_LOG(DEBUG, \"Flow Hit ASO is supported.\");\n+\t\t}\n+#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO */\n+#if defined(HAVE_MLX5_DR_CREATE_ACTION_ASO) && \\\n+\tdefined(HAVE_MLX5_DR_ACTION_ASO_CT)\n+\t\tif (config->hca_attr.ct_offload &&\n+\t\t    priv->mtr_color_reg == REG_C_3) {\n+\t\t\terr = mlx5_flow_aso_ct_mng_init(sh);\n+\t\t\tif (err) {\n+\t\t\t\terr = -err;\n+\t\t\t\tgoto error;\n+\t\t\t}\n+\t\t\tDRV_LOG(DEBUG, \"CT ASO is supported.\");\n+\t\t\tsh->ct_aso_en = 1;\n+\t\t}\n+#endif /* HAVE_MLX5_DR_CREATE_ACTION_ASO && HAVE_MLX5_DR_ACTION_ASO_CT */\n+#if defined(HAVE_MLX5DV_DR) && defined(HAVE_MLX5_DR_CREATE_ACTION_FLOW_SAMPLE)\n+\t\tif (config->hca_attr.log_max_ft_sampler_num > 0  &&\n+\t\t    config->dv_flow_en) {\n+\t\t\tpriv->sampler_en = 1;\n+\t\t\tDRV_LOG(DEBUG, \"Sampler enabled!\");\n+\t\t} else {\n+\t\t\tpriv->sampler_en = 0;\n+\t\t\tif (!config->hca_attr.log_max_ft_sampler_num)\n+\t\t\t\tDRV_LOG(WARNING,\n+\t\t\t\t\t\"No available register for sampler.\");\n+\t\t\telse\n+\t\t\t\tDRV_LOG(DEBUG, \"DV flow is not supported!\");\n+\t\t}\n+#endif\n+\t}\n+\tif (config->cqe_comp && RTE_CACHE_LINE_SIZE == 128 &&\n+\t    !(dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP)) {\n+\t\tDRV_LOG(WARNING, \"Rx CQE 128B compression is not supported\");\n+\t\tconfig->cqe_comp = 0;\n+\t}\n+\tif (config->cqe_comp_fmt == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX &&\n+\t    (!config->devx || !config->hca_attr.mini_cqe_resp_flow_tag)) {\n+\t\tDRV_LOG(WARNING, \"Flow Tag CQE compression\"\n+\t\t\t\t \" format isn't supported.\");\n+\t\tconfig->cqe_comp = 0;\n+\t}\n+\tif (config->cqe_comp_fmt == MLX5_CQE_RESP_FORMAT_L34H_STRIDX &&\n+\t    (!config->devx || !config->hca_attr.mini_cqe_resp_l3_l4_tag)) {\n+\t\tDRV_LOG(WARNING, \"L3/L4 Header CQE compression\"\n+\t\t\t\t \" format isn't supported.\");\n+\t\tconfig->cqe_comp = 0;\n+\t}\n+\tDRV_LOG(DEBUG, \"Rx CQE compression is %ssupported\",\n+\t\t\tconfig->cqe_comp ? \"\" : \"not \");\n+\tif (config->tx_pp) {\n+\t\tDRV_LOG(DEBUG, \"Timestamp counter frequency %u kHz\",\n+\t\t\tconfig->hca_attr.dev_freq_khz);\n+\t\tDRV_LOG(DEBUG, \"Packet pacing is %ssupported\",\n+\t\t\tconfig->hca_attr.qos.packet_pacing ? \"\" : \"not \");\n+\t\tDRV_LOG(DEBUG, \"Cross channel ops are %ssupported\",\n+\t\t\tconfig->hca_attr.cross_channel ? \"\" : \"not \");\n+\t\tDRV_LOG(DEBUG, \"WQE index ignore is %ssupported\",\n+\t\t\tconfig->hca_attr.wqe_index_ignore ? \"\" : \"not \");\n+\t\tDRV_LOG(DEBUG, \"Non-wire SQ feature is %ssupported\",\n+\t\t\tconfig->hca_attr.non_wire_sq ? \"\" : \"not \");\n+\t\tDRV_LOG(DEBUG, \"Static WQE SQ feature is %ssupported (%d)\",\n+\t\t\tconfig->hca_attr.log_max_static_sq_wq ? \"\" : \"not \",\n+\t\t\tconfig->hca_attr.log_max_static_sq_wq);\n+\t\tDRV_LOG(DEBUG, \"WQE rate PP mode is %ssupported\",\n+\t\t\tconfig->hca_attr.qos.wqe_rate_pp ? \"\" : \"not \");\n+\t\tif (!config->devx) {\n+\t\t\tDRV_LOG(ERR, \"DevX is required for packet pacing\");\n+\t\t\terr = ENODEV;\n+\t\t\tgoto error;\n+\t\t}\n+\t\tif (!config->hca_attr.qos.packet_pacing) {\n+\t\t\tDRV_LOG(ERR, \"Packet pacing is not supported\");\n+\t\t\terr = ENODEV;\n+\t\t\tgoto error;\n+\t\t}\n+\t\tif (!config->hca_attr.cross_channel) {\n+\t\t\tDRV_LOG(ERR, \"Cross channel operations are\"\n+\t\t\t\t     \" required for packet pacing\");\n+\t\t\terr = ENODEV;\n+\t\t\tgoto error;\n+\t\t}\n+\t\tif (!config->hca_attr.wqe_index_ignore) {\n+\t\t\tDRV_LOG(ERR, \"WQE index ignore feature is\"\n+\t\t\t\t     \" required for packet pacing\");\n+\t\t\terr = ENODEV;\n+\t\t\tgoto error;\n+\t\t}\n+\t\tif (!config->hca_attr.non_wire_sq) {\n+\t\t\tDRV_LOG(ERR, \"Non-wire SQ feature is\"\n+\t\t\t\t     \" required for packet pacing\");\n+\t\t\terr = ENODEV;\n+\t\t\tgoto error;\n+\t\t}\n+\t\tif (!config->hca_attr.log_max_static_sq_wq) {\n+\t\t\tDRV_LOG(ERR, \"Static WQE SQ feature is\"\n+\t\t\t\t     \" required for packet pacing\");\n+\t\t\terr = ENODEV;\n+\t\t\tgoto error;\n+\t\t}\n+\t\tif (!config->hca_attr.qos.wqe_rate_pp) {\n+\t\t\tDRV_LOG(ERR, \"WQE rate mode is required\"\n+\t\t\t\t     \" for packet pacing\");\n+\t\t\terr = ENODEV;\n+\t\t\tgoto error;\n+\t\t}\n+#ifndef HAVE_MLX5DV_DEVX_UAR_OFFSET\n+\t\tDRV_LOG(ERR, \"DevX does not provide UAR offset,\"\n+\t\t\t     \" can't create queues for packet pacing\");\n+\t\terr = ENODEV;\n+\t\tgoto error;\n+#endif\n+\t}\n+\tif (config->devx) {\n+\t\tuint32_t reg[MLX5_ST_SZ_DW(register_mtutc)];\n+\n+\t\terr = config->hca_attr.access_register_user ?\n+\t\t\tmlx5_devx_cmd_register_read\n+\t\t\t\t(sh->ctx, MLX5_REGISTER_ID_MTUTC, 0,\n+\t\t\t\treg, MLX5_ST_SZ_DW(register_mtutc)) : ENOTSUP;\n+\t\tif (!err) {\n+\t\t\tuint32_t ts_mode;\n+\n+\t\t\t/* MTUTC register is read successfully. */\n+\t\t\tts_mode = MLX5_GET(register_mtutc, reg,\n+\t\t\t\t\t   time_stamp_mode);\n+\t\t\tif (ts_mode == MLX5_MTUTC_TIMESTAMP_MODE_REAL_TIME)\n+\t\t\t\tconfig->rt_timestamp = 1;\n+\t\t} else {\n+\t\t\t/* Kernel does not support register reading. */\n+\t\t\tif (config->hca_attr.dev_freq_khz ==\n+\t\t\t\t\t\t (NS_PER_S / MS_PER_S))\n+\t\t\t\tconfig->rt_timestamp = 1;\n+\t\t}\n+\t}\n+\t/*\n+\t * If HW has bug working with tunnel packet decapsulation and\n+\t * scatter FCS, and decapsulation is needed, clear the hw_fcs_strip\n+\t * bit. Then DEV_RX_OFFLOAD_KEEP_CRC bit will not be set anymore.\n+\t */\n+\tif (config->hca_attr.scatter_fcs_w_decap_disable && config->decap_en)\n+\t\tconfig->hw_fcs_strip = 0;\n+\tDRV_LOG(DEBUG, \"FCS stripping configuration is %ssupported\",\n+\t\t(config->hw_fcs_strip ? \"\" : \"not \"));\n+\tif (config->mprq.enabled && mprq) {\n+\t\tif (config->mprq.stride_num_n &&\n+\t\t    (config->mprq.stride_num_n > mprq_max_stride_num_n ||\n+\t\t     config->mprq.stride_num_n < mprq_min_stride_num_n)) {\n+\t\t\tconfig->mprq.stride_num_n =\n+\t\t\t\tRTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_NUM_N,\n+\t\t\t\t\t\tmprq_min_stride_num_n),\n+\t\t\t\t\tmprq_max_stride_num_n);\n+\t\t\tDRV_LOG(WARNING,\n+\t\t\t\t\"the number of strides\"\n+\t\t\t\t\" for Multi-Packet RQ is out of range,\"\n+\t\t\t\t\" setting default value (%u)\",\n+\t\t\t\t1 << config->mprq.stride_num_n);\n+\t\t}\n+\t\tif (config->mprq.stride_size_n &&\n+\t\t    (config->mprq.stride_size_n > mprq_max_stride_size_n ||\n+\t\t     config->mprq.stride_size_n < mprq_min_stride_size_n)) {\n+\t\t\tconfig->mprq.stride_size_n =\n+\t\t\t\tRTE_MIN(RTE_MAX(MLX5_MPRQ_STRIDE_SIZE_N,\n+\t\t\t\t\t\tmprq_min_stride_size_n),\n+\t\t\t\t\tmprq_max_stride_size_n);\n+\t\t\tDRV_LOG(WARNING,\n+\t\t\t\t\"the size of a stride\"\n+\t\t\t\t\" for Multi-Packet RQ is out of range,\"\n+\t\t\t\t\" setting default value (%u)\",\n+\t\t\t\t1 << config->mprq.stride_size_n);\n+\t\t}\n+\t\tconfig->mprq.min_stride_size_n = mprq_min_stride_size_n;\n+\t\tconfig->mprq.max_stride_size_n = mprq_max_stride_size_n;\n+\t} else if (config->mprq.enabled && !mprq) {\n+\t\tDRV_LOG(WARNING, \"Multi-Packet RQ isn't supported\");\n+\t\tconfig->mprq.enabled = 0;\n+\t}\n+\tif (config->max_dump_files_num == 0)\n+\t\tconfig->max_dump_files_num = 128;\n+\teth_dev = rte_eth_dev_allocate(name);\n+\tif (eth_dev == NULL) {\n+\t\tDRV_LOG(ERR, \"can not allocate rte ethdev\");\n+\t\terr = ENOMEM;\n+\t\tgoto error;\n+\t}\n+\tif (priv->representor) {\n+\t\teth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR;\n+\t\teth_dev->data->representor_id = priv->representor_id;\n+\t}\n+\tpriv->mp_id.port_id = eth_dev->data->port_id;\n+\tstrlcpy(priv->mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN);\n+\t/*\n+\t * Store associated network device interface index. This index\n+\t * is permanent throughout the lifetime of device. So, we may store\n+\t * the ifindex here and use the cached value further.\n+\t */\n+\tMLX5_ASSERT(spawn->ifindex);\n+\tpriv->if_index = spawn->ifindex;\n+\teth_dev->data->dev_private = priv;\n+\tpriv->dev_data = eth_dev->data;\n+\teth_dev->data->mac_addrs = priv->mac;\n+\teth_dev->device = dpdk_dev;\n+\teth_dev->data->dev_flags |= RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS;\n+\t/* Configure the first MAC address by default. */\n+\tif (mlx5_get_mac(eth_dev, &mac.addr_bytes)) {\n+\t\tDRV_LOG(ERR,\n+\t\t\t\"port %u cannot get MAC address, is mlx5_en\"\n+\t\t\t\" loaded? (errno: %s)\",\n+\t\t\teth_dev->data->port_id, strerror(rte_errno));\n+\t\terr = ENODEV;\n+\t\tgoto error;\n+\t}\n+\tDRV_LOG(INFO,\n+\t\t\"port %u MAC address is \" RTE_ETHER_ADDR_PRT_FMT,\n+\t\teth_dev->data->port_id, RTE_ETHER_ADDR_BYTES(&mac));\n+#ifdef RTE_LIBRTE_MLX5_DEBUG\n+\t{\n+\t\tchar ifname[MLX5_NAMESIZE];\n+\n+\t\tif (mlx5_get_ifname(eth_dev, &ifname) == 0)\n+\t\t\tDRV_LOG(DEBUG, \"port %u ifname is \\\"%s\\\"\",\n+\t\t\t\teth_dev->data->port_id, ifname);\n+\t\telse\n+\t\t\tDRV_LOG(DEBUG, \"port %u ifname is unknown\",\n+\t\t\t\teth_dev->data->port_id);\n+\t}\n+#endif\n+\t/* Get actual MTU if possible. */\n+\terr = mlx5_get_mtu(eth_dev, &priv->mtu);\n+\tif (err) {\n+\t\terr = rte_errno;\n+\t\tgoto error;\n+\t}\n+\tDRV_LOG(DEBUG, \"port %u MTU is %u\", eth_dev->data->port_id,\n+\t\tpriv->mtu);\n+\t/* Initialize burst functions to prevent crashes before link-up. */\n+\teth_dev->rx_pkt_burst = removed_rx_burst;\n+\teth_dev->tx_pkt_burst = removed_tx_burst;\n+\teth_dev->dev_ops = &mlx5_dev_ops;\n+\teth_dev->rx_descriptor_status = mlx5_rx_descriptor_status;\n+\teth_dev->tx_descriptor_status = mlx5_tx_descriptor_status;\n+\teth_dev->rx_queue_count = mlx5_rx_queue_count;\n+\t/* Register MAC address. */\n+\tclaim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));\n+\tif (config->vf && config->vf_nl_en)\n+\t\tmlx5_nl_mac_addr_sync(priv->nl_socket_route,\n+\t\t\t\t      mlx5_ifindex(eth_dev),\n+\t\t\t\t      eth_dev->data->mac_addrs,\n+\t\t\t\t      MLX5_MAX_MAC_ADDRESSES);\n+\tpriv->ctrl_flows = 0;\n+\trte_spinlock_init(&priv->flow_list_lock);\n+\tTAILQ_INIT(&priv->flow_meters);\n+\tpriv->mtr_profile_tbl = mlx5_l3t_create(MLX5_L3T_TYPE_PTR);\n+\tif (!priv->mtr_profile_tbl)\n+\t\tgoto error;\n+\t/* Hint libmlx5 to use PMD allocator for data plane resources */\n+\tmlx5_glue->dv_set_context_attr(sh->ctx,\n+\t\t\tMLX5DV_CTX_ATTR_BUF_ALLOCATORS,\n+\t\t\t(void *)((uintptr_t)&(struct mlx5dv_ctx_allocators){\n+\t\t\t\t.alloc = &mlx5_alloc_verbs_buf,\n+\t\t\t\t.free = &mlx5_free_verbs_buf,\n+\t\t\t\t.data = sh,\n+\t\t\t}));\n+\t/* Bring Ethernet device up. */\n+\tDRV_LOG(DEBUG, \"port %u forcing Ethernet interface up\",\n+\t\teth_dev->data->port_id);\n+\tmlx5_set_link_up(eth_dev);\n+\t/*\n+\t * Even though the interrupt handler is not installed yet,\n+\t * interrupts will still trigger on the async_fd from\n+\t * Verbs context returned by ibv_open_device().\n+\t */\n+\tmlx5_link_update(eth_dev, 0);\n+#ifdef HAVE_MLX5DV_DR_ESWITCH\n+\tif (!(config->hca_attr.eswitch_manager && config->dv_flow_en &&\n+\t      (switch_info->representor || switch_info->master)))\n+\t\tconfig->dv_esw_en = 0;\n+#else\n+\tconfig->dv_esw_en = 0;\n+#endif\n+\t/* Detect minimal data bytes to inline. */\n+\tmlx5_set_min_inline(spawn, config);\n+\t/* Store device configuration on private structure. */\n+\tpriv->config = *config;\n+\tfor (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {\n+\t\ticfg[i].release_mem_en = !!config->reclaim_mode;\n+\t\tif (config->reclaim_mode)\n+\t\t\ticfg[i].per_core_cache = 0;\n+\t\tpriv->flows[i] = mlx5_ipool_create(&icfg[i]);\n+\t\tif (!priv->flows[i])\n+\t\t\tgoto error;\n+\t}\n+\t/* Create context for virtual machine VLAN workaround. */\n+\tpriv->vmwa_context = mlx5_vlan_vmwa_init(eth_dev, spawn->ifindex);\n+\tif (config->dv_flow_en) {\n+\t\terr = mlx5_alloc_shared_dr(priv);\n+\t\tif (err)\n+\t\t\tgoto error;\n+\t}\n+\tif (config->devx && config->dv_flow_en && config->dest_tir) {\n+\t\tpriv->obj_ops = devx_obj_ops;\n+\t\tpriv->obj_ops.drop_action_create =\n+\t\t\t\t\t\tibv_obj_ops.drop_action_create;\n+\t\tpriv->obj_ops.drop_action_destroy =\n+\t\t\t\t\t\tibv_obj_ops.drop_action_destroy;\n+#ifndef HAVE_MLX5DV_DEVX_UAR_OFFSET\n+\t\tpriv->obj_ops.txq_obj_modify = ibv_obj_ops.txq_obj_modify;\n+#else\n+\t\tif (config->dv_esw_en)\n+\t\t\tpriv->obj_ops.txq_obj_modify =\n+\t\t\t\t\t\tibv_obj_ops.txq_obj_modify;\n+#endif\n+\t\t/* Use specific wrappers for Tx object. */\n+\t\tpriv->obj_ops.txq_obj_new = mlx5_os_txq_obj_new;\n+\t\tpriv->obj_ops.txq_obj_release = mlx5_os_txq_obj_release;\n+\t\tmlx5_queue_counter_id_prepare(eth_dev);\n+\t\tpriv->obj_ops.lb_dummy_queue_create =\n+\t\t\t\t\tmlx5_rxq_ibv_obj_dummy_lb_create;\n+\t\tpriv->obj_ops.lb_dummy_queue_release =\n+\t\t\t\t\tmlx5_rxq_ibv_obj_dummy_lb_release;\n+\t} else {\n+\t\tpriv->obj_ops = ibv_obj_ops;\n+\t}\n+\tif (config->tx_pp &&\n+\t    (priv->config.dv_esw_en ||\n+\t     priv->obj_ops.txq_obj_new != mlx5_os_txq_obj_new)) {\n+\t\t/*\n+\t\t * HAVE_MLX5DV_DEVX_UAR_OFFSET is required to support\n+\t\t * packet pacing and already checked above.\n+\t\t * Hence, we should only make sure the SQs will be created\n+\t\t * with DevX, not with Verbs.\n+\t\t * Verbs allocates the SQ UAR on its own and it can't be shared\n+\t\t * with Clock Queue UAR as required for Tx scheduling.\n+\t\t */\n+\t\tDRV_LOG(ERR, \"Verbs SQs, UAR can't be shared as required for packet pacing\");\n+\t\terr = ENODEV;\n+\t\tgoto error;\n+\t}\n+\tpriv->drop_queue.hrxq = mlx5_drop_action_create(eth_dev);\n+\tif (!priv->drop_queue.hrxq)\n+\t\tgoto error;\n+\t/* Supported Verbs flow priority number detection. */\n+\terr = mlx5_flow_discover_priorities(eth_dev);\n+\tif (err < 0) {\n+\t\terr = -err;\n+\t\tgoto error;\n+\t}\n+\tpriv->config.flow_prio = err;\n+\tif (!priv->config.dv_esw_en &&\n+\t    priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) {\n+\t\tDRV_LOG(WARNING, \"metadata mode %u is not supported \"\n+\t\t\t\t \"(no E-Switch)\", priv->config.dv_xmeta_en);\n+\t\tpriv->config.dv_xmeta_en = MLX5_XMETA_MODE_LEGACY;\n+\t}\n+\tmlx5_set_metadata_mask(eth_dev);\n+\tif (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&\n+\t    !priv->sh->dv_regc0_mask) {\n+\t\tDRV_LOG(ERR, \"metadata mode %u is not supported \"\n+\t\t\t     \"(no metadata reg_c[0] is available)\",\n+\t\t\t     priv->config.dv_xmeta_en);\n+\t\t\terr = ENOTSUP;\n+\t\t\tgoto error;\n+\t}\n+\tpriv->hrxqs = mlx5_list_create(\"hrxq\", eth_dev, true,\n+\t\t\t\t       mlx5_hrxq_create_cb,\n+\t\t\t\t       mlx5_hrxq_match_cb,\n+\t\t\t\t       mlx5_hrxq_remove_cb,\n+\t\t\t\t       mlx5_hrxq_clone_cb,\n+\t\t\t\t       mlx5_hrxq_clone_free_cb);\n+\tif (!priv->hrxqs)\n+\t\tgoto error;\n+\trte_rwlock_init(&priv->ind_tbls_lock);\n+\t/* Query availability of metadata reg_c's. */\n+\terr = mlx5_flow_discover_mreg_c(eth_dev);\n+\tif (err < 0) {\n+\t\terr = -err;\n+\t\tgoto error;\n+\t}\n+\tif (!mlx5_flow_ext_mreg_supported(eth_dev)) {\n+\t\tDRV_LOG(DEBUG,\n+\t\t\t\"port %u extensive metadata register is not supported\",\n+\t\t\teth_dev->data->port_id);\n+\t\tif (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY) {\n+\t\t\tDRV_LOG(ERR, \"metadata mode %u is not supported \"\n+\t\t\t\t     \"(no metadata registers available)\",\n+\t\t\t\t     priv->config.dv_xmeta_en);\n+\t\t\terr = ENOTSUP;\n+\t\t\tgoto error;\n+\t\t}\n+\t}\n+\tif (priv->config.dv_flow_en &&\n+\t    priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&\n+\t    mlx5_flow_ext_mreg_supported(eth_dev) &&\n+\t    priv->sh->dv_regc0_mask) {\n+\t\tpriv->mreg_cp_tbl = mlx5_hlist_create(MLX5_FLOW_MREG_HNAME,\n+\t\t\t\t\t\t      MLX5_FLOW_MREG_HTABLE_SZ,\n+\t\t\t\t\t\t      false, true, eth_dev,\n+\t\t\t\t\t\t      flow_dv_mreg_create_cb,\n+\t\t\t\t\t\t      flow_dv_mreg_match_cb,\n+\t\t\t\t\t\t      flow_dv_mreg_remove_cb,\n+\t\t\t\t\t\t      flow_dv_mreg_clone_cb,\n+\t\t\t\t\t\t    flow_dv_mreg_clone_free_cb);\n+\t\tif (!priv->mreg_cp_tbl) {\n+\t\t\terr = ENOMEM;\n+\t\t\tgoto error;\n+\t\t}\n+\t}\n+\trte_spinlock_init(&priv->shared_act_sl);\n+\tmlx5_flow_counter_mode_config(eth_dev);\n+\tmlx5_flow_drop_action_config(eth_dev);\n+\tif (priv->config.dv_flow_en)\n+\t\teth_dev->data->dev_flags |= RTE_ETH_DEV_FLOW_OPS_THREAD_SAFE;\n+\treturn eth_dev;\n+error:\n+\tif (priv) {\n+\t\tif (priv->mreg_cp_tbl)\n+\t\t\tmlx5_hlist_destroy(priv->mreg_cp_tbl);\n+\t\tif (priv->sh)\n+\t\t\tmlx5_os_free_shared_dr(priv);\n+\t\tif (priv->nl_socket_route >= 0)\n+\t\t\tclose(priv->nl_socket_route);\n+\t\tif (priv->nl_socket_rdma >= 0)\n+\t\t\tclose(priv->nl_socket_rdma);\n+\t\tif (priv->vmwa_context)\n+\t\t\tmlx5_vlan_vmwa_exit(priv->vmwa_context);\n+\t\tif (eth_dev && priv->drop_queue.hrxq)\n+\t\t\tmlx5_drop_action_destroy(eth_dev);\n+\t\tif (priv->mtr_profile_tbl)\n+\t\t\tmlx5_l3t_destroy(priv->mtr_profile_tbl);\n+\t\tif (own_domain_id)\n+\t\t\tclaim_zero(rte_eth_switch_domain_free(priv->domain_id));\n+\t\tif (priv->hrxqs)\n+\t\t\tmlx5_list_destroy(priv->hrxqs);\n+\t\tmlx5_free(priv);\n+\t\tif (eth_dev != NULL)\n+\t\t\teth_dev->data->dev_private = NULL;\n+\t}\n+\tif (eth_dev != NULL) {\n+\t\t/* mac_addrs must not be freed alone because part of\n+\t\t * dev_private\n+\t\t **/\n+\t\teth_dev->data->mac_addrs = NULL;\n+\t\trte_eth_dev_release_port(eth_dev);\n+\t}\n+\tif (sh)\n+\t\tmlx5_free_shared_dev_ctx(sh);\n+\tMLX5_ASSERT(err > 0);\n+\trte_errno = err;\n+\treturn NULL;\n+}\n+\n+/**\n+ * Comparison callback to sort device data.\n+ *\n+ * This is meant to be used with qsort().\n+ *\n+ * @param a[in]\n+ *   Pointer to pointer to first data object.\n+ * @param b[in]\n+ *   Pointer to pointer to second data object.\n+ *\n+ * @return\n+ *   0 if both objects are equal, less than 0 if the first argument is less\n+ *   than the second, greater than 0 otherwise.\n+ */\n+static int\n+mlx5_dev_spawn_data_cmp(const void *a, const void *b)\n+{\n+\tconst struct mlx5_switch_info *si_a =\n+\t\t&((const struct mlx5_dev_spawn_data *)a)->info;\n+\tconst struct mlx5_switch_info *si_b =\n+\t\t&((const struct mlx5_dev_spawn_data *)b)->info;\n+\tint ret;\n+\n+\t/* Master device first. */\n+\tret = si_b->master - si_a->master;\n+\tif (ret)\n+\t\treturn ret;\n+\t/* Then representor devices. */\n+\tret = si_b->representor - si_a->representor;\n+\tif (ret)\n+\t\treturn ret;\n+\t/* Unidentified devices come last in no specific order. */\n+\tif (!si_a->representor)\n+\t\treturn 0;\n+\t/* Order representors by name. */\n+\treturn si_a->port_name - si_b->port_name;\n+}\n+\n+/**\n+ * Match PCI information for possible slaves of bonding device.\n+ *\n+ * @param[in] ibv_dev\n+ *   Pointer to Infiniband device structure.\n+ * @param[in] pci_dev\n+ *   Pointer to primary PCI address structure to match.\n+ * @param[in] nl_rdma\n+ *   Netlink RDMA group socket handle.\n+ * @param[in] owner\n+ *   Rerepsentor owner PF index.\n+ * @param[out] bond_info\n+ *   Pointer to bonding information.\n+ *\n+ * @return\n+ *   negative value if no bonding device found, otherwise\n+ *   positive index of slave PF in bonding.\n+ */\n+static int\n+mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev,\n+\t\t\t   const struct rte_pci_addr *pci_dev,\n+\t\t\t   int nl_rdma, uint16_t owner,\n+\t\t\t   struct mlx5_bond_info *bond_info)\n+{\n+\tchar ifname[IF_NAMESIZE + 1];\n+\tunsigned int ifindex;\n+\tunsigned int np, i;\n+\tFILE *bond_file = NULL, *file;\n+\tint pf = -1;\n+\tint ret;\n+\n+\t/*\n+\t * Try to get master device name. If something goes\n+\t * wrong suppose the lack of kernel support and no\n+\t * bonding devices.\n+\t */\n+\tmemset(bond_info, 0, sizeof(*bond_info));\n+\tif (nl_rdma < 0)\n+\t\treturn -1;\n+\tif (!strstr(ibv_dev->name, \"bond\"))\n+\t\treturn -1;\n+\tnp = mlx5_nl_portnum(nl_rdma, ibv_dev->name);\n+\tif (!np)\n+\t\treturn -1;\n+\t/*\n+\t * The Master device might not be on the predefined\n+\t * port (not on port index 1, it is not garanted),\n+\t * we have to scan all Infiniband device port and\n+\t * find master.\n+\t */\n+\tfor (i = 1; i <= np; ++i) {\n+\t\t/* Check whether Infiniband port is populated. */\n+\t\tifindex = mlx5_nl_ifindex(nl_rdma, ibv_dev->name, i);\n+\t\tif (!ifindex)\n+\t\t\tcontinue;\n+\t\tif (!if_indextoname(ifindex, ifname))\n+\t\t\tcontinue;\n+\t\t/* Try to read bonding slave names from sysfs. */\n+\t\tMKSTR(slaves,\n+\t\t      \"/sys/class/net/%s/master/bonding/slaves\", ifname);\n+\t\tbond_file = fopen(slaves, \"r\");\n+\t\tif (bond_file)\n+\t\t\tbreak;\n+\t}\n+\tif (!bond_file)\n+\t\treturn -1;\n+\t/* Use safe format to check maximal buffer length. */\n+\tMLX5_ASSERT(atol(RTE_STR(IF_NAMESIZE)) == IF_NAMESIZE);\n+\twhile (fscanf(bond_file, \"%\" RTE_STR(IF_NAMESIZE) \"s\", ifname) == 1) {\n+\t\tchar tmp_str[IF_NAMESIZE + 32];\n+\t\tstruct rte_pci_addr pci_addr;\n+\t\tstruct mlx5_switch_info\tinfo;\n+\n+\t\t/* Process slave interface names in the loop. */\n+\t\tsnprintf(tmp_str, sizeof(tmp_str),\n+\t\t\t \"/sys/class/net/%s\", ifname);\n+\t\tif (mlx5_get_pci_addr(tmp_str, &pci_addr)) {\n+\t\t\tDRV_LOG(WARNING, \"can not get PCI address\"\n+\t\t\t\t\t \" for netdev \\\"%s\\\"\", ifname);\n+\t\t\tcontinue;\n+\t\t}\n+\t\t/* Slave interface PCI address match found. */\n+\t\tsnprintf(tmp_str, sizeof(tmp_str),\n+\t\t\t \"/sys/class/net/%s/phys_port_name\", ifname);\n+\t\tfile = fopen(tmp_str, \"rb\");\n+\t\tif (!file)\n+\t\t\tbreak;\n+\t\tinfo.name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET;\n+\t\tif (fscanf(file, \"%32s\", tmp_str) == 1)\n+\t\t\tmlx5_translate_port_name(tmp_str, &info);\n+\t\tfclose(file);\n+\t\t/* Only process PF ports. */\n+\t\tif (info.name_type != MLX5_PHYS_PORT_NAME_TYPE_LEGACY &&\n+\t\t    info.name_type != MLX5_PHYS_PORT_NAME_TYPE_UPLINK)\n+\t\t\tcontinue;\n+\t\t/* Check max bonding member. */\n+\t\tif (info.port_name >= MLX5_BOND_MAX_PORTS) {\n+\t\t\tDRV_LOG(WARNING, \"bonding index out of range, \"\n+\t\t\t\t\"please increase MLX5_BOND_MAX_PORTS: %s\",\n+\t\t\t\ttmp_str);\n+\t\t\tbreak;\n+\t\t}\n+\t\t/* Match PCI address, allows BDF0+pfx or BDFx+pfx. */\n+\t\tif (pci_dev->domain == pci_addr.domain &&\n+\t\t    pci_dev->bus == pci_addr.bus &&\n+\t\t    pci_dev->devid == pci_addr.devid &&\n+\t\t    ((pci_dev->function == 0 &&\n+\t\t      pci_dev->function + owner == pci_addr.function) ||\n+\t\t     (pci_dev->function == owner &&\n+\t\t      pci_addr.function == owner)))\n+\t\t\tpf = info.port_name;\n+\t\t/* Get ifindex. */\n+\t\tsnprintf(tmp_str, sizeof(tmp_str),\n+\t\t\t \"/sys/class/net/%s/ifindex\", ifname);\n+\t\tfile = fopen(tmp_str, \"rb\");\n+\t\tif (!file)\n+\t\t\tbreak;\n+\t\tret = fscanf(file, \"%u\", &ifindex);\n+\t\tfclose(file);\n+\t\tif (ret != 1)\n+\t\t\tbreak;\n+\t\t/* Save bonding info. */\n+\t\tstrncpy(bond_info->ports[info.port_name].ifname, ifname,\n+\t\t\tsizeof(bond_info->ports[0].ifname));\n+\t\tbond_info->ports[info.port_name].pci_addr = pci_addr;\n+\t\tbond_info->ports[info.port_name].ifindex = ifindex;\n+\t\tbond_info->n_port++;\n+\t}\n+\tif (pf >= 0) {\n+\t\t/* Get bond interface info */\n+\t\tret = mlx5_sysfs_bond_info(ifindex, &bond_info->ifindex,\n+\t\t\t\t\t   bond_info->ifname);\n+\t\tif (ret)\n+\t\t\tDRV_LOG(ERR, \"unable to get bond info: %s\",\n+\t\t\t\tstrerror(rte_errno));\n+\t\telse\n+\t\t\tDRV_LOG(INFO, \"PF device %u, bond device %u(%s)\",\n+\t\t\t\tifindex, bond_info->ifindex, bond_info->ifname);\n+\t}\n+\treturn pf;\n+}\n+\n+static void\n+mlx5_os_config_default(struct mlx5_dev_config *config)\n+{\n+\tmemset(config, 0, sizeof(*config));\n+\tconfig->mps = MLX5_ARG_UNSET;\n+\tconfig->dbnc = MLX5_ARG_UNSET;\n+\tconfig->rx_vec_en = 1;\n+\tconfig->txq_inline_max = MLX5_ARG_UNSET;\n+\tconfig->txq_inline_min = MLX5_ARG_UNSET;\n+\tconfig->txq_inline_mpw = MLX5_ARG_UNSET;\n+\tconfig->txqs_inline = MLX5_ARG_UNSET;\n+\tconfig->vf_nl_en = 1;\n+\tconfig->mr_ext_memseg_en = 1;\n+\tconfig->mprq.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN;\n+\tconfig->mprq.min_rxqs_num = MLX5_MPRQ_MIN_RXQS;\n+\tconfig->dv_esw_en = 1;\n+\tconfig->dv_flow_en = 1;\n+\tconfig->decap_en = 1;\n+\tconfig->log_hp_size = MLX5_ARG_UNSET;\n+\tconfig->allow_duplicate_pattern = 1;\n+}\n+\n+/**\n+ * Register a PCI device within bonding.\n+ *\n+ * This function spawns Ethernet devices out of a given PCI device and\n+ * bonding owner PF index.\n+ *\n+ * @param[in] pci_dev\n+ *   PCI device information.\n+ * @param[in] req_eth_da\n+ *   Requested ethdev device argument.\n+ * @param[in] owner_id\n+ *   Requested owner PF port ID within bonding device, default to 0.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev,\n+\t\t     struct rte_eth_devargs *req_eth_da,\n+\t\t     uint16_t owner_id)\n+{\n+\tstruct ibv_device **ibv_list;\n+\t/*\n+\t * Number of found IB Devices matching with requested PCI BDF.\n+\t * nd != 1 means there are multiple IB devices over the same\n+\t * PCI device and we have representors and master.\n+\t */\n+\tunsigned int nd = 0;\n+\t/*\n+\t * Number of found IB device Ports. nd = 1 and np = 1..n means\n+\t * we have the single multiport IB device, and there may be\n+\t * representors attached to some of found ports.\n+\t */\n+\tunsigned int np = 0;\n+\t/*\n+\t * Number of DPDK ethernet devices to Spawn - either over\n+\t * multiple IB devices or multiple ports of single IB device.\n+\t * Actually this is the number of iterations to spawn.\n+\t */\n+\tunsigned int ns = 0;\n+\t/*\n+\t * Bonding device\n+\t *   < 0 - no bonding device (single one)\n+\t *  >= 0 - bonding device (value is slave PF index)\n+\t */\n+\tint bd = -1;\n+\tstruct mlx5_dev_spawn_data *list = NULL;\n+\tstruct mlx5_dev_config dev_config;\n+\tunsigned int dev_config_vf;\n+\tstruct rte_eth_devargs eth_da = *req_eth_da;\n+\tstruct rte_pci_addr owner_pci = pci_dev->addr; /* Owner PF. */\n+\tstruct mlx5_bond_info bond_info;\n+\tint ret = -1;\n+\n+\terrno = 0;\n+\tibv_list = mlx5_glue->get_device_list(&ret);\n+\tif (!ibv_list) {\n+\t\trte_errno = errno ? errno : ENOSYS;\n+\t\tDRV_LOG(ERR, \"cannot list devices, is ib_uverbs loaded?\");\n+\t\treturn -rte_errno;\n+\t}\n+\t/*\n+\t * First scan the list of all Infiniband devices to find\n+\t * matching ones, gathering into the list.\n+\t */\n+\tstruct ibv_device *ibv_match[ret + 1];\n+\tint nl_route = mlx5_nl_init(NETLINK_ROUTE);\n+\tint nl_rdma = mlx5_nl_init(NETLINK_RDMA);\n+\tunsigned int i;\n+\n+\twhile (ret-- > 0) {\n+\t\tstruct rte_pci_addr pci_addr;\n+\n+\t\tDRV_LOG(DEBUG, \"checking device \\\"%s\\\"\", ibv_list[ret]->name);\n+\t\tbd = mlx5_device_bond_pci_match\n+\t\t\t\t(ibv_list[ret], &owner_pci, nl_rdma, owner_id,\n+\t\t\t\t &bond_info);\n+\t\tif (bd >= 0) {\n+\t\t\t/*\n+\t\t\t * Bonding device detected. Only one match is allowed,\n+\t\t\t * the bonding is supported over multi-port IB device,\n+\t\t\t * there should be no matches on representor PCI\n+\t\t\t * functions or non VF LAG bonding devices with\n+\t\t\t * specified address.\n+\t\t\t */\n+\t\t\tif (nd) {\n+\t\t\t\tDRV_LOG(ERR,\n+\t\t\t\t\t\"multiple PCI match on bonding device\"\n+\t\t\t\t\t\"\\\"%s\\\" found\", ibv_list[ret]->name);\n+\t\t\t\trte_errno = ENOENT;\n+\t\t\t\tret = -rte_errno;\n+\t\t\t\tgoto exit;\n+\t\t\t}\n+\t\t\t/* Amend owner pci address if owner PF ID specified. */\n+\t\t\tif (eth_da.nb_representor_ports)\n+\t\t\t\towner_pci.function += owner_id;\n+\t\t\tDRV_LOG(INFO, \"PCI information matches for\"\n+\t\t\t\t      \" slave %d bonding device \\\"%s\\\"\",\n+\t\t\t\t      bd, ibv_list[ret]->name);\n+\t\t\tibv_match[nd++] = ibv_list[ret];\n+\t\t\tbreak;\n+\t\t} else {\n+\t\t\t/* Bonding device not found. */\n+\t\t\tif (mlx5_get_pci_addr(ibv_list[ret]->ibdev_path,\n+\t\t\t\t\t      &pci_addr))\n+\t\t\t\tcontinue;\n+\t\t\tif (owner_pci.domain != pci_addr.domain ||\n+\t\t\t    owner_pci.bus != pci_addr.bus ||\n+\t\t\t    owner_pci.devid != pci_addr.devid ||\n+\t\t\t    owner_pci.function != pci_addr.function)\n+\t\t\t\tcontinue;\n+\t\t\tDRV_LOG(INFO, \"PCI information matches for device \\\"%s\\\"\",\n+\t\t\t\tibv_list[ret]->name);\n+\t\t\tibv_match[nd++] = ibv_list[ret];\n+\t\t}\n+\t}\n+\tibv_match[nd] = NULL;\n+\tif (!nd) {\n+\t\t/* No device matches, just complain and bail out. */\n+\t\tDRV_LOG(WARNING,\n+\t\t\t\"no Verbs device matches PCI device \" PCI_PRI_FMT \",\"\n+\t\t\t\" are kernel drivers loaded?\",\n+\t\t\towner_pci.domain, owner_pci.bus,\n+\t\t\towner_pci.devid, owner_pci.function);\n+\t\trte_errno = ENOENT;\n+\t\tret = -rte_errno;\n+\t\tgoto exit;\n+\t}\n+\tif (nd == 1) {\n+\t\t/*\n+\t\t * Found single matching device may have multiple ports.\n+\t\t * Each port may be representor, we have to check the port\n+\t\t * number and check the representors existence.\n+\t\t */\n+\t\tif (nl_rdma >= 0)\n+\t\t\tnp = mlx5_nl_portnum(nl_rdma, ibv_match[0]->name);\n+\t\tif (!np)\n+\t\t\tDRV_LOG(WARNING, \"can not get IB device \\\"%s\\\"\"\n+\t\t\t\t\t \" ports number\", ibv_match[0]->name);\n+\t\tif (bd >= 0 && !np) {\n+\t\t\tDRV_LOG(ERR, \"can not get ports\"\n+\t\t\t\t     \" for bonding device\");\n+\t\t\trte_errno = ENOENT;\n+\t\t\tret = -rte_errno;\n+\t\t\tgoto exit;\n+\t\t}\n+\t}\n+\t/*\n+\t * Now we can determine the maximal\n+\t * amount of devices to be spawned.\n+\t */\n+\tlist = mlx5_malloc(MLX5_MEM_ZERO,\n+\t\t\t   sizeof(struct mlx5_dev_spawn_data) *\n+\t\t\t   (np ? np : nd),\n+\t\t\t   RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);\n+\tif (!list) {\n+\t\tDRV_LOG(ERR, \"spawn data array allocation failure\");\n+\t\trte_errno = ENOMEM;\n+\t\tret = -rte_errno;\n+\t\tgoto exit;\n+\t}\n+\tif (bd >= 0 || np > 1) {\n+\t\t/*\n+\t\t * Single IB device with multiple ports found,\n+\t\t * it may be E-Switch master device and representors.\n+\t\t * We have to perform identification through the ports.\n+\t\t */\n+\t\tMLX5_ASSERT(nl_rdma >= 0);\n+\t\tMLX5_ASSERT(ns == 0);\n+\t\tMLX5_ASSERT(nd == 1);\n+\t\tMLX5_ASSERT(np);\n+\t\tfor (i = 1; i <= np; ++i) {\n+\t\t\tlist[ns].bond_info = &bond_info;\n+\t\t\tlist[ns].max_port = np;\n+\t\t\tlist[ns].phys_port = i;\n+\t\t\tlist[ns].phys_dev = ibv_match[0];\n+\t\t\tlist[ns].eth_dev = NULL;\n+\t\t\tlist[ns].pci_dev = pci_dev;\n+\t\t\tlist[ns].pf_bond = bd;\n+\t\t\tlist[ns].ifindex = mlx5_nl_ifindex\n+\t\t\t\t(nl_rdma,\n+\t\t\t\tmlx5_os_get_dev_device_name\n+\t\t\t\t\t\t(list[ns].phys_dev), i);\n+\t\t\tif (!list[ns].ifindex) {\n+\t\t\t\t/*\n+\t\t\t\t * No network interface index found for the\n+\t\t\t\t * specified port, it means there is no\n+\t\t\t\t * representor on this port. It's OK,\n+\t\t\t\t * there can be disabled ports, for example\n+\t\t\t\t * if sriov_numvfs < sriov_totalvfs.\n+\t\t\t\t */\n+\t\t\t\tcontinue;\n+\t\t\t}\n+\t\t\tret = -1;\n+\t\t\tif (nl_route >= 0)\n+\t\t\t\tret = mlx5_nl_switch_info\n+\t\t\t\t\t       (nl_route,\n+\t\t\t\t\t\tlist[ns].ifindex,\n+\t\t\t\t\t\t&list[ns].info);\n+\t\t\tif (ret || (!list[ns].info.representor &&\n+\t\t\t\t    !list[ns].info.master)) {\n+\t\t\t\t/*\n+\t\t\t\t * We failed to recognize representors with\n+\t\t\t\t * Netlink, let's try to perform the task\n+\t\t\t\t * with sysfs.\n+\t\t\t\t */\n+\t\t\t\tret =  mlx5_sysfs_switch_info\n+\t\t\t\t\t\t(list[ns].ifindex,\n+\t\t\t\t\t\t &list[ns].info);\n+\t\t\t}\n+\t\t\tif (!ret && bd >= 0) {\n+\t\t\t\tswitch (list[ns].info.name_type) {\n+\t\t\t\tcase MLX5_PHYS_PORT_NAME_TYPE_UPLINK:\n+\t\t\t\t\tif (np == 1) {\n+\t\t\t\t\t\t/*\n+\t\t\t\t\t\t * Force standalone bonding\n+\t\t\t\t\t\t * device for ROCE LAG\n+\t\t\t\t\t\t * confgiurations.\n+\t\t\t\t\t\t */\n+\t\t\t\t\t\tlist[ns].info.master = 0;\n+\t\t\t\t\t\tlist[ns].info.representor = 0;\n+\t\t\t\t\t}\n+\t\t\t\t\tif (list[ns].info.port_name == bd)\n+\t\t\t\t\t\tns++;\n+\t\t\t\t\tbreak;\n+\t\t\t\tcase MLX5_PHYS_PORT_NAME_TYPE_PFHPF:\n+\t\t\t\t\t/* Fallthrough */\n+\t\t\t\tcase MLX5_PHYS_PORT_NAME_TYPE_PFVF:\n+\t\t\t\t\t/* Fallthrough */\n+\t\t\t\tcase MLX5_PHYS_PORT_NAME_TYPE_PFSF:\n+\t\t\t\t\tif (list[ns].info.pf_num == bd)\n+\t\t\t\t\t\tns++;\n+\t\t\t\t\tbreak;\n+\t\t\t\tdefault:\n+\t\t\t\t\tbreak;\n+\t\t\t\t}\n+\t\t\t\tcontinue;\n+\t\t\t}\n+\t\t\tif (!ret && (list[ns].info.representor ^\n+\t\t\t\t     list[ns].info.master))\n+\t\t\t\tns++;\n+\t\t}\n+\t\tif (!ns) {\n+\t\t\tDRV_LOG(ERR,\n+\t\t\t\t\"unable to recognize master/representors\"\n+\t\t\t\t\" on the IB device with multiple ports\");\n+\t\t\trte_errno = ENOENT;\n+\t\t\tret = -rte_errno;\n+\t\t\tgoto exit;\n+\t\t}\n+\t} else {\n+\t\t/*\n+\t\t * The existence of several matching entries (nd > 1) means\n+\t\t * port representors have been instantiated. No existing Verbs\n+\t\t * call nor sysfs entries can tell them apart, this can only\n+\t\t * be done through Netlink calls assuming kernel drivers are\n+\t\t * recent enough to support them.\n+\t\t *\n+\t\t * In the event of identification failure through Netlink,\n+\t\t * try again through sysfs, then:\n+\t\t *\n+\t\t * 1. A single IB device matches (nd == 1) with single\n+\t\t *    port (np=0/1) and is not a representor, assume\n+\t\t *    no switch support.\n+\t\t *\n+\t\t * 2. Otherwise no safe assumptions can be made;\n+\t\t *    complain louder and bail out.\n+\t\t */\n+\t\tfor (i = 0; i != nd; ++i) {\n+\t\t\tmemset(&list[ns].info, 0, sizeof(list[ns].info));\n+\t\t\tlist[ns].bond_info = NULL;\n+\t\t\tlist[ns].max_port = 1;\n+\t\t\tlist[ns].phys_port = 1;\n+\t\t\tlist[ns].phys_dev = ibv_match[i];\n+\t\t\tlist[ns].eth_dev = NULL;\n+\t\t\tlist[ns].pci_dev = pci_dev;\n+\t\t\tlist[ns].pf_bond = -1;\n+\t\t\tlist[ns].ifindex = 0;\n+\t\t\tif (nl_rdma >= 0)\n+\t\t\t\tlist[ns].ifindex = mlx5_nl_ifindex\n+\t\t\t\t(nl_rdma,\n+\t\t\t\tmlx5_os_get_dev_device_name\n+\t\t\t\t\t\t(list[ns].phys_dev), 1);\n+\t\t\tif (!list[ns].ifindex) {\n+\t\t\t\tchar ifname[IF_NAMESIZE];\n+\n+\t\t\t\t/*\n+\t\t\t\t * Netlink failed, it may happen with old\n+\t\t\t\t * ib_core kernel driver (before 4.16).\n+\t\t\t\t * We can assume there is old driver because\n+\t\t\t\t * here we are processing single ports IB\n+\t\t\t\t * devices. Let's try sysfs to retrieve\n+\t\t\t\t * the ifindex. The method works for\n+\t\t\t\t * master device only.\n+\t\t\t\t */\n+\t\t\t\tif (nd > 1) {\n+\t\t\t\t\t/*\n+\t\t\t\t\t * Multiple devices found, assume\n+\t\t\t\t\t * representors, can not distinguish\n+\t\t\t\t\t * master/representor and retrieve\n+\t\t\t\t\t * ifindex via sysfs.\n+\t\t\t\t\t */\n+\t\t\t\t\tcontinue;\n+\t\t\t\t}\n+\t\t\t\tret = mlx5_get_ifname_sysfs\n+\t\t\t\t\t(ibv_match[i]->ibdev_path, ifname);\n+\t\t\t\tif (!ret)\n+\t\t\t\t\tlist[ns].ifindex =\n+\t\t\t\t\t\tif_nametoindex(ifname);\n+\t\t\t\tif (!list[ns].ifindex) {\n+\t\t\t\t\t/*\n+\t\t\t\t\t * No network interface index found\n+\t\t\t\t\t * for the specified device, it means\n+\t\t\t\t\t * there it is neither representor\n+\t\t\t\t\t * nor master.\n+\t\t\t\t\t */\n+\t\t\t\t\tcontinue;\n+\t\t\t\t}\n+\t\t\t}\n+\t\t\tret = -1;\n+\t\t\tif (nl_route >= 0)\n+\t\t\t\tret = mlx5_nl_switch_info\n+\t\t\t\t\t       (nl_route,\n+\t\t\t\t\t\tlist[ns].ifindex,\n+\t\t\t\t\t\t&list[ns].info);\n+\t\t\tif (ret || (!list[ns].info.representor &&\n+\t\t\t\t    !list[ns].info.master)) {\n+\t\t\t\t/*\n+\t\t\t\t * We failed to recognize representors with\n+\t\t\t\t * Netlink, let's try to perform the task\n+\t\t\t\t * with sysfs.\n+\t\t\t\t */\n+\t\t\t\tret =  mlx5_sysfs_switch_info\n+\t\t\t\t\t\t(list[ns].ifindex,\n+\t\t\t\t\t\t &list[ns].info);\n+\t\t\t}\n+\t\t\tif (!ret && (list[ns].info.representor ^\n+\t\t\t\t     list[ns].info.master)) {\n+\t\t\t\tns++;\n+\t\t\t} else if ((nd == 1) &&\n+\t\t\t\t   !list[ns].info.representor &&\n+\t\t\t\t   !list[ns].info.master) {\n+\t\t\t\t/*\n+\t\t\t\t * Single IB device with\n+\t\t\t\t * one physical port and\n+\t\t\t\t * attached network device.\n+\t\t\t\t * May be SRIOV is not enabled\n+\t\t\t\t * or there is no representors.\n+\t\t\t\t */\n+\t\t\t\tDRV_LOG(INFO, \"no E-Switch support detected\");\n+\t\t\t\tns++;\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t}\n+\t\tif (!ns) {\n+\t\t\tDRV_LOG(ERR,\n+\t\t\t\t\"unable to recognize master/representors\"\n+\t\t\t\t\" on the multiple IB devices\");\n+\t\t\trte_errno = ENOENT;\n+\t\t\tret = -rte_errno;\n+\t\t\tgoto exit;\n+\t\t}\n+\t\t/*\n+\t\t * New kernels may add the switch_id attribute for the case\n+\t\t * there is no E-Switch and we wrongly recognized the\n+\t\t * only device as master. Override this if there is the\n+\t\t * single device with single port and new device name\n+\t\t * format present.\n+\t\t */\n+\t\tif (nd == 1 &&\n+\t\t    list[0].info.name_type == MLX5_PHYS_PORT_NAME_TYPE_UPLINK) {\n+\t\t\tlist[0].info.master = 0;\n+\t\t\tlist[0].info.representor = 0;\n+\t\t}\n+\t}\n+\tMLX5_ASSERT(ns);\n+\t/*\n+\t * Sort list to probe devices in natural order for users convenience\n+\t * (i.e. master first, then representors from lowest to highest ID).\n+\t */\n+\tqsort(list, ns, sizeof(*list), mlx5_dev_spawn_data_cmp);\n+\t/* Device specific configuration. */\n+\tswitch (pci_dev->id.device_id) {\n+\tcase PCI_DEVICE_ID_MELLANOX_CONNECTX4VF:\n+\tcase PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF:\n+\tcase PCI_DEVICE_ID_MELLANOX_CONNECTX5VF:\n+\tcase PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF:\n+\tcase PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF:\n+\tcase PCI_DEVICE_ID_MELLANOX_CONNECTX6VF:\n+\tcase PCI_DEVICE_ID_MELLANOX_CONNECTXVF:\n+\t\tdev_config_vf = 1;\n+\t\tbreak;\n+\tdefault:\n+\t\tdev_config_vf = 0;\n+\t\tbreak;\n+\t}\n+\tif (eth_da.type != RTE_ETH_REPRESENTOR_NONE) {\n+\t\t/* Set devargs default values. */\n+\t\tif (eth_da.nb_mh_controllers == 0) {\n+\t\t\teth_da.nb_mh_controllers = 1;\n+\t\t\teth_da.mh_controllers[0] = 0;\n+\t\t}\n+\t\tif (eth_da.nb_ports == 0 && ns > 0) {\n+\t\t\tif (list[0].pf_bond >= 0 && list[0].info.representor)\n+\t\t\t\tDRV_LOG(WARNING, \"Representor on Bonding device should use pf#vf# syntax: %s\",\n+\t\t\t\t\tpci_dev->device.devargs->args);\n+\t\t\teth_da.nb_ports = 1;\n+\t\t\teth_da.ports[0] = list[0].info.pf_num;\n+\t\t}\n+\t\tif (eth_da.nb_representor_ports == 0) {\n+\t\t\teth_da.nb_representor_ports = 1;\n+\t\t\teth_da.representor_ports[0] = 0;\n+\t\t}\n+\t}\n+\tfor (i = 0; i != ns; ++i) {\n+\t\tuint32_t restore;\n+\n+\t\t/* Default configuration. */\n+\t\tmlx5_os_config_default(&dev_config);\n+\t\tdev_config.vf = dev_config_vf;\n+\t\tlist[i].numa_node = pci_dev->device.numa_node;\n+\t\tlist[i].eth_dev = mlx5_dev_spawn(&pci_dev->device,\n+\t\t\t\t\t\t &list[i],\n+\t\t\t\t\t\t &dev_config,\n+\t\t\t\t\t\t &eth_da);\n+\t\tif (!list[i].eth_dev) {\n+\t\t\tif (rte_errno != EBUSY && rte_errno != EEXIST)\n+\t\t\t\tbreak;\n+\t\t\t/* Device is disabled or already spawned. Ignore it. */\n+\t\t\tcontinue;\n+\t\t}\n+\t\trestore = list[i].eth_dev->data->dev_flags;\n+\t\trte_eth_copy_pci_info(list[i].eth_dev, pci_dev);\n+\t\t/**\n+\t\t * Each representor has a dedicated interrupts vector.\n+\t\t * rte_eth_copy_pci_info() assigns PF interrupts handle to\n+\t\t * representor eth_dev object because representor and PF\n+\t\t * share the same PCI address.\n+\t\t * Override representor device with a dedicated\n+\t\t * interrupts handle here.\n+\t\t * Representor interrupts handle is released in mlx5_dev_stop().\n+\t\t */\n+\t\tif (list[i].info.representor) {\n+\t\t\tstruct rte_intr_handle *intr_handle;\n+\t\t\tintr_handle = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,\n+\t\t\t\t\t\t  sizeof(*intr_handle), 0,\n+\t\t\t\t\t\t  SOCKET_ID_ANY);\n+\t\t\tif (!intr_handle) {\n+\t\t\t\tDRV_LOG(ERR,\n+\t\t\t\t\t\"port %u failed to allocate memory for interrupt handler \"\n+\t\t\t\t\t\"Rx interrupts will not be supported\",\n+\t\t\t\t\ti);\n+\t\t\t\trte_errno = ENOMEM;\n+\t\t\t\tret = -rte_errno;\n+\t\t\t\tgoto exit;\n+\t\t\t}\n+\t\t\tlist[i].eth_dev->intr_handle = intr_handle;\n+\t\t}\n+\t\t/* Restore non-PCI flags cleared by the above call. */\n+\t\tlist[i].eth_dev->data->dev_flags |= restore;\n+\t\trte_eth_dev_probing_finish(list[i].eth_dev);\n+\t}\n+\tif (i != ns) {\n+\t\tDRV_LOG(ERR,\n+\t\t\t\"probe of PCI device \" PCI_PRI_FMT \" aborted after\"\n+\t\t\t\" encountering an error: %s\",\n+\t\t\towner_pci.domain, owner_pci.bus,\n+\t\t\towner_pci.devid, owner_pci.function,\n+\t\t\tstrerror(rte_errno));\n+\t\tret = -rte_errno;\n+\t\t/* Roll back. */\n+\t\twhile (i--) {\n+\t\t\tif (!list[i].eth_dev)\n+\t\t\t\tcontinue;\n+\t\t\tmlx5_dev_close(list[i].eth_dev);\n+\t\t\t/* mac_addrs must not be freed because in dev_private */\n+\t\t\tlist[i].eth_dev->data->mac_addrs = NULL;\n+\t\t\tclaim_zero(rte_eth_dev_release_port(list[i].eth_dev));\n+\t\t}\n+\t\t/* Restore original error. */\n+\t\trte_errno = -ret;\n+\t} else {\n+\t\tret = 0;\n+\t}\n+exit:\n+\t/*\n+\t * Do the routine cleanup:\n+\t * - close opened Netlink sockets\n+\t * - free allocated spawn data array\n+\t * - free the Infiniband device list\n+\t */\n+\tif (nl_rdma >= 0)\n+\t\tclose(nl_rdma);\n+\tif (nl_route >= 0)\n+\t\tclose(nl_route);\n+\tif (list)\n+\t\tmlx5_free(list);\n+\tMLX5_ASSERT(ibv_list);\n+\tmlx5_glue->free_device_list(ibv_list);\n+\treturn ret;\n+}\n+\n+static int\n+mlx5_os_parse_eth_devargs(struct rte_device *dev,\n+\t\t\t  struct rte_eth_devargs *eth_da)\n+{\n+\tint ret = 0;\n+\n+\tif (dev->devargs == NULL)\n+\t\treturn 0;\n+\tmemset(eth_da, 0, sizeof(*eth_da));\n+\t/* Parse representor information first from class argument. */\n+\tif (dev->devargs->cls_str)\n+\t\tret = rte_eth_devargs_parse(dev->devargs->cls_str, eth_da);\n+\tif (ret != 0) {\n+\t\tDRV_LOG(ERR, \"failed to parse device arguments: %s\",\n+\t\t\tdev->devargs->cls_str);\n+\t\treturn -rte_errno;\n+\t}\n+\tif (eth_da->type == RTE_ETH_REPRESENTOR_NONE) {\n+\t\t/* Parse legacy device argument */\n+\t\tret = rte_eth_devargs_parse(dev->devargs->args, eth_da);\n+\t\tif (ret) {\n+\t\t\tDRV_LOG(ERR, \"failed to parse device arguments: %s\",\n+\t\t\t\tdev->devargs->args);\n+\t\t\treturn -rte_errno;\n+\t\t}\n+\t}\n+\treturn 0;\n+}\n+\n+/**\n+ * Callback to register a PCI device.\n+ *\n+ * This function spawns Ethernet devices out of a given PCI device.\n+ *\n+ * @param[in] pci_dev\n+ *   PCI device information.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_os_pci_probe(struct rte_pci_device *pci_dev)\n+{\n+\tstruct rte_eth_devargs eth_da = { .nb_ports = 0 };\n+\tint ret = 0;\n+\tuint16_t p;\n+\n+\tret = mlx5_os_parse_eth_devargs(&pci_dev->device, &eth_da);\n+\tif (ret != 0)\n+\t\treturn ret;\n+\n+\tif (eth_da.nb_ports > 0) {\n+\t\t/* Iterate all port if devargs pf is range: \"pf[0-1]vf[...]\". */\n+\t\tfor (p = 0; p < eth_da.nb_ports; p++) {\n+\t\t\tret = mlx5_os_pci_probe_pf(pci_dev, &eth_da,\n+\t\t\t\t\t\t   eth_da.ports[p]);\n+\t\t\tif (ret)\n+\t\t\t\tbreak;\n+\t\t}\n+\t\tif (ret) {\n+\t\t\tDRV_LOG(ERR, \"Probe of PCI device \" PCI_PRI_FMT \" \"\n+\t\t\t\t\"aborted due to proding failure of PF %u\",\n+\t\t\t\tpci_dev->addr.domain, pci_dev->addr.bus,\n+\t\t\t\tpci_dev->addr.devid, pci_dev->addr.function,\n+\t\t\t\teth_da.ports[p]);\n+\t\t\tmlx5_net_remove(&pci_dev->device);\n+\t\t}\n+\t} else {\n+\t\tret = mlx5_os_pci_probe_pf(pci_dev, &eth_da, 0);\n+\t}\n+\treturn ret;\n+}\n+\n+/* Probe a single SF device on auxiliary bus, no representor support. */\n+static int\n+mlx5_os_auxiliary_probe(struct rte_device *dev)\n+{\n+\tstruct rte_eth_devargs eth_da = { .nb_ports = 0 };\n+\tstruct mlx5_dev_config config;\n+\tstruct mlx5_dev_spawn_data spawn = { .pf_bond = -1 };\n+\tstruct rte_auxiliary_device *adev = RTE_DEV_TO_AUXILIARY(dev);\n+\tstruct rte_eth_dev *eth_dev;\n+\tint ret = 0;\n+\n+\t/* Parse ethdev devargs. */\n+\tret = mlx5_os_parse_eth_devargs(dev, &eth_da);\n+\tif (ret != 0)\n+\t\treturn ret;\n+\t/* Set default config data. */\n+\tmlx5_os_config_default(&config);\n+\tconfig.sf = 1;\n+\t/* Init spawn data. */\n+\tspawn.max_port = 1;\n+\tspawn.phys_port = 1;\n+\tspawn.phys_dev = mlx5_os_get_ibv_dev(dev);\n+\tif (spawn.phys_dev == NULL)\n+\t\treturn -rte_errno;\n+\tret = mlx5_auxiliary_get_ifindex(dev->name);\n+\tif (ret < 0) {\n+\t\tDRV_LOG(ERR, \"failed to get ethdev ifindex: %s\", dev->name);\n+\t\treturn ret;\n+\t}\n+\tspawn.ifindex = ret;\n+\tspawn.numa_node = dev->numa_node;\n+\t/* Spawn device. */\n+\teth_dev = mlx5_dev_spawn(dev, &spawn, &config, &eth_da);\n+\tif (eth_dev == NULL)\n+\t\treturn -rte_errno;\n+\t/* Post create. */\n+\teth_dev->intr_handle = &adev->intr_handle;\n+\tif (rte_eal_process_type() == RTE_PROC_PRIMARY) {\n+\t\teth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;\n+\t\teth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_RMV;\n+\t\teth_dev->data->numa_node = dev->numa_node;\n+\t}\n+\trte_eth_dev_probing_finish(eth_dev);\n+\treturn 0;\n+}\n+\n+/**\n+ * Net class driver callback to probe a device.\n+ *\n+ * This function probe PCI bus device(s) or a single SF on auxiliary bus.\n+ *\n+ * @param[in] dev\n+ *   Pointer to the generic device.\n+ *\n+ * @return\n+ *   0 on success, the function cannot fail.\n+ */\n+int\n+mlx5_os_net_probe(struct rte_device *dev)\n+{\n+\tint ret;\n+\n+\tif (rte_eal_process_type() == RTE_PROC_PRIMARY)\n+\t\tmlx5_pmd_socket_init();\n+\tret = mlx5_init_once();\n+\tif (ret) {\n+\t\tDRV_LOG(ERR, \"unable to init PMD global data: %s\",\n+\t\t\tstrerror(rte_errno));\n+\t\treturn -rte_errno;\n+\t}\n+\tif (mlx5_dev_is_pci(dev))\n+\t\treturn mlx5_os_pci_probe(RTE_DEV_TO_PCI(dev));\n+\telse\n+\t\treturn mlx5_os_auxiliary_probe(dev);\n+}\n+\n+static int\n+mlx5_config_doorbell_mapping_env(const struct mlx5_dev_config *config)\n+{\n+\tchar *env;\n+\tint value;\n+\n+\tMLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);\n+\t/* Get environment variable to store. */\n+\tenv = getenv(MLX5_SHUT_UP_BF);\n+\tvalue = env ? !!strcmp(env, \"0\") : MLX5_ARG_UNSET;\n+\tif (config->dbnc == MLX5_ARG_UNSET)\n+\t\tsetenv(MLX5_SHUT_UP_BF, MLX5_SHUT_UP_BF_DEFAULT, 1);\n+\telse\n+\t\tsetenv(MLX5_SHUT_UP_BF,\n+\t\t       config->dbnc == MLX5_TXDB_NCACHED ? \"1\" : \"0\", 1);\n+\treturn value;\n+}\n+\n+static void\n+mlx5_restore_doorbell_mapping_env(int value)\n+{\n+\tMLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);\n+\t/* Restore the original environment variable state. */\n+\tif (value == MLX5_ARG_UNSET)\n+\t\tunsetenv(MLX5_SHUT_UP_BF);\n+\telse\n+\t\tsetenv(MLX5_SHUT_UP_BF, value ? \"1\" : \"0\", 1);\n+}\n+\n+/**\n+ * Extract pdn of PD object using DV API.\n+ *\n+ * @param[in] pd\n+ *   Pointer to the verbs PD object.\n+ * @param[out] pdn\n+ *   Pointer to the PD object number variable.\n+ *\n+ * @return\n+ *   0 on success, error value otherwise.\n+ */\n+int\n+mlx5_os_get_pdn(void *pd, uint32_t *pdn)\n+{\n+#ifdef HAVE_IBV_FLOW_DV_SUPPORT\n+\tstruct mlx5dv_obj obj;\n+\tstruct mlx5dv_pd pd_info;\n+\tint ret = 0;\n+\n+\tobj.pd.in = pd;\n+\tobj.pd.out = &pd_info;\n+\tret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD);\n+\tif (ret) {\n+\t\tDRV_LOG(DEBUG, \"Fail to get PD object info\");\n+\t\treturn ret;\n+\t}\n+\t*pdn = pd_info.pdn;\n+\treturn 0;\n+#else\n+\t(void)pd;\n+\t(void)pdn;\n+\treturn -ENOTSUP;\n+#endif /* HAVE_IBV_FLOW_DV_SUPPORT */\n+}\n+\n+/**\n+ * Function API to open IB device.\n+ *\n+ * This function calls the Linux glue APIs to open a device.\n+ *\n+ * @param[in] spawn\n+ *   Pointer to the IB device attributes (name, port, etc).\n+ * @param[out] config\n+ *   Pointer to device configuration structure.\n+ * @param[out] sh\n+ *   Pointer to shared context structure.\n+ *\n+ * @return\n+ *   0 on success, a positive error value otherwise.\n+ */\n+int\n+mlx5_os_open_device(const struct mlx5_dev_spawn_data *spawn,\n+\t\t     const struct mlx5_dev_config *config,\n+\t\t     struct mlx5_dev_ctx_shared *sh)\n+{\n+\tint dbmap_env;\n+\tint err = 0;\n+\n+\tpthread_mutex_init(&sh->txpp.mutex, NULL);\n+\t/*\n+\t * Configure environment variable \"MLX5_BF_SHUT_UP\"\n+\t * before the device creation. The rdma_core library\n+\t * checks the variable at device creation and\n+\t * stores the result internally.\n+\t */\n+\tdbmap_env = mlx5_config_doorbell_mapping_env(config);\n+\t/* Try to open IB device with DV first, then usual Verbs. */\n+\terrno = 0;\n+\tsh->ctx = mlx5_glue->dv_open_device(spawn->phys_dev);\n+\tif (sh->ctx) {\n+\t\tsh->devx = 1;\n+\t\tDRV_LOG(DEBUG, \"DevX is supported\");\n+\t\t/* The device is created, no need for environment. */\n+\t\tmlx5_restore_doorbell_mapping_env(dbmap_env);\n+\t} else {\n+\t\t/* The environment variable is still configured. */\n+\t\tsh->ctx = mlx5_glue->open_device(spawn->phys_dev);\n+\t\terr = errno ? errno : ENODEV;\n+\t\t/*\n+\t\t * The environment variable is not needed anymore,\n+\t\t * all device creation attempts are completed.\n+\t\t */\n+\t\tmlx5_restore_doorbell_mapping_env(dbmap_env);\n+\t\tif (!sh->ctx)\n+\t\t\treturn err;\n+\t\tDRV_LOG(DEBUG, \"DevX is NOT supported\");\n+\t\terr = 0;\n+\t}\n+\tif (!err && sh->ctx) {\n+\t\t/* Hint libmlx5 to use PMD allocator for data plane resources */\n+\t\tmlx5_glue->dv_set_context_attr(sh->ctx,\n+\t\t\tMLX5DV_CTX_ATTR_BUF_ALLOCATORS,\n+\t\t\t(void *)((uintptr_t)&(struct mlx5dv_ctx_allocators){\n+\t\t\t\t.alloc = &mlx5_alloc_verbs_buf,\n+\t\t\t\t.free = &mlx5_free_verbs_buf,\n+\t\t\t\t.data = sh,\n+\t\t\t}));\n+\t}\n+\treturn err;\n+}\n+\n+/**\n+ * Install shared asynchronous device events handler.\n+ * This function is implemented to support event sharing\n+ * between multiple ports of single IB device.\n+ *\n+ * @param sh\n+ *   Pointer to mlx5_dev_ctx_shared object.\n+ */\n+void\n+mlx5_os_dev_shared_handler_install(struct mlx5_dev_ctx_shared *sh)\n+{\n+\tint ret;\n+\tint flags;\n+\n+\tsh->intr_handle.fd = -1;\n+\tflags = fcntl(((struct ibv_context *)sh->ctx)->async_fd, F_GETFL);\n+\tret = fcntl(((struct ibv_context *)sh->ctx)->async_fd,\n+\t\t    F_SETFL, flags | O_NONBLOCK);\n+\tif (ret) {\n+\t\tDRV_LOG(INFO, \"failed to change file descriptor async event\"\n+\t\t\t\" queue\");\n+\t} else {\n+\t\tsh->intr_handle.fd = ((struct ibv_context *)sh->ctx)->async_fd;\n+\t\tsh->intr_handle.type = RTE_INTR_HANDLE_EXT;\n+\t\tif (rte_intr_callback_register(&sh->intr_handle,\n+\t\t\t\t\tmlx5_dev_interrupt_handler, sh)) {\n+\t\t\tDRV_LOG(INFO, \"Fail to install the shared interrupt.\");\n+\t\t\tsh->intr_handle.fd = -1;\n+\t\t}\n+\t}\n+\tif (sh->devx) {\n+#ifdef HAVE_IBV_DEVX_ASYNC\n+\t\tsh->intr_handle_devx.fd = -1;\n+\t\tsh->devx_comp =\n+\t\t\t(void *)mlx5_glue->devx_create_cmd_comp(sh->ctx);\n+\t\tstruct mlx5dv_devx_cmd_comp *devx_comp = sh->devx_comp;\n+\t\tif (!devx_comp) {\n+\t\t\tDRV_LOG(INFO, \"failed to allocate devx_comp.\");\n+\t\t\treturn;\n+\t\t}\n+\t\tflags = fcntl(devx_comp->fd, F_GETFL);\n+\t\tret = fcntl(devx_comp->fd, F_SETFL, flags | O_NONBLOCK);\n+\t\tif (ret) {\n+\t\t\tDRV_LOG(INFO, \"failed to change file descriptor\"\n+\t\t\t\t\" devx comp\");\n+\t\t\treturn;\n+\t\t}\n+\t\tsh->intr_handle_devx.fd = devx_comp->fd;\n+\t\tsh->intr_handle_devx.type = RTE_INTR_HANDLE_EXT;\n+\t\tif (rte_intr_callback_register(&sh->intr_handle_devx,\n+\t\t\t\t\tmlx5_dev_interrupt_handler_devx, sh)) {\n+\t\t\tDRV_LOG(INFO, \"Fail to install the devx shared\"\n+\t\t\t\t\" interrupt.\");\n+\t\t\tsh->intr_handle_devx.fd = -1;\n+\t\t}\n+#endif /* HAVE_IBV_DEVX_ASYNC */\n+\t}\n+}\n+\n+/**\n+ * Uninstall shared asynchronous device events handler.\n+ * This function is implemented to support event sharing\n+ * between multiple ports of single IB device.\n+ *\n+ * @param dev\n+ *   Pointer to mlx5_dev_ctx_shared object.\n+ */\n+void\n+mlx5_os_dev_shared_handler_uninstall(struct mlx5_dev_ctx_shared *sh)\n+{\n+\tif (sh->intr_handle.fd >= 0)\n+\t\tmlx5_intr_callback_unregister(&sh->intr_handle,\n+\t\t\t\t\t      mlx5_dev_interrupt_handler, sh);\n+#ifdef HAVE_IBV_DEVX_ASYNC\n+\tif (sh->intr_handle_devx.fd >= 0)\n+\t\trte_intr_callback_unregister(&sh->intr_handle_devx,\n+\t\t\t\t  mlx5_dev_interrupt_handler_devx, sh);\n+\tif (sh->devx_comp)\n+\t\tmlx5_glue->devx_destroy_cmd_comp(sh->devx_comp);\n+#endif\n+}\n+\n+/**\n+ * Read statistics by a named counter.\n+ *\n+ * @param[in] priv\n+ *   Pointer to the private device data structure.\n+ * @param[in] ctr_name\n+ *   Pointer to the name of the statistic counter to read\n+ * @param[out] stat\n+ *   Pointer to read statistic value.\n+ * @return\n+ *   0 on success and stat is valud, 1 if failed to read the value\n+ *   rte_errno is set.\n+ *\n+ */\n+int\n+mlx5_os_read_dev_stat(struct mlx5_priv *priv, const char *ctr_name,\n+\t\t      uint64_t *stat)\n+{\n+\tint fd;\n+\n+\tif (priv->sh) {\n+\t\tif (priv->q_counters != NULL &&\n+\t\t    strcmp(ctr_name, \"out_of_buffer\") == 0)\n+\t\t\treturn mlx5_devx_cmd_queue_counter_query\n+\t\t\t\t\t(priv->q_counters, 0, (uint32_t *)stat);\n+\t\tMKSTR(path, \"%s/ports/%d/hw_counters/%s\",\n+\t\t      priv->sh->ibdev_path,\n+\t\t      priv->dev_port,\n+\t\t      ctr_name);\n+\t\tfd = open(path, O_RDONLY);\n+\t\t/*\n+\t\t * in switchdev the file location is not per port\n+\t\t * but rather in <ibdev_path>/hw_counters/<file_name>.\n+\t\t */\n+\t\tif (fd == -1) {\n+\t\t\tMKSTR(path1, \"%s/hw_counters/%s\",\n+\t\t\t      priv->sh->ibdev_path,\n+\t\t\t      ctr_name);\n+\t\t\tfd = open(path1, O_RDONLY);\n+\t\t}\n+\t\tif (fd != -1) {\n+\t\t\tchar buf[21] = {'\\0'};\n+\t\t\tssize_t n = read(fd, buf, sizeof(buf));\n+\n+\t\t\tclose(fd);\n+\t\t\tif (n != -1) {\n+\t\t\t\t*stat = strtoull(buf, NULL, 10);\n+\t\t\t\treturn 0;\n+\t\t\t}\n+\t\t}\n+\t}\n+\t*stat = 0;\n+\treturn 1;\n+}\n+\n+/**\n+ * Set the reg_mr and dereg_mr call backs\n+ *\n+ * @param reg_mr_cb[out]\n+ *   Pointer to reg_mr func\n+ * @param dereg_mr_cb[out]\n+ *   Pointer to dereg_mr func\n+ *\n+ */\n+void\n+mlx5_os_set_reg_mr_cb(mlx5_reg_mr_t *reg_mr_cb,\n+\t\t      mlx5_dereg_mr_t *dereg_mr_cb)\n+{\n+\t*reg_mr_cb = mlx5_mr_verbs_ops.reg_mr;\n+\t*dereg_mr_cb = mlx5_mr_verbs_ops.dereg_mr;\n+}\n+\n+/**\n+ * Remove a MAC address from device\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param index\n+ *   MAC address index.\n+ */\n+void\n+mlx5_os_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tconst int vf = priv->config.vf;\n+\n+\tif (vf)\n+\t\tmlx5_nl_mac_addr_remove(priv->nl_socket_route,\n+\t\t\t\t\tmlx5_ifindex(dev), priv->mac_own,\n+\t\t\t\t\t&dev->data->mac_addrs[index], index);\n+}\n+\n+/**\n+ * Adds a MAC address to the device\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param mac_addr\n+ *   MAC address to register.\n+ * @param index\n+ *   MAC address index.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise\n+ */\n+int\n+mlx5_os_mac_addr_add(struct rte_eth_dev *dev, struct rte_ether_addr *mac,\n+\t\t     uint32_t index)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tconst int vf = priv->config.vf;\n+\tint ret = 0;\n+\n+\tif (vf)\n+\t\tret = mlx5_nl_mac_addr_add(priv->nl_socket_route,\n+\t\t\t\t\t   mlx5_ifindex(dev), priv->mac_own,\n+\t\t\t\t\t   mac, index);\n+\treturn ret;\n+}\n+\n+/**\n+ * Modify a VF MAC address\n+ *\n+ * @param priv\n+ *   Pointer to device private data.\n+ * @param mac_addr\n+ *   MAC address to modify into.\n+ * @param iface_idx\n+ *   Net device interface index\n+ * @param vf_index\n+ *   VF index\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise\n+ */\n+int\n+mlx5_os_vf_mac_addr_modify(struct mlx5_priv *priv,\n+\t\t\t   unsigned int iface_idx,\n+\t\t\t   struct rte_ether_addr *mac_addr,\n+\t\t\t   int vf_index)\n+{\n+\treturn mlx5_nl_vf_mac_addr_modify\n+\t\t(priv->nl_socket_route, iface_idx, mac_addr, vf_index);\n+}\n+\n+/**\n+ * Set device promiscuous mode\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param enable\n+ *   0 - promiscuous is disabled, otherwise - enabled\n+ *\n+ * @return\n+ *   0 on success, a negative error value otherwise\n+ */\n+int\n+mlx5_os_set_promisc(struct rte_eth_dev *dev, int enable)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\n+\treturn mlx5_nl_promisc(priv->nl_socket_route,\n+\t\t\t       mlx5_ifindex(dev), !!enable);\n+}\n+\n+/**\n+ * Set device promiscuous mode\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param enable\n+ *   0 - all multicase is disabled, otherwise - enabled\n+ *\n+ * @return\n+ *   0 on success, a negative error value otherwise\n+ */\n+int\n+mlx5_os_set_allmulti(struct rte_eth_dev *dev, int enable)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\n+\treturn mlx5_nl_allmulti(priv->nl_socket_route,\n+\t\t\t\tmlx5_ifindex(dev), !!enable);\n+}\n+\n+/**\n+ * Flush device MAC addresses\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ *\n+ */\n+void\n+mlx5_os_mac_addr_flush(struct rte_eth_dev *dev)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\n+\tmlx5_nl_mac_addr_flush(priv->nl_socket_route, mlx5_ifindex(dev),\n+\t\t\t       dev->data->mac_addrs,\n+\t\t\t       MLX5_MAX_MAC_ADDRESSES, priv->mac_own);\n+}\ndiff --git a/drivers/net/mlx5/freebsd/mlx5_os.h b/drivers/net/mlx5/freebsd/mlx5_os.h\nnew file mode 100644\nindex 0000000000..2991d37df2\n--- /dev/null\n+++ b/drivers/net/mlx5/freebsd/mlx5_os.h\n@@ -0,0 +1,24 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright 2015 6WIND S.A.\n+ * Copyright 2020 Mellanox Technologies, Ltd\n+ */\n+\n+#ifndef RTE_PMD_MLX5_OS_H_\n+#define RTE_PMD_MLX5_OS_H_\n+\n+#include <net/if.h>\n+\n+/* verb enumerations translations to local enums. */\n+enum {\n+\tMLX5_FS_NAME_MAX = IBV_SYSFS_NAME_MAX + 1,\n+\tMLX5_FS_PATH_MAX = IBV_SYSFS_PATH_MAX + 1\n+};\n+\n+/* Maximal data of sendmsg message(in bytes). */\n+#define MLX5_SENDMSG_MAX 64\n+\n+#define MLX5_NAMESIZE IF_NAMESIZE\n+\n+int mlx5_auxiliary_get_ifindex(const char *sf_name);\n+\n+#endif /* RTE_PMD_MLX5_OS_H_ */\ndiff --git a/drivers/net/mlx5/freebsd/mlx5_socket.c b/drivers/net/mlx5/freebsd/mlx5_socket.c\nnew file mode 100644\nindex 0000000000..6356b66dc4\n--- /dev/null\n+++ b/drivers/net/mlx5/freebsd/mlx5_socket.c\n@@ -0,0 +1,249 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright 2019 Mellanox Technologies, Ltd\n+ */\n+\n+#ifndef _GNU_SOURCE\n+#define _GNU_SOURCE\n+#endif\n+\n+#include <sys/types.h>\n+#include <sys/socket.h>\n+#include <sys/un.h>\n+#include <fcntl.h>\n+#include <stdio.h>\n+#include <unistd.h>\n+#include <sys/stat.h>\n+\n+#include \"rte_eal.h\"\n+#include \"mlx5_utils.h\"\n+#include \"mlx5.h\"\n+\n+/* PMD socket service for tools. */\n+\n+#define MLX5_SOCKET_PATH \"/var/tmp/dpdk_net_mlx5_%d\"\n+\n+int server_socket; /* Unix socket for primary process. */\n+struct rte_intr_handle server_intr_handle; /* Interrupt handler. */\n+\n+/**\n+ * Handle server pmd socket interrupts.\n+ */\n+static void\n+mlx5_pmd_socket_handle(void *cb __rte_unused)\n+{\n+\tint conn_sock;\n+\tint ret;\n+\tstruct cmsghdr *cmsg = NULL;\n+\tuint32_t data[MLX5_SENDMSG_MAX / sizeof(uint32_t)];\n+\tuint64_t flow_ptr = 0;\n+\tuint8_t  buf[CMSG_SPACE(sizeof(int))] = { 0 };\n+\tstruct iovec io = {\n+\t\t.iov_base = data,\n+\t\t.iov_len = sizeof(data),\n+\t};\n+\tstruct msghdr msg = {\n+\t\t.msg_iov = &io,\n+\t\t.msg_iovlen = 1,\n+\t\t.msg_control = buf,\n+\t\t.msg_controllen = sizeof(buf),\n+\t};\n+\n+\tuint32_t port_id;\n+\tint fd;\n+\tFILE *file = NULL;\n+\tstruct rte_eth_dev *dev;\n+\tstruct rte_flow_error err;\n+\tstruct mlx5_flow_dump_req  *dump_req;\n+\tstruct mlx5_flow_dump_ack  *dump_ack;\n+\n+\tmemset(data, 0, sizeof(data));\n+\t/* Accept the connection from the client. */\n+\tconn_sock = accept(server_socket, NULL, NULL);\n+\tif (conn_sock < 0) {\n+\t\tDRV_LOG(WARNING, \"connection failed: %s\", strerror(errno));\n+\t\treturn;\n+\t}\n+\tret = recvmsg(conn_sock, &msg, MSG_WAITALL);\n+\tif (ret != sizeof(struct mlx5_flow_dump_req)) {\n+\t\tDRV_LOG(WARNING, \"wrong message received: %s\",\n+\t\t\tstrerror(errno));\n+\t\tgoto error;\n+\t}\n+\n+\t/* Receive file descriptor. */\n+\tcmsg = CMSG_FIRSTHDR(&msg);\n+\tif (cmsg == NULL || cmsg->cmsg_type != SCM_RIGHTS ||\n+\t    cmsg->cmsg_len < sizeof(int)) {\n+\t\tDRV_LOG(WARNING, \"invalid file descriptor message\");\n+\t\tgoto error;\n+\t}\n+\tmemcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));\n+\tfile = fdopen(fd, \"w\");\n+\tif (!file) {\n+\t\tDRV_LOG(WARNING, \"Failed to open file\");\n+\t\tgoto error;\n+\t}\n+\t/* Receive port number. */\n+\tif (msg.msg_iovlen != 1 || msg.msg_iov->iov_len < sizeof(uint16_t)) {\n+\t\tDRV_LOG(WARNING, \"wrong port number message\");\n+\t\tgoto error;\n+\t}\n+\n+\tdump_req = (struct mlx5_flow_dump_req *)msg.msg_iov->iov_base;\n+\tif (dump_req) {\n+\t\tport_id = dump_req->port_id;\n+\t\tflow_ptr = dump_req->flow_id;\n+\t} else {\n+\t\tDRV_LOG(WARNING, \"Invalid message\");\n+\t\tgoto error;\n+\t}\n+\n+\tif (!rte_eth_dev_is_valid_port(port_id)) {\n+\t\tDRV_LOG(WARNING, \"Invalid port %u\", port_id);\n+\t\tgoto error;\n+\t}\n+\n+\t/* Dump flow. */\n+\tdev = &rte_eth_devices[port_id];\n+\tif (flow_ptr == 0)\n+\t\tret = mlx5_flow_dev_dump(dev, NULL, file, NULL);\n+\telse\n+\t\tret = mlx5_flow_dev_dump(dev,\n+\t\t\t(struct rte_flow *)((uintptr_t)flow_ptr), file, &err);\n+\n+\t/* Set-up the ancillary data and reply. */\n+\tmsg.msg_controllen = 0;\n+\tmsg.msg_control = NULL;\n+\tmsg.msg_iovlen = 1;\n+\tmsg.msg_iov = &io;\n+\tdump_ack = (struct mlx5_flow_dump_ack *)data;\n+\tdump_ack->rc = -ret;\n+\tio.iov_len = sizeof(struct mlx5_flow_dump_ack);\n+\tio.iov_base = dump_ack;\n+\tdo {\n+\t\tret = sendmsg(conn_sock, &msg, 0);\n+\t} while (ret < 0 && errno == EINTR);\n+\tif (ret < 0)\n+\t\tDRV_LOG(WARNING, \"failed to send response %s\",\n+\t\t\tstrerror(errno));\n+error:\n+\tif (conn_sock >= 0)\n+\t\tclose(conn_sock);\n+\tif (file)\n+\t\tfclose(file);\n+}\n+\n+/**\n+ * Install interrupt handler.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ * @return\n+ *   0 on success, a negative errno value otherwise.\n+ */\n+static int\n+mlx5_pmd_interrupt_handler_install(void)\n+{\n+\tMLX5_ASSERT(server_socket);\n+\tserver_intr_handle.fd = server_socket;\n+\tserver_intr_handle.type = RTE_INTR_HANDLE_EXT;\n+\treturn rte_intr_callback_register(&server_intr_handle,\n+\t\t\t\t\t  mlx5_pmd_socket_handle, NULL);\n+}\n+\n+/**\n+ * Uninstall interrupt handler.\n+ */\n+static void\n+mlx5_pmd_interrupt_handler_uninstall(void)\n+{\n+\tif (server_socket) {\n+\t\tmlx5_intr_callback_unregister(&server_intr_handle,\n+\t\t\t\t\t      mlx5_pmd_socket_handle,\n+\t\t\t\t\t      NULL);\n+\t}\n+\tserver_intr_handle.fd = 0;\n+\tserver_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;\n+}\n+\n+/**\n+ * Initialise the socket to communicate with the secondary process\n+ *\n+ * @param[in] dev\n+ *   Pointer to Ethernet device.\n+ *\n+ * @return\n+ *   0 on success, a negative value otherwise.\n+ */\n+int\n+mlx5_pmd_socket_init(void)\n+{\n+\tstruct sockaddr_un sun = {\n+\t\t.sun_family = AF_UNIX,\n+\t};\n+\tint ret;\n+\tint flags;\n+\n+\tMLX5_ASSERT(rte_eal_process_type() == RTE_PROC_PRIMARY);\n+\tif (server_socket)\n+\t\treturn 0;\n+\t/*\n+\t * Initialize the socket to communicate with the secondary\n+\t * process.\n+\t */\n+\tret = socket(AF_UNIX, SOCK_STREAM, 0);\n+\tif (ret < 0) {\n+\t\tDRV_LOG(WARNING, \"Failed to open mlx5 socket: %s\",\n+\t\t\tstrerror(errno));\n+\t\tgoto error;\n+\t}\n+\tserver_socket = ret;\n+\tflags = fcntl(server_socket, F_GETFL, 0);\n+\tif (flags == -1)\n+\t\tgoto error;\n+\tret = fcntl(server_socket, F_SETFL, flags | O_NONBLOCK);\n+\tif (ret < 0)\n+\t\tgoto error;\n+\tsnprintf(sun.sun_path, sizeof(sun.sun_path), MLX5_SOCKET_PATH,\n+\t\t getpid());\n+\tremove(sun.sun_path);\n+\tret = bind(server_socket, (const struct sockaddr *)&sun, sizeof(sun));\n+\tif (ret < 0) {\n+\t\tDRV_LOG(WARNING,\n+\t\t\t\"cannot bind mlx5 socket: %s\", strerror(errno));\n+\t\tgoto close;\n+\t}\n+\tret = listen(server_socket, 0);\n+\tif (ret < 0) {\n+\t\tDRV_LOG(WARNING, \"cannot listen on mlx5 socket: %s\",\n+\t\t\tstrerror(errno));\n+\t\tgoto close;\n+\t}\n+\tif (mlx5_pmd_interrupt_handler_install()) {\n+\t\tDRV_LOG(WARNING, \"cannot register interrupt handler for mlx5 socket: %s\",\n+\t\t\tstrerror(errno));\n+\t\tgoto close;\n+\t}\n+\treturn 0;\n+close:\n+\tremove(sun.sun_path);\n+error:\n+\tclaim_zero(close(server_socket));\n+\tserver_socket = 0;\n+\tDRV_LOG(ERR, \"Cannot initialize socket: %s\", strerror(errno));\n+\treturn -errno;\n+}\n+\n+/**\n+ * Un-Initialize the pmd socket\n+ */\n+RTE_FINI(mlx5_pmd_socket_uninit)\n+{\n+\tif (!server_socket)\n+\t\treturn;\n+\tmlx5_pmd_interrupt_handler_uninstall();\n+\tclaim_zero(close(server_socket));\n+\tserver_socket = 0;\n+\tMKSTR(path, MLX5_SOCKET_PATH, getpid());\n+\tclaim_zero(remove(path));\n+}\ndiff --git a/drivers/net/mlx5/freebsd/mlx5_verbs.c b/drivers/net/mlx5/freebsd/mlx5_verbs.c\nnew file mode 100644\nindex 0000000000..d4fa202ac4\n--- /dev/null\n+++ b/drivers/net/mlx5/freebsd/mlx5_verbs.c\n@@ -0,0 +1,1208 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright 2020 Mellanox Technologies, Ltd\n+ */\n+\n+#include <stddef.h>\n+#include <errno.h>\n+#include <string.h>\n+#include <stdint.h>\n+#include <unistd.h>\n+#include <inttypes.h>\n+#include <sys/queue.h>\n+\n+#include \"mlx5_autoconf.h\"\n+\n+#include <rte_mbuf.h>\n+#include <rte_malloc.h>\n+#include <ethdev_driver.h>\n+#include <rte_common.h>\n+\n+#include <mlx5_glue.h>\n+#include <mlx5_common.h>\n+#include <mlx5_common_mr.h>\n+#include <mlx5_verbs.h>\n+#include <mlx5_rx.h>\n+#include <mlx5_tx.h>\n+#include <mlx5_utils.h>\n+#include <mlx5_malloc.h>\n+\n+/**\n+ * Register mr. Given protection domain pointer, pointer to addr and length\n+ * register the memory region.\n+ *\n+ * @param[in] pd\n+ *   Pointer to protection domain context.\n+ * @param[in] addr\n+ *   Pointer to memory start address.\n+ * @param[in] length\n+ *   Length of the memory to register.\n+ * @param[out] pmd_mr\n+ *   pmd_mr struct set with lkey, address, length and pointer to mr object\n+ *\n+ * @return\n+ *   0 on successful registration, -1 otherwise\n+ */\n+static int\n+mlx5_reg_mr(void *pd, void *addr, size_t length,\n+\t\t struct mlx5_pmd_mr *pmd_mr)\n+{\n+\treturn mlx5_common_verbs_reg_mr(pd, addr, length, pmd_mr);\n+}\n+\n+/**\n+ * Deregister mr. Given the mlx5 pmd MR - deregister the MR\n+ *\n+ * @param[in] pmd_mr\n+ *   pmd_mr struct set with lkey, address, length and pointer to mr object\n+ *\n+ */\n+static void\n+mlx5_dereg_mr(struct mlx5_pmd_mr *pmd_mr)\n+{\n+\tmlx5_common_verbs_dereg_mr(pmd_mr);\n+}\n+\n+/* verbs operations. */\n+const struct mlx5_mr_ops mlx5_mr_verbs_ops = {\n+\t.reg_mr = mlx5_reg_mr,\n+\t.dereg_mr = mlx5_dereg_mr,\n+};\n+\n+/**\n+ * Modify Rx WQ vlan stripping offload\n+ *\n+ * @param rxq_obj\n+ *   Rx queue object.\n+ *\n+ * @return 0 on success, non-0 otherwise\n+ */\n+static int\n+mlx5_rxq_obj_modify_wq_vlan_strip(struct mlx5_rxq_obj *rxq_obj, int on)\n+{\n+\tuint16_t vlan_offloads =\n+\t\t(on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |\n+\t\t0;\n+\tstruct ibv_wq_attr mod;\n+\tmod = (struct ibv_wq_attr){\n+\t\t.attr_mask = IBV_WQ_ATTR_FLAGS,\n+\t\t.flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING,\n+\t\t.flags = vlan_offloads,\n+\t};\n+\n+\treturn mlx5_glue->modify_wq(rxq_obj->wq, &mod);\n+}\n+\n+/**\n+ * Modifies the attributes for the specified WQ.\n+ *\n+ * @param rxq_obj\n+ *   Verbs Rx queue object.\n+ * @param type\n+ *   Type of change queue state.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_ibv_modify_wq(struct mlx5_rxq_obj *rxq_obj, uint8_t type)\n+{\n+\tstruct ibv_wq_attr mod = {\n+\t\t.attr_mask = IBV_WQ_ATTR_STATE,\n+\t\t.wq_state = (enum ibv_wq_state)type,\n+\t};\n+\n+\treturn mlx5_glue->modify_wq(rxq_obj->wq, &mod);\n+}\n+\n+/**\n+ * Modify QP using Verbs API.\n+ *\n+ * @param txq_obj\n+ *   Verbs Tx queue object.\n+ * @param type\n+ *   Type of change queue state.\n+ * @param dev_port\n+ *   IB device port number.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_ibv_modify_qp(struct mlx5_txq_obj *obj, enum mlx5_txq_modify_type type,\n+\t\t   uint8_t dev_port)\n+{\n+\tstruct ibv_qp_attr mod = {\n+\t\t.qp_state = IBV_QPS_RESET,\n+\t\t.port_num = dev_port,\n+\t};\n+\tint attr_mask = (IBV_QP_STATE | IBV_QP_PORT);\n+\tint ret;\n+\n+\tif (type != MLX5_TXQ_MOD_RST2RDY) {\n+\t\tret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);\n+\t\tif (ret) {\n+\t\t\tDRV_LOG(ERR, \"Cannot change Tx QP state to RESET %s\",\n+\t\t\t\tstrerror(errno));\n+\t\t\trte_errno = errno;\n+\t\t\treturn ret;\n+\t\t}\n+\t\tif (type == MLX5_TXQ_MOD_RDY2RST)\n+\t\t\treturn 0;\n+\t}\n+\tif (type == MLX5_TXQ_MOD_ERR2RDY)\n+\t\tattr_mask = IBV_QP_STATE;\n+\tmod.qp_state = IBV_QPS_INIT;\n+\tret = mlx5_glue->modify_qp(obj->qp, &mod, attr_mask);\n+\tif (ret) {\n+\t\tDRV_LOG(ERR, \"Cannot change Tx QP state to INIT %s\",\n+\t\t\tstrerror(errno));\n+\t\trte_errno = errno;\n+\t\treturn ret;\n+\t}\n+\tmod.qp_state = IBV_QPS_RTR;\n+\tret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);\n+\tif (ret) {\n+\t\tDRV_LOG(ERR, \"Cannot change Tx QP state to RTR %s\",\n+\t\t\tstrerror(errno));\n+\t\trte_errno = errno;\n+\t\treturn ret;\n+\t}\n+\tmod.qp_state = IBV_QPS_RTS;\n+\tret = mlx5_glue->modify_qp(obj->qp, &mod, IBV_QP_STATE);\n+\tif (ret) {\n+\t\tDRV_LOG(ERR, \"Cannot change Tx QP state to RTS %s\",\n+\t\t\tstrerror(errno));\n+\t\trte_errno = errno;\n+\t\treturn ret;\n+\t}\n+\treturn 0;\n+}\n+\n+/**\n+ * Create a CQ Verbs object.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ * @param idx\n+ *   Queue index in DPDK Rx queue array.\n+ *\n+ * @return\n+ *   The Verbs CQ object initialized, NULL otherwise and rte_errno is set.\n+ */\n+static struct ibv_cq *\n+mlx5_rxq_ibv_cq_create(struct rte_eth_dev *dev, uint16_t idx)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];\n+\tstruct mlx5_rxq_ctrl *rxq_ctrl =\n+\t\tcontainer_of(rxq_data, struct mlx5_rxq_ctrl, rxq);\n+\tstruct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;\n+\tunsigned int cqe_n = mlx5_rxq_cqe_num(rxq_data);\n+\tstruct {\n+\t\tstruct ibv_cq_init_attr_ex ibv;\n+\t\tstruct mlx5dv_cq_init_attr mlx5;\n+\t} cq_attr;\n+\n+\tcq_attr.ibv = (struct ibv_cq_init_attr_ex){\n+\t\t.cqe = cqe_n,\n+\t\t.channel = rxq_obj->ibv_channel,\n+\t\t.comp_mask = 0,\n+\t};\n+\tcq_attr.mlx5 = (struct mlx5dv_cq_init_attr){\n+\t\t.comp_mask = 0,\n+\t};\n+\tif (priv->config.cqe_comp && !rxq_data->hw_timestamp) {\n+\t\tcq_attr.mlx5.comp_mask |=\n+\t\t\t\tMLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;\n+\t\trxq_data->byte_mask = UINT32_MAX;\n+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT\n+\t\tif (mlx5_rxq_mprq_enabled(rxq_data)) {\n+\t\t\tcq_attr.mlx5.cqe_comp_res_format =\n+\t\t\t\t\tMLX5DV_CQE_RES_FORMAT_CSUM_STRIDX;\n+\t\t\trxq_data->mcqe_format =\n+\t\t\t\t\tMLX5_CQE_RESP_FORMAT_CSUM_STRIDX;\n+\t\t} else {\n+\t\t\tcq_attr.mlx5.cqe_comp_res_format =\n+\t\t\t\t\tMLX5DV_CQE_RES_FORMAT_HASH;\n+\t\t\trxq_data->mcqe_format =\n+\t\t\t\t\tMLX5_CQE_RESP_FORMAT_HASH;\n+\t\t}\n+#else\n+\t\tcq_attr.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;\n+\t\trxq_data->mcqe_format = MLX5_CQE_RESP_FORMAT_HASH;\n+#endif\n+\t\t/*\n+\t\t * For vectorized Rx, it must not be doubled in order to\n+\t\t * make cq_ci and rq_ci aligned.\n+\t\t */\n+\t\tif (mlx5_rxq_check_vec_support(rxq_data) < 0)\n+\t\t\tcq_attr.ibv.cqe *= 2;\n+\t} else if (priv->config.cqe_comp && rxq_data->hw_timestamp) {\n+\t\tDRV_LOG(DEBUG,\n+\t\t\t\"Port %u Rx CQE compression is disabled for HW\"\n+\t\t\t\" timestamp.\",\n+\t\t\tdev->data->port_id);\n+\t}\n+#ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD\n+\tif (RTE_CACHE_LINE_SIZE == 128) {\n+\t\tcq_attr.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;\n+\t\tcq_attr.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;\n+\t}\n+#endif\n+\treturn mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(priv->sh->ctx,\n+\t\t\t\t\t\t\t      &cq_attr.ibv,\n+\t\t\t\t\t\t\t      &cq_attr.mlx5));\n+}\n+\n+/**\n+ * Create a WQ Verbs object.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ * @param idx\n+ *   Queue index in DPDK Rx queue array.\n+ *\n+ * @return\n+ *   The Verbs WQ object initialized, NULL otherwise and rte_errno is set.\n+ */\n+static struct ibv_wq *\n+mlx5_rxq_ibv_wq_create(struct rte_eth_dev *dev, uint16_t idx)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];\n+\tstruct mlx5_rxq_ctrl *rxq_ctrl =\n+\t\tcontainer_of(rxq_data, struct mlx5_rxq_ctrl, rxq);\n+\tstruct mlx5_rxq_obj *rxq_obj = rxq_ctrl->obj;\n+\tunsigned int wqe_n = 1 << rxq_data->elts_n;\n+\tstruct {\n+\t\tstruct ibv_wq_init_attr ibv;\n+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT\n+\t\tstruct mlx5dv_wq_init_attr mlx5;\n+#endif\n+\t} wq_attr;\n+\n+\twq_attr.ibv = (struct ibv_wq_init_attr){\n+\t\t.wq_context = NULL, /* Could be useful in the future. */\n+\t\t.wq_type = IBV_WQT_RQ,\n+\t\t/* Max number of outstanding WRs. */\n+\t\t.max_wr = wqe_n >> rxq_data->sges_n,\n+\t\t/* Max number of scatter/gather elements in a WR. */\n+\t\t.max_sge = 1 << rxq_data->sges_n,\n+\t\t.pd = priv->sh->pd,\n+\t\t.cq = rxq_obj->ibv_cq,\n+\t\t.comp_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING | 0,\n+\t\t.create_flags = (rxq_data->vlan_strip ?\n+\t\t\t\t IBV_WQ_FLAGS_CVLAN_STRIPPING : 0),\n+\t};\n+\t/* By default, FCS (CRC) is stripped by hardware. */\n+\tif (rxq_data->crc_present) {\n+\t\twq_attr.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;\n+\t\twq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;\n+\t}\n+\tif (priv->config.hw_padding) {\n+#if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING)\n+\t\twq_attr.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;\n+\t\twq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;\n+#elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING)\n+\t\twq_attr.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING;\n+\t\twq_attr.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;\n+#endif\n+\t}\n+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT\n+\twq_attr.mlx5 = (struct mlx5dv_wq_init_attr){\n+\t\t.comp_mask = 0,\n+\t};\n+\tif (mlx5_rxq_mprq_enabled(rxq_data)) {\n+\t\tstruct mlx5dv_striding_rq_init_attr *mprq_attr =\n+\t\t\t\t\t\t&wq_attr.mlx5.striding_rq_attrs;\n+\n+\t\twq_attr.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;\n+\t\t*mprq_attr = (struct mlx5dv_striding_rq_init_attr){\n+\t\t\t.single_stride_log_num_of_bytes = rxq_data->strd_sz_n,\n+\t\t\t.single_wqe_log_num_of_strides = rxq_data->strd_num_n,\n+\t\t\t.two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT,\n+\t\t};\n+\t}\n+\trxq_obj->wq = mlx5_glue->dv_create_wq(priv->sh->ctx, &wq_attr.ibv,\n+\t\t\t\t\t      &wq_attr.mlx5);\n+#else\n+\trxq_obj->wq = mlx5_glue->create_wq(priv->sh->ctx, &wq_attr.ibv);\n+#endif\n+\tif (rxq_obj->wq) {\n+\t\t/*\n+\t\t * Make sure number of WRs*SGEs match expectations since a queue\n+\t\t * cannot allocate more than \"desc\" buffers.\n+\t\t */\n+\t\tif (wq_attr.ibv.max_wr != (wqe_n >> rxq_data->sges_n) ||\n+\t\t    wq_attr.ibv.max_sge != (1u << rxq_data->sges_n)) {\n+\t\t\tDRV_LOG(ERR,\n+\t\t\t\t\"Port %u Rx queue %u requested %u*%u but got\"\n+\t\t\t\t\" %u*%u WRs*SGEs.\",\n+\t\t\t\tdev->data->port_id, idx,\n+\t\t\t\twqe_n >> rxq_data->sges_n,\n+\t\t\t\t(1 << rxq_data->sges_n),\n+\t\t\t\twq_attr.ibv.max_wr, wq_attr.ibv.max_sge);\n+\t\t\tclaim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));\n+\t\t\trxq_obj->wq = NULL;\n+\t\t\trte_errno = EINVAL;\n+\t\t}\n+\t}\n+\treturn rxq_obj->wq;\n+}\n+\n+/**\n+ * Create the Rx queue Verbs object.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ * @param idx\n+ *   Queue index in DPDK Rx queue array.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_rxq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];\n+\tstruct mlx5_rxq_ctrl *rxq_ctrl =\n+\t\tcontainer_of(rxq_data, struct mlx5_rxq_ctrl, rxq);\n+\tstruct mlx5_rxq_obj *tmpl = rxq_ctrl->obj;\n+\tstruct mlx5dv_cq cq_info;\n+\tstruct mlx5dv_rwq rwq;\n+\tint ret = 0;\n+\tstruct mlx5dv_obj obj;\n+\n+\tMLX5_ASSERT(rxq_data);\n+\tMLX5_ASSERT(tmpl);\n+\ttmpl->rxq_ctrl = rxq_ctrl;\n+\tif (rxq_ctrl->irq) {\n+\t\ttmpl->ibv_channel =\n+\t\t\t\tmlx5_glue->create_comp_channel(priv->sh->ctx);\n+\t\tif (!tmpl->ibv_channel) {\n+\t\t\tDRV_LOG(ERR, \"Port %u: comp channel creation failure.\",\n+\t\t\t\tdev->data->port_id);\n+\t\t\trte_errno = ENOMEM;\n+\t\t\tgoto error;\n+\t\t}\n+\t\ttmpl->fd = ((struct ibv_comp_channel *)(tmpl->ibv_channel))->fd;\n+\t}\n+\t/* Create CQ using Verbs API. */\n+\ttmpl->ibv_cq = mlx5_rxq_ibv_cq_create(dev, idx);\n+\tif (!tmpl->ibv_cq) {\n+\t\tDRV_LOG(ERR, \"Port %u Rx queue %u CQ creation failure.\",\n+\t\t\tdev->data->port_id, idx);\n+\t\trte_errno = ENOMEM;\n+\t\tgoto error;\n+\t}\n+\tobj.cq.in = tmpl->ibv_cq;\n+\tobj.cq.out = &cq_info;\n+\tret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ);\n+\tif (ret) {\n+\t\trte_errno = ret;\n+\t\tgoto error;\n+\t}\n+\tif (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {\n+\t\tDRV_LOG(ERR,\n+\t\t\t\"Port %u wrong MLX5_CQE_SIZE environment \"\n+\t\t\t\"variable value: it should be set to %u.\",\n+\t\t\tdev->data->port_id, RTE_CACHE_LINE_SIZE);\n+\t\trte_errno = EINVAL;\n+\t\tgoto error;\n+\t}\n+\t/* Fill the rings. */\n+\trxq_data->cqe_n = log2above(cq_info.cqe_cnt);\n+\trxq_data->cq_db = cq_info.dbrec;\n+\trxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;\n+\trxq_data->cq_uar = cq_info.cq_uar;\n+\trxq_data->cqn = cq_info.cqn;\n+\t/* Create WQ (RQ) using Verbs API. */\n+\ttmpl->wq = mlx5_rxq_ibv_wq_create(dev, idx);\n+\tif (!tmpl->wq) {\n+\t\tDRV_LOG(ERR, \"Port %u Rx queue %u WQ creation failure.\",\n+\t\t\tdev->data->port_id, idx);\n+\t\trte_errno = ENOMEM;\n+\t\tgoto error;\n+\t}\n+\t/* Change queue state to ready. */\n+\tret = mlx5_ibv_modify_wq(tmpl, IBV_WQS_RDY);\n+\tif (ret) {\n+\t\tDRV_LOG(ERR,\n+\t\t\t\"Port %u Rx queue %u WQ state to IBV_WQS_RDY failed.\",\n+\t\t\tdev->data->port_id, idx);\n+\t\trte_errno = ret;\n+\t\tgoto error;\n+\t}\n+\tobj.rwq.in = tmpl->wq;\n+\tobj.rwq.out = &rwq;\n+\tret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_RWQ);\n+\tif (ret) {\n+\t\trte_errno = ret;\n+\t\tgoto error;\n+\t}\n+\trxq_data->wqes = rwq.buf;\n+\trxq_data->rq_db = rwq.dbrec;\n+\trxq_data->cq_arm_sn = 0;\n+\tmlx5_rxq_initialize(rxq_data);\n+\trxq_data->cq_ci = 0;\n+\tdev->data->rx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;\n+\trxq_ctrl->wqn = ((struct ibv_wq *)(tmpl->wq))->wq_num;\n+\treturn 0;\n+error:\n+\tret = rte_errno; /* Save rte_errno before cleanup. */\n+\tif (tmpl->wq)\n+\t\tclaim_zero(mlx5_glue->destroy_wq(tmpl->wq));\n+\tif (tmpl->ibv_cq)\n+\t\tclaim_zero(mlx5_glue->destroy_cq(tmpl->ibv_cq));\n+\tif (tmpl->ibv_channel)\n+\t\tclaim_zero(mlx5_glue->destroy_comp_channel(tmpl->ibv_channel));\n+\trte_errno = ret; /* Restore rte_errno. */\n+\treturn -rte_errno;\n+}\n+\n+/**\n+ * Release an Rx verbs queue object.\n+ *\n+ * @param rxq_obj\n+ *   Verbs Rx queue object.\n+ */\n+static void\n+mlx5_rxq_ibv_obj_release(struct mlx5_rxq_obj *rxq_obj)\n+{\n+\tMLX5_ASSERT(rxq_obj);\n+\tMLX5_ASSERT(rxq_obj->wq);\n+\tMLX5_ASSERT(rxq_obj->ibv_cq);\n+\tclaim_zero(mlx5_glue->destroy_wq(rxq_obj->wq));\n+\tclaim_zero(mlx5_glue->destroy_cq(rxq_obj->ibv_cq));\n+\tif (rxq_obj->ibv_channel)\n+\t\tclaim_zero(mlx5_glue->destroy_comp_channel\n+\t\t\t\t\t\t\t(rxq_obj->ibv_channel));\n+}\n+\n+/**\n+ * Get event for an Rx verbs queue object.\n+ *\n+ * @param rxq_obj\n+ *   Verbs Rx queue object.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_rx_ibv_get_event(struct mlx5_rxq_obj *rxq_obj)\n+{\n+\tstruct ibv_cq *ev_cq;\n+\tvoid *ev_ctx;\n+\tint ret = mlx5_glue->get_cq_event(rxq_obj->ibv_channel,\n+\t\t\t\t\t  &ev_cq, &ev_ctx);\n+\n+\tif (ret < 0 || ev_cq != rxq_obj->ibv_cq)\n+\t\tgoto exit;\n+\tmlx5_glue->ack_cq_events(rxq_obj->ibv_cq, 1);\n+\treturn 0;\n+exit:\n+\tif (ret < 0)\n+\t\trte_errno = errno;\n+\telse\n+\t\trte_errno = EINVAL;\n+\treturn -rte_errno;\n+}\n+\n+/**\n+ * Creates a receive work queue as a filed of indirection table.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ * @param log_n\n+ *   Log of number of queues in the array.\n+ * @param ind_tbl\n+ *   Verbs indirection table object.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_ibv_ind_table_new(struct rte_eth_dev *dev, const unsigned int log_n,\n+\t\t       struct mlx5_ind_table_obj *ind_tbl)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct ibv_wq *wq[1 << log_n];\n+\tunsigned int i, j;\n+\n+\tMLX5_ASSERT(ind_tbl);\n+\tfor (i = 0; i != ind_tbl->queues_n; ++i) {\n+\t\tstruct mlx5_rxq_data *rxq = (*priv->rxqs)[ind_tbl->queues[i]];\n+\t\tstruct mlx5_rxq_ctrl *rxq_ctrl =\n+\t\t\t\tcontainer_of(rxq, struct mlx5_rxq_ctrl, rxq);\n+\n+\t\twq[i] = rxq_ctrl->obj->wq;\n+\t}\n+\tMLX5_ASSERT(i > 0);\n+\t/* Finalise indirection table. */\n+\tfor (j = 0; i != (unsigned int)(1 << log_n); ++j, ++i)\n+\t\twq[i] = wq[j];\n+\tind_tbl->ind_table = mlx5_glue->create_rwq_ind_table(priv->sh->ctx,\n+\t\t\t\t\t&(struct ibv_rwq_ind_table_init_attr){\n+\t\t\t\t\t\t.log_ind_tbl_size = log_n,\n+\t\t\t\t\t\t.ind_tbl = wq,\n+\t\t\t\t\t\t.comp_mask = 0,\n+\t\t\t\t\t});\n+\tif (!ind_tbl->ind_table) {\n+\t\trte_errno = errno;\n+\t\treturn -rte_errno;\n+\t}\n+\treturn 0;\n+}\n+\n+/**\n+ * Destroys the specified Indirection Table.\n+ *\n+ * @param ind_table\n+ *   Indirection table to release.\n+ */\n+static void\n+mlx5_ibv_ind_table_destroy(struct mlx5_ind_table_obj *ind_tbl)\n+{\n+\tclaim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl->ind_table));\n+}\n+\n+/**\n+ * Create an Rx Hash queue.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ * @param hrxq\n+ *   Pointer to Rx Hash queue.\n+ * @param tunnel\n+ *   Tunnel type.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_ibv_hrxq_new(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq,\n+\t\t  int tunnel __rte_unused)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct ibv_qp *qp = NULL;\n+\tstruct mlx5_ind_table_obj *ind_tbl = hrxq->ind_table;\n+\tconst uint8_t *rss_key = hrxq->rss_key;\n+\tuint64_t hash_fields = hrxq->hash_fields;\n+\tint err;\n+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT\n+\tstruct mlx5dv_qp_init_attr qp_init_attr;\n+\n+\tmemset(&qp_init_attr, 0, sizeof(qp_init_attr));\n+\tif (tunnel) {\n+\t\tqp_init_attr.comp_mask =\n+\t\t\t\t       MLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;\n+\t\tqp_init_attr.create_flags = MLX5DV_QP_CREATE_TUNNEL_OFFLOADS;\n+\t}\n+#ifdef HAVE_IBV_FLOW_DV_SUPPORT\n+\tif (dev->data->dev_conf.lpbk_mode) {\n+\t\t/* Allow packet sent from NIC loop back w/o source MAC check. */\n+\t\tqp_init_attr.comp_mask |=\n+\t\t\t\tMLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;\n+\t\tqp_init_attr.create_flags |=\n+\t\t\t\tMLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;\n+\t}\n+#endif\n+\tqp = mlx5_glue->dv_create_qp\n+\t\t\t(priv->sh->ctx,\n+\t\t\t &(struct ibv_qp_init_attr_ex){\n+\t\t\t\t.qp_type = IBV_QPT_RAW_PACKET,\n+\t\t\t\t.comp_mask =\n+\t\t\t\t\tIBV_QP_INIT_ATTR_PD |\n+\t\t\t\t\tIBV_QP_INIT_ATTR_IND_TABLE |\n+\t\t\t\t\tIBV_QP_INIT_ATTR_RX_HASH,\n+\t\t\t\t.rx_hash_conf = (struct ibv_rx_hash_conf){\n+\t\t\t\t\t.rx_hash_function =\n+\t\t\t\t\t\tIBV_RX_HASH_FUNC_TOEPLITZ,\n+\t\t\t\t\t.rx_hash_key_len = hrxq->rss_key_len,\n+\t\t\t\t\t.rx_hash_key =\n+\t\t\t\t\t\t(void *)(uintptr_t)rss_key,\n+\t\t\t\t\t.rx_hash_fields_mask = hash_fields,\n+\t\t\t\t},\n+\t\t\t\t.rwq_ind_tbl = ind_tbl->ind_table,\n+\t\t\t\t.pd = priv->sh->pd,\n+\t\t\t  },\n+\t\t\t  &qp_init_attr);\n+#else\n+\tqp = mlx5_glue->create_qp_ex\n+\t\t\t(priv->sh->ctx,\n+\t\t\t &(struct ibv_qp_init_attr_ex){\n+\t\t\t\t.qp_type = IBV_QPT_RAW_PACKET,\n+\t\t\t\t.comp_mask =\n+\t\t\t\t\tIBV_QP_INIT_ATTR_PD |\n+\t\t\t\t\tIBV_QP_INIT_ATTR_IND_TABLE |\n+\t\t\t\t\tIBV_QP_INIT_ATTR_RX_HASH,\n+\t\t\t\t.rx_hash_conf = (struct ibv_rx_hash_conf){\n+\t\t\t\t\t.rx_hash_function =\n+\t\t\t\t\t\tIBV_RX_HASH_FUNC_TOEPLITZ,\n+\t\t\t\t\t.rx_hash_key_len = hrxq->rss_key_len,\n+\t\t\t\t\t.rx_hash_key =\n+\t\t\t\t\t\t(void *)(uintptr_t)rss_key,\n+\t\t\t\t\t.rx_hash_fields_mask = hash_fields,\n+\t\t\t\t},\n+\t\t\t\t.rwq_ind_tbl = ind_tbl->ind_table,\n+\t\t\t\t.pd = priv->sh->pd,\n+\t\t\t });\n+#endif\n+\tif (!qp) {\n+\t\trte_errno = errno;\n+\t\tgoto error;\n+\t}\n+\thrxq->qp = qp;\n+#ifdef HAVE_IBV_FLOW_DV_SUPPORT\n+\thrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);\n+\tif (!hrxq->action) {\n+\t\trte_errno = errno;\n+\t\tgoto error;\n+\t}\n+#endif\n+\treturn 0;\n+error:\n+\terr = rte_errno; /* Save rte_errno before cleanup. */\n+\tif (qp)\n+\t\tclaim_zero(mlx5_glue->destroy_qp(qp));\n+\trte_errno = err; /* Restore rte_errno. */\n+\treturn -rte_errno;\n+}\n+\n+/**\n+ * Destroy a Verbs queue pair.\n+ *\n+ * @param hrxq\n+ *   Hash Rx queue to release its qp.\n+ */\n+static void\n+mlx5_ibv_qp_destroy(struct mlx5_hrxq *hrxq)\n+{\n+\tclaim_zero(mlx5_glue->destroy_qp(hrxq->qp));\n+}\n+\n+/**\n+ * Release a drop Rx queue Verbs object.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ */\n+static void\n+mlx5_rxq_ibv_obj_drop_release(struct rte_eth_dev *dev)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_rxq_obj *rxq = priv->drop_queue.rxq;\n+\n+\tif (rxq->wq)\n+\t\tclaim_zero(mlx5_glue->destroy_wq(rxq->wq));\n+\tif (rxq->ibv_cq)\n+\t\tclaim_zero(mlx5_glue->destroy_cq(rxq->ibv_cq));\n+\tmlx5_free(rxq);\n+\tpriv->drop_queue.rxq = NULL;\n+}\n+\n+/**\n+ * Create a drop Rx queue Verbs object.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_rxq_ibv_obj_drop_create(struct rte_eth_dev *dev)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct ibv_context *ctx = priv->sh->ctx;\n+\tstruct mlx5_rxq_obj *rxq = priv->drop_queue.rxq;\n+\n+\tif (rxq)\n+\t\treturn 0;\n+\trxq = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*rxq), 0, SOCKET_ID_ANY);\n+\tif (!rxq) {\n+\t\tDRV_LOG(DEBUG, \"Port %u cannot allocate drop Rx queue memory.\",\n+\t\t      dev->data->port_id);\n+\t\trte_errno = ENOMEM;\n+\t\treturn -rte_errno;\n+\t}\n+\tpriv->drop_queue.rxq = rxq;\n+\trxq->ibv_cq = mlx5_glue->create_cq(ctx, 1, NULL, NULL, 0);\n+\tif (!rxq->ibv_cq) {\n+\t\tDRV_LOG(DEBUG, \"Port %u cannot allocate CQ for drop queue.\",\n+\t\t      dev->data->port_id);\n+\t\trte_errno = errno;\n+\t\tgoto error;\n+\t}\n+\trxq->wq = mlx5_glue->create_wq(ctx, &(struct ibv_wq_init_attr){\n+\t\t\t\t\t\t    .wq_type = IBV_WQT_RQ,\n+\t\t\t\t\t\t    .max_wr = 1,\n+\t\t\t\t\t\t    .max_sge = 1,\n+\t\t\t\t\t\t    .pd = priv->sh->pd,\n+\t\t\t\t\t\t    .cq = rxq->ibv_cq,\n+\t\t\t\t\t      });\n+\tif (!rxq->wq) {\n+\t\tDRV_LOG(DEBUG, \"Port %u cannot allocate WQ for drop queue.\",\n+\t\t      dev->data->port_id);\n+\t\trte_errno = errno;\n+\t\tgoto error;\n+\t}\n+\tpriv->drop_queue.rxq = rxq;\n+\treturn 0;\n+error:\n+\tmlx5_rxq_ibv_obj_drop_release(dev);\n+\treturn -rte_errno;\n+}\n+\n+/**\n+ * Create a Verbs drop action for Rx Hash queue.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+static int\n+mlx5_ibv_drop_action_create(struct rte_eth_dev *dev)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;\n+\tstruct ibv_rwq_ind_table *ind_tbl = NULL;\n+\tstruct mlx5_rxq_obj *rxq;\n+\tint ret;\n+\n+\tMLX5_ASSERT(hrxq && hrxq->ind_table);\n+\tret = mlx5_rxq_ibv_obj_drop_create(dev);\n+\tif (ret < 0)\n+\t\tgoto error;\n+\trxq = priv->drop_queue.rxq;\n+\tind_tbl = mlx5_glue->create_rwq_ind_table\n+\t\t\t\t(priv->sh->ctx,\n+\t\t\t\t &(struct ibv_rwq_ind_table_init_attr){\n+\t\t\t\t\t.log_ind_tbl_size = 0,\n+\t\t\t\t\t.ind_tbl = (struct ibv_wq **)&rxq->wq,\n+\t\t\t\t\t.comp_mask = 0,\n+\t\t\t\t });\n+\tif (!ind_tbl) {\n+\t\tDRV_LOG(DEBUG, \"Port %u\"\n+\t\t\t\" cannot allocate indirection table for drop queue.\",\n+\t\t\tdev->data->port_id);\n+\t\trte_errno = errno;\n+\t\tgoto error;\n+\t}\n+\thrxq->qp = mlx5_glue->create_qp_ex(priv->sh->ctx,\n+\t\t &(struct ibv_qp_init_attr_ex){\n+\t\t\t.qp_type = IBV_QPT_RAW_PACKET,\n+\t\t\t.comp_mask = IBV_QP_INIT_ATTR_PD |\n+\t\t\t\t     IBV_QP_INIT_ATTR_IND_TABLE |\n+\t\t\t\t     IBV_QP_INIT_ATTR_RX_HASH,\n+\t\t\t.rx_hash_conf = (struct ibv_rx_hash_conf){\n+\t\t\t\t.rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,\n+\t\t\t\t.rx_hash_key_len = MLX5_RSS_HASH_KEY_LEN,\n+\t\t\t\t.rx_hash_key = rss_hash_default_key,\n+\t\t\t\t.rx_hash_fields_mask = 0,\n+\t\t\t\t},\n+\t\t\t.rwq_ind_tbl = ind_tbl,\n+\t\t\t.pd = priv->sh->pd\n+\t\t });\n+\tif (!hrxq->qp) {\n+\t\tDRV_LOG(DEBUG, \"Port %u cannot allocate QP for drop queue.\",\n+\t\t      dev->data->port_id);\n+\t\trte_errno = errno;\n+\t\tgoto error;\n+\t}\n+#ifdef HAVE_IBV_FLOW_DV_SUPPORT\n+\thrxq->action = mlx5_glue->dv_create_flow_action_dest_ibv_qp(hrxq->qp);\n+\tif (!hrxq->action) {\n+\t\trte_errno = errno;\n+\t\tgoto error;\n+\t}\n+#endif\n+\thrxq->ind_table->ind_table = ind_tbl;\n+\treturn 0;\n+error:\n+\tif (hrxq->qp)\n+\t\tclaim_zero(mlx5_glue->destroy_qp(hrxq->qp));\n+\tif (ind_tbl)\n+\t\tclaim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl));\n+\tif (priv->drop_queue.rxq)\n+\t\tmlx5_rxq_ibv_obj_drop_release(dev);\n+\treturn -rte_errno;\n+}\n+\n+/**\n+ * Release a drop hash Rx queue.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ */\n+static void\n+mlx5_ibv_drop_action_destroy(struct rte_eth_dev *dev)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_hrxq *hrxq = priv->drop_queue.hrxq;\n+\tstruct ibv_rwq_ind_table *ind_tbl = hrxq->ind_table->ind_table;\n+\n+#ifdef HAVE_IBV_FLOW_DV_SUPPORT\n+\tclaim_zero(mlx5_glue->destroy_flow_action(hrxq->action));\n+#endif\n+\tclaim_zero(mlx5_glue->destroy_qp(hrxq->qp));\n+\tclaim_zero(mlx5_glue->destroy_rwq_ind_table(ind_tbl));\n+\tmlx5_rxq_ibv_obj_drop_release(dev);\n+}\n+\n+/**\n+ * Create a QP Verbs object.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ * @param idx\n+ *   Queue index in DPDK Tx queue array.\n+ *\n+ * @return\n+ *   The QP Verbs object, NULL otherwise and rte_errno is set.\n+ */\n+static struct ibv_qp *\n+mlx5_txq_ibv_qp_create(struct rte_eth_dev *dev, uint16_t idx)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_txq_data *txq_data = (*priv->txqs)[idx];\n+\tstruct mlx5_txq_ctrl *txq_ctrl =\n+\t\t\tcontainer_of(txq_data, struct mlx5_txq_ctrl, txq);\n+\tstruct ibv_qp *qp_obj = NULL;\n+\tstruct ibv_qp_init_attr_ex qp_attr = { 0 };\n+\tconst int desc = 1 << txq_data->elts_n;\n+\n+\tMLX5_ASSERT(txq_ctrl->obj->cq);\n+\t/* CQ to be associated with the send queue. */\n+\tqp_attr.send_cq = txq_ctrl->obj->cq;\n+\t/* CQ to be associated with the receive queue. */\n+\tqp_attr.recv_cq = txq_ctrl->obj->cq;\n+\t/* Max number of outstanding WRs. */\n+\tqp_attr.cap.max_send_wr = ((priv->sh->device_attr.max_qp_wr < desc) ?\n+\t\t\t\t   priv->sh->device_attr.max_qp_wr : desc);\n+\t/*\n+\t * Max number of scatter/gather elements in a WR, must be 1 to prevent\n+\t * libmlx5 from trying to affect must be 1 to prevent libmlx5 from\n+\t * trying to affect too much memory. TX gather is not impacted by the\n+\t * device_attr.max_sge limit and will still work properly.\n+\t */\n+\tqp_attr.cap.max_send_sge = 1;\n+\tqp_attr.qp_type = IBV_QPT_RAW_PACKET,\n+\t/* Do *NOT* enable this, completions events are managed per Tx burst. */\n+\tqp_attr.sq_sig_all = 0;\n+\tqp_attr.pd = priv->sh->pd;\n+\tqp_attr.comp_mask = IBV_QP_INIT_ATTR_PD;\n+\tif (txq_data->inlen_send)\n+\t\tqp_attr.cap.max_inline_data = txq_ctrl->max_inline_data;\n+\tif (txq_data->tso_en) {\n+\t\tqp_attr.max_tso_header = txq_ctrl->max_tso_header;\n+\t\tqp_attr.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER;\n+\t}\n+\tqp_obj = mlx5_glue->create_qp_ex(priv->sh->ctx, &qp_attr);\n+\tif (qp_obj == NULL) {\n+\t\tDRV_LOG(ERR, \"Port %u Tx queue %u QP creation failure.\",\n+\t\t\tdev->data->port_id, idx);\n+\t\trte_errno = errno;\n+\t}\n+\treturn qp_obj;\n+}\n+\n+/**\n+ * Create the Tx queue Verbs object.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ * @param idx\n+ *   Queue index in DPDK Tx queue array.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+int\n+mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx)\n+{\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_txq_data *txq_data = (*priv->txqs)[idx];\n+\tstruct mlx5_txq_ctrl *txq_ctrl =\n+\t\tcontainer_of(txq_data, struct mlx5_txq_ctrl, txq);\n+\tstruct mlx5_txq_obj *txq_obj = txq_ctrl->obj;\n+\tunsigned int cqe_n;\n+\tstruct mlx5dv_qp qp;\n+\tstruct mlx5dv_cq cq_info;\n+\tstruct mlx5dv_obj obj;\n+\tconst int desc = 1 << txq_data->elts_n;\n+\tint ret = 0;\n+\n+\tMLX5_ASSERT(txq_data);\n+\tMLX5_ASSERT(txq_obj);\n+\ttxq_obj->txq_ctrl = txq_ctrl;\n+\tif (mlx5_getenv_int(\"MLX5_ENABLE_CQE_COMPRESSION\")) {\n+\t\tDRV_LOG(ERR, \"Port %u MLX5_ENABLE_CQE_COMPRESSION \"\n+\t\t\t\"must never be set.\", dev->data->port_id);\n+\t\trte_errno = EINVAL;\n+\t\treturn -rte_errno;\n+\t}\n+\tcqe_n = desc / MLX5_TX_COMP_THRESH +\n+\t\t1 + MLX5_TX_COMP_THRESH_INLINE_DIV;\n+\ttxq_obj->cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0);\n+\tif (txq_obj->cq == NULL) {\n+\t\tDRV_LOG(ERR, \"Port %u Tx queue %u CQ creation failure.\",\n+\t\t\tdev->data->port_id, idx);\n+\t\trte_errno = errno;\n+\t\tgoto error;\n+\t}\n+\ttxq_obj->qp = mlx5_txq_ibv_qp_create(dev, idx);\n+\tif (txq_obj->qp == NULL) {\n+\t\trte_errno = errno;\n+\t\tgoto error;\n+\t}\n+\tret = mlx5_ibv_modify_qp(txq_obj, MLX5_TXQ_MOD_RST2RDY,\n+\t\t\t\t (uint8_t)priv->dev_port);\n+\tif (ret) {\n+\t\tDRV_LOG(ERR, \"Port %u Tx queue %u QP state modifying failed.\",\n+\t\t\tdev->data->port_id, idx);\n+\t\trte_errno = errno;\n+\t\tgoto error;\n+\t}\n+\tqp.comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET;\n+#ifdef HAVE_IBV_FLOW_DV_SUPPORT\n+\t/* If using DevX, need additional mask to read tisn value. */\n+\tif (priv->sh->devx && !priv->sh->tdn)\n+\t\tqp.comp_mask |= MLX5DV_QP_MASK_RAW_QP_HANDLES;\n+#endif\n+\tobj.cq.in = txq_obj->cq;\n+\tobj.cq.out = &cq_info;\n+\tobj.qp.in = txq_obj->qp;\n+\tobj.qp.out = &qp;\n+\tret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP);\n+\tif (ret != 0) {\n+\t\trte_errno = errno;\n+\t\tgoto error;\n+\t}\n+\tif (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {\n+\t\tDRV_LOG(ERR,\n+\t\t\t\"Port %u wrong MLX5_CQE_SIZE environment variable\"\n+\t\t\t\" value: it should be set to %u.\",\n+\t\t\tdev->data->port_id, RTE_CACHE_LINE_SIZE);\n+\t\trte_errno = EINVAL;\n+\t\tgoto error;\n+\t}\n+\ttxq_data->cqe_n = log2above(cq_info.cqe_cnt);\n+\ttxq_data->cqe_s = 1 << txq_data->cqe_n;\n+\ttxq_data->cqe_m = txq_data->cqe_s - 1;\n+\ttxq_data->qp_num_8s = ((struct ibv_qp *)txq_obj->qp)->qp_num << 8;\n+\ttxq_data->wqes = qp.sq.buf;\n+\ttxq_data->wqe_n = log2above(qp.sq.wqe_cnt);\n+\ttxq_data->wqe_s = 1 << txq_data->wqe_n;\n+\ttxq_data->wqe_m = txq_data->wqe_s - 1;\n+\ttxq_data->wqes_end = txq_data->wqes + txq_data->wqe_s;\n+\ttxq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];\n+\ttxq_data->cq_db = cq_info.dbrec;\n+\ttxq_data->cqes = (volatile struct mlx5_cqe *)cq_info.buf;\n+\ttxq_data->cq_ci = 0;\n+\ttxq_data->cq_pi = 0;\n+\ttxq_data->wqe_ci = 0;\n+\ttxq_data->wqe_pi = 0;\n+\ttxq_data->wqe_comp = 0;\n+\ttxq_data->wqe_thres = txq_data->wqe_s / MLX5_TX_COMP_THRESH_INLINE_DIV;\n+#ifdef HAVE_IBV_FLOW_DV_SUPPORT\n+\t/*\n+\t * If using DevX need to query and store TIS transport domain value.\n+\t * This is done once per port.\n+\t * Will use this value on Rx, when creating matching TIR.\n+\t */\n+\tif (priv->sh->devx && !priv->sh->tdn) {\n+\t\tret = mlx5_devx_cmd_qp_query_tis_td(txq_obj->qp, qp.tisn,\n+\t\t\t\t\t\t    &priv->sh->tdn);\n+\t\tif (ret) {\n+\t\t\tDRV_LOG(ERR, \"Fail to query port %u Tx queue %u QP TIS \"\n+\t\t\t\t\"transport domain.\", dev->data->port_id, idx);\n+\t\t\trte_errno = EINVAL;\n+\t\t\tgoto error;\n+\t\t} else {\n+\t\t\tDRV_LOG(DEBUG, \"Port %u Tx queue %u TIS number %d \"\n+\t\t\t\t\"transport domain %d.\", dev->data->port_id,\n+\t\t\t\tidx, qp.tisn, priv->sh->tdn);\n+\t\t}\n+\t}\n+#endif\n+\ttxq_ctrl->bf_reg = qp.bf.reg;\n+\tif (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {\n+\t\ttxq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;\n+\t\tDRV_LOG(DEBUG, \"Port %u: uar_mmap_offset 0x%\" PRIx64 \".\",\n+\t\t\tdev->data->port_id, txq_ctrl->uar_mmap_offset);\n+\t} else {\n+\t\tDRV_LOG(ERR,\n+\t\t\t\"Port %u failed to retrieve UAR info, invalid\"\n+\t\t\t\" libmlx5.so\",\n+\t\t\tdev->data->port_id);\n+\t\trte_errno = EINVAL;\n+\t\tgoto error;\n+\t}\n+\ttxq_uar_init(txq_ctrl);\n+\tdev->data->tx_queue_state[idx] = RTE_ETH_QUEUE_STATE_STARTED;\n+\treturn 0;\n+error:\n+\tret = rte_errno; /* Save rte_errno before cleanup. */\n+\tif (txq_obj->cq)\n+\t\tclaim_zero(mlx5_glue->destroy_cq(txq_obj->cq));\n+\tif (txq_obj->qp)\n+\t\tclaim_zero(mlx5_glue->destroy_qp(txq_obj->qp));\n+\trte_errno = ret; /* Restore rte_errno. */\n+\treturn -rte_errno;\n+}\n+\n+/*\n+ * Create the dummy QP with minimal resources for loopback.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ *\n+ * @return\n+ *   0 on success, a negative errno value otherwise and rte_errno is set.\n+ */\n+int\n+mlx5_rxq_ibv_obj_dummy_lb_create(struct rte_eth_dev *dev)\n+{\n+#if defined(HAVE_IBV_DEVICE_TUNNEL_SUPPORT) && defined(HAVE_IBV_FLOW_DV_SUPPORT)\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_dev_ctx_shared *sh = priv->sh;\n+\tstruct ibv_context *ctx = sh->ctx;\n+\tstruct mlx5dv_qp_init_attr qp_init_attr = {0};\n+\tstruct {\n+\t\tstruct ibv_cq_init_attr_ex ibv;\n+\t\tstruct mlx5dv_cq_init_attr mlx5;\n+\t} cq_attr = {{0}};\n+\n+\tif (dev->data->dev_conf.lpbk_mode) {\n+\t\t/* Allow packet sent from NIC loop back w/o source MAC check. */\n+\t\tqp_init_attr.comp_mask |=\n+\t\t\t\tMLX5DV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS;\n+\t\tqp_init_attr.create_flags |=\n+\t\t\t\tMLX5DV_QP_CREATE_TIR_ALLOW_SELF_LOOPBACK_UC;\n+\t} else {\n+\t\treturn 0;\n+\t}\n+\t/* Only need to check refcnt, 0 after \"sh\" is allocated. */\n+\tif (!!(__atomic_fetch_add(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED))) {\n+\t\tMLX5_ASSERT(sh->self_lb.ibv_cq && sh->self_lb.qp);\n+\t\tpriv->lb_used = 1;\n+\t\treturn 0;\n+\t}\n+\tcq_attr.ibv = (struct ibv_cq_init_attr_ex){\n+\t\t.cqe = 1,\n+\t\t.channel = NULL,\n+\t\t.comp_mask = 0,\n+\t};\n+\tcq_attr.mlx5 = (struct mlx5dv_cq_init_attr){\n+\t\t.comp_mask = 0,\n+\t};\n+\t/* Only CQ is needed, no WQ(RQ) is required in this case. */\n+\tsh->self_lb.ibv_cq = mlx5_glue->cq_ex_to_cq(mlx5_glue->dv_create_cq(ctx,\n+\t\t\t\t\t\t\t&cq_attr.ibv,\n+\t\t\t\t\t\t\t&cq_attr.mlx5));\n+\tif (!sh->self_lb.ibv_cq) {\n+\t\tDRV_LOG(ERR, \"Port %u cannot allocate CQ for loopback.\",\n+\t\t\tdev->data->port_id);\n+\t\trte_errno = errno;\n+\t\tgoto error;\n+\t}\n+\tsh->self_lb.qp = mlx5_glue->dv_create_qp(ctx,\n+\t\t\t\t&(struct ibv_qp_init_attr_ex){\n+\t\t\t\t\t.qp_type = IBV_QPT_RAW_PACKET,\n+\t\t\t\t\t.comp_mask = IBV_QP_INIT_ATTR_PD,\n+\t\t\t\t\t.pd = sh->pd,\n+\t\t\t\t\t.send_cq = sh->self_lb.ibv_cq,\n+\t\t\t\t\t.recv_cq = sh->self_lb.ibv_cq,\n+\t\t\t\t\t.cap.max_recv_wr = 1,\n+\t\t\t\t},\n+\t\t\t\t&qp_init_attr);\n+\tif (!sh->self_lb.qp) {\n+\t\tDRV_LOG(DEBUG, \"Port %u cannot allocate QP for loopback.\",\n+\t\t\tdev->data->port_id);\n+\t\trte_errno = errno;\n+\t\tgoto error;\n+\t}\n+\tpriv->lb_used = 1;\n+\treturn 0;\n+error:\n+\tif (sh->self_lb.ibv_cq) {\n+\t\tclaim_zero(mlx5_glue->destroy_cq(sh->self_lb.ibv_cq));\n+\t\tsh->self_lb.ibv_cq = NULL;\n+\t}\n+\t(void)__atomic_sub_fetch(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED);\n+\treturn -rte_errno;\n+#else\n+\tRTE_SET_USED(dev);\n+\treturn 0;\n+#endif\n+}\n+\n+/*\n+ * Release the dummy queue resources for loopback.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device.\n+ */\n+void\n+mlx5_rxq_ibv_obj_dummy_lb_release(struct rte_eth_dev *dev)\n+{\n+#if defined(HAVE_IBV_DEVICE_TUNNEL_SUPPORT) && defined(HAVE_IBV_FLOW_DV_SUPPORT)\n+\tstruct mlx5_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_dev_ctx_shared *sh = priv->sh;\n+\n+\tif (!priv->lb_used)\n+\t\treturn;\n+\tMLX5_ASSERT(__atomic_load_n(&sh->self_lb.refcnt, __ATOMIC_RELAXED));\n+\tif (!(__atomic_sub_fetch(&sh->self_lb.refcnt, 1, __ATOMIC_RELAXED))) {\n+\t\tif (sh->self_lb.qp) {\n+\t\t\tclaim_zero(mlx5_glue->destroy_qp(sh->self_lb.qp));\n+\t\t\tsh->self_lb.qp = NULL;\n+\t\t}\n+\t\tif (sh->self_lb.ibv_cq) {\n+\t\t\tclaim_zero(mlx5_glue->destroy_cq(sh->self_lb.ibv_cq));\n+\t\t\tsh->self_lb.ibv_cq = NULL;\n+\t\t}\n+\t}\n+\tpriv->lb_used = 0;\n+#else\n+\tRTE_SET_USED(dev);\n+\treturn;\n+#endif\n+}\n+\n+/**\n+ * Release an Tx verbs queue object.\n+ *\n+ * @param txq_obj\n+ *   Verbs Tx queue object..\n+ */\n+void\n+mlx5_txq_ibv_obj_release(struct mlx5_txq_obj *txq_obj)\n+{\n+\tMLX5_ASSERT(txq_obj);\n+\tclaim_zero(mlx5_glue->destroy_qp(txq_obj->qp));\n+\tclaim_zero(mlx5_glue->destroy_cq(txq_obj->cq));\n+}\n+\n+struct mlx5_obj_ops ibv_obj_ops = {\n+\t.rxq_obj_modify_vlan_strip = mlx5_rxq_obj_modify_wq_vlan_strip,\n+\t.rxq_obj_new = mlx5_rxq_ibv_obj_new,\n+\t.rxq_event_get = mlx5_rx_ibv_get_event,\n+\t.rxq_obj_modify = mlx5_ibv_modify_wq,\n+\t.rxq_obj_release = mlx5_rxq_ibv_obj_release,\n+\t.ind_table_new = mlx5_ibv_ind_table_new,\n+\t.ind_table_destroy = mlx5_ibv_ind_table_destroy,\n+\t.hrxq_new = mlx5_ibv_hrxq_new,\n+\t.hrxq_destroy = mlx5_ibv_qp_destroy,\n+\t.drop_action_create = mlx5_ibv_drop_action_create,\n+\t.drop_action_destroy = mlx5_ibv_drop_action_destroy,\n+\t.txq_obj_new = mlx5_txq_ibv_obj_new,\n+\t.txq_obj_modify = mlx5_ibv_modify_qp,\n+\t.txq_obj_release = mlx5_txq_ibv_obj_release,\n+\t.lb_dummy_queue_create = NULL,\n+\t.lb_dummy_queue_release = NULL,\n+};\ndiff --git a/drivers/net/mlx5/freebsd/mlx5_verbs.h b/drivers/net/mlx5/freebsd/mlx5_verbs.h\nnew file mode 100644\nindex 0000000000..f7e8e2fe98\n--- /dev/null\n+++ b/drivers/net/mlx5/freebsd/mlx5_verbs.h\n@@ -0,0 +1,18 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright 2020 Mellanox Technologies, Ltd\n+ */\n+\n+#ifndef RTE_PMD_MLX5_VERBS_H_\n+#define RTE_PMD_MLX5_VERBS_H_\n+\n+#include \"mlx5.h\"\n+\n+int mlx5_txq_ibv_obj_new(struct rte_eth_dev *dev, uint16_t idx);\n+void mlx5_txq_ibv_obj_release(struct mlx5_txq_obj *txq_obj);\n+int mlx5_rxq_ibv_obj_dummy_lb_create(struct rte_eth_dev *dev);\n+void mlx5_rxq_ibv_obj_dummy_lb_release(struct rte_eth_dev *dev);\n+\n+/* Verbs ops struct */\n+extern const struct mlx5_mr_ops mlx5_mr_verbs_ops;\n+extern struct mlx5_obj_ops ibv_obj_ops;\n+#endif /* RTE_PMD_MLX5_VERBS_H_ */\n",
    "prefixes": [
        "02/19"
    ]
}