get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/8453/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 8453,
    "url": "http://patches.dpdk.org/api/patches/8453/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/1446231162-8075-3-git-send-email-adrien.mazarguil@6wind.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<1446231162-8075-3-git-send-email-adrien.mazarguil@6wind.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/1446231162-8075-3-git-send-email-adrien.mazarguil@6wind.com",
    "date": "2015-10-30T18:52:31",
    "name": "[dpdk-dev,v2,02/13] mlx5: add non-scattered TX and RX support",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "6cd5e579470cb9a87b8560292cf26a240919efb9",
    "submitter": {
        "id": 165,
        "url": "http://patches.dpdk.org/api/people/165/?format=api",
        "name": "Adrien Mazarguil",
        "email": "adrien.mazarguil@6wind.com"
    },
    "delegate": null,
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/1446231162-8075-3-git-send-email-adrien.mazarguil@6wind.com/mbox/",
    "series": [],
    "comments": "http://patches.dpdk.org/api/patches/8453/comments/",
    "check": "pending",
    "checks": "http://patches.dpdk.org/api/patches/8453/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [IPv6:::1])\n\tby dpdk.org (Postfix) with ESMTP id 760A591CB;\n\tFri, 30 Oct 2015 19:53:19 +0100 (CET)",
            "from mail-wm0-f47.google.com (mail-wm0-f47.google.com\n\t[74.125.82.47]) by dpdk.org (Postfix) with ESMTP id 7AF2091EE\n\tfor <dev@dpdk.org>; Fri, 30 Oct 2015 19:53:14 +0100 (CET)",
            "by wmec75 with SMTP id c75so19121363wme.1\n\tfor <dev@dpdk.org>; Fri, 30 Oct 2015 11:53:14 -0700 (PDT)",
            "from 6wind.com (guy78-3-82-239-227-177.fbx.proxad.net.\n\t[82.239.227.177]) by smtp.gmail.com with ESMTPSA id\n\tly4sm8409079wjb.4.2015.10.30.11.53.10\n\t(version=TLSv1.2 cipher=RC4-SHA bits=128/128);\n\tFri, 30 Oct 2015 11:53:13 -0700 (PDT)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=6wind_com.20150623.gappssmtp.com; s=20150623;\n\th=from:to:cc:subject:date:message-id:in-reply-to:references;\n\tbh=BGU/BkNspy7P+wjvnJ3vOQsp9LcEasbqre/K3XKMYS8=;\n\tb=fdne96dKd4G3YcumRFlgotWyFKcttfNh5WdZ4R05SC1c5WkLAonJkZr+21Uuoj2Dml\n\tNaos03RodiZcIrE5GwpR96FG8o9545EEZ5nQ3uqS7Z4DOzF2CdA4GDdcOKAkKbcDc6co\n\tBVy4t7T2l/UMuHteibf1KrnZ2Yv5I9Uc7p99ObnZ4a58E2wS783HVeUfcQoIkRaeNdoA\n\ttdWw+nVnQj8V9tSkWVO+Ycf8FjEHkOhQjpXcc9Rnf/knQBjkyWiqUfaMR2VH2K7pgOZ4\n\tMKwR89cUXP+Y5VyxY3SMyhsdFeHfMDILY5ajckMZX9ivZI1lXN54PIANFh1+zOBSbnSe\n\tMnpw==",
        "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20130820;\n\th=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to\n\t:references;\n\tbh=BGU/BkNspy7P+wjvnJ3vOQsp9LcEasbqre/K3XKMYS8=;\n\tb=cyH5jWoAdKUqIAR8rc8wBQ91CYo1RkqTeq+MCrgjyhBjwjGlzyM8fqEeFhj0JOK0BQ\n\tyuN8Q2FN/oTckMzM3IBCATuz1fMliKf/D6v1d4KBHll4LAFmHJWjgeXsqc/94u84aT/T\n\tbLkdPvqgGExLRSgZoUcZc96CBezaCXmi3YpXhD23CJpl1NGpbdOWNfJVN+PF/5mNc/XV\n\tmAnofHdo2lUgAEsxNpPsDEPxWTnYL7mAt3T2XE/mc/lkfE89xwF2f7d56Aa6e3bODtLR\n\t+tYxYx2MCW8H3JUcrxX1lufM8iqQ6mZm8wp7FfiWIkuIpu31lovv79EgaFDfbdZD4omv\n\tg28w==",
        "X-Gm-Message-State": "ALoCoQmYsAzYgxuNz2irhm4kdW/QhiCtpsFZRGZb90OVeM97R2Swo+tqYtxxr5Z+th3B70QhLaED",
        "X-Received": "by 10.28.87.72 with SMTP id l69mr4964347wmb.70.1446231194233;\n\tFri, 30 Oct 2015 11:53:14 -0700 (PDT)",
        "From": "Adrien Mazarguil <adrien.mazarguil@6wind.com>",
        "To": "dev@dpdk.org",
        "Date": "Fri, 30 Oct 2015 19:52:31 +0100",
        "Message-Id": "<1446231162-8075-3-git-send-email-adrien.mazarguil@6wind.com>",
        "X-Mailer": "git-send-email 2.1.0",
        "In-Reply-To": "<1446231162-8075-1-git-send-email-adrien.mazarguil@6wind.com>",
        "References": "<1444067589-29513-1-git-send-email-adrien.mazarguil@6wind.com>\n\t<1446231162-8075-1-git-send-email-adrien.mazarguil@6wind.com>",
        "Subject": "[dpdk-dev] [PATCH v2 02/13] mlx5: add non-scattered TX and RX\n\tsupport",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "patches and discussions about DPDK <dev.dpdk.org>",
        "List-Unsubscribe": "<http://dpdk.org/ml/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<http://dpdk.org/ml/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "RSS implementation with parent/child QPs comes from mlx4 and is temporary.\n\nSigned-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>\nSigned-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>\n---\n config/common_bsdapp          |   3 +\n config/common_linuxapp        |   3 +\n drivers/net/mlx5/Makefile     |  15 +\n drivers/net/mlx5/mlx5.c       |  40 +++\n drivers/net/mlx5/mlx5.h       |  25 ++\n drivers/net/mlx5/mlx5_defs.h  |  24 ++\n drivers/net/mlx5/mlx5_rxq.c   | 682 ++++++++++++++++++++++++++++++++++++++++++\n drivers/net/mlx5/mlx5_rxtx.c  | 496 ++++++++++++++++++++++++++++++\n drivers/net/mlx5/mlx5_rxtx.h  | 156 ++++++++++\n drivers/net/mlx5/mlx5_txq.c   | 512 +++++++++++++++++++++++++++++++\n drivers/net/mlx5/mlx5_utils.h |  11 +\n 11 files changed, 1967 insertions(+)\n create mode 100644 drivers/net/mlx5/mlx5_rxq.c\n create mode 100644 drivers/net/mlx5/mlx5_rxtx.c\n create mode 100644 drivers/net/mlx5/mlx5_rxtx.h\n create mode 100644 drivers/net/mlx5/mlx5_txq.c",
    "diff": "diff --git a/config/common_bsdapp b/config/common_bsdapp\nindex 1e6885f..3b50ff9 100644\n--- a/config/common_bsdapp\n+++ b/config/common_bsdapp\n@@ -218,6 +218,9 @@ CONFIG_RTE_LIBRTE_MLX4_SOFT_COUNTERS=1\n #\n CONFIG_RTE_LIBRTE_MLX5_PMD=n\n CONFIG_RTE_LIBRTE_MLX5_DEBUG=n\n+CONFIG_RTE_LIBRTE_MLX5_SGE_WR_N=4\n+CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE=0\n+CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE=8\n \n #\n # Compile burst-oriented Broadcom PMD driver\ndiff --git a/config/common_linuxapp b/config/common_linuxapp\nindex 7da7ba7..eed8fc0 100644\n--- a/config/common_linuxapp\n+++ b/config/common_linuxapp\n@@ -216,6 +216,9 @@ CONFIG_RTE_LIBRTE_MLX4_SOFT_COUNTERS=1\n #\n CONFIG_RTE_LIBRTE_MLX5_PMD=n\n CONFIG_RTE_LIBRTE_MLX5_DEBUG=n\n+CONFIG_RTE_LIBRTE_MLX5_SGE_WR_N=4\n+CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE=0\n+CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE=8\n \n #\n # Compile burst-oriented Broadcom PMD driver\ndiff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile\nindex 6e63073..7b9c57b 100644\n--- a/drivers/net/mlx5/Makefile\n+++ b/drivers/net/mlx5/Makefile\n@@ -42,6 +42,9 @@ LIB = librte_pmd_mlx5.a\n \n # Sources.\n SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c\n+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxq.c\n+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_txq.c\n+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxtx.c\n SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_ethdev.c\n SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mac.c\n \n@@ -79,6 +82,18 @@ else\n CFLAGS += -DNDEBUG -UPEDANTIC\n endif\n \n+ifdef CONFIG_RTE_LIBRTE_MLX5_SGE_WR_N\n+CFLAGS += -DMLX5_PMD_SGE_WR_N=$(CONFIG_RTE_LIBRTE_MLX5_SGE_WR_N)\n+endif\n+\n+ifdef CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE\n+CFLAGS += -DMLX5_PMD_MAX_INLINE=$(CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE)\n+endif\n+\n+ifdef CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE\n+CFLAGS += -DMLX5_PMD_TX_MP_CACHE=$(CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE)\n+endif\n+\n include $(RTE_SDK)/mk/rte.lib.mk\n \n # Generate and clean-up mlx5_autoconf.h.\ndiff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c\nindex 6df486b..54bd6b9 100644\n--- a/drivers/net/mlx5/mlx5.c\n+++ b/drivers/net/mlx5/mlx5.c\n@@ -63,6 +63,7 @@\n \n #include \"mlx5.h\"\n #include \"mlx5_utils.h\"\n+#include \"mlx5_rxtx.h\"\n #include \"mlx5_autoconf.h\"\n \n /**\n@@ -77,11 +78,46 @@ static void\n mlx5_dev_close(struct rte_eth_dev *dev)\n {\n \tstruct priv *priv = dev->data->dev_private;\n+\tvoid *tmp;\n+\tunsigned int i;\n \n \tpriv_lock(priv);\n \tDEBUG(\"%p: closing device \\\"%s\\\"\",\n \t      (void *)dev,\n \t      ((priv->ctx != NULL) ? priv->ctx->device->name : \"\"));\n+\t/* Prevent crashes when queues are still in use. */\n+\tdev->rx_pkt_burst = removed_rx_burst;\n+\tdev->tx_pkt_burst = removed_tx_burst;\n+\tif (priv->rxqs != NULL) {\n+\t\t/* XXX race condition if mlx5_rx_burst() is still running. */\n+\t\tusleep(1000);\n+\t\tfor (i = 0; (i != priv->rxqs_n); ++i) {\n+\t\t\ttmp = (*priv->rxqs)[i];\n+\t\t\tif (tmp == NULL)\n+\t\t\t\tcontinue;\n+\t\t\t(*priv->rxqs)[i] = NULL;\n+\t\t\trxq_cleanup(tmp);\n+\t\t\trte_free(tmp);\n+\t\t}\n+\t\tpriv->rxqs_n = 0;\n+\t\tpriv->rxqs = NULL;\n+\t}\n+\tif (priv->txqs != NULL) {\n+\t\t/* XXX race condition if mlx5_tx_burst() is still running. */\n+\t\tusleep(1000);\n+\t\tfor (i = 0; (i != priv->txqs_n); ++i) {\n+\t\t\ttmp = (*priv->txqs)[i];\n+\t\t\tif (tmp == NULL)\n+\t\t\t\tcontinue;\n+\t\t\t(*priv->txqs)[i] = NULL;\n+\t\t\ttxq_cleanup(tmp);\n+\t\t\trte_free(tmp);\n+\t\t}\n+\t\tpriv->txqs_n = 0;\n+\t\tpriv->txqs = NULL;\n+\t}\n+\tif (priv->rss)\n+\t\trxq_cleanup(&priv->rxq_parent);\n \tif (priv->pd != NULL) {\n \t\tassert(priv->ctx != NULL);\n \t\tclaim_zero(ibv_dealloc_pd(priv->pd));\n@@ -94,6 +130,10 @@ mlx5_dev_close(struct rte_eth_dev *dev)\n \n static const struct eth_dev_ops mlx5_dev_ops = {\n \t.dev_close = mlx5_dev_close,\n+\t.rx_queue_setup = mlx5_rx_queue_setup,\n+\t.tx_queue_setup = mlx5_tx_queue_setup,\n+\t.rx_queue_release = mlx5_rx_queue_release,\n+\t.tx_queue_release = mlx5_tx_queue_release,\n };\n \n static struct {\ndiff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h\nindex 21db3cd..49978f5 100644\n--- a/drivers/net/mlx5/mlx5.h\n+++ b/drivers/net/mlx5/mlx5.h\n@@ -63,6 +63,7 @@\n #endif\n \n #include \"mlx5_utils.h\"\n+#include \"mlx5_rxtx.h\"\n #include \"mlx5_autoconf.h\"\n #include \"mlx5_defs.h\"\n \n@@ -101,9 +102,33 @@ struct priv {\n \tunsigned int rss:1; /* RSS is enabled. */\n \tunsigned int vf:1; /* This is a VF device. */\n \tunsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */\n+\t/* RX/TX queues. */\n+\tstruct rxq rxq_parent; /* Parent queue when RSS is enabled. */\n+\tunsigned int rxqs_n; /* RX queues array size. */\n+\tunsigned int txqs_n; /* TX queues array size. */\n+\tstruct rxq *(*rxqs)[]; /* RX queues. */\n+\tstruct txq *(*txqs)[]; /* TX queues. */\n \trte_spinlock_t lock; /* Lock for control functions. */\n };\n \n+/* Work Request ID data type (64 bit). */\n+typedef union {\n+\tstruct {\n+\t\tuint32_t id;\n+\t\tuint16_t offset;\n+\t} data;\n+\tuint64_t raw;\n+} wr_id_t;\n+\n+/* Compile-time check. */\n+static inline void wr_id_t_check(void)\n+{\n+\twr_id_t check[1 + (2 * -!(sizeof(wr_id_t) == sizeof(uint64_t)))];\n+\n+\t(void)check;\n+\t(void)wr_id_t_check;\n+}\n+\n /**\n  * Lock private structure to protect it from concurrent access in the\n  * control path.\ndiff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h\nindex c66a74f..c85be9c 100644\n--- a/drivers/net/mlx5/mlx5_defs.h\n+++ b/drivers/net/mlx5/mlx5_defs.h\n@@ -40,4 +40,28 @@\n /* Maximum number of simultaneous MAC addresses. */\n #define MLX5_MAX_MAC_ADDRESSES 128\n \n+/* Request send completion once in every 64 sends, might be less. */\n+#define MLX5_PMD_TX_PER_COMP_REQ 64\n+\n+/* Maximum number of Scatter/Gather Elements per Work Request. */\n+#ifndef MLX5_PMD_SGE_WR_N\n+#define MLX5_PMD_SGE_WR_N 4\n+#endif\n+\n+/* Maximum size for inline data. */\n+#ifndef MLX5_PMD_MAX_INLINE\n+#define MLX5_PMD_MAX_INLINE 0\n+#endif\n+\n+/*\n+ * Maximum number of cached Memory Pools (MPs) per TX queue. Each RTE MP\n+ * from which buffers are to be transmitted will have to be mapped by this\n+ * driver to their own Memory Region (MR). This is a slow operation.\n+ *\n+ * This value is always 1 for RX queues.\n+ */\n+#ifndef MLX5_PMD_TX_MP_CACHE\n+#define MLX5_PMD_TX_MP_CACHE 8\n+#endif\n+\n #endif /* RTE_PMD_MLX5_DEFS_H_ */\ndiff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c\nnew file mode 100644\nindex 0000000..01cc649\n--- /dev/null\n+++ b/drivers/net/mlx5/mlx5_rxq.c\n@@ -0,0 +1,682 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright 2015 6WIND S.A.\n+ *   Copyright 2015 Mellanox.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of 6WIND S.A. nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+#include <stddef.h>\n+#include <assert.h>\n+#include <errno.h>\n+#include <string.h>\n+#include <stdint.h>\n+\n+/* Verbs header. */\n+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic ignored \"-pedantic\"\n+#endif\n+#include <infiniband/verbs.h>\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic error \"-pedantic\"\n+#endif\n+\n+/* DPDK headers don't like -pedantic. */\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic ignored \"-pedantic\"\n+#endif\n+#include <rte_mbuf.h>\n+#include <rte_malloc.h>\n+#include <rte_ethdev.h>\n+#include <rte_common.h>\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic error \"-pedantic\"\n+#endif\n+\n+#include \"mlx5.h\"\n+#include \"mlx5_rxtx.h\"\n+#include \"mlx5_utils.h\"\n+#include \"mlx5_defs.h\"\n+\n+/**\n+ * Allocate RX queue elements.\n+ *\n+ * @param rxq\n+ *   Pointer to RX queue structure.\n+ * @param elts_n\n+ *   Number of elements to allocate.\n+ * @param[in] pool\n+ *   If not NULL, fetch buffers from this array instead of allocating them\n+ *   with rte_pktmbuf_alloc().\n+ *\n+ * @return\n+ *   0 on success, errno value on failure.\n+ */\n+static int\n+rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool)\n+{\n+\tunsigned int i;\n+\tstruct rxq_elt (*elts)[elts_n] =\n+\t\trte_calloc_socket(\"RXQ elements\", 1, sizeof(*elts), 0,\n+\t\t\t\t  rxq->socket);\n+\tint ret = 0;\n+\n+\tif (elts == NULL) {\n+\t\tERROR(\"%p: can't allocate packets array\", (void *)rxq);\n+\t\tret = ENOMEM;\n+\t\tgoto error;\n+\t}\n+\t/* For each WR (packet). */\n+\tfor (i = 0; (i != elts_n); ++i) {\n+\t\tstruct rxq_elt *elt = &(*elts)[i];\n+\t\tstruct ibv_recv_wr *wr = &elt->wr;\n+\t\tstruct ibv_sge *sge = &(*elts)[i].sge;\n+\t\tstruct rte_mbuf *buf;\n+\n+\t\tif (pool != NULL) {\n+\t\t\tbuf = *(pool++);\n+\t\t\tassert(buf != NULL);\n+\t\t\trte_pktmbuf_reset(buf);\n+\t\t} else\n+\t\t\tbuf = rte_pktmbuf_alloc(rxq->mp);\n+\t\tif (buf == NULL) {\n+\t\t\tassert(pool == NULL);\n+\t\t\tERROR(\"%p: empty mbuf pool\", (void *)rxq);\n+\t\t\tret = ENOMEM;\n+\t\t\tgoto error;\n+\t\t}\n+\t\t/* Configure WR. Work request ID contains its own index in\n+\t\t * the elts array and the offset between SGE buffer header and\n+\t\t * its data. */\n+\t\tWR_ID(wr->wr_id).id = i;\n+\t\tWR_ID(wr->wr_id).offset =\n+\t\t\t(((uintptr_t)buf->buf_addr + RTE_PKTMBUF_HEADROOM) -\n+\t\t\t (uintptr_t)buf);\n+\t\twr->next = &(*elts)[(i + 1)].wr;\n+\t\twr->sg_list = sge;\n+\t\twr->num_sge = 1;\n+\t\t/* Headroom is reserved by rte_pktmbuf_alloc(). */\n+\t\tassert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM);\n+\t\t/* Buffer is supposed to be empty. */\n+\t\tassert(rte_pktmbuf_data_len(buf) == 0);\n+\t\tassert(rte_pktmbuf_pkt_len(buf) == 0);\n+\t\t/* sge->addr must be able to store a pointer. */\n+\t\tassert(sizeof(sge->addr) >= sizeof(uintptr_t));\n+\t\t/* SGE keeps its headroom. */\n+\t\tsge->addr = (uintptr_t)\n+\t\t\t((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM);\n+\t\tsge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM);\n+\t\tsge->lkey = rxq->mr->lkey;\n+\t\t/* Redundant check for tailroom. */\n+\t\tassert(sge->length == rte_pktmbuf_tailroom(buf));\n+\t\t/* Make sure elts index and SGE mbuf pointer can be deduced\n+\t\t * from WR ID. */\n+\t\tif ((WR_ID(wr->wr_id).id != i) ||\n+\t\t    ((void *)((uintptr_t)sge->addr -\n+\t\t\tWR_ID(wr->wr_id).offset) != buf)) {\n+\t\t\tERROR(\"%p: cannot store index and offset in WR ID\",\n+\t\t\t      (void *)rxq);\n+\t\t\tsge->addr = 0;\n+\t\t\trte_pktmbuf_free(buf);\n+\t\t\tret = EOVERFLOW;\n+\t\t\tgoto error;\n+\t\t}\n+\t}\n+\t/* The last WR pointer must be NULL. */\n+\t(*elts)[(i - 1)].wr.next = NULL;\n+\tDEBUG(\"%p: allocated and configured %u single-segment WRs\",\n+\t      (void *)rxq, elts_n);\n+\trxq->elts_n = elts_n;\n+\trxq->elts_head = 0;\n+\trxq->elts.no_sp = elts;\n+\tassert(ret == 0);\n+\treturn 0;\n+error:\n+\tif (elts != NULL) {\n+\t\tassert(pool == NULL);\n+\t\tfor (i = 0; (i != RTE_DIM(*elts)); ++i) {\n+\t\t\tstruct rxq_elt *elt = &(*elts)[i];\n+\t\t\tstruct rte_mbuf *buf;\n+\n+\t\t\tif (elt->sge.addr == 0)\n+\t\t\t\tcontinue;\n+\t\t\tassert(WR_ID(elt->wr.wr_id).id == i);\n+\t\t\tbuf = (void *)((uintptr_t)elt->sge.addr -\n+\t\t\t\tWR_ID(elt->wr.wr_id).offset);\n+\t\t\trte_pktmbuf_free_seg(buf);\n+\t\t}\n+\t\trte_free(elts);\n+\t}\n+\tDEBUG(\"%p: failed, freed everything\", (void *)rxq);\n+\tassert(ret > 0);\n+\treturn ret;\n+}\n+\n+/**\n+ * Free RX queue elements.\n+ *\n+ * @param rxq\n+ *   Pointer to RX queue structure.\n+ */\n+static void\n+rxq_free_elts(struct rxq *rxq)\n+{\n+\tunsigned int i;\n+\tunsigned int elts_n = rxq->elts_n;\n+\tstruct rxq_elt (*elts)[elts_n] = rxq->elts.no_sp;\n+\n+\tDEBUG(\"%p: freeing WRs\", (void *)rxq);\n+\trxq->elts_n = 0;\n+\trxq->elts.no_sp = NULL;\n+\tif (elts == NULL)\n+\t\treturn;\n+\tfor (i = 0; (i != RTE_DIM(*elts)); ++i) {\n+\t\tstruct rxq_elt *elt = &(*elts)[i];\n+\t\tstruct rte_mbuf *buf;\n+\n+\t\tif (elt->sge.addr == 0)\n+\t\t\tcontinue;\n+\t\tassert(WR_ID(elt->wr.wr_id).id == i);\n+\t\tbuf = (void *)((uintptr_t)elt->sge.addr -\n+\t\t\tWR_ID(elt->wr.wr_id).offset);\n+\t\trte_pktmbuf_free_seg(buf);\n+\t}\n+\trte_free(elts);\n+}\n+\n+/**\n+ * Clean up a RX queue.\n+ *\n+ * Destroy objects, free allocated memory and reset the structure for reuse.\n+ *\n+ * @param rxq\n+ *   Pointer to RX queue structure.\n+ */\n+void\n+rxq_cleanup(struct rxq *rxq)\n+{\n+\tstruct ibv_exp_release_intf_params params;\n+\n+\tDEBUG(\"cleaning up %p\", (void *)rxq);\n+\trxq_free_elts(rxq);\n+\tif (rxq->if_qp != NULL) {\n+\t\tassert(rxq->priv != NULL);\n+\t\tassert(rxq->priv->ctx != NULL);\n+\t\tassert(rxq->qp != NULL);\n+\t\tparams = (struct ibv_exp_release_intf_params){\n+\t\t\t.comp_mask = 0,\n+\t\t};\n+\t\tclaim_zero(ibv_exp_release_intf(rxq->priv->ctx,\n+\t\t\t\t\t\trxq->if_qp,\n+\t\t\t\t\t\t&params));\n+\t}\n+\tif (rxq->if_cq != NULL) {\n+\t\tassert(rxq->priv != NULL);\n+\t\tassert(rxq->priv->ctx != NULL);\n+\t\tassert(rxq->cq != NULL);\n+\t\tparams = (struct ibv_exp_release_intf_params){\n+\t\t\t.comp_mask = 0,\n+\t\t};\n+\t\tclaim_zero(ibv_exp_release_intf(rxq->priv->ctx,\n+\t\t\t\t\t\trxq->if_cq,\n+\t\t\t\t\t\t&params));\n+\t}\n+\tif (rxq->qp != NULL) {\n+\t\tclaim_zero(ibv_destroy_qp(rxq->qp));\n+\t}\n+\tif (rxq->cq != NULL)\n+\t\tclaim_zero(ibv_destroy_cq(rxq->cq));\n+\tif (rxq->rd != NULL) {\n+\t\tstruct ibv_exp_destroy_res_domain_attr attr = {\n+\t\t\t.comp_mask = 0,\n+\t\t};\n+\n+\t\tassert(rxq->priv != NULL);\n+\t\tassert(rxq->priv->ctx != NULL);\n+\t\tclaim_zero(ibv_exp_destroy_res_domain(rxq->priv->ctx,\n+\t\t\t\t\t\t      rxq->rd,\n+\t\t\t\t\t\t      &attr));\n+\t}\n+\tif (rxq->mr != NULL)\n+\t\tclaim_zero(ibv_dereg_mr(rxq->mr));\n+\tmemset(rxq, 0, sizeof(*rxq));\n+}\n+\n+/**\n+ * Allocate a Queue Pair.\n+ * Optionally setup inline receive if supported.\n+ *\n+ * @param priv\n+ *   Pointer to private structure.\n+ * @param cq\n+ *   Completion queue to associate with QP.\n+ * @param desc\n+ *   Number of descriptors in QP (hint only).\n+ *\n+ * @return\n+ *   QP pointer or NULL in case of error.\n+ */\n+static struct ibv_qp *\n+rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc,\n+\t     struct ibv_exp_res_domain *rd)\n+{\n+\tstruct ibv_exp_qp_init_attr attr = {\n+\t\t/* CQ to be associated with the send queue. */\n+\t\t.send_cq = cq,\n+\t\t/* CQ to be associated with the receive queue. */\n+\t\t.recv_cq = cq,\n+\t\t.cap = {\n+\t\t\t/* Max number of outstanding WRs. */\n+\t\t\t.max_recv_wr = ((priv->device_attr.max_qp_wr < desc) ?\n+\t\t\t\t\tpriv->device_attr.max_qp_wr :\n+\t\t\t\t\tdesc),\n+\t\t\t/* Max number of scatter/gather elements in a WR. */\n+\t\t\t.max_recv_sge = ((priv->device_attr.max_sge <\n+\t\t\t\t\t  MLX5_PMD_SGE_WR_N) ?\n+\t\t\t\t\t priv->device_attr.max_sge :\n+\t\t\t\t\t MLX5_PMD_SGE_WR_N),\n+\t\t},\n+\t\t.qp_type = IBV_QPT_RAW_PACKET,\n+\t\t.comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |\n+\t\t\t      IBV_EXP_QP_INIT_ATTR_RES_DOMAIN),\n+\t\t.pd = priv->pd,\n+\t\t.res_domain = rd,\n+\t};\n+\n+\treturn ibv_exp_create_qp(priv->ctx, &attr);\n+}\n+\n+#ifdef RSS_SUPPORT\n+\n+/**\n+ * Allocate a RSS Queue Pair.\n+ * Optionally setup inline receive if supported.\n+ *\n+ * @param priv\n+ *   Pointer to private structure.\n+ * @param cq\n+ *   Completion queue to associate with QP.\n+ * @param desc\n+ *   Number of descriptors in QP (hint only).\n+ * @param parent\n+ *   If nonzero, create a parent QP, otherwise a child.\n+ *\n+ * @return\n+ *   QP pointer or NULL in case of error.\n+ */\n+static struct ibv_qp *\n+rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc,\n+\t\t int parent, struct ibv_exp_res_domain *rd)\n+{\n+\tstruct ibv_exp_qp_init_attr attr = {\n+\t\t/* CQ to be associated with the send queue. */\n+\t\t.send_cq = cq,\n+\t\t/* CQ to be associated with the receive queue. */\n+\t\t.recv_cq = cq,\n+\t\t.cap = {\n+\t\t\t/* Max number of outstanding WRs. */\n+\t\t\t.max_recv_wr = ((priv->device_attr.max_qp_wr < desc) ?\n+\t\t\t\t\tpriv->device_attr.max_qp_wr :\n+\t\t\t\t\tdesc),\n+\t\t\t/* Max number of scatter/gather elements in a WR. */\n+\t\t\t.max_recv_sge = ((priv->device_attr.max_sge <\n+\t\t\t\t\t  MLX5_PMD_SGE_WR_N) ?\n+\t\t\t\t\t priv->device_attr.max_sge :\n+\t\t\t\t\t MLX5_PMD_SGE_WR_N),\n+\t\t},\n+\t\t.qp_type = IBV_QPT_RAW_PACKET,\n+\t\t.comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |\n+\t\t\t      IBV_EXP_QP_INIT_ATTR_RES_DOMAIN |\n+\t\t\t      IBV_EXP_QP_INIT_ATTR_QPG),\n+\t\t.pd = priv->pd,\n+\t\t.res_domain = rd,\n+\t};\n+\n+\tif (parent) {\n+\t\tattr.qpg.qpg_type = IBV_EXP_QPG_PARENT;\n+\t\t/* TSS isn't necessary. */\n+\t\tattr.qpg.parent_attrib.tss_child_count = 0;\n+\t\tattr.qpg.parent_attrib.rss_child_count = priv->rxqs_n;\n+\t\tDEBUG(\"initializing parent RSS queue\");\n+\t} else {\n+\t\tattr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX;\n+\t\tattr.qpg.qpg_parent = priv->rxq_parent.qp;\n+\t\tDEBUG(\"initializing child RSS queue\");\n+\t}\n+\treturn ibv_exp_create_qp(priv->ctx, &attr);\n+}\n+\n+#endif /* RSS_SUPPORT */\n+\n+/**\n+ * Configure a RX queue.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param rxq\n+ *   Pointer to RX queue structure.\n+ * @param desc\n+ *   Number of descriptors to configure in queue.\n+ * @param socket\n+ *   NUMA socket on which memory must be allocated.\n+ * @param[in] conf\n+ *   Thresholds parameters.\n+ * @param mp\n+ *   Memory pool for buffer allocations.\n+ *\n+ * @return\n+ *   0 on success, errno value on failure.\n+ */\n+int\n+rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc,\n+\t  unsigned int socket, const struct rte_eth_rxconf *conf,\n+\t  struct rte_mempool *mp)\n+{\n+\tstruct priv *priv = dev->data->dev_private;\n+\tstruct rxq tmpl = {\n+\t\t.priv = priv,\n+\t\t.mp = mp,\n+\t\t.socket = socket\n+\t};\n+\tstruct ibv_exp_qp_attr mod;\n+\tunion {\n+\t\tstruct ibv_exp_query_intf_params params;\n+\t\tstruct ibv_exp_cq_init_attr cq;\n+\t\tstruct ibv_exp_res_domain_init_attr rd;\n+\t} attr;\n+\tenum ibv_exp_query_intf_status status;\n+\tstruct ibv_recv_wr *bad_wr;\n+\tstruct rte_mbuf *buf;\n+\tint ret = 0;\n+\tint parent = (rxq == &priv->rxq_parent);\n+\n+\t(void)conf; /* Thresholds configuration (ignored). */\n+\t/*\n+\t * If this is a parent queue, hardware must support RSS and\n+\t * RSS must be enabled.\n+\t */\n+\tassert((!parent) || ((priv->hw_rss) && (priv->rss)));\n+\tif (parent) {\n+\t\t/* Even if unused, ibv_create_cq() requires at least one\n+\t\t * descriptor. */\n+\t\tdesc = 1;\n+\t\tgoto skip_mr;\n+\t}\n+\tif ((desc == 0) || (desc % MLX5_PMD_SGE_WR_N)) {\n+\t\tERROR(\"%p: invalid number of RX descriptors (must be a\"\n+\t\t      \" multiple of %d)\", (void *)dev, MLX5_PMD_SGE_WR_N);\n+\t\treturn EINVAL;\n+\t}\n+\t/* Get mbuf length. */\n+\tbuf = rte_pktmbuf_alloc(mp);\n+\tif (buf == NULL) {\n+\t\tERROR(\"%p: unable to allocate mbuf\", (void *)dev);\n+\t\treturn ENOMEM;\n+\t}\n+\ttmpl.mb_len = buf->buf_len;\n+\tassert((rte_pktmbuf_headroom(buf) +\n+\t\trte_pktmbuf_tailroom(buf)) == tmpl.mb_len);\n+\tassert(rte_pktmbuf_headroom(buf) == RTE_PKTMBUF_HEADROOM);\n+\trte_pktmbuf_free(buf);\n+\t/* Use the entire RX mempool as the memory region. */\n+\ttmpl.mr = ibv_reg_mr(priv->pd,\n+\t\t\t     (void *)mp->elt_va_start,\n+\t\t\t     (mp->elt_va_end - mp->elt_va_start),\n+\t\t\t     (IBV_ACCESS_LOCAL_WRITE |\n+\t\t\t      IBV_ACCESS_REMOTE_WRITE));\n+\tif (tmpl.mr == NULL) {\n+\t\tret = EINVAL;\n+\t\tERROR(\"%p: MR creation failure: %s\",\n+\t\t      (void *)dev, strerror(ret));\n+\t\tgoto error;\n+\t}\n+skip_mr:\n+\tattr.rd = (struct ibv_exp_res_domain_init_attr){\n+\t\t.comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |\n+\t\t\t      IBV_EXP_RES_DOMAIN_MSG_MODEL),\n+\t\t.thread_model = IBV_EXP_THREAD_SINGLE,\n+\t\t.msg_model = IBV_EXP_MSG_HIGH_BW,\n+\t};\n+\ttmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd);\n+\tif (tmpl.rd == NULL) {\n+\t\tret = ENOMEM;\n+\t\tERROR(\"%p: RD creation failure: %s\",\n+\t\t      (void *)dev, strerror(ret));\n+\t\tgoto error;\n+\t}\n+\tattr.cq = (struct ibv_exp_cq_init_attr){\n+\t\t.comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,\n+\t\t.res_domain = tmpl.rd,\n+\t};\n+\ttmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, NULL, 0, &attr.cq);\n+\tif (tmpl.cq == NULL) {\n+\t\tret = ENOMEM;\n+\t\tERROR(\"%p: CQ creation failure: %s\",\n+\t\t      (void *)dev, strerror(ret));\n+\t\tgoto error;\n+\t}\n+\tDEBUG(\"priv->device_attr.max_qp_wr is %d\",\n+\t      priv->device_attr.max_qp_wr);\n+\tDEBUG(\"priv->device_attr.max_sge is %d\",\n+\t      priv->device_attr.max_sge);\n+#ifdef RSS_SUPPORT\n+\tif (priv->rss)\n+\t\ttmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent,\n+\t\t\t\t\t   tmpl.rd);\n+\telse\n+#endif /* RSS_SUPPORT */\n+\t\ttmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd);\n+\tif (tmpl.qp == NULL) {\n+\t\tret = (errno ? errno : EINVAL);\n+\t\tERROR(\"%p: QP creation failure: %s\",\n+\t\t      (void *)dev, strerror(ret));\n+\t\tgoto error;\n+\t}\n+\tmod = (struct ibv_exp_qp_attr){\n+\t\t/* Move the QP to this state. */\n+\t\t.qp_state = IBV_QPS_INIT,\n+\t\t/* Primary port number. */\n+\t\t.port_num = priv->port\n+\t};\n+\tret = ibv_exp_modify_qp(tmpl.qp, &mod,\n+\t\t\t\t(IBV_EXP_QP_STATE |\n+#ifdef RSS_SUPPORT\n+\t\t\t\t (parent ? IBV_EXP_QP_GROUP_RSS : 0) |\n+#endif /* RSS_SUPPORT */\n+\t\t\t\t IBV_EXP_QP_PORT));\n+\tif (ret) {\n+\t\tERROR(\"%p: QP state to IBV_QPS_INIT failed: %s\",\n+\t\t      (void *)dev, strerror(ret));\n+\t\tgoto error;\n+\t}\n+\t/* Allocate descriptors for RX queues, except for the RSS parent. */\n+\tif (parent)\n+\t\tgoto skip_alloc;\n+\tret = rxq_alloc_elts(&tmpl, desc, NULL);\n+\tif (ret) {\n+\t\tERROR(\"%p: RXQ allocation failed: %s\",\n+\t\t      (void *)dev, strerror(ret));\n+\t\tgoto error;\n+\t}\n+\tret = ibv_post_recv(tmpl.qp,\n+\t\t\t    &(*tmpl.elts.no_sp)[0].wr,\n+\t\t\t    &bad_wr);\n+\tif (ret) {\n+\t\tERROR(\"%p: ibv_post_recv() failed for WR %p: %s\",\n+\t\t      (void *)dev,\n+\t\t      (void *)bad_wr,\n+\t\t      strerror(ret));\n+\t\tgoto error;\n+\t}\n+skip_alloc:\n+\tmod = (struct ibv_exp_qp_attr){\n+\t\t.qp_state = IBV_QPS_RTR\n+\t};\n+\tret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE);\n+\tif (ret) {\n+\t\tERROR(\"%p: QP state to IBV_QPS_RTR failed: %s\",\n+\t\t      (void *)dev, strerror(ret));\n+\t\tgoto error;\n+\t}\n+\t/* Save port ID. */\n+\ttmpl.port_id = dev->data->port_id;\n+\tDEBUG(\"%p: RTE port ID: %u\", (void *)rxq, tmpl.port_id);\n+\tattr.params = (struct ibv_exp_query_intf_params){\n+\t\t.intf_scope = IBV_EXP_INTF_GLOBAL,\n+\t\t.intf = IBV_EXP_INTF_CQ,\n+\t\t.obj = tmpl.cq,\n+\t};\n+\ttmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);\n+\tif (tmpl.if_cq == NULL) {\n+\t\tERROR(\"%p: CQ interface family query failed with status %d\",\n+\t\t      (void *)dev, status);\n+\t\tgoto error;\n+\t}\n+\tattr.params = (struct ibv_exp_query_intf_params){\n+\t\t.intf_scope = IBV_EXP_INTF_GLOBAL,\n+\t\t.intf = IBV_EXP_INTF_QP_BURST,\n+\t\t.obj = tmpl.qp,\n+\t};\n+\ttmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);\n+\tif (tmpl.if_qp == NULL) {\n+\t\tERROR(\"%p: QP interface family query failed with status %d\",\n+\t\t      (void *)dev, status);\n+\t\tgoto error;\n+\t}\n+\t/* Clean up rxq in case we're reinitializing it. */\n+\tDEBUG(\"%p: cleaning-up old rxq just in case\", (void *)rxq);\n+\trxq_cleanup(rxq);\n+\t*rxq = tmpl;\n+\tDEBUG(\"%p: rxq updated with %p\", (void *)rxq, (void *)&tmpl);\n+\tassert(ret == 0);\n+\treturn 0;\n+error:\n+\trxq_cleanup(&tmpl);\n+\tassert(ret > 0);\n+\treturn ret;\n+}\n+\n+/**\n+ * DPDK callback to configure a RX queue.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param idx\n+ *   RX queue index.\n+ * @param desc\n+ *   Number of descriptors to configure in queue.\n+ * @param socket\n+ *   NUMA socket on which memory must be allocated.\n+ * @param[in] conf\n+ *   Thresholds parameters.\n+ * @param mp\n+ *   Memory pool for buffer allocations.\n+ *\n+ * @return\n+ *   0 on success, negative errno value on failure.\n+ */\n+int\n+mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,\n+\t\t    unsigned int socket, const struct rte_eth_rxconf *conf,\n+\t\t    struct rte_mempool *mp)\n+{\n+\tstruct priv *priv = dev->data->dev_private;\n+\tstruct rxq *rxq = (*priv->rxqs)[idx];\n+\tint ret;\n+\n+\tpriv_lock(priv);\n+\tDEBUG(\"%p: configuring queue %u for %u descriptors\",\n+\t      (void *)dev, idx, desc);\n+\tif (idx >= priv->rxqs_n) {\n+\t\tERROR(\"%p: queue index out of range (%u >= %u)\",\n+\t\t      (void *)dev, idx, priv->rxqs_n);\n+\t\tpriv_unlock(priv);\n+\t\treturn -EOVERFLOW;\n+\t}\n+\tif (rxq != NULL) {\n+\t\tDEBUG(\"%p: reusing already allocated queue index %u (%p)\",\n+\t\t      (void *)dev, idx, (void *)rxq);\n+\t\tif (priv->started) {\n+\t\t\tpriv_unlock(priv);\n+\t\t\treturn -EEXIST;\n+\t\t}\n+\t\t(*priv->rxqs)[idx] = NULL;\n+\t\trxq_cleanup(rxq);\n+\t} else {\n+\t\trxq = rte_calloc_socket(\"RXQ\", 1, sizeof(*rxq), 0, socket);\n+\t\tif (rxq == NULL) {\n+\t\t\tERROR(\"%p: unable to allocate queue index %u\",\n+\t\t\t      (void *)dev, idx);\n+\t\t\tpriv_unlock(priv);\n+\t\t\treturn -ENOMEM;\n+\t\t}\n+\t}\n+\tret = rxq_setup(dev, rxq, desc, socket, conf, mp);\n+\tif (ret)\n+\t\trte_free(rxq);\n+\telse {\n+\t\tDEBUG(\"%p: adding RX queue %p to list\",\n+\t\t      (void *)dev, (void *)rxq);\n+\t\t(*priv->rxqs)[idx] = rxq;\n+\t\t/* Update receive callback. */\n+\t\tdev->rx_pkt_burst = mlx5_rx_burst;\n+\t}\n+\tpriv_unlock(priv);\n+\treturn -ret;\n+}\n+\n+/**\n+ * DPDK callback to release a RX queue.\n+ *\n+ * @param dpdk_rxq\n+ *   Generic RX queue pointer.\n+ */\n+void\n+mlx5_rx_queue_release(void *dpdk_rxq)\n+{\n+\tstruct rxq *rxq = (struct rxq *)dpdk_rxq;\n+\tstruct priv *priv;\n+\tunsigned int i;\n+\n+\tif (rxq == NULL)\n+\t\treturn;\n+\tpriv = rxq->priv;\n+\tpriv_lock(priv);\n+\tassert(rxq != &priv->rxq_parent);\n+\tfor (i = 0; (i != priv->rxqs_n); ++i)\n+\t\tif ((*priv->rxqs)[i] == rxq) {\n+\t\t\tDEBUG(\"%p: removing RX queue %p from list\",\n+\t\t\t      (void *)priv->dev, (void *)rxq);\n+\t\t\t(*priv->rxqs)[i] = NULL;\n+\t\t\tbreak;\n+\t\t}\n+\trxq_cleanup(rxq);\n+\trte_free(rxq);\n+\tpriv_unlock(priv);\n+}\ndiff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c\nnew file mode 100644\nindex 0000000..0f1e541\n--- /dev/null\n+++ b/drivers/net/mlx5/mlx5_rxtx.c\n@@ -0,0 +1,496 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright 2015 6WIND S.A.\n+ *   Copyright 2015 Mellanox.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of 6WIND S.A. nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+#include <assert.h>\n+#include <stdint.h>\n+#include <string.h>\n+#include <stdlib.h>\n+\n+/* Verbs header. */\n+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic ignored \"-pedantic\"\n+#endif\n+#include <infiniband/verbs.h>\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic error \"-pedantic\"\n+#endif\n+\n+/* DPDK headers don't like -pedantic. */\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic ignored \"-pedantic\"\n+#endif\n+#include <rte_mbuf.h>\n+#include <rte_mempool.h>\n+#include <rte_prefetch.h>\n+#include <rte_common.h>\n+#include <rte_branch_prediction.h>\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic error \"-pedantic\"\n+#endif\n+\n+#include \"mlx5.h\"\n+#include \"mlx5_utils.h\"\n+#include \"mlx5_rxtx.h\"\n+#include \"mlx5_defs.h\"\n+\n+/**\n+ * Manage TX completions.\n+ *\n+ * When sending a burst, mlx5_tx_burst() posts several WRs.\n+ * To improve performance, a completion event is only required once every\n+ * MLX5_PMD_TX_PER_COMP_REQ sends. Doing so discards completion information\n+ * for other WRs, but this information would not be used anyway.\n+ *\n+ * @param txq\n+ *   Pointer to TX queue structure.\n+ *\n+ * @return\n+ *   0 on success, -1 on failure.\n+ */\n+static int\n+txq_complete(struct txq *txq)\n+{\n+\tunsigned int elts_comp = txq->elts_comp;\n+\tunsigned int elts_tail = txq->elts_tail;\n+\tconst unsigned int elts_n = txq->elts_n;\n+\tint wcs_n;\n+\n+\tif (unlikely(elts_comp == 0))\n+\t\treturn 0;\n+#ifdef DEBUG_SEND\n+\tDEBUG(\"%p: processing %u work requests completions\",\n+\t      (void *)txq, elts_comp);\n+#endif\n+\twcs_n = txq->if_cq->poll_cnt(txq->cq, elts_comp);\n+\tif (unlikely(wcs_n == 0))\n+\t\treturn 0;\n+\tif (unlikely(wcs_n < 0)) {\n+\t\tDEBUG(\"%p: ibv_poll_cq() failed (wcs_n=%d)\",\n+\t\t      (void *)txq, wcs_n);\n+\t\treturn -1;\n+\t}\n+\telts_comp -= wcs_n;\n+\tassert(elts_comp <= txq->elts_comp);\n+\t/*\n+\t * Assume WC status is successful as nothing can be done about it\n+\t * anyway.\n+\t */\n+\telts_tail += wcs_n * txq->elts_comp_cd_init;\n+\tif (elts_tail >= elts_n)\n+\t\telts_tail -= elts_n;\n+\ttxq->elts_tail = elts_tail;\n+\ttxq->elts_comp = elts_comp;\n+\treturn 0;\n+}\n+\n+/**\n+ * Get Memory Region (MR) <-> Memory Pool (MP) association from txq->mp2mr[].\n+ * Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,\n+ * remove an entry first.\n+ *\n+ * @param txq\n+ *   Pointer to TX queue structure.\n+ * @param[in] mp\n+ *   Memory Pool for which a Memory Region lkey must be returned.\n+ *\n+ * @return\n+ *   mr->lkey on success, (uint32_t)-1 on failure.\n+ */\n+static uint32_t\n+txq_mp2mr(struct txq *txq, struct rte_mempool *mp)\n+{\n+\tunsigned int i;\n+\tstruct ibv_mr *mr;\n+\n+\tfor (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {\n+\t\tif (unlikely(txq->mp2mr[i].mp == NULL)) {\n+\t\t\t/* Unknown MP, add a new MR for it. */\n+\t\t\tbreak;\n+\t\t}\n+\t\tif (txq->mp2mr[i].mp == mp) {\n+\t\t\tassert(txq->mp2mr[i].lkey != (uint32_t)-1);\n+\t\t\tassert(txq->mp2mr[i].mr->lkey == txq->mp2mr[i].lkey);\n+\t\t\treturn txq->mp2mr[i].lkey;\n+\t\t}\n+\t}\n+\t/* Add a new entry, register MR first. */\n+\tDEBUG(\"%p: discovered new memory pool %p\", (void *)txq, (void *)mp);\n+\tmr = ibv_reg_mr(txq->priv->pd,\n+\t\t\t(void *)mp->elt_va_start,\n+\t\t\t(mp->elt_va_end - mp->elt_va_start),\n+\t\t\t(IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE));\n+\tif (unlikely(mr == NULL)) {\n+\t\tDEBUG(\"%p: unable to configure MR, ibv_reg_mr() failed.\",\n+\t\t      (void *)txq);\n+\t\treturn (uint32_t)-1;\n+\t}\n+\tif (unlikely(i == RTE_DIM(txq->mp2mr))) {\n+\t\t/* Table is full, remove oldest entry. */\n+\t\tDEBUG(\"%p: MR <-> MP table full, dropping oldest entry.\",\n+\t\t      (void *)txq);\n+\t\t--i;\n+\t\tclaim_zero(ibv_dereg_mr(txq->mp2mr[i].mr));\n+\t\tmemmove(&txq->mp2mr[0], &txq->mp2mr[1],\n+\t\t\t(sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));\n+\t}\n+\t/* Store the new entry. */\n+\ttxq->mp2mr[i].mp = mp;\n+\ttxq->mp2mr[i].mr = mr;\n+\ttxq->mp2mr[i].lkey = mr->lkey;\n+\tDEBUG(\"%p: new MR lkey for MP %p: 0x%08\" PRIu32,\n+\t      (void *)txq, (void *)mp, txq->mp2mr[i].lkey);\n+\treturn txq->mp2mr[i].lkey;\n+}\n+\n+/**\n+ * DPDK callback for TX.\n+ *\n+ * @param dpdk_txq\n+ *   Generic pointer to TX queue structure.\n+ * @param[in] pkts\n+ *   Packets to transmit.\n+ * @param pkts_n\n+ *   Number of packets in array.\n+ *\n+ * @return\n+ *   Number of packets successfully transmitted (<= pkts_n).\n+ */\n+uint16_t\n+mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)\n+{\n+\tstruct txq *txq = (struct txq *)dpdk_txq;\n+\tunsigned int elts_head = txq->elts_head;\n+\tconst unsigned int elts_tail = txq->elts_tail;\n+\tconst unsigned int elts_n = txq->elts_n;\n+\tunsigned int elts_comp_cd = txq->elts_comp_cd;\n+\tunsigned int elts_comp = 0;\n+\tunsigned int i;\n+\tunsigned int max;\n+\tint err;\n+\n+\tassert(elts_comp_cd != 0);\n+\ttxq_complete(txq);\n+\tmax = (elts_n - (elts_head - elts_tail));\n+\tif (max > elts_n)\n+\t\tmax -= elts_n;\n+\tassert(max >= 1);\n+\tassert(max <= elts_n);\n+\t/* Always leave one free entry in the ring. */\n+\t--max;\n+\tif (max == 0)\n+\t\treturn 0;\n+\tif (max > pkts_n)\n+\t\tmax = pkts_n;\n+\tfor (i = 0; (i != max); ++i) {\n+\t\tstruct rte_mbuf *buf = pkts[i];\n+\t\tunsigned int elts_head_next =\n+\t\t\t(((elts_head + 1) == elts_n) ? 0 : elts_head + 1);\n+\t\tstruct txq_elt *elt_next = &(*txq->elts)[elts_head_next];\n+\t\tstruct txq_elt *elt = &(*txq->elts)[elts_head];\n+\t\tunsigned int segs = NB_SEGS(buf);\n+\t\tuint32_t send_flags = 0;\n+\n+\t\t/* Clean up old buffer. */\n+\t\tif (likely(elt->buf != NULL)) {\n+\t\t\tstruct rte_mbuf *tmp = elt->buf;\n+\n+\t\t\t/* Faster than rte_pktmbuf_free(). */\n+\t\t\tdo {\n+\t\t\t\tstruct rte_mbuf *next = NEXT(tmp);\n+\n+\t\t\t\trte_pktmbuf_free_seg(tmp);\n+\t\t\t\ttmp = next;\n+\t\t\t} while (tmp != NULL);\n+\t\t}\n+\t\t/* Request TX completion. */\n+\t\tif (unlikely(--elts_comp_cd == 0)) {\n+\t\t\telts_comp_cd = txq->elts_comp_cd_init;\n+\t\t\t++elts_comp;\n+\t\t\tsend_flags |= IBV_EXP_QP_BURST_SIGNALED;\n+\t\t}\n+\t\tif (likely(segs == 1)) {\n+\t\t\tuintptr_t addr;\n+\t\t\tuint32_t length;\n+\t\t\tuint32_t lkey;\n+\n+\t\t\t/* Retrieve buffer information. */\n+\t\t\taddr = rte_pktmbuf_mtod(buf, uintptr_t);\n+\t\t\tlength = DATA_LEN(buf);\n+\t\t\t/* Retrieve Memory Region key for this memory pool. */\n+\t\t\tlkey = txq_mp2mr(txq, buf->pool);\n+\t\t\tif (unlikely(lkey == (uint32_t)-1)) {\n+\t\t\t\t/* MR does not exist. */\n+\t\t\t\tDEBUG(\"%p: unable to get MP <-> MR\"\n+\t\t\t\t      \" association\", (void *)txq);\n+\t\t\t\t/* Clean up TX element. */\n+\t\t\t\telt->buf = NULL;\n+\t\t\t\tgoto stop;\n+\t\t\t}\n+\t\t\t/* Update element. */\n+\t\t\telt->buf = buf;\n+\t\t\tif (txq->priv->vf)\n+\t\t\t\trte_prefetch0((volatile void *)\n+\t\t\t\t\t      (uintptr_t)addr);\n+\t\t\tRTE_MBUF_PREFETCH_TO_FREE(elt_next->buf);\n+\t\t\t/* Put packet into send queue. */\n+#if MLX5_PMD_MAX_INLINE > 0\n+\t\t\tif (length <= txq->max_inline)\n+\t\t\t\terr = txq->if_qp->send_pending_inline\n+\t\t\t\t\t(txq->qp,\n+\t\t\t\t\t (void *)addr,\n+\t\t\t\t\t length,\n+\t\t\t\t\t send_flags);\n+\t\t\telse\n+#endif\n+\t\t\t\terr = txq->if_qp->send_pending\n+\t\t\t\t\t(txq->qp,\n+\t\t\t\t\t addr,\n+\t\t\t\t\t length,\n+\t\t\t\t\t lkey,\n+\t\t\t\t\t send_flags);\n+\t\t\tif (unlikely(err))\n+\t\t\t\tgoto stop;\n+\t\t} else {\n+\t\t\tDEBUG(\"%p: TX scattered buffers support not\"\n+\t\t\t      \" compiled in\", (void *)txq);\n+\t\t\tgoto stop;\n+\t\t}\n+\t\telts_head = elts_head_next;\n+\t}\n+stop:\n+\t/* Take a shortcut if nothing must be sent. */\n+\tif (unlikely(i == 0))\n+\t\treturn 0;\n+\t/* Ring QP doorbell. */\n+\terr = txq->if_qp->send_flush(txq->qp);\n+\tif (unlikely(err)) {\n+\t\t/* A nonzero value is not supposed to be returned.\n+\t\t * Nothing can be done about it. */\n+\t\tDEBUG(\"%p: send_flush() failed with error %d\",\n+\t\t      (void *)txq, err);\n+\t}\n+\ttxq->elts_head = elts_head;\n+\ttxq->elts_comp += elts_comp;\n+\ttxq->elts_comp_cd = elts_comp_cd;\n+\treturn i;\n+}\n+\n+/**\n+ * DPDK callback for RX.\n+ *\n+ * @param dpdk_rxq\n+ *   Generic pointer to RX queue structure.\n+ * @param[out] pkts\n+ *   Array to store received packets.\n+ * @param pkts_n\n+ *   Maximum number of packets in array.\n+ *\n+ * @return\n+ *   Number of packets successfully received (<= pkts_n).\n+ */\n+uint16_t\n+mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)\n+{\n+\tstruct rxq *rxq = (struct rxq *)dpdk_rxq;\n+\tstruct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp;\n+\tconst unsigned int elts_n = rxq->elts_n;\n+\tunsigned int elts_head = rxq->elts_head;\n+\tstruct ibv_sge sges[pkts_n];\n+\tunsigned int i;\n+\tunsigned int pkts_ret = 0;\n+\tint ret;\n+\n+\tfor (i = 0; (i != pkts_n); ++i) {\n+\t\tstruct rxq_elt *elt = &(*elts)[elts_head];\n+\t\tstruct ibv_recv_wr *wr = &elt->wr;\n+\t\tuint64_t wr_id = wr->wr_id;\n+\t\tunsigned int len;\n+\t\tstruct rte_mbuf *seg = (void *)((uintptr_t)elt->sge.addr -\n+\t\t\tWR_ID(wr_id).offset);\n+\t\tstruct rte_mbuf *rep;\n+\t\tuint32_t flags;\n+\n+\t\t/* Sanity checks. */\n+\t\tassert(WR_ID(wr_id).id < rxq->elts_n);\n+\t\tassert(wr->sg_list == &elt->sge);\n+\t\tassert(wr->num_sge == 1);\n+\t\tassert(elts_head < rxq->elts_n);\n+\t\tassert(rxq->elts_head < rxq->elts_n);\n+\t\t/*\n+\t\t * Fetch initial bytes of packet descriptor into a\n+\t\t * cacheline while allocating rep.\n+\t\t */\n+\t\trte_prefetch0(seg);\n+\t\trte_prefetch0(&seg->cacheline1);\n+\t\tret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL,\n+\t\t\t\t\t\t    &flags);\n+\t\tif (unlikely(ret < 0)) {\n+\t\t\tstruct ibv_wc wc;\n+\t\t\tint wcs_n;\n+\n+\t\t\tDEBUG(\"rxq=%p, poll_length() failed (ret=%d)\",\n+\t\t\t      (void *)rxq, ret);\n+\t\t\t/* ibv_poll_cq() must be used in case of failure. */\n+\t\t\twcs_n = ibv_poll_cq(rxq->cq, 1, &wc);\n+\t\t\tif (unlikely(wcs_n == 0))\n+\t\t\t\tbreak;\n+\t\t\tif (unlikely(wcs_n < 0)) {\n+\t\t\t\tDEBUG(\"rxq=%p, ibv_poll_cq() failed (wcs_n=%d)\",\n+\t\t\t\t      (void *)rxq, wcs_n);\n+\t\t\t\tbreak;\n+\t\t\t}\n+\t\t\tassert(wcs_n == 1);\n+\t\t\tif (unlikely(wc.status != IBV_WC_SUCCESS)) {\n+\t\t\t\t/* Whatever, just repost the offending WR. */\n+\t\t\t\tDEBUG(\"rxq=%p, wr_id=%\" PRIu64 \": bad work\"\n+\t\t\t\t      \" completion status (%d): %s\",\n+\t\t\t\t      (void *)rxq, wc.wr_id, wc.status,\n+\t\t\t\t      ibv_wc_status_str(wc.status));\n+\t\t\t\t/* Add SGE to array for repost. */\n+\t\t\t\tsges[i] = elt->sge;\n+\t\t\t\tgoto repost;\n+\t\t\t}\n+\t\t\tret = wc.byte_len;\n+\t\t}\n+\t\tif (ret == 0)\n+\t\t\tbreak;\n+\t\tlen = ret;\n+\t\trep = __rte_mbuf_raw_alloc(rxq->mp);\n+\t\tif (unlikely(rep == NULL)) {\n+\t\t\t/*\n+\t\t\t * Unable to allocate a replacement mbuf,\n+\t\t\t * repost WR.\n+\t\t\t */\n+\t\t\tDEBUG(\"rxq=%p, wr_id=%\" PRIu32 \":\"\n+\t\t\t      \" can't allocate a new mbuf\",\n+\t\t\t      (void *)rxq, WR_ID(wr_id).id);\n+\t\t\t/* Increment out of memory counters. */\n+\t\t\t++rxq->priv->dev->data->rx_mbuf_alloc_failed;\n+\t\t\tgoto repost;\n+\t\t}\n+\n+\t\t/* Reconfigure sge to use rep instead of seg. */\n+\t\telt->sge.addr = (uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM;\n+\t\tassert(elt->sge.lkey == rxq->mr->lkey);\n+\t\tWR_ID(wr->wr_id).offset =\n+\t\t\t(((uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM) -\n+\t\t\t (uintptr_t)rep);\n+\t\tassert(WR_ID(wr->wr_id).id == WR_ID(wr_id).id);\n+\n+\t\t/* Add SGE to array for repost. */\n+\t\tsges[i] = elt->sge;\n+\n+\t\t/* Update seg information. */\n+\t\tSET_DATA_OFF(seg, RTE_PKTMBUF_HEADROOM);\n+\t\tNB_SEGS(seg) = 1;\n+\t\tPORT(seg) = rxq->port_id;\n+\t\tNEXT(seg) = NULL;\n+\t\tPKT_LEN(seg) = len;\n+\t\tDATA_LEN(seg) = len;\n+\n+\t\t/* Return packet. */\n+\t\t*(pkts++) = seg;\n+\t\t++pkts_ret;\n+repost:\n+\t\tif (++elts_head >= elts_n)\n+\t\t\telts_head = 0;\n+\t\tcontinue;\n+\t}\n+\tif (unlikely(i == 0))\n+\t\treturn 0;\n+\t/* Repost WRs. */\n+#ifdef DEBUG_RECV\n+\tDEBUG(\"%p: reposting %u WRs\", (void *)rxq, i);\n+#endif\n+\tret = rxq->if_qp->recv_burst(rxq->qp, sges, i);\n+\tif (unlikely(ret)) {\n+\t\t/* Inability to repost WRs is fatal. */\n+\t\tDEBUG(\"%p: recv_burst(): failed (ret=%d)\",\n+\t\t      (void *)rxq->priv,\n+\t\t      ret);\n+\t\tabort();\n+\t}\n+\trxq->elts_head = elts_head;\n+\treturn pkts_ret;\n+}\n+\n+/**\n+ * Dummy DPDK callback for TX.\n+ *\n+ * This function is used to temporarily replace the real callback during\n+ * unsafe control operations on the queue, or in case of error.\n+ *\n+ * @param dpdk_txq\n+ *   Generic pointer to TX queue structure.\n+ * @param[in] pkts\n+ *   Packets to transmit.\n+ * @param pkts_n\n+ *   Number of packets in array.\n+ *\n+ * @return\n+ *   Number of packets successfully transmitted (<= pkts_n).\n+ */\n+uint16_t\n+removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)\n+{\n+\t(void)dpdk_txq;\n+\t(void)pkts;\n+\t(void)pkts_n;\n+\treturn 0;\n+}\n+\n+/**\n+ * Dummy DPDK callback for RX.\n+ *\n+ * This function is used to temporarily replace the real callback during\n+ * unsafe control operations on the queue, or in case of error.\n+ *\n+ * @param dpdk_rxq\n+ *   Generic pointer to RX queue structure.\n+ * @param[out] pkts\n+ *   Array to store received packets.\n+ * @param pkts_n\n+ *   Maximum number of packets in array.\n+ *\n+ * @return\n+ *   Number of packets successfully received (<= pkts_n).\n+ */\n+uint16_t\n+removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)\n+{\n+\t(void)dpdk_rxq;\n+\t(void)pkts;\n+\t(void)pkts_n;\n+\treturn 0;\n+}\ndiff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h\nnew file mode 100644\nindex 0000000..1459317\n--- /dev/null\n+++ b/drivers/net/mlx5/mlx5_rxtx.h\n@@ -0,0 +1,156 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright 2015 6WIND S.A.\n+ *   Copyright 2015 Mellanox.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of 6WIND S.A. nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+#ifndef RTE_PMD_MLX5_RXTX_H_\n+#define RTE_PMD_MLX5_RXTX_H_\n+\n+#include <stdint.h>\n+\n+/* Verbs header. */\n+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic ignored \"-pedantic\"\n+#endif\n+#include <infiniband/verbs.h>\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic error \"-pedantic\"\n+#endif\n+\n+/* DPDK headers don't like -pedantic. */\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic ignored \"-pedantic\"\n+#endif\n+#include <rte_mbuf.h>\n+#include <rte_mempool.h>\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic error \"-pedantic\"\n+#endif\n+\n+#include \"mlx5_utils.h\"\n+#include \"mlx5.h\"\n+#include \"mlx5_defs.h\"\n+\n+/* RX element. */\n+struct rxq_elt {\n+\tstruct ibv_recv_wr wr; /* Work Request. */\n+\tstruct ibv_sge sge; /* Scatter/Gather Element. */\n+\t/* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */\n+};\n+\n+struct priv;\n+\n+/* RX queue descriptor. */\n+struct rxq {\n+\tstruct priv *priv; /* Back pointer to private data. */\n+\tstruct rte_mempool *mp; /* Memory Pool for allocations. */\n+\tstruct ibv_mr *mr; /* Memory Region (for mp). */\n+\tstruct ibv_cq *cq; /* Completion Queue. */\n+\tstruct ibv_qp *qp; /* Queue Pair. */\n+\tstruct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */\n+\tstruct ibv_exp_cq_family *if_cq; /* CQ interface. */\n+\tunsigned int port_id; /* Port ID for incoming packets. */\n+\tunsigned int elts_n; /* (*elts)[] length. */\n+\tunsigned int elts_head; /* Current index in (*elts)[]. */\n+\tunion {\n+\t\tstruct rxq_elt (*no_sp)[]; /* RX elements. */\n+\t} elts;\n+\tuint32_t mb_len; /* Length of a mp-issued mbuf. */\n+\tunsigned int socket; /* CPU socket ID for allocations. */\n+\tstruct ibv_exp_res_domain *rd; /* Resource Domain. */\n+};\n+\n+/* TX element. */\n+struct txq_elt {\n+\tstruct rte_mbuf *buf;\n+};\n+\n+/* Linear buffer type. It is used when transmitting buffers with too many\n+ * segments that do not fit the hardware queue (see max_send_sge).\n+ * Extra segments are copied (linearized) in such buffers, replacing the\n+ * last SGE during TX.\n+ * The size is arbitrary but large enough to hold a jumbo frame with\n+ * 8 segments considering mbuf.buf_len is about 2048 bytes. */\n+typedef uint8_t linear_t[16384];\n+\n+/* TX queue descriptor. */\n+struct txq {\n+\tstruct priv *priv; /* Back pointer to private data. */\n+\tstruct {\n+\t\tstruct rte_mempool *mp; /* Cached Memory Pool. */\n+\t\tstruct ibv_mr *mr; /* Memory Region (for mp). */\n+\t\tuint32_t lkey; /* mr->lkey */\n+\t} mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */\n+\tstruct ibv_cq *cq; /* Completion Queue. */\n+\tstruct ibv_qp *qp; /* Queue Pair. */\n+\tstruct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */\n+\tstruct ibv_exp_cq_family *if_cq; /* CQ interface. */\n+#if MLX5_PMD_MAX_INLINE > 0\n+\tuint32_t max_inline; /* Max inline send size <= MLX5_PMD_MAX_INLINE. */\n+#endif\n+\tunsigned int elts_n; /* (*elts)[] length. */\n+\tstruct txq_elt (*elts)[]; /* TX elements. */\n+\tunsigned int elts_head; /* Current index in (*elts)[]. */\n+\tunsigned int elts_tail; /* First element awaiting completion. */\n+\tunsigned int elts_comp; /* Number of completion requests. */\n+\tunsigned int elts_comp_cd; /* Countdown for next completion request. */\n+\tunsigned int elts_comp_cd_init; /* Initial value for countdown. */\n+\tlinear_t (*elts_linear)[]; /* Linearized buffers. */\n+\tstruct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */\n+\tunsigned int socket; /* CPU socket ID for allocations. */\n+\tstruct ibv_exp_res_domain *rd; /* Resource Domain. */\n+};\n+\n+/* mlx5_rxq.c */\n+\n+void rxq_cleanup(struct rxq *);\n+int rxq_setup(struct rte_eth_dev *, struct rxq *, uint16_t, unsigned int,\n+\t      const struct rte_eth_rxconf *, struct rte_mempool *);\n+int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,\n+\t\t\tconst struct rte_eth_rxconf *, struct rte_mempool *);\n+void mlx5_rx_queue_release(void *);\n+\n+/* mlx5_txq.c */\n+\n+void txq_cleanup(struct txq *);\n+int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,\n+\t\t\tconst struct rte_eth_txconf *);\n+void mlx5_tx_queue_release(void *);\n+\n+/* mlx5_rxtx.c */\n+\n+uint16_t mlx5_tx_burst(void *, struct rte_mbuf **, uint16_t);\n+uint16_t mlx5_rx_burst(void *, struct rte_mbuf **, uint16_t);\n+uint16_t removed_tx_burst(void *, struct rte_mbuf **, uint16_t);\n+uint16_t removed_rx_burst(void *, struct rte_mbuf **, uint16_t);\n+\n+#endif /* RTE_PMD_MLX5_RXTX_H_ */\ndiff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c\nnew file mode 100644\nindex 0000000..2bae61f\n--- /dev/null\n+++ b/drivers/net/mlx5/mlx5_txq.c\n@@ -0,0 +1,512 @@\n+/*-\n+ *   BSD LICENSE\n+ *\n+ *   Copyright 2015 6WIND S.A.\n+ *   Copyright 2015 Mellanox.\n+ *\n+ *   Redistribution and use in source and binary forms, with or without\n+ *   modification, are permitted provided that the following conditions\n+ *   are met:\n+ *\n+ *     * Redistributions of source code must retain the above copyright\n+ *       notice, this list of conditions and the following disclaimer.\n+ *     * Redistributions in binary form must reproduce the above copyright\n+ *       notice, this list of conditions and the following disclaimer in\n+ *       the documentation and/or other materials provided with the\n+ *       distribution.\n+ *     * Neither the name of 6WIND S.A. nor the names of its\n+ *       contributors may be used to endorse or promote products derived\n+ *       from this software without specific prior written permission.\n+ *\n+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n+ *   \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+#include <stddef.h>\n+#include <assert.h>\n+#include <errno.h>\n+#include <string.h>\n+#include <stdint.h>\n+\n+/* Verbs header. */\n+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic ignored \"-pedantic\"\n+#endif\n+#include <infiniband/verbs.h>\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic error \"-pedantic\"\n+#endif\n+\n+/* DPDK headers don't like -pedantic. */\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic ignored \"-pedantic\"\n+#endif\n+#include <rte_mbuf.h>\n+#include <rte_malloc.h>\n+#include <rte_ethdev.h>\n+#include <rte_common.h>\n+#ifdef PEDANTIC\n+#pragma GCC diagnostic error \"-pedantic\"\n+#endif\n+\n+#include \"mlx5_utils.h\"\n+#include \"mlx5.h\"\n+#include \"mlx5_rxtx.h\"\n+#include \"mlx5_autoconf.h\"\n+#include \"mlx5_defs.h\"\n+\n+/**\n+ * Allocate TX queue elements.\n+ *\n+ * @param txq\n+ *   Pointer to TX queue structure.\n+ * @param elts_n\n+ *   Number of elements to allocate.\n+ *\n+ * @return\n+ *   0 on success, errno value on failure.\n+ */\n+static int\n+txq_alloc_elts(struct txq *txq, unsigned int elts_n)\n+{\n+\tunsigned int i;\n+\tstruct txq_elt (*elts)[elts_n] =\n+\t\trte_calloc_socket(\"TXQ\", 1, sizeof(*elts), 0, txq->socket);\n+\tlinear_t (*elts_linear)[elts_n] =\n+\t\trte_calloc_socket(\"TXQ\", 1, sizeof(*elts_linear), 0,\n+\t\t\t\t  txq->socket);\n+\tstruct ibv_mr *mr_linear = NULL;\n+\tint ret = 0;\n+\n+\tif ((elts == NULL) || (elts_linear == NULL)) {\n+\t\tERROR(\"%p: can't allocate packets array\", (void *)txq);\n+\t\tret = ENOMEM;\n+\t\tgoto error;\n+\t}\n+\tmr_linear =\n+\t\tibv_reg_mr(txq->priv->pd, elts_linear, sizeof(*elts_linear),\n+\t\t\t   (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE));\n+\tif (mr_linear == NULL) {\n+\t\tERROR(\"%p: unable to configure MR, ibv_reg_mr() failed\",\n+\t\t      (void *)txq);\n+\t\tret = EINVAL;\n+\t\tgoto error;\n+\t}\n+\tfor (i = 0; (i != elts_n); ++i) {\n+\t\tstruct txq_elt *elt = &(*elts)[i];\n+\n+\t\telt->buf = NULL;\n+\t}\n+\tDEBUG(\"%p: allocated and configured %u WRs\", (void *)txq, elts_n);\n+\ttxq->elts_n = elts_n;\n+\ttxq->elts = elts;\n+\ttxq->elts_head = 0;\n+\ttxq->elts_tail = 0;\n+\ttxq->elts_comp = 0;\n+\t/* Request send completion every MLX5_PMD_TX_PER_COMP_REQ packets or\n+\t * at least 4 times per ring. */\n+\ttxq->elts_comp_cd_init =\n+\t\t((MLX5_PMD_TX_PER_COMP_REQ < (elts_n / 4)) ?\n+\t\t MLX5_PMD_TX_PER_COMP_REQ : (elts_n / 4));\n+\ttxq->elts_comp_cd = txq->elts_comp_cd_init;\n+\ttxq->elts_linear = elts_linear;\n+\ttxq->mr_linear = mr_linear;\n+\tassert(ret == 0);\n+\treturn 0;\n+error:\n+\tif (mr_linear != NULL)\n+\t\tclaim_zero(ibv_dereg_mr(mr_linear));\n+\n+\trte_free(elts_linear);\n+\trte_free(elts);\n+\n+\tDEBUG(\"%p: failed, freed everything\", (void *)txq);\n+\tassert(ret > 0);\n+\treturn ret;\n+}\n+\n+/**\n+ * Free TX queue elements.\n+ *\n+ * @param txq\n+ *   Pointer to TX queue structure.\n+ */\n+static void\n+txq_free_elts(struct txq *txq)\n+{\n+\tunsigned int i;\n+\tunsigned int elts_n = txq->elts_n;\n+\tstruct txq_elt (*elts)[elts_n] = txq->elts;\n+\tlinear_t (*elts_linear)[elts_n] = txq->elts_linear;\n+\tstruct ibv_mr *mr_linear = txq->mr_linear;\n+\n+\tDEBUG(\"%p: freeing WRs\", (void *)txq);\n+\ttxq->elts_n = 0;\n+\ttxq->elts = NULL;\n+\ttxq->elts_linear = NULL;\n+\ttxq->mr_linear = NULL;\n+\tif (mr_linear != NULL)\n+\t\tclaim_zero(ibv_dereg_mr(mr_linear));\n+\n+\trte_free(elts_linear);\n+\tif (elts == NULL)\n+\t\treturn;\n+\tfor (i = 0; (i != RTE_DIM(*elts)); ++i) {\n+\t\tstruct txq_elt *elt = &(*elts)[i];\n+\n+\t\tif (elt->buf == NULL)\n+\t\t\tcontinue;\n+\t\trte_pktmbuf_free(elt->buf);\n+\t}\n+\trte_free(elts);\n+}\n+\n+/**\n+ * Clean up a TX queue.\n+ *\n+ * Destroy objects, free allocated memory and reset the structure for reuse.\n+ *\n+ * @param txq\n+ *   Pointer to TX queue structure.\n+ */\n+void\n+txq_cleanup(struct txq *txq)\n+{\n+\tstruct ibv_exp_release_intf_params params;\n+\tsize_t i;\n+\n+\tDEBUG(\"cleaning up %p\", (void *)txq);\n+\ttxq_free_elts(txq);\n+\tif (txq->if_qp != NULL) {\n+\t\tassert(txq->priv != NULL);\n+\t\tassert(txq->priv->ctx != NULL);\n+\t\tassert(txq->qp != NULL);\n+\t\tparams = (struct ibv_exp_release_intf_params){\n+\t\t\t.comp_mask = 0,\n+\t\t};\n+\t\tclaim_zero(ibv_exp_release_intf(txq->priv->ctx,\n+\t\t\t\t\t\ttxq->if_qp,\n+\t\t\t\t\t\t&params));\n+\t}\n+\tif (txq->if_cq != NULL) {\n+\t\tassert(txq->priv != NULL);\n+\t\tassert(txq->priv->ctx != NULL);\n+\t\tassert(txq->cq != NULL);\n+\t\tparams = (struct ibv_exp_release_intf_params){\n+\t\t\t.comp_mask = 0,\n+\t\t};\n+\t\tclaim_zero(ibv_exp_release_intf(txq->priv->ctx,\n+\t\t\t\t\t\ttxq->if_cq,\n+\t\t\t\t\t\t&params));\n+\t}\n+\tif (txq->qp != NULL)\n+\t\tclaim_zero(ibv_destroy_qp(txq->qp));\n+\tif (txq->cq != NULL)\n+\t\tclaim_zero(ibv_destroy_cq(txq->cq));\n+\tif (txq->rd != NULL) {\n+\t\tstruct ibv_exp_destroy_res_domain_attr attr = {\n+\t\t\t.comp_mask = 0,\n+\t\t};\n+\n+\t\tassert(txq->priv != NULL);\n+\t\tassert(txq->priv->ctx != NULL);\n+\t\tclaim_zero(ibv_exp_destroy_res_domain(txq->priv->ctx,\n+\t\t\t\t\t\t      txq->rd,\n+\t\t\t\t\t\t      &attr));\n+\t}\n+\tfor (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {\n+\t\tif (txq->mp2mr[i].mp == NULL)\n+\t\t\tbreak;\n+\t\tassert(txq->mp2mr[i].mr != NULL);\n+\t\tclaim_zero(ibv_dereg_mr(txq->mp2mr[i].mr));\n+\t}\n+\tmemset(txq, 0, sizeof(*txq));\n+}\n+\n+/**\n+ * Configure a TX queue.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param txq\n+ *   Pointer to TX queue structure.\n+ * @param desc\n+ *   Number of descriptors to configure in queue.\n+ * @param socket\n+ *   NUMA socket on which memory must be allocated.\n+ * @param[in] conf\n+ *   Thresholds parameters.\n+ *\n+ * @return\n+ *   0 on success, errno value on failure.\n+ */\n+static int\n+txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc,\n+\t  unsigned int socket, const struct rte_eth_txconf *conf)\n+{\n+\tstruct priv *priv = dev->data->dev_private;\n+\tstruct txq tmpl = {\n+\t\t.priv = priv,\n+\t\t.socket = socket\n+\t};\n+\tunion {\n+\t\tstruct ibv_exp_query_intf_params params;\n+\t\tstruct ibv_exp_qp_init_attr init;\n+\t\tstruct ibv_exp_res_domain_init_attr rd;\n+\t\tstruct ibv_exp_cq_init_attr cq;\n+\t\tstruct ibv_exp_qp_attr mod;\n+\t} attr;\n+\tenum ibv_exp_query_intf_status status;\n+\tint ret = 0;\n+\n+\t(void)conf; /* Thresholds configuration (ignored). */\n+\tif ((desc == 0) || (desc % MLX5_PMD_SGE_WR_N)) {\n+\t\tERROR(\"%p: invalid number of TX descriptors (must be a\"\n+\t\t      \" multiple of %d)\", (void *)dev, MLX5_PMD_SGE_WR_N);\n+\t\treturn EINVAL;\n+\t}\n+\tdesc /= MLX5_PMD_SGE_WR_N;\n+\t/* MRs will be registered in mp2mr[] later. */\n+\tattr.rd = (struct ibv_exp_res_domain_init_attr){\n+\t\t.comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL |\n+\t\t\t      IBV_EXP_RES_DOMAIN_MSG_MODEL),\n+\t\t.thread_model = IBV_EXP_THREAD_SINGLE,\n+\t\t.msg_model = IBV_EXP_MSG_HIGH_BW,\n+\t};\n+\ttmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd);\n+\tif (tmpl.rd == NULL) {\n+\t\tret = ENOMEM;\n+\t\tERROR(\"%p: RD creation failure: %s\",\n+\t\t      (void *)dev, strerror(ret));\n+\t\tgoto error;\n+\t}\n+\tattr.cq = (struct ibv_exp_cq_init_attr){\n+\t\t.comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN,\n+\t\t.res_domain = tmpl.rd,\n+\t};\n+\ttmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, NULL, 0, &attr.cq);\n+\tif (tmpl.cq == NULL) {\n+\t\tret = ENOMEM;\n+\t\tERROR(\"%p: CQ creation failure: %s\",\n+\t\t      (void *)dev, strerror(ret));\n+\t\tgoto error;\n+\t}\n+\tDEBUG(\"priv->device_attr.max_qp_wr is %d\",\n+\t      priv->device_attr.max_qp_wr);\n+\tDEBUG(\"priv->device_attr.max_sge is %d\",\n+\t      priv->device_attr.max_sge);\n+\tattr.init = (struct ibv_exp_qp_init_attr){\n+\t\t/* CQ to be associated with the send queue. */\n+\t\t.send_cq = tmpl.cq,\n+\t\t/* CQ to be associated with the receive queue. */\n+\t\t.recv_cq = tmpl.cq,\n+\t\t.cap = {\n+\t\t\t/* Max number of outstanding WRs. */\n+\t\t\t.max_send_wr = ((priv->device_attr.max_qp_wr < desc) ?\n+\t\t\t\t\tpriv->device_attr.max_qp_wr :\n+\t\t\t\t\tdesc),\n+\t\t\t/* Max number of scatter/gather elements in a WR. */\n+\t\t\t.max_send_sge = ((priv->device_attr.max_sge <\n+\t\t\t\t\t  MLX5_PMD_SGE_WR_N) ?\n+\t\t\t\t\t priv->device_attr.max_sge :\n+\t\t\t\t\t MLX5_PMD_SGE_WR_N),\n+#if MLX5_PMD_MAX_INLINE > 0\n+\t\t\t.max_inline_data = MLX5_PMD_MAX_INLINE,\n+#endif\n+\t\t},\n+\t\t.qp_type = IBV_QPT_RAW_PACKET,\n+\t\t/* Do *NOT* enable this, completions events are managed per\n+\t\t * TX burst. */\n+\t\t.sq_sig_all = 0,\n+\t\t.pd = priv->pd,\n+\t\t.res_domain = tmpl.rd,\n+\t\t.comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |\n+\t\t\t      IBV_EXP_QP_INIT_ATTR_RES_DOMAIN),\n+\t};\n+\ttmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init);\n+\tif (tmpl.qp == NULL) {\n+\t\tret = (errno ? errno : EINVAL);\n+\t\tERROR(\"%p: QP creation failure: %s\",\n+\t\t      (void *)dev, strerror(ret));\n+\t\tgoto error;\n+\t}\n+#if MLX5_PMD_MAX_INLINE > 0\n+\t/* ibv_create_qp() updates this value. */\n+\ttmpl.max_inline = attr.init.cap.max_inline_data;\n+#endif\n+\tattr.mod = (struct ibv_exp_qp_attr){\n+\t\t/* Move the QP to this state. */\n+\t\t.qp_state = IBV_QPS_INIT,\n+\t\t/* Primary port number. */\n+\t\t.port_num = priv->port\n+\t};\n+\tret = ibv_exp_modify_qp(tmpl.qp, &attr.mod,\n+\t\t\t\t(IBV_EXP_QP_STATE | IBV_EXP_QP_PORT));\n+\tif (ret) {\n+\t\tERROR(\"%p: QP state to IBV_QPS_INIT failed: %s\",\n+\t\t      (void *)dev, strerror(ret));\n+\t\tgoto error;\n+\t}\n+\tret = txq_alloc_elts(&tmpl, desc);\n+\tif (ret) {\n+\t\tERROR(\"%p: TXQ allocation failed: %s\",\n+\t\t      (void *)dev, strerror(ret));\n+\t\tgoto error;\n+\t}\n+\tattr.mod = (struct ibv_exp_qp_attr){\n+\t\t.qp_state = IBV_QPS_RTR\n+\t};\n+\tret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE);\n+\tif (ret) {\n+\t\tERROR(\"%p: QP state to IBV_QPS_RTR failed: %s\",\n+\t\t      (void *)dev, strerror(ret));\n+\t\tgoto error;\n+\t}\n+\tattr.mod.qp_state = IBV_QPS_RTS;\n+\tret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE);\n+\tif (ret) {\n+\t\tERROR(\"%p: QP state to IBV_QPS_RTS failed: %s\",\n+\t\t      (void *)dev, strerror(ret));\n+\t\tgoto error;\n+\t}\n+\tattr.params = (struct ibv_exp_query_intf_params){\n+\t\t.intf_scope = IBV_EXP_INTF_GLOBAL,\n+\t\t.intf = IBV_EXP_INTF_CQ,\n+\t\t.obj = tmpl.cq,\n+\t};\n+\ttmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status);\n+\tif (tmpl.if_cq == NULL) {\n+\t\tret = EINVAL;\n+\t\tERROR(\"%p: CQ interface family query failed with status %d\",\n+\t\t      (void *)dev, status);\n+\t\tgoto error;\n+\t}\n+\tattr.params = (struct ibv_exp_query_intf_params){\n+\t\t.intf_scope = IBV_EXP_INTF_GLOBAL,\n+\t\t.intf = IBV_EXP_INTF_QP_BURST,\n+\t\t.obj = tmpl.qp,\n+\t};\n+\ttmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status);\n+\tif (tmpl.if_qp == NULL) {\n+\t\tret = EINVAL;\n+\t\tERROR(\"%p: QP interface family query failed with status %d\",\n+\t\t      (void *)dev, status);\n+\t\tgoto error;\n+\t}\n+\t/* Clean up txq in case we're reinitializing it. */\n+\tDEBUG(\"%p: cleaning-up old txq just in case\", (void *)txq);\n+\ttxq_cleanup(txq);\n+\t*txq = tmpl;\n+\tDEBUG(\"%p: txq updated with %p\", (void *)txq, (void *)&tmpl);\n+\tassert(ret == 0);\n+\treturn 0;\n+error:\n+\ttxq_cleanup(&tmpl);\n+\tassert(ret > 0);\n+\treturn ret;\n+}\n+\n+/**\n+ * DPDK callback to configure a TX queue.\n+ *\n+ * @param dev\n+ *   Pointer to Ethernet device structure.\n+ * @param idx\n+ *   TX queue index.\n+ * @param desc\n+ *   Number of descriptors to configure in queue.\n+ * @param socket\n+ *   NUMA socket on which memory must be allocated.\n+ * @param[in] conf\n+ *   Thresholds parameters.\n+ *\n+ * @return\n+ *   0 on success, negative errno value on failure.\n+ */\n+int\n+mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,\n+\t\t    unsigned int socket, const struct rte_eth_txconf *conf)\n+{\n+\tstruct priv *priv = dev->data->dev_private;\n+\tstruct txq *txq = (*priv->txqs)[idx];\n+\tint ret;\n+\n+\tpriv_lock(priv);\n+\tDEBUG(\"%p: configuring queue %u for %u descriptors\",\n+\t      (void *)dev, idx, desc);\n+\tif (idx >= priv->txqs_n) {\n+\t\tERROR(\"%p: queue index out of range (%u >= %u)\",\n+\t\t      (void *)dev, idx, priv->txqs_n);\n+\t\tpriv_unlock(priv);\n+\t\treturn -EOVERFLOW;\n+\t}\n+\tif (txq != NULL) {\n+\t\tDEBUG(\"%p: reusing already allocated queue index %u (%p)\",\n+\t\t      (void *)dev, idx, (void *)txq);\n+\t\tif (priv->started) {\n+\t\t\tpriv_unlock(priv);\n+\t\t\treturn -EEXIST;\n+\t\t}\n+\t\t(*priv->txqs)[idx] = NULL;\n+\t\ttxq_cleanup(txq);\n+\t} else {\n+\t\ttxq = rte_calloc_socket(\"TXQ\", 1, sizeof(*txq), 0, socket);\n+\t\tif (txq == NULL) {\n+\t\t\tERROR(\"%p: unable to allocate queue index %u\",\n+\t\t\t      (void *)dev, idx);\n+\t\t\tpriv_unlock(priv);\n+\t\t\treturn -ENOMEM;\n+\t\t}\n+\t}\n+\tret = txq_setup(dev, txq, desc, socket, conf);\n+\tif (ret)\n+\t\trte_free(txq);\n+\telse {\n+\t\tDEBUG(\"%p: adding TX queue %p to list\",\n+\t\t      (void *)dev, (void *)txq);\n+\t\t(*priv->txqs)[idx] = txq;\n+\t\t/* Update send callback. */\n+\t\tdev->tx_pkt_burst = mlx5_tx_burst;\n+\t}\n+\tpriv_unlock(priv);\n+\treturn -ret;\n+}\n+\n+/**\n+ * DPDK callback to release a TX queue.\n+ *\n+ * @param dpdk_txq\n+ *   Generic TX queue pointer.\n+ */\n+void\n+mlx5_tx_queue_release(void *dpdk_txq)\n+{\n+\tstruct txq *txq = (struct txq *)dpdk_txq;\n+\tstruct priv *priv;\n+\tunsigned int i;\n+\n+\tif (txq == NULL)\n+\t\treturn;\n+\tpriv = txq->priv;\n+\tpriv_lock(priv);\n+\tfor (i = 0; (i != priv->txqs_n); ++i)\n+\t\tif ((*priv->txqs)[i] == txq) {\n+\t\t\tDEBUG(\"%p: removing TX queue %p from list\",\n+\t\t\t      (void *)priv->dev, (void *)txq);\n+\t\t\t(*priv->txqs)[i] = NULL;\n+\t\t\tbreak;\n+\t\t}\n+\ttxq_cleanup(txq);\n+\trte_free(txq);\n+\tpriv_unlock(priv);\n+}\ndiff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h\nindex cc6aab6..e48e6b6 100644\n--- a/drivers/net/mlx5/mlx5_utils.h\n+++ b/drivers/net/mlx5/mlx5_utils.h\n@@ -140,10 +140,21 @@ pmd_drv_log_basename(const char *s)\n #define WARN(...) PMD_DRV_LOG(WARNING, __VA_ARGS__)\n #define ERROR(...) PMD_DRV_LOG(ERR, __VA_ARGS__)\n \n+/* Convenience macros for accessing mbuf fields. */\n+#define NEXT(m) ((m)->next)\n+#define DATA_LEN(m) ((m)->data_len)\n+#define PKT_LEN(m) ((m)->pkt_len)\n+#define DATA_OFF(m) ((m)->data_off)\n+#define SET_DATA_OFF(m, o) ((m)->data_off = (o))\n+#define NB_SEGS(m) ((m)->nb_segs)\n+#define PORT(m) ((m)->port)\n+\n /* Allocate a buffer on the stack and fill it with a printf format string. */\n #define MKSTR(name, ...) \\\n \tchar name[snprintf(NULL, 0, __VA_ARGS__) + 1]; \\\n \t\\\n \tsnprintf(name, sizeof(name), __VA_ARGS__)\n \n+#define WR_ID(o) (((wr_id_t *)&(o))->data)\n+\n #endif /* RTE_PMD_MLX5_UTILS_H_ */\n",
    "prefixes": [
        "dpdk-dev",
        "v2",
        "02/13"
    ]
}