From patchwork Mon Oct 5 17:52:58 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Adrien Mazarguil X-Patchwork-Id: 7429 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id 634089214; Mon, 5 Oct 2015 19:53:41 +0200 (CEST) Received: from mail-wi0-f181.google.com (mail-wi0-f181.google.com [209.85.212.181]) by dpdk.org (Postfix) with ESMTP id E04CD9212 for ; Mon, 5 Oct 2015 19:53:39 +0200 (CEST) Received: by wiclk2 with SMTP id lk2so132406094wic.0 for ; Mon, 05 Oct 2015 10:53:39 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=13PIKZ87j3Ky2aSj+Wd9SE19f5zcdQ+Vn83tEhA+tmw=; b=FaqLupqJdFFAhFT7FzVSkclReVDmpOprvQlt/YtPXG5l1Iyv3nVsEGZfCYsfwukkGv 0xQByMTsGfSvipveVThu0dpw66Uy4/obG3nV0x57DmnRtZe16wBMOTwUQYIJ3xwI0I8Y R84M215eo4tluvEAwGwfcYNzTkKyrEszWROhRKE43mRELsvqjUqziny07WX1MfGQhq/b 9fJlBpGfSbWONouJ7TBb3YTMCNxyP06pkyByIzJ9L/91HuqZUHbahBkVpM/zeymwTWct 0oCc4Lc4GeSyIWj5iVmuQxoeFMJ6Ms4oWVwUdVa6YPBq3t824PiW0XcjasPqHG1UqHwk 2gSA== X-Gm-Message-State: ALoCoQki9J5gi+Mm/R1Phrc5bXzZBlTjN7Ds73LIWt2mc6qOQzI6e83AIqwliOFxL+KgtNeCi8xd X-Received: by 10.180.186.10 with SMTP id fg10mr13337877wic.30.1444067619503; Mon, 05 Oct 2015 10:53:39 -0700 (PDT) Received: from 6wind.com (guy78-3-82-239-227-177.fbx.proxad.net. [82.239.227.177]) by smtp.gmail.com with ESMTPSA id bk4sm28040816wjc.1.2015.10.05.10.53.37 (version=TLSv1.2 cipher=RC4-SHA bits=128/128); Mon, 05 Oct 2015 10:53:39 -0700 (PDT) From: Adrien Mazarguil To: dev@dpdk.org Date: Mon, 5 Oct 2015 19:52:58 +0200 Message-Id: <1444067589-29513-3-git-send-email-adrien.mazarguil@6wind.com> X-Mailer: git-send-email 2.1.0 In-Reply-To: <1444067589-29513-1-git-send-email-adrien.mazarguil@6wind.com> References: <1444067589-29513-1-git-send-email-adrien.mazarguil@6wind.com> Subject: [dpdk-dev] [PATCH 02/13] mlx5: add non-scattered TX and RX support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" RSS implementation with parent/child QPs comes from mlx4 and is temporary. Signed-off-by: Adrien Mazarguil Signed-off-by: Nelio Laranjeiro --- config/common_bsdapp | 3 + config/common_linuxapp | 3 + drivers/net/mlx5/Makefile | 15 + drivers/net/mlx5/mlx5.c | 40 +++ drivers/net/mlx5/mlx5.h | 25 ++ drivers/net/mlx5/mlx5_defs.h | 24 ++ drivers/net/mlx5/mlx5_rxq.c | 682 ++++++++++++++++++++++++++++++++++++++++++ drivers/net/mlx5/mlx5_rxtx.c | 495 ++++++++++++++++++++++++++++++ drivers/net/mlx5/mlx5_rxtx.h | 161 ++++++++++ drivers/net/mlx5/mlx5_txq.c | 512 +++++++++++++++++++++++++++++++ drivers/net/mlx5/mlx5_utils.h | 11 + 11 files changed, 1971 insertions(+) create mode 100644 drivers/net/mlx5/mlx5_rxq.c create mode 100644 drivers/net/mlx5/mlx5_rxtx.c create mode 100644 drivers/net/mlx5/mlx5_rxtx.h create mode 100644 drivers/net/mlx5/mlx5_txq.c diff --git a/config/common_bsdapp b/config/common_bsdapp index 1e6885f..3b50ff9 100644 --- a/config/common_bsdapp +++ b/config/common_bsdapp @@ -218,6 +218,9 @@ CONFIG_RTE_LIBRTE_MLX4_SOFT_COUNTERS=1 # CONFIG_RTE_LIBRTE_MLX5_PMD=n CONFIG_RTE_LIBRTE_MLX5_DEBUG=n +CONFIG_RTE_LIBRTE_MLX5_SGE_WR_N=4 +CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE=0 +CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE=8 # # Compile burst-oriented Broadcom PMD driver diff --git a/config/common_linuxapp b/config/common_linuxapp index 7da7ba7..eed8fc0 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -216,6 +216,9 @@ CONFIG_RTE_LIBRTE_MLX4_SOFT_COUNTERS=1 # CONFIG_RTE_LIBRTE_MLX5_PMD=n CONFIG_RTE_LIBRTE_MLX5_DEBUG=n +CONFIG_RTE_LIBRTE_MLX5_SGE_WR_N=4 +CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE=0 +CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE=8 # # Compile burst-oriented Broadcom PMD driver diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index 6e63073..7b9c57b 100644 --- a/drivers/net/mlx5/Makefile +++ b/drivers/net/mlx5/Makefile @@ -42,6 +42,9 @@ LIB = librte_pmd_mlx5.a # Sources. SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c +SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxq.c +SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_txq.c +SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxtx.c SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_ethdev.c SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mac.c @@ -79,6 +82,18 @@ else CFLAGS += -DNDEBUG -UPEDANTIC endif +ifdef CONFIG_RTE_LIBRTE_MLX5_SGE_WR_N +CFLAGS += -DMLX5_PMD_SGE_WR_N=$(CONFIG_RTE_LIBRTE_MLX5_SGE_WR_N) +endif + +ifdef CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE +CFLAGS += -DMLX5_PMD_MAX_INLINE=$(CONFIG_RTE_LIBRTE_MLX5_MAX_INLINE) +endif + +ifdef CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE +CFLAGS += -DMLX5_PMD_TX_MP_CACHE=$(CONFIG_RTE_LIBRTE_MLX5_TX_MP_CACHE) +endif + include $(RTE_SDK)/mk/rte.lib.mk # Generate and clean-up mlx5_autoconf.h. diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index f6fe8a6..31ce5ec 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -63,6 +63,7 @@ #include "mlx5.h" #include "mlx5_utils.h" +#include "mlx5_rxtx.h" #include "mlx5_autoconf.h" /** @@ -77,11 +78,46 @@ static void mlx5_dev_close(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; + void *tmp; + unsigned int i; priv_lock(priv); DEBUG("%p: closing device \"%s\"", (void *)dev, ((priv->ctx != NULL) ? priv->ctx->device->name : "")); + /* Prevent crashes when queues are still in use. */ + dev->rx_pkt_burst = removed_rx_burst; + dev->tx_pkt_burst = removed_tx_burst; + if (priv->rxqs != NULL) { + /* XXX race condition if mlx5_rx_burst() is still running. */ + usleep(1000); + for (i = 0; (i != priv->rxqs_n); ++i) { + tmp = (*priv->rxqs)[i]; + if (tmp == NULL) + continue; + (*priv->rxqs)[i] = NULL; + rxq_cleanup(tmp); + rte_free(tmp); + } + priv->rxqs_n = 0; + priv->rxqs = NULL; + } + if (priv->txqs != NULL) { + /* XXX race condition if mlx5_tx_burst() is still running. */ + usleep(1000); + for (i = 0; (i != priv->txqs_n); ++i) { + tmp = (*priv->txqs)[i]; + if (tmp == NULL) + continue; + (*priv->txqs)[i] = NULL; + txq_cleanup(tmp); + rte_free(tmp); + } + priv->txqs_n = 0; + priv->txqs = NULL; + } + if (priv->rss) + rxq_cleanup(&priv->rxq_parent); if (priv->pd != NULL) { assert(priv->ctx != NULL); claim_zero(ibv_dealloc_pd(priv->pd)); @@ -94,6 +130,10 @@ mlx5_dev_close(struct rte_eth_dev *dev) static const struct eth_dev_ops mlx5_dev_ops = { .dev_close = mlx5_dev_close, + .rx_queue_setup = mlx5_rx_queue_setup, + .tx_queue_setup = mlx5_tx_queue_setup, + .rx_queue_release = mlx5_rx_queue_release, + .tx_queue_release = mlx5_tx_queue_release, }; static struct { diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index ef7975d..7e60e70 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -63,6 +63,7 @@ #endif #include "mlx5_utils.h" +#include "mlx5_rxtx.h" #include "mlx5_autoconf.h" #include "mlx5_defs.h" @@ -108,9 +109,33 @@ struct priv { unsigned int rss:1; /* RSS is enabled. */ unsigned int vf:1; /* This is a VF device. */ unsigned int max_rss_tbl_sz; /* Maximum number of RSS queues. */ + /* RX/TX queues. */ + struct rxq rxq_parent; /* Parent queue when RSS is enabled. */ + unsigned int rxqs_n; /* RX queues array size. */ + unsigned int txqs_n; /* TX queues array size. */ + struct rxq *(*rxqs)[]; /* RX queues. */ + struct txq *(*txqs)[]; /* TX queues. */ rte_spinlock_t lock; /* Lock for control functions. */ }; +/* Work Request ID data type (64 bit). */ +typedef union { + struct { + uint32_t id; + uint16_t offset; + } data; + uint64_t raw; +} wr_id_t; + +/* Compile-time check. */ +static inline void wr_id_t_check(void) +{ + wr_id_t check[1 + (2 * -!(sizeof(wr_id_t) == sizeof(uint64_t)))]; + + (void)check; + (void)wr_id_t_check; +} + /** * Lock private structure to protect it from concurrent access in the * control path. diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h index 987ddcf..4f13a4e 100644 --- a/drivers/net/mlx5/mlx5_defs.h +++ b/drivers/net/mlx5/mlx5_defs.h @@ -50,4 +50,28 @@ /* Maximum number of simultaneous VLAN filters supported. See above. */ #define MLX5_MAX_VLAN_IDS 127 +/* Request send completion once in every 64 sends, might be less. */ +#define MLX5_PMD_TX_PER_COMP_REQ 64 + +/* Maximum number of Scatter/Gather Elements per Work Request. */ +#ifndef MLX5_PMD_SGE_WR_N +#define MLX5_PMD_SGE_WR_N 4 +#endif + +/* Maximum size for inline data. */ +#ifndef MLX5_PMD_MAX_INLINE +#define MLX5_PMD_MAX_INLINE 0 +#endif + +/* + * Maximum number of cached Memory Pools (MPs) per TX queue. Each RTE MP + * from which buffers are to be transmitted will have to be mapped by this + * driver to their own Memory Region (MR). This is a slow operation. + * + * This value is always 1 for RX queues. + */ +#ifndef MLX5_PMD_TX_MP_CACHE +#define MLX5_PMD_TX_MP_CACHE 8 +#endif + #endif /* RTE_PMD_MLX5_DEFS_H_ */ diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c new file mode 100644 index 0000000..01cc649 --- /dev/null +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -0,0 +1,682 @@ +/*- + * BSD LICENSE + * + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +/* Verbs header. */ +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-pedantic" +#endif +#include +#ifdef PEDANTIC +#pragma GCC diagnostic error "-pedantic" +#endif + +/* DPDK headers don't like -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-pedantic" +#endif +#include +#include +#include +#include +#ifdef PEDANTIC +#pragma GCC diagnostic error "-pedantic" +#endif + +#include "mlx5.h" +#include "mlx5_rxtx.h" +#include "mlx5_utils.h" +#include "mlx5_defs.h" + +/** + * Allocate RX queue elements. + * + * @param rxq + * Pointer to RX queue structure. + * @param elts_n + * Number of elements to allocate. + * @param[in] pool + * If not NULL, fetch buffers from this array instead of allocating them + * with rte_pktmbuf_alloc(). + * + * @return + * 0 on success, errno value on failure. + */ +static int +rxq_alloc_elts(struct rxq *rxq, unsigned int elts_n, struct rte_mbuf **pool) +{ + unsigned int i; + struct rxq_elt (*elts)[elts_n] = + rte_calloc_socket("RXQ elements", 1, sizeof(*elts), 0, + rxq->socket); + int ret = 0; + + if (elts == NULL) { + ERROR("%p: can't allocate packets array", (void *)rxq); + ret = ENOMEM; + goto error; + } + /* For each WR (packet). */ + for (i = 0; (i != elts_n); ++i) { + struct rxq_elt *elt = &(*elts)[i]; + struct ibv_recv_wr *wr = &elt->wr; + struct ibv_sge *sge = &(*elts)[i].sge; + struct rte_mbuf *buf; + + if (pool != NULL) { + buf = *(pool++); + assert(buf != NULL); + rte_pktmbuf_reset(buf); + } else + buf = rte_pktmbuf_alloc(rxq->mp); + if (buf == NULL) { + assert(pool == NULL); + ERROR("%p: empty mbuf pool", (void *)rxq); + ret = ENOMEM; + goto error; + } + /* Configure WR. Work request ID contains its own index in + * the elts array and the offset between SGE buffer header and + * its data. */ + WR_ID(wr->wr_id).id = i; + WR_ID(wr->wr_id).offset = + (((uintptr_t)buf->buf_addr + RTE_PKTMBUF_HEADROOM) - + (uintptr_t)buf); + wr->next = &(*elts)[(i + 1)].wr; + wr->sg_list = sge; + wr->num_sge = 1; + /* Headroom is reserved by rte_pktmbuf_alloc(). */ + assert(DATA_OFF(buf) == RTE_PKTMBUF_HEADROOM); + /* Buffer is supposed to be empty. */ + assert(rte_pktmbuf_data_len(buf) == 0); + assert(rte_pktmbuf_pkt_len(buf) == 0); + /* sge->addr must be able to store a pointer. */ + assert(sizeof(sge->addr) >= sizeof(uintptr_t)); + /* SGE keeps its headroom. */ + sge->addr = (uintptr_t) + ((uint8_t *)buf->buf_addr + RTE_PKTMBUF_HEADROOM); + sge->length = (buf->buf_len - RTE_PKTMBUF_HEADROOM); + sge->lkey = rxq->mr->lkey; + /* Redundant check for tailroom. */ + assert(sge->length == rte_pktmbuf_tailroom(buf)); + /* Make sure elts index and SGE mbuf pointer can be deduced + * from WR ID. */ + if ((WR_ID(wr->wr_id).id != i) || + ((void *)((uintptr_t)sge->addr - + WR_ID(wr->wr_id).offset) != buf)) { + ERROR("%p: cannot store index and offset in WR ID", + (void *)rxq); + sge->addr = 0; + rte_pktmbuf_free(buf); + ret = EOVERFLOW; + goto error; + } + } + /* The last WR pointer must be NULL. */ + (*elts)[(i - 1)].wr.next = NULL; + DEBUG("%p: allocated and configured %u single-segment WRs", + (void *)rxq, elts_n); + rxq->elts_n = elts_n; + rxq->elts_head = 0; + rxq->elts.no_sp = elts; + assert(ret == 0); + return 0; +error: + if (elts != NULL) { + assert(pool == NULL); + for (i = 0; (i != RTE_DIM(*elts)); ++i) { + struct rxq_elt *elt = &(*elts)[i]; + struct rte_mbuf *buf; + + if (elt->sge.addr == 0) + continue; + assert(WR_ID(elt->wr.wr_id).id == i); + buf = (void *)((uintptr_t)elt->sge.addr - + WR_ID(elt->wr.wr_id).offset); + rte_pktmbuf_free_seg(buf); + } + rte_free(elts); + } + DEBUG("%p: failed, freed everything", (void *)rxq); + assert(ret > 0); + return ret; +} + +/** + * Free RX queue elements. + * + * @param rxq + * Pointer to RX queue structure. + */ +static void +rxq_free_elts(struct rxq *rxq) +{ + unsigned int i; + unsigned int elts_n = rxq->elts_n; + struct rxq_elt (*elts)[elts_n] = rxq->elts.no_sp; + + DEBUG("%p: freeing WRs", (void *)rxq); + rxq->elts_n = 0; + rxq->elts.no_sp = NULL; + if (elts == NULL) + return; + for (i = 0; (i != RTE_DIM(*elts)); ++i) { + struct rxq_elt *elt = &(*elts)[i]; + struct rte_mbuf *buf; + + if (elt->sge.addr == 0) + continue; + assert(WR_ID(elt->wr.wr_id).id == i); + buf = (void *)((uintptr_t)elt->sge.addr - + WR_ID(elt->wr.wr_id).offset); + rte_pktmbuf_free_seg(buf); + } + rte_free(elts); +} + +/** + * Clean up a RX queue. + * + * Destroy objects, free allocated memory and reset the structure for reuse. + * + * @param rxq + * Pointer to RX queue structure. + */ +void +rxq_cleanup(struct rxq *rxq) +{ + struct ibv_exp_release_intf_params params; + + DEBUG("cleaning up %p", (void *)rxq); + rxq_free_elts(rxq); + if (rxq->if_qp != NULL) { + assert(rxq->priv != NULL); + assert(rxq->priv->ctx != NULL); + assert(rxq->qp != NULL); + params = (struct ibv_exp_release_intf_params){ + .comp_mask = 0, + }; + claim_zero(ibv_exp_release_intf(rxq->priv->ctx, + rxq->if_qp, + ¶ms)); + } + if (rxq->if_cq != NULL) { + assert(rxq->priv != NULL); + assert(rxq->priv->ctx != NULL); + assert(rxq->cq != NULL); + params = (struct ibv_exp_release_intf_params){ + .comp_mask = 0, + }; + claim_zero(ibv_exp_release_intf(rxq->priv->ctx, + rxq->if_cq, + ¶ms)); + } + if (rxq->qp != NULL) { + claim_zero(ibv_destroy_qp(rxq->qp)); + } + if (rxq->cq != NULL) + claim_zero(ibv_destroy_cq(rxq->cq)); + if (rxq->rd != NULL) { + struct ibv_exp_destroy_res_domain_attr attr = { + .comp_mask = 0, + }; + + assert(rxq->priv != NULL); + assert(rxq->priv->ctx != NULL); + claim_zero(ibv_exp_destroy_res_domain(rxq->priv->ctx, + rxq->rd, + &attr)); + } + if (rxq->mr != NULL) + claim_zero(ibv_dereg_mr(rxq->mr)); + memset(rxq, 0, sizeof(*rxq)); +} + +/** + * Allocate a Queue Pair. + * Optionally setup inline receive if supported. + * + * @param priv + * Pointer to private structure. + * @param cq + * Completion queue to associate with QP. + * @param desc + * Number of descriptors in QP (hint only). + * + * @return + * QP pointer or NULL in case of error. + */ +static struct ibv_qp * +rxq_setup_qp(struct priv *priv, struct ibv_cq *cq, uint16_t desc, + struct ibv_exp_res_domain *rd) +{ + struct ibv_exp_qp_init_attr attr = { + /* CQ to be associated with the send queue. */ + .send_cq = cq, + /* CQ to be associated with the receive queue. */ + .recv_cq = cq, + .cap = { + /* Max number of outstanding WRs. */ + .max_recv_wr = ((priv->device_attr.max_qp_wr < desc) ? + priv->device_attr.max_qp_wr : + desc), + /* Max number of scatter/gather elements in a WR. */ + .max_recv_sge = ((priv->device_attr.max_sge < + MLX5_PMD_SGE_WR_N) ? + priv->device_attr.max_sge : + MLX5_PMD_SGE_WR_N), + }, + .qp_type = IBV_QPT_RAW_PACKET, + .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD | + IBV_EXP_QP_INIT_ATTR_RES_DOMAIN), + .pd = priv->pd, + .res_domain = rd, + }; + + return ibv_exp_create_qp(priv->ctx, &attr); +} + +#ifdef RSS_SUPPORT + +/** + * Allocate a RSS Queue Pair. + * Optionally setup inline receive if supported. + * + * @param priv + * Pointer to private structure. + * @param cq + * Completion queue to associate with QP. + * @param desc + * Number of descriptors in QP (hint only). + * @param parent + * If nonzero, create a parent QP, otherwise a child. + * + * @return + * QP pointer or NULL in case of error. + */ +static struct ibv_qp * +rxq_setup_qp_rss(struct priv *priv, struct ibv_cq *cq, uint16_t desc, + int parent, struct ibv_exp_res_domain *rd) +{ + struct ibv_exp_qp_init_attr attr = { + /* CQ to be associated with the send queue. */ + .send_cq = cq, + /* CQ to be associated with the receive queue. */ + .recv_cq = cq, + .cap = { + /* Max number of outstanding WRs. */ + .max_recv_wr = ((priv->device_attr.max_qp_wr < desc) ? + priv->device_attr.max_qp_wr : + desc), + /* Max number of scatter/gather elements in a WR. */ + .max_recv_sge = ((priv->device_attr.max_sge < + MLX5_PMD_SGE_WR_N) ? + priv->device_attr.max_sge : + MLX5_PMD_SGE_WR_N), + }, + .qp_type = IBV_QPT_RAW_PACKET, + .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD | + IBV_EXP_QP_INIT_ATTR_RES_DOMAIN | + IBV_EXP_QP_INIT_ATTR_QPG), + .pd = priv->pd, + .res_domain = rd, + }; + + if (parent) { + attr.qpg.qpg_type = IBV_EXP_QPG_PARENT; + /* TSS isn't necessary. */ + attr.qpg.parent_attrib.tss_child_count = 0; + attr.qpg.parent_attrib.rss_child_count = priv->rxqs_n; + DEBUG("initializing parent RSS queue"); + } else { + attr.qpg.qpg_type = IBV_EXP_QPG_CHILD_RX; + attr.qpg.qpg_parent = priv->rxq_parent.qp; + DEBUG("initializing child RSS queue"); + } + return ibv_exp_create_qp(priv->ctx, &attr); +} + +#endif /* RSS_SUPPORT */ + +/** + * Configure a RX queue. + * + * @param dev + * Pointer to Ethernet device structure. + * @param rxq + * Pointer to RX queue structure. + * @param desc + * Number of descriptors to configure in queue. + * @param socket + * NUMA socket on which memory must be allocated. + * @param[in] conf + * Thresholds parameters. + * @param mp + * Memory pool for buffer allocations. + * + * @return + * 0 on success, errno value on failure. + */ +int +rxq_setup(struct rte_eth_dev *dev, struct rxq *rxq, uint16_t desc, + unsigned int socket, const struct rte_eth_rxconf *conf, + struct rte_mempool *mp) +{ + struct priv *priv = dev->data->dev_private; + struct rxq tmpl = { + .priv = priv, + .mp = mp, + .socket = socket + }; + struct ibv_exp_qp_attr mod; + union { + struct ibv_exp_query_intf_params params; + struct ibv_exp_cq_init_attr cq; + struct ibv_exp_res_domain_init_attr rd; + } attr; + enum ibv_exp_query_intf_status status; + struct ibv_recv_wr *bad_wr; + struct rte_mbuf *buf; + int ret = 0; + int parent = (rxq == &priv->rxq_parent); + + (void)conf; /* Thresholds configuration (ignored). */ + /* + * If this is a parent queue, hardware must support RSS and + * RSS must be enabled. + */ + assert((!parent) || ((priv->hw_rss) && (priv->rss))); + if (parent) { + /* Even if unused, ibv_create_cq() requires at least one + * descriptor. */ + desc = 1; + goto skip_mr; + } + if ((desc == 0) || (desc % MLX5_PMD_SGE_WR_N)) { + ERROR("%p: invalid number of RX descriptors (must be a" + " multiple of %d)", (void *)dev, MLX5_PMD_SGE_WR_N); + return EINVAL; + } + /* Get mbuf length. */ + buf = rte_pktmbuf_alloc(mp); + if (buf == NULL) { + ERROR("%p: unable to allocate mbuf", (void *)dev); + return ENOMEM; + } + tmpl.mb_len = buf->buf_len; + assert((rte_pktmbuf_headroom(buf) + + rte_pktmbuf_tailroom(buf)) == tmpl.mb_len); + assert(rte_pktmbuf_headroom(buf) == RTE_PKTMBUF_HEADROOM); + rte_pktmbuf_free(buf); + /* Use the entire RX mempool as the memory region. */ + tmpl.mr = ibv_reg_mr(priv->pd, + (void *)mp->elt_va_start, + (mp->elt_va_end - mp->elt_va_start), + (IBV_ACCESS_LOCAL_WRITE | + IBV_ACCESS_REMOTE_WRITE)); + if (tmpl.mr == NULL) { + ret = EINVAL; + ERROR("%p: MR creation failure: %s", + (void *)dev, strerror(ret)); + goto error; + } +skip_mr: + attr.rd = (struct ibv_exp_res_domain_init_attr){ + .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL | + IBV_EXP_RES_DOMAIN_MSG_MODEL), + .thread_model = IBV_EXP_THREAD_SINGLE, + .msg_model = IBV_EXP_MSG_HIGH_BW, + }; + tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd); + if (tmpl.rd == NULL) { + ret = ENOMEM; + ERROR("%p: RD creation failure: %s", + (void *)dev, strerror(ret)); + goto error; + } + attr.cq = (struct ibv_exp_cq_init_attr){ + .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN, + .res_domain = tmpl.rd, + }; + tmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, NULL, 0, &attr.cq); + if (tmpl.cq == NULL) { + ret = ENOMEM; + ERROR("%p: CQ creation failure: %s", + (void *)dev, strerror(ret)); + goto error; + } + DEBUG("priv->device_attr.max_qp_wr is %d", + priv->device_attr.max_qp_wr); + DEBUG("priv->device_attr.max_sge is %d", + priv->device_attr.max_sge); +#ifdef RSS_SUPPORT + if (priv->rss) + tmpl.qp = rxq_setup_qp_rss(priv, tmpl.cq, desc, parent, + tmpl.rd); + else +#endif /* RSS_SUPPORT */ + tmpl.qp = rxq_setup_qp(priv, tmpl.cq, desc, tmpl.rd); + if (tmpl.qp == NULL) { + ret = (errno ? errno : EINVAL); + ERROR("%p: QP creation failure: %s", + (void *)dev, strerror(ret)); + goto error; + } + mod = (struct ibv_exp_qp_attr){ + /* Move the QP to this state. */ + .qp_state = IBV_QPS_INIT, + /* Primary port number. */ + .port_num = priv->port + }; + ret = ibv_exp_modify_qp(tmpl.qp, &mod, + (IBV_EXP_QP_STATE | +#ifdef RSS_SUPPORT + (parent ? IBV_EXP_QP_GROUP_RSS : 0) | +#endif /* RSS_SUPPORT */ + IBV_EXP_QP_PORT)); + if (ret) { + ERROR("%p: QP state to IBV_QPS_INIT failed: %s", + (void *)dev, strerror(ret)); + goto error; + } + /* Allocate descriptors for RX queues, except for the RSS parent. */ + if (parent) + goto skip_alloc; + ret = rxq_alloc_elts(&tmpl, desc, NULL); + if (ret) { + ERROR("%p: RXQ allocation failed: %s", + (void *)dev, strerror(ret)); + goto error; + } + ret = ibv_post_recv(tmpl.qp, + &(*tmpl.elts.no_sp)[0].wr, + &bad_wr); + if (ret) { + ERROR("%p: ibv_post_recv() failed for WR %p: %s", + (void *)dev, + (void *)bad_wr, + strerror(ret)); + goto error; + } +skip_alloc: + mod = (struct ibv_exp_qp_attr){ + .qp_state = IBV_QPS_RTR + }; + ret = ibv_exp_modify_qp(tmpl.qp, &mod, IBV_EXP_QP_STATE); + if (ret) { + ERROR("%p: QP state to IBV_QPS_RTR failed: %s", + (void *)dev, strerror(ret)); + goto error; + } + /* Save port ID. */ + tmpl.port_id = dev->data->port_id; + DEBUG("%p: RTE port ID: %u", (void *)rxq, tmpl.port_id); + attr.params = (struct ibv_exp_query_intf_params){ + .intf_scope = IBV_EXP_INTF_GLOBAL, + .intf = IBV_EXP_INTF_CQ, + .obj = tmpl.cq, + }; + tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status); + if (tmpl.if_cq == NULL) { + ERROR("%p: CQ interface family query failed with status %d", + (void *)dev, status); + goto error; + } + attr.params = (struct ibv_exp_query_intf_params){ + .intf_scope = IBV_EXP_INTF_GLOBAL, + .intf = IBV_EXP_INTF_QP_BURST, + .obj = tmpl.qp, + }; + tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status); + if (tmpl.if_qp == NULL) { + ERROR("%p: QP interface family query failed with status %d", + (void *)dev, status); + goto error; + } + /* Clean up rxq in case we're reinitializing it. */ + DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq); + rxq_cleanup(rxq); + *rxq = tmpl; + DEBUG("%p: rxq updated with %p", (void *)rxq, (void *)&tmpl); + assert(ret == 0); + return 0; +error: + rxq_cleanup(&tmpl); + assert(ret > 0); + return ret; +} + +/** + * DPDK callback to configure a RX queue. + * + * @param dev + * Pointer to Ethernet device structure. + * @param idx + * RX queue index. + * @param desc + * Number of descriptors to configure in queue. + * @param socket + * NUMA socket on which memory must be allocated. + * @param[in] conf + * Thresholds parameters. + * @param mp + * Memory pool for buffer allocations. + * + * @return + * 0 on success, negative errno value on failure. + */ +int +mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + unsigned int socket, const struct rte_eth_rxconf *conf, + struct rte_mempool *mp) +{ + struct priv *priv = dev->data->dev_private; + struct rxq *rxq = (*priv->rxqs)[idx]; + int ret; + + priv_lock(priv); + DEBUG("%p: configuring queue %u for %u descriptors", + (void *)dev, idx, desc); + if (idx >= priv->rxqs_n) { + ERROR("%p: queue index out of range (%u >= %u)", + (void *)dev, idx, priv->rxqs_n); + priv_unlock(priv); + return -EOVERFLOW; + } + if (rxq != NULL) { + DEBUG("%p: reusing already allocated queue index %u (%p)", + (void *)dev, idx, (void *)rxq); + if (priv->started) { + priv_unlock(priv); + return -EEXIST; + } + (*priv->rxqs)[idx] = NULL; + rxq_cleanup(rxq); + } else { + rxq = rte_calloc_socket("RXQ", 1, sizeof(*rxq), 0, socket); + if (rxq == NULL) { + ERROR("%p: unable to allocate queue index %u", + (void *)dev, idx); + priv_unlock(priv); + return -ENOMEM; + } + } + ret = rxq_setup(dev, rxq, desc, socket, conf, mp); + if (ret) + rte_free(rxq); + else { + DEBUG("%p: adding RX queue %p to list", + (void *)dev, (void *)rxq); + (*priv->rxqs)[idx] = rxq; + /* Update receive callback. */ + dev->rx_pkt_burst = mlx5_rx_burst; + } + priv_unlock(priv); + return -ret; +} + +/** + * DPDK callback to release a RX queue. + * + * @param dpdk_rxq + * Generic RX queue pointer. + */ +void +mlx5_rx_queue_release(void *dpdk_rxq) +{ + struct rxq *rxq = (struct rxq *)dpdk_rxq; + struct priv *priv; + unsigned int i; + + if (rxq == NULL) + return; + priv = rxq->priv; + priv_lock(priv); + assert(rxq != &priv->rxq_parent); + for (i = 0; (i != priv->rxqs_n); ++i) + if ((*priv->rxqs)[i] == rxq) { + DEBUG("%p: removing RX queue %p from list", + (void *)priv->dev, (void *)rxq); + (*priv->rxqs)[i] = NULL; + break; + } + rxq_cleanup(rxq); + rte_free(rxq); + priv_unlock(priv); +} diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c new file mode 100644 index 0000000..40bddf0 --- /dev/null +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -0,0 +1,495 @@ +/*- + * BSD LICENSE + * + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +/* Verbs header. */ +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-pedantic" +#endif +#include +#ifdef PEDANTIC +#pragma GCC diagnostic error "-pedantic" +#endif + +/* DPDK headers don't like -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-pedantic" +#endif +#include +#include +#include +#include +#include +#ifdef PEDANTIC +#pragma GCC diagnostic error "-pedantic" +#endif + +#include "mlx5.h" +#include "mlx5_utils.h" +#include "mlx5_rxtx.h" +#include "mlx5_defs.h" + +/** + * Manage TX completions. + * + * When sending a burst, mlx5_tx_burst() posts several WRs. + * To improve performance, a completion event is only required once every + * MLX5_PMD_TX_PER_COMP_REQ sends. Doing so discards completion information + * for other WRs, but this information would not be used anyway. + * + * @param txq + * Pointer to TX queue structure. + * + * @return + * 0 on success, -1 on failure. + */ +static int +txq_complete(struct txq *txq) +{ + unsigned int elts_comp = txq->elts_comp; + unsigned int elts_tail = txq->elts_tail; + const unsigned int elts_n = txq->elts_n; + int wcs_n; + + if (unlikely(elts_comp == 0)) + return 0; +#ifdef DEBUG_SEND + DEBUG("%p: processing %u work requests completions", + (void *)txq, elts_comp); +#endif + wcs_n = txq->if_cq->poll_cnt(txq->cq, elts_comp); + if (unlikely(wcs_n == 0)) + return 0; + if (unlikely(wcs_n < 0)) { + DEBUG("%p: ibv_poll_cq() failed (wcs_n=%d)", + (void *)txq, wcs_n); + return -1; + } + elts_comp -= wcs_n; + assert(elts_comp <= txq->elts_comp); + /* + * Assume WC status is successful as nothing can be done about it + * anyway. + */ + elts_tail += wcs_n * txq->elts_comp_cd_init; + if (elts_tail >= elts_n) + elts_tail -= elts_n; + txq->elts_tail = elts_tail; + txq->elts_comp = elts_comp; + return 0; +} + +/** + * Get Memory Region (MR) <-> Memory Pool (MP) association from txq->mp2mr[]. + * Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full, + * remove an entry first. + * + * @param txq + * Pointer to TX queue structure. + * @param[in] mp + * Memory Pool for which a Memory Region lkey must be returned. + * + * @return + * mr->lkey on success, (uint32_t)-1 on failure. + */ +static uint32_t +txq_mp2mr(struct txq *txq, struct rte_mempool *mp) +{ + unsigned int i; + struct ibv_mr *mr; + + for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) { + if (unlikely(txq->mp2mr[i].mp == NULL)) { + /* Unknown MP, add a new MR for it. */ + break; + } + if (txq->mp2mr[i].mp == mp) { + assert(txq->mp2mr[i].lkey != (uint32_t)-1); + assert(txq->mp2mr[i].mr->lkey == txq->mp2mr[i].lkey); + return txq->mp2mr[i].lkey; + } + } + /* Add a new entry, register MR first. */ + DEBUG("%p: discovered new memory pool %p", (void *)txq, (void *)mp); + mr = ibv_reg_mr(txq->priv->pd, + (void *)mp->elt_va_start, + (mp->elt_va_end - mp->elt_va_start), + (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE)); + if (unlikely(mr == NULL)) { + DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.", + (void *)txq); + return (uint32_t)-1; + } + if (unlikely(i == RTE_DIM(txq->mp2mr))) { + /* Table is full, remove oldest entry. */ + DEBUG("%p: MR <-> MP table full, dropping oldest entry.", + (void *)txq); + --i; + claim_zero(ibv_dereg_mr(txq->mp2mr[i].mr)); + memmove(&txq->mp2mr[0], &txq->mp2mr[1], + (sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0]))); + } + /* Store the new entry. */ + txq->mp2mr[i].mp = mp; + txq->mp2mr[i].mr = mr; + txq->mp2mr[i].lkey = mr->lkey; + DEBUG("%p: new MR lkey for MP %p: 0x%08" PRIu32, + (void *)txq, (void *)mp, txq->mp2mr[i].lkey); + return txq->mp2mr[i].lkey; +} + +/** + * DPDK callback for TX. + * + * @param dpdk_txq + * Generic pointer to TX queue structure. + * @param[in] pkts + * Packets to transmit. + * @param pkts_n + * Number of packets in array. + * + * @return + * Number of packets successfully transmitted (<= pkts_n). + */ +uint16_t +mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) +{ + struct txq *txq = (struct txq *)dpdk_txq; + unsigned int elts_head = txq->elts_head; + const unsigned int elts_tail = txq->elts_tail; + const unsigned int elts_n = txq->elts_n; + unsigned int elts_comp_cd = txq->elts_comp_cd; + unsigned int elts_comp = 0; + unsigned int i; + unsigned int max; + int err; + + assert(elts_comp_cd != 0); + txq_complete(txq); + max = (elts_n - (elts_head - elts_tail)); + if (max > elts_n) + max -= elts_n; + assert(max >= 1); + assert(max <= elts_n); + /* Always leave one free entry in the ring. */ + --max; + if (max == 0) + return 0; + if (max > pkts_n) + max = pkts_n; + for (i = 0; (i != max); ++i) { + struct rte_mbuf *buf = pkts[i]; + unsigned int elts_head_next = + (((elts_head + 1) == elts_n) ? 0 : elts_head + 1); + struct txq_elt *elt_next = &(*txq->elts)[elts_head_next]; + struct txq_elt *elt = &(*txq->elts)[elts_head]; + unsigned int segs = NB_SEGS(buf); + uint32_t send_flags = 0; + + /* Clean up old buffer. */ + if (likely(elt->buf != NULL)) { + struct rte_mbuf *tmp = elt->buf; + + /* Faster than rte_pktmbuf_free(). */ + do { + struct rte_mbuf *next = NEXT(tmp); + + rte_pktmbuf_free_seg(tmp); + tmp = next; + } while (tmp != NULL); + } + /* Request TX completion. */ + if (unlikely(--elts_comp_cd == 0)) { + elts_comp_cd = txq->elts_comp_cd_init; + ++elts_comp; + send_flags |= IBV_EXP_QP_BURST_SIGNALED; + } + if (likely(segs == 1)) { + uintptr_t addr; + uint32_t length; + uint32_t lkey; + + /* Retrieve buffer information. */ + addr = rte_pktmbuf_mtod(buf, uintptr_t); + length = DATA_LEN(buf); + /* Retrieve Memory Region key for this memory pool. */ + lkey = txq_mp2mr(txq, buf->pool); + if (unlikely(lkey == (uint32_t)-1)) { + /* MR does not exist. */ + DEBUG("%p: unable to get MP <-> MR" + " association", (void *)txq); + /* Clean up TX element. */ + elt->buf = NULL; + goto stop; + } + /* Update element. */ + elt->buf = buf; + if (txq->priv->vf) + rte_prefetch0((volatile void *) + (uintptr_t)addr); + RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf); + /* Put packet into send queue. */ +#if MLX5_PMD_MAX_INLINE > 0 + if (length <= txq->max_inline) + err = txq->if_qp->send_pending_inline + (txq->qp, + (void *)addr, + length, + send_flags); + else +#endif + err = txq->if_qp->send_pending + (txq->qp, + addr, + length, + lkey, + send_flags); + if (unlikely(err)) + goto stop; + } else { + DEBUG("%p: TX scattered buffers support not" + " compiled in", (void *)txq); + goto stop; + } + elts_head = elts_head_next; + } +stop: + /* Take a shortcut if nothing must be sent. */ + if (unlikely(i == 0)) + return 0; + /* Ring QP doorbell. */ + err = txq->if_qp->send_flush(txq->qp); + if (unlikely(err)) { + /* A nonzero value is not supposed to be returned. + * Nothing can be done about it. */ + DEBUG("%p: send_flush() failed with error %d", + (void *)txq, err); + } + txq->elts_head = elts_head; + txq->elts_comp += elts_comp; + txq->elts_comp_cd = elts_comp_cd; + return i; +} + +/** + * DPDK callback for RX. + * + * @param dpdk_rxq + * Generic pointer to RX queue structure. + * @param[out] pkts + * Array to store received packets. + * @param pkts_n + * Maximum number of packets in array. + * + * @return + * Number of packets successfully received (<= pkts_n). + */ +uint16_t +mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) +{ + struct rxq *rxq = (struct rxq *)dpdk_rxq; + struct rxq_elt (*elts)[rxq->elts_n] = rxq->elts.no_sp; + const unsigned int elts_n = rxq->elts_n; + unsigned int elts_head = rxq->elts_head; + struct ibv_sge sges[pkts_n]; + unsigned int i; + unsigned int pkts_ret = 0; + int ret; + + for (i = 0; (i != pkts_n); ++i) { + struct rxq_elt *elt = &(*elts)[elts_head]; + struct ibv_recv_wr *wr = &elt->wr; + uint64_t wr_id = wr->wr_id; + unsigned int len; + struct rte_mbuf *seg = (void *)((uintptr_t)elt->sge.addr - + WR_ID(wr_id).offset); + struct rte_mbuf *rep; + uint32_t flags; + + /* Sanity checks. */ + assert(WR_ID(wr_id).id < rxq->elts_n); + assert(wr->sg_list == &elt->sge); + assert(wr->num_sge == 1); + assert(elts_head < rxq->elts_n); + assert(rxq->elts_head < rxq->elts_n); + ret = rxq->if_cq->poll_length_flags(rxq->cq, NULL, NULL, + &flags); + if (unlikely(ret < 0)) { + struct ibv_wc wc; + int wcs_n; + + DEBUG("rxq=%p, poll_length() failed (ret=%d)", + (void *)rxq, ret); + /* ibv_poll_cq() must be used in case of failure. */ + wcs_n = ibv_poll_cq(rxq->cq, 1, &wc); + if (unlikely(wcs_n == 0)) + break; + if (unlikely(wcs_n < 0)) { + DEBUG("rxq=%p, ibv_poll_cq() failed (wcs_n=%d)", + (void *)rxq, wcs_n); + break; + } + assert(wcs_n == 1); + if (unlikely(wc.status != IBV_WC_SUCCESS)) { + /* Whatever, just repost the offending WR. */ + DEBUG("rxq=%p, wr_id=%" PRIu64 ": bad work" + " completion status (%d): %s", + (void *)rxq, wc.wr_id, wc.status, + ibv_wc_status_str(wc.status)); + /* Add SGE to array for repost. */ + sges[i] = elt->sge; + goto repost; + } + ret = wc.byte_len; + } + if (ret == 0) + break; + len = ret; + /* + * Fetch initial bytes of packet descriptor into a + * cacheline while allocating rep. + */ + rte_prefetch0(seg); + rep = __rte_mbuf_raw_alloc(rxq->mp); + if (unlikely(rep == NULL)) { + /* + * Unable to allocate a replacement mbuf, + * repost WR. + */ + DEBUG("rxq=%p, wr_id=%" PRIu32 ":" + " can't allocate a new mbuf", + (void *)rxq, WR_ID(wr_id).id); + /* Increment out of memory counters. */ + ++rxq->priv->dev->data->rx_mbuf_alloc_failed; + goto repost; + } + + /* Reconfigure sge to use rep instead of seg. */ + elt->sge.addr = (uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM; + assert(elt->sge.lkey == rxq->mr->lkey); + WR_ID(wr->wr_id).offset = + (((uintptr_t)rep->buf_addr + RTE_PKTMBUF_HEADROOM) - + (uintptr_t)rep); + assert(WR_ID(wr->wr_id).id == WR_ID(wr_id).id); + + /* Add SGE to array for repost. */ + sges[i] = elt->sge; + + /* Update seg information. */ + SET_DATA_OFF(seg, RTE_PKTMBUF_HEADROOM); + NB_SEGS(seg) = 1; + PORT(seg) = rxq->port_id; + NEXT(seg) = NULL; + PKT_LEN(seg) = len; + DATA_LEN(seg) = len; + + /* Return packet. */ + *(pkts++) = seg; + ++pkts_ret; +repost: + if (++elts_head >= elts_n) + elts_head = 0; + continue; + } + if (unlikely(i == 0)) + return 0; + /* Repost WRs. */ +#ifdef DEBUG_RECV + DEBUG("%p: reposting %u WRs", (void *)rxq, i); +#endif + ret = rxq->if_qp->recv_burst(rxq->qp, sges, i); + if (unlikely(ret)) { + /* Inability to repost WRs is fatal. */ + DEBUG("%p: recv_burst(): failed (ret=%d)", + (void *)rxq->priv, + ret); + abort(); + } + rxq->elts_head = elts_head; + return pkts_ret; +} + +/** + * Dummy DPDK callback for TX. + * + * This function is used to temporarily replace the real callback during + * unsafe control operations on the queue, or in case of error. + * + * @param dpdk_txq + * Generic pointer to TX queue structure. + * @param[in] pkts + * Packets to transmit. + * @param pkts_n + * Number of packets in array. + * + * @return + * Number of packets successfully transmitted (<= pkts_n). + */ +uint16_t +removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) +{ + (void)dpdk_txq; + (void)pkts; + (void)pkts_n; + return 0; +} + +/** + * Dummy DPDK callback for RX. + * + * This function is used to temporarily replace the real callback during + * unsafe control operations on the queue, or in case of error. + * + * @param dpdk_rxq + * Generic pointer to RX queue structure. + * @param[out] pkts + * Array to store received packets. + * @param pkts_n + * Maximum number of packets in array. + * + * @return + * Number of packets successfully received (<= pkts_n). + */ +uint16_t +removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) +{ + (void)dpdk_rxq; + (void)pkts; + (void)pkts_n; + return 0; +} diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h new file mode 100644 index 0000000..08a4911 --- /dev/null +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -0,0 +1,161 @@ +/*- + * BSD LICENSE + * + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_PMD_MLX5_RXTX_H_ +#define RTE_PMD_MLX5_RXTX_H_ + +#include + +/* Verbs header. */ +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-pedantic" +#endif +#include +#ifdef PEDANTIC +#pragma GCC diagnostic error "-pedantic" +#endif + +/* DPDK headers don't like -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-pedantic" +#endif +#include +#include +#ifdef PEDANTIC +#pragma GCC diagnostic error "-pedantic" +#endif + +#include "mlx5_utils.h" +#include "mlx5.h" +#include "mlx5_defs.h" + +/* RX element. */ +struct rxq_elt { + struct ibv_recv_wr wr; /* Work Request. */ + struct ibv_sge sge; /* Scatter/Gather Element. */ + /* mbuf pointer is derived from WR_ID(wr.wr_id).offset. */ +}; + +struct priv; + +/* RX queue descriptor. */ +struct rxq { + struct priv *priv; /* Back pointer to private data. */ + struct rte_mempool *mp; /* Memory Pool for allocations. */ + struct ibv_mr *mr; /* Memory Region (for mp). */ + struct ibv_cq *cq; /* Completion Queue. */ + struct ibv_qp *qp; /* Queue Pair. */ + struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */ + struct ibv_exp_cq_family *if_cq; /* CQ interface. */ + /* + * Each VLAN ID requires a separate flow steering rule. + */ + BITFIELD_DECLARE(mac_configured, uint32_t, MLX5_MAX_MAC_ADDRESSES); + struct ibv_flow *mac_flow[MLX5_MAX_MAC_ADDRESSES][MLX5_MAX_VLAN_IDS]; + unsigned int port_id; /* Port ID for incoming packets. */ + unsigned int elts_n; /* (*elts)[] length. */ + unsigned int elts_head; /* Current index in (*elts)[]. */ + union { + struct rxq_elt (*no_sp)[]; /* RX elements. */ + } elts; + uint32_t mb_len; /* Length of a mp-issued mbuf. */ + unsigned int socket; /* CPU socket ID for allocations. */ + struct ibv_exp_res_domain *rd; /* Resource Domain. */ +}; + +/* TX element. */ +struct txq_elt { + struct rte_mbuf *buf; +}; + +/* Linear buffer type. It is used when transmitting buffers with too many + * segments that do not fit the hardware queue (see max_send_sge). + * Extra segments are copied (linearized) in such buffers, replacing the + * last SGE during TX. + * The size is arbitrary but large enough to hold a jumbo frame with + * 8 segments considering mbuf.buf_len is about 2048 bytes. */ +typedef uint8_t linear_t[16384]; + +/* TX queue descriptor. */ +struct txq { + struct priv *priv; /* Back pointer to private data. */ + struct { + struct rte_mempool *mp; /* Cached Memory Pool. */ + struct ibv_mr *mr; /* Memory Region (for mp). */ + uint32_t lkey; /* mr->lkey */ + } mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */ + struct ibv_cq *cq; /* Completion Queue. */ + struct ibv_qp *qp; /* Queue Pair. */ + struct ibv_exp_qp_burst_family *if_qp; /* QP burst interface. */ + struct ibv_exp_cq_family *if_cq; /* CQ interface. */ +#if MLX5_PMD_MAX_INLINE > 0 + uint32_t max_inline; /* Max inline send size <= MLX5_PMD_MAX_INLINE. */ +#endif + unsigned int elts_n; /* (*elts)[] length. */ + struct txq_elt (*elts)[]; /* TX elements. */ + unsigned int elts_head; /* Current index in (*elts)[]. */ + unsigned int elts_tail; /* First element awaiting completion. */ + unsigned int elts_comp; /* Number of completion requests. */ + unsigned int elts_comp_cd; /* Countdown for next completion request. */ + unsigned int elts_comp_cd_init; /* Initial value for countdown. */ + linear_t (*elts_linear)[]; /* Linearized buffers. */ + struct ibv_mr *mr_linear; /* Memory Region for linearized buffers. */ + unsigned int socket; /* CPU socket ID for allocations. */ + struct ibv_exp_res_domain *rd; /* Resource Domain. */ +}; + +/* mlx5_rxq.c */ + +void rxq_cleanup(struct rxq *); +int rxq_setup(struct rte_eth_dev *, struct rxq *, uint16_t, unsigned int, + const struct rte_eth_rxconf *, struct rte_mempool *); +int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int, + const struct rte_eth_rxconf *, struct rte_mempool *); +void mlx5_rx_queue_release(void *); + +/* mlx5_txq.c */ + +void txq_cleanup(struct txq *); +int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int, + const struct rte_eth_txconf *); +void mlx5_tx_queue_release(void *); + +/* mlx5_rxtx.c */ + +uint16_t mlx5_tx_burst(void *, struct rte_mbuf **, uint16_t); +uint16_t mlx5_rx_burst(void *, struct rte_mbuf **, uint16_t); +uint16_t removed_tx_burst(void *, struct rte_mbuf **, uint16_t); +uint16_t removed_rx_burst(void *, struct rte_mbuf **, uint16_t); + +#endif /* RTE_PMD_MLX5_RXTX_H_ */ diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c new file mode 100644 index 0000000..2bae61f --- /dev/null +++ b/drivers/net/mlx5/mlx5_txq.c @@ -0,0 +1,512 @@ +/*- + * BSD LICENSE + * + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +/* Verbs header. */ +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-pedantic" +#endif +#include +#ifdef PEDANTIC +#pragma GCC diagnostic error "-pedantic" +#endif + +/* DPDK headers don't like -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-pedantic" +#endif +#include +#include +#include +#include +#ifdef PEDANTIC +#pragma GCC diagnostic error "-pedantic" +#endif + +#include "mlx5_utils.h" +#include "mlx5.h" +#include "mlx5_rxtx.h" +#include "mlx5_autoconf.h" +#include "mlx5_defs.h" + +/** + * Allocate TX queue elements. + * + * @param txq + * Pointer to TX queue structure. + * @param elts_n + * Number of elements to allocate. + * + * @return + * 0 on success, errno value on failure. + */ +static int +txq_alloc_elts(struct txq *txq, unsigned int elts_n) +{ + unsigned int i; + struct txq_elt (*elts)[elts_n] = + rte_calloc_socket("TXQ", 1, sizeof(*elts), 0, txq->socket); + linear_t (*elts_linear)[elts_n] = + rte_calloc_socket("TXQ", 1, sizeof(*elts_linear), 0, + txq->socket); + struct ibv_mr *mr_linear = NULL; + int ret = 0; + + if ((elts == NULL) || (elts_linear == NULL)) { + ERROR("%p: can't allocate packets array", (void *)txq); + ret = ENOMEM; + goto error; + } + mr_linear = + ibv_reg_mr(txq->priv->pd, elts_linear, sizeof(*elts_linear), + (IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_WRITE)); + if (mr_linear == NULL) { + ERROR("%p: unable to configure MR, ibv_reg_mr() failed", + (void *)txq); + ret = EINVAL; + goto error; + } + for (i = 0; (i != elts_n); ++i) { + struct txq_elt *elt = &(*elts)[i]; + + elt->buf = NULL; + } + DEBUG("%p: allocated and configured %u WRs", (void *)txq, elts_n); + txq->elts_n = elts_n; + txq->elts = elts; + txq->elts_head = 0; + txq->elts_tail = 0; + txq->elts_comp = 0; + /* Request send completion every MLX5_PMD_TX_PER_COMP_REQ packets or + * at least 4 times per ring. */ + txq->elts_comp_cd_init = + ((MLX5_PMD_TX_PER_COMP_REQ < (elts_n / 4)) ? + MLX5_PMD_TX_PER_COMP_REQ : (elts_n / 4)); + txq->elts_comp_cd = txq->elts_comp_cd_init; + txq->elts_linear = elts_linear; + txq->mr_linear = mr_linear; + assert(ret == 0); + return 0; +error: + if (mr_linear != NULL) + claim_zero(ibv_dereg_mr(mr_linear)); + + rte_free(elts_linear); + rte_free(elts); + + DEBUG("%p: failed, freed everything", (void *)txq); + assert(ret > 0); + return ret; +} + +/** + * Free TX queue elements. + * + * @param txq + * Pointer to TX queue structure. + */ +static void +txq_free_elts(struct txq *txq) +{ + unsigned int i; + unsigned int elts_n = txq->elts_n; + struct txq_elt (*elts)[elts_n] = txq->elts; + linear_t (*elts_linear)[elts_n] = txq->elts_linear; + struct ibv_mr *mr_linear = txq->mr_linear; + + DEBUG("%p: freeing WRs", (void *)txq); + txq->elts_n = 0; + txq->elts = NULL; + txq->elts_linear = NULL; + txq->mr_linear = NULL; + if (mr_linear != NULL) + claim_zero(ibv_dereg_mr(mr_linear)); + + rte_free(elts_linear); + if (elts == NULL) + return; + for (i = 0; (i != RTE_DIM(*elts)); ++i) { + struct txq_elt *elt = &(*elts)[i]; + + if (elt->buf == NULL) + continue; + rte_pktmbuf_free(elt->buf); + } + rte_free(elts); +} + +/** + * Clean up a TX queue. + * + * Destroy objects, free allocated memory and reset the structure for reuse. + * + * @param txq + * Pointer to TX queue structure. + */ +void +txq_cleanup(struct txq *txq) +{ + struct ibv_exp_release_intf_params params; + size_t i; + + DEBUG("cleaning up %p", (void *)txq); + txq_free_elts(txq); + if (txq->if_qp != NULL) { + assert(txq->priv != NULL); + assert(txq->priv->ctx != NULL); + assert(txq->qp != NULL); + params = (struct ibv_exp_release_intf_params){ + .comp_mask = 0, + }; + claim_zero(ibv_exp_release_intf(txq->priv->ctx, + txq->if_qp, + ¶ms)); + } + if (txq->if_cq != NULL) { + assert(txq->priv != NULL); + assert(txq->priv->ctx != NULL); + assert(txq->cq != NULL); + params = (struct ibv_exp_release_intf_params){ + .comp_mask = 0, + }; + claim_zero(ibv_exp_release_intf(txq->priv->ctx, + txq->if_cq, + ¶ms)); + } + if (txq->qp != NULL) + claim_zero(ibv_destroy_qp(txq->qp)); + if (txq->cq != NULL) + claim_zero(ibv_destroy_cq(txq->cq)); + if (txq->rd != NULL) { + struct ibv_exp_destroy_res_domain_attr attr = { + .comp_mask = 0, + }; + + assert(txq->priv != NULL); + assert(txq->priv->ctx != NULL); + claim_zero(ibv_exp_destroy_res_domain(txq->priv->ctx, + txq->rd, + &attr)); + } + for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) { + if (txq->mp2mr[i].mp == NULL) + break; + assert(txq->mp2mr[i].mr != NULL); + claim_zero(ibv_dereg_mr(txq->mp2mr[i].mr)); + } + memset(txq, 0, sizeof(*txq)); +} + +/** + * Configure a TX queue. + * + * @param dev + * Pointer to Ethernet device structure. + * @param txq + * Pointer to TX queue structure. + * @param desc + * Number of descriptors to configure in queue. + * @param socket + * NUMA socket on which memory must be allocated. + * @param[in] conf + * Thresholds parameters. + * + * @return + * 0 on success, errno value on failure. + */ +static int +txq_setup(struct rte_eth_dev *dev, struct txq *txq, uint16_t desc, + unsigned int socket, const struct rte_eth_txconf *conf) +{ + struct priv *priv = dev->data->dev_private; + struct txq tmpl = { + .priv = priv, + .socket = socket + }; + union { + struct ibv_exp_query_intf_params params; + struct ibv_exp_qp_init_attr init; + struct ibv_exp_res_domain_init_attr rd; + struct ibv_exp_cq_init_attr cq; + struct ibv_exp_qp_attr mod; + } attr; + enum ibv_exp_query_intf_status status; + int ret = 0; + + (void)conf; /* Thresholds configuration (ignored). */ + if ((desc == 0) || (desc % MLX5_PMD_SGE_WR_N)) { + ERROR("%p: invalid number of TX descriptors (must be a" + " multiple of %d)", (void *)dev, MLX5_PMD_SGE_WR_N); + return EINVAL; + } + desc /= MLX5_PMD_SGE_WR_N; + /* MRs will be registered in mp2mr[] later. */ + attr.rd = (struct ibv_exp_res_domain_init_attr){ + .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL | + IBV_EXP_RES_DOMAIN_MSG_MODEL), + .thread_model = IBV_EXP_THREAD_SINGLE, + .msg_model = IBV_EXP_MSG_HIGH_BW, + }; + tmpl.rd = ibv_exp_create_res_domain(priv->ctx, &attr.rd); + if (tmpl.rd == NULL) { + ret = ENOMEM; + ERROR("%p: RD creation failure: %s", + (void *)dev, strerror(ret)); + goto error; + } + attr.cq = (struct ibv_exp_cq_init_attr){ + .comp_mask = IBV_EXP_CQ_INIT_ATTR_RES_DOMAIN, + .res_domain = tmpl.rd, + }; + tmpl.cq = ibv_exp_create_cq(priv->ctx, desc, NULL, NULL, 0, &attr.cq); + if (tmpl.cq == NULL) { + ret = ENOMEM; + ERROR("%p: CQ creation failure: %s", + (void *)dev, strerror(ret)); + goto error; + } + DEBUG("priv->device_attr.max_qp_wr is %d", + priv->device_attr.max_qp_wr); + DEBUG("priv->device_attr.max_sge is %d", + priv->device_attr.max_sge); + attr.init = (struct ibv_exp_qp_init_attr){ + /* CQ to be associated with the send queue. */ + .send_cq = tmpl.cq, + /* CQ to be associated with the receive queue. */ + .recv_cq = tmpl.cq, + .cap = { + /* Max number of outstanding WRs. */ + .max_send_wr = ((priv->device_attr.max_qp_wr < desc) ? + priv->device_attr.max_qp_wr : + desc), + /* Max number of scatter/gather elements in a WR. */ + .max_send_sge = ((priv->device_attr.max_sge < + MLX5_PMD_SGE_WR_N) ? + priv->device_attr.max_sge : + MLX5_PMD_SGE_WR_N), +#if MLX5_PMD_MAX_INLINE > 0 + .max_inline_data = MLX5_PMD_MAX_INLINE, +#endif + }, + .qp_type = IBV_QPT_RAW_PACKET, + /* Do *NOT* enable this, completions events are managed per + * TX burst. */ + .sq_sig_all = 0, + .pd = priv->pd, + .res_domain = tmpl.rd, + .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD | + IBV_EXP_QP_INIT_ATTR_RES_DOMAIN), + }; + tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init); + if (tmpl.qp == NULL) { + ret = (errno ? errno : EINVAL); + ERROR("%p: QP creation failure: %s", + (void *)dev, strerror(ret)); + goto error; + } +#if MLX5_PMD_MAX_INLINE > 0 + /* ibv_create_qp() updates this value. */ + tmpl.max_inline = attr.init.cap.max_inline_data; +#endif + attr.mod = (struct ibv_exp_qp_attr){ + /* Move the QP to this state. */ + .qp_state = IBV_QPS_INIT, + /* Primary port number. */ + .port_num = priv->port + }; + ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, + (IBV_EXP_QP_STATE | IBV_EXP_QP_PORT)); + if (ret) { + ERROR("%p: QP state to IBV_QPS_INIT failed: %s", + (void *)dev, strerror(ret)); + goto error; + } + ret = txq_alloc_elts(&tmpl, desc); + if (ret) { + ERROR("%p: TXQ allocation failed: %s", + (void *)dev, strerror(ret)); + goto error; + } + attr.mod = (struct ibv_exp_qp_attr){ + .qp_state = IBV_QPS_RTR + }; + ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE); + if (ret) { + ERROR("%p: QP state to IBV_QPS_RTR failed: %s", + (void *)dev, strerror(ret)); + goto error; + } + attr.mod.qp_state = IBV_QPS_RTS; + ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE); + if (ret) { + ERROR("%p: QP state to IBV_QPS_RTS failed: %s", + (void *)dev, strerror(ret)); + goto error; + } + attr.params = (struct ibv_exp_query_intf_params){ + .intf_scope = IBV_EXP_INTF_GLOBAL, + .intf = IBV_EXP_INTF_CQ, + .obj = tmpl.cq, + }; + tmpl.if_cq = ibv_exp_query_intf(priv->ctx, &attr.params, &status); + if (tmpl.if_cq == NULL) { + ret = EINVAL; + ERROR("%p: CQ interface family query failed with status %d", + (void *)dev, status); + goto error; + } + attr.params = (struct ibv_exp_query_intf_params){ + .intf_scope = IBV_EXP_INTF_GLOBAL, + .intf = IBV_EXP_INTF_QP_BURST, + .obj = tmpl.qp, + }; + tmpl.if_qp = ibv_exp_query_intf(priv->ctx, &attr.params, &status); + if (tmpl.if_qp == NULL) { + ret = EINVAL; + ERROR("%p: QP interface family query failed with status %d", + (void *)dev, status); + goto error; + } + /* Clean up txq in case we're reinitializing it. */ + DEBUG("%p: cleaning-up old txq just in case", (void *)txq); + txq_cleanup(txq); + *txq = tmpl; + DEBUG("%p: txq updated with %p", (void *)txq, (void *)&tmpl); + assert(ret == 0); + return 0; +error: + txq_cleanup(&tmpl); + assert(ret > 0); + return ret; +} + +/** + * DPDK callback to configure a TX queue. + * + * @param dev + * Pointer to Ethernet device structure. + * @param idx + * TX queue index. + * @param desc + * Number of descriptors to configure in queue. + * @param socket + * NUMA socket on which memory must be allocated. + * @param[in] conf + * Thresholds parameters. + * + * @return + * 0 on success, negative errno value on failure. + */ +int +mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + unsigned int socket, const struct rte_eth_txconf *conf) +{ + struct priv *priv = dev->data->dev_private; + struct txq *txq = (*priv->txqs)[idx]; + int ret; + + priv_lock(priv); + DEBUG("%p: configuring queue %u for %u descriptors", + (void *)dev, idx, desc); + if (idx >= priv->txqs_n) { + ERROR("%p: queue index out of range (%u >= %u)", + (void *)dev, idx, priv->txqs_n); + priv_unlock(priv); + return -EOVERFLOW; + } + if (txq != NULL) { + DEBUG("%p: reusing already allocated queue index %u (%p)", + (void *)dev, idx, (void *)txq); + if (priv->started) { + priv_unlock(priv); + return -EEXIST; + } + (*priv->txqs)[idx] = NULL; + txq_cleanup(txq); + } else { + txq = rte_calloc_socket("TXQ", 1, sizeof(*txq), 0, socket); + if (txq == NULL) { + ERROR("%p: unable to allocate queue index %u", + (void *)dev, idx); + priv_unlock(priv); + return -ENOMEM; + } + } + ret = txq_setup(dev, txq, desc, socket, conf); + if (ret) + rte_free(txq); + else { + DEBUG("%p: adding TX queue %p to list", + (void *)dev, (void *)txq); + (*priv->txqs)[idx] = txq; + /* Update send callback. */ + dev->tx_pkt_burst = mlx5_tx_burst; + } + priv_unlock(priv); + return -ret; +} + +/** + * DPDK callback to release a TX queue. + * + * @param dpdk_txq + * Generic TX queue pointer. + */ +void +mlx5_tx_queue_release(void *dpdk_txq) +{ + struct txq *txq = (struct txq *)dpdk_txq; + struct priv *priv; + unsigned int i; + + if (txq == NULL) + return; + priv = txq->priv; + priv_lock(priv); + for (i = 0; (i != priv->txqs_n); ++i) + if ((*priv->txqs)[i] == txq) { + DEBUG("%p: removing TX queue %p from list", + (void *)priv->dev, (void *)txq); + (*priv->txqs)[i] = NULL; + break; + } + txq_cleanup(txq); + rte_free(txq); + priv_unlock(priv); +} diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h index cc6aab6..e48e6b6 100644 --- a/drivers/net/mlx5/mlx5_utils.h +++ b/drivers/net/mlx5/mlx5_utils.h @@ -140,10 +140,21 @@ pmd_drv_log_basename(const char *s) #define WARN(...) PMD_DRV_LOG(WARNING, __VA_ARGS__) #define ERROR(...) PMD_DRV_LOG(ERR, __VA_ARGS__) +/* Convenience macros for accessing mbuf fields. */ +#define NEXT(m) ((m)->next) +#define DATA_LEN(m) ((m)->data_len) +#define PKT_LEN(m) ((m)->pkt_len) +#define DATA_OFF(m) ((m)->data_off) +#define SET_DATA_OFF(m, o) ((m)->data_off = (o)) +#define NB_SEGS(m) ((m)->nb_segs) +#define PORT(m) ((m)->port) + /* Allocate a buffer on the stack and fill it with a printf format string. */ #define MKSTR(name, ...) \ char name[snprintf(NULL, 0, __VA_ARGS__) + 1]; \ \ snprintf(name, sizeof(name), __VA_ARGS__) +#define WR_ID(o) (((wr_id_t *)&(o))->data) + #endif /* RTE_PMD_MLX5_UTILS_H_ */