@@ -254,6 +254,29 @@ Run-time configuration
- x86_64 with ConnectX-4, ConnectX-4 LX and ConnectX-5.
- POWER8 and ARMv8 with ConnectX-4 LX and ConnectX-5.
+- ``mprq_en`` parameter [int]
+
+ A nonzero value enables configuring Multi-Packet Rx queues. Rx queue is
+ configured as Multi-Packet RQ if the total number of Rx queues is
+ ``rxqs_min_mprq`` or more and Rx scatter isn't configured. Enabled by default.
+
+ Multi-Packet Rx Queue (MPRQ a.k.a Striding RQ) can further save PCIe bandwidth
+ by posting a single large buffer for multiple packets. Instead of posting a
+ buffers per a packet, one large buffer is posted in order to receive multiple
+ packets on the buffer. A MPRQ buffer consists of multiple fixed-size strides
+ and each stride receives one packet.
+
+- ``mprq_max_memcpy_len`` parameter [int]
+ The maximum size of packet for memcpy in case of Multi-Packet Rx queue. Rx
+ packet is mem-copied to a user-provided mbuf if the size of Rx packet is less
+ than or equal to this parameter. Otherwise, the packet will be referenced by mbuf
+ indirection. In case of indirection, the Mempool for the direct mbufs will be
+ allocated and managed by PMD. The default value is 128.
+
+- ``rxqs_min_mprq`` parameter [int]
+ Configure Rx queues as Multi-Packet RQ if the total number of Rx queues is greater or
+ equal to this value. The default value is 12.
+
- ``txq_inline`` parameter [int]
Amount of data to be inlined during TX operations. Improves latency.
@@ -44,6 +44,18 @@
/* Device parameter to enable RX completion queue compression. */
#define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"
+/* Device parameter to enable Multi-Packet Rx queue. */
+#define MLX5_RX_MPRQ_EN "mprq_en"
+
+/* Device parameter to limit the size of memcpy'd packet. */
+#define MLX5_RX_MPRQ_MAX_MEMCPY_LEN "mprq_max_memcpy_len"
+
+/*
+ * Device parameter to set the minimum number of Rx queues to configure
+ * Multi-Packet Rx queue.
+ */
+#define MLX5_RXQS_MIN_MPRQ "rxqs_min_mprq"
+
/* Device parameter to configure inline send. */
#define MLX5_TXQ_INLINE "txq_inline"
@@ -393,6 +405,12 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
}
if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) {
config->cqe_comp = !!tmp;
+ } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) {
+ config->mprq = !!tmp;
+ } else if (strcmp(MLX5_RX_MPRQ_MAX_MEMCPY_LEN, key) == 0) {
+ config->mprq_max_memcpy_len = tmp;
+ } else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) {
+ config->rxqs_mprq = tmp;
} else if (strcmp(MLX5_TXQ_INLINE, key) == 0) {
config->txq_inline = tmp;
} else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
@@ -431,6 +449,9 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
{
const char **params = (const char *[]){
MLX5_RXQ_CQE_COMP_EN,
+ MLX5_RX_MPRQ_EN,
+ MLX5_RX_MPRQ_MAX_MEMCPY_LEN,
+ MLX5_RXQS_MIN_MPRQ,
MLX5_TXQ_INLINE,
MLX5_TXQS_MIN_INLINE,
MLX5_TXQ_MPW_EN,
@@ -600,6 +621,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
unsigned int mps;
unsigned int cqe_comp;
unsigned int tunnel_en = 0;
+ unsigned int mprq = 0;
int idx;
int i;
struct mlx5dv_context attrs_out = {0};
@@ -674,6 +696,9 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
attrs_out.comp_mask |= MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS;
#endif
+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
+ attrs_out.comp_mask |= MLX5DV_CONTEXT_MASK_STRIDING_RQ;
+#endif
mlx5_glue->dv_query_device(attr_ctx, &attrs_out);
if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) {
if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW) {
@@ -687,6 +712,37 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
DRV_LOG(DEBUG, "MPW isn't supported");
mps = MLX5_MPW_DISABLED;
}
+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
+ if (attrs_out.comp_mask & MLX5DV_CONTEXT_MASK_STRIDING_RQ) {
+ struct mlx5dv_striding_rq_caps mprq_caps =
+ attrs_out.striding_rq_caps;
+
+ DRV_LOG(DEBUG, "\tmin_single_stride_log_num_of_bytes: %d",
+ mprq_caps.min_single_stride_log_num_of_bytes);
+ DRV_LOG(DEBUG, "\tmax_single_stride_log_num_of_bytes: %d",
+ mprq_caps.max_single_stride_log_num_of_bytes);
+ DRV_LOG(DEBUG, "\tmin_single_wqe_log_num_of_strides: %d",
+ mprq_caps.min_single_wqe_log_num_of_strides);
+ DRV_LOG(DEBUG, "\tmax_single_wqe_log_num_of_strides: %d",
+ mprq_caps.max_single_wqe_log_num_of_strides);
+ DRV_LOG(DEBUG, "\tsupported_qpts: %d",
+ mprq_caps.supported_qpts);
+ if (mprq_caps.min_single_stride_log_num_of_bytes <=
+ MLX5_MPRQ_MIN_STRIDE_SZ_N &&
+ mprq_caps.max_single_stride_log_num_of_bytes >=
+ MLX5_MPRQ_STRIDE_SZ_N &&
+ mprq_caps.min_single_wqe_log_num_of_strides <=
+ MLX5_MPRQ_MIN_STRIDE_NUM_N &&
+ mprq_caps.max_single_wqe_log_num_of_strides >=
+ MLX5_MPRQ_STRIDE_NUM_N) {
+ DRV_LOG(DEBUG, "Multi-Packet RQ is supported");
+ mprq = 1;
+ } else {
+ DRV_LOG(DEBUG, "Multi-Packet RQ isn't supported");
+ mprq = 0;
+ }
+ }
+#endif
if (RTE_CACHE_LINE_SIZE == 128 &&
!(attrs_out.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP))
cqe_comp = 0;
@@ -733,6 +789,9 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
.txq_inline = MLX5_ARG_UNSET,
.txqs_inline = MLX5_ARG_UNSET,
.inline_max_packet_sz = MLX5_ARG_UNSET,
+ .mprq = 1, /* Enabled by default. */
+ .mprq_max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN,
+ .rxqs_mprq = MLX5_MPRQ_MIN_RXQS,
};
len = snprintf(name, sizeof(name), PCI_PRI_FMT,
@@ -890,6 +949,12 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
DRV_LOG(WARNING, "Rx CQE compression isn't supported");
config.cqe_comp = 0;
}
+ if (config.mprq && !mprq) {
+ DRV_LOG(WARNING, "Multi-Packet RQ isn't supported");
+ config.mprq = 0;
+ }
+ DRV_LOG(INFO, "Multi-Packet RQ is %s",
+ config.mprq ? "enabled" : "disabled");
eth_dev = rte_eth_dev_allocate(name);
if (eth_dev == NULL) {
DRV_LOG(ERR, "can not allocate rte ethdev");
@@ -87,6 +87,9 @@ struct mlx5_dev_config {
unsigned int tx_vec_en:1; /* Tx vector is enabled. */
unsigned int rx_vec_en:1; /* Rx vector is enabled. */
unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
+ unsigned int mprq:1; /* Whether Multi-Packet RQ is supported. */
+ unsigned int mprq_max_memcpy_len; /* Maximum packet size to memcpy. */
+ unsigned int rxqs_mprq; /* Queue count threshold for Multi-Packet RQ. */
unsigned int tso_max_payload_sz; /* Maximum TCP payload for TSO. */
unsigned int ind_table_max_size; /* Maximum indirection table size. */
int txq_inline; /* Maximum packet size for inlining. */
@@ -95,4 +95,24 @@
*/
#define MLX5_UAR_OFFSET (1ULL << 32)
+/* Log 2 of the size of a stride for Multi-Packet RQ. */
+#define MLX5_MPRQ_STRIDE_SZ_N 11
+#define MLX5_MPRQ_MIN_STRIDE_SZ_N 6
+
+/* Log 2 of the number of strides per WQE for Multi-Packet RQ. */
+#define MLX5_MPRQ_STRIDE_NUM_N 4
+#define MLX5_MPRQ_MIN_STRIDE_NUM_N 3
+
+/* Two-byte shift is disabled for Multi-Packet RQ. */
+#define MLX5_MPRQ_TWO_BYTE_SHIFT 0
+
+/* Minimum size of packet to be memcpy'd instead of indirection. */
+#define MLX5_MPRQ_MEMCPY_DEFAULT_LEN 128
+
+/* Minimum number Rx queues to enable Multi-Packet RQ. */
+#define MLX5_MPRQ_MIN_RXQS 12
+
+/* Cache size of mempool for Multi-Packet RQ. */
+#define MLX5_MPRQ_MP_CACHE_SZ 16
+
#endif /* RTE_PMD_MLX5_DEFS_H_ */
@@ -464,6 +464,7 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
};
if (dev->rx_pkt_burst == mlx5_rx_burst ||
+ dev->rx_pkt_burst == mlx5_rx_burst_mprq ||
dev->rx_pkt_burst == mlx5_rx_burst_vec)
return ptypes;
return NULL;
@@ -1116,6 +1117,8 @@ mlx5_select_rx_function(struct rte_eth_dev *dev)
rx_pkt_burst = mlx5_rx_burst_vec;
DRV_LOG(DEBUG, "port %u selected Rx vectorized function",
dev->data->port_id);
+ } else if (mlx5_mprq_enabled(dev)) {
+ rx_pkt_burst = mlx5_rx_burst_mprq;
}
return rx_pkt_burst;
}
@@ -195,6 +195,21 @@ struct mlx5_mpw {
} data;
};
+/* WQE for Multi-Packet RQ. */
+struct mlx5_wqe_mprq {
+ struct mlx5_wqe_srq_next_seg next_seg;
+ struct mlx5_wqe_data_seg dseg;
+};
+
+#define MLX5_MPRQ_LEN_MASK 0x000ffff
+#define MLX5_MPRQ_LEN_SHIFT 0
+#define MLX5_MPRQ_STRIDE_NUM_MASK 0x7fff0000
+#define MLX5_MPRQ_STRIDE_NUM_SHIFT 16
+#define MLX5_MPRQ_FILLER_MASK 0x80000000
+#define MLX5_MPRQ_FILLER_SHIFT 31
+
+#define MLX5_MPRQ_STRIDE_SHIFT_BYTE 2
+
/* CQ element structure - should be equal to the cache line size */
struct mlx5_cqe {
#if (RTE_CACHE_LINE_SIZE == 128)
@@ -55,7 +55,75 @@ uint8_t rss_hash_default_key[] = {
const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
/**
- * Allocate RX queue elements.
+ * Check whether Multi-Packet RQ can be enabled for the device.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return
+ * 1 if supported, negative errno value if not.
+ */
+inline int
+mlx5_check_mprq_support(struct rte_eth_dev *dev)
+{
+ struct priv *priv = dev->data->dev_private;
+
+ if (priv->config.mprq && priv->rxqs_n >= priv->config.rxqs_mprq)
+ return 1;
+ return -ENOTSUP;
+}
+
+/**
+ * Check whether Multi-Packet RQ is enabled for the Rx queue.
+ *
+ * @param rxq
+ * Pointer to receive queue structure.
+ *
+ * @return
+ * 0 if disabled, otherwise enabled.
+ */
+static inline int
+rxq_mprq_enabled(struct mlx5_rxq_data *rxq)
+{
+ return rxq->mprq_mp != NULL;
+}
+
+/**
+ * Check whether Multi-Packet RQ is enabled for the device.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ *
+ * @return
+ * 0 if disabled, otherwise enabled.
+ */
+inline int
+mlx5_mprq_enabled(struct rte_eth_dev *dev)
+{
+ struct priv *priv = dev->data->dev_private;
+ uint16_t i;
+ uint16_t n = 0;
+
+ if (mlx5_check_mprq_support(dev) < 0)
+ return 0;
+ /* All the configured queues should be enabled. */
+ for (i = 0; i < priv->rxqs_n; ++i) {
+ struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
+
+ if (!rxq)
+ continue;
+ if (rxq_mprq_enabled(rxq))
+ ++n;
+ }
+ /* Multi-Packet RQ can't be partially configured. */
+ assert(n != 0);
+ if (n == priv->rxqs_n)
+ return 1;
+ return 0;
+}
+
+/**
+ * Allocate RX queue elements for Multi-Packet RQ.
*
* @param rxq_ctrl
* Pointer to RX queue structure.
@@ -63,8 +131,60 @@ const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-int
-rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
+static int
+rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
+{
+ struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
+ unsigned int wqe_n = 1 << rxq->elts_n;
+ unsigned int i;
+ int err;
+
+ /* Iterate on segments. */
+ for (i = 0; i <= wqe_n; ++i) {
+ struct rte_mbuf *buf;
+
+ if (rte_mempool_get(rxq->mprq_mp, (void **)&buf) < 0) {
+ DRV_LOG(ERR, "port %u empty mbuf pool",
+ rxq_ctrl->priv->dev->data->port_id);
+ rte_errno = ENOMEM;
+ goto error;
+ }
+ if (i < wqe_n)
+ (*rxq->elts)[i] = buf;
+ else
+ rxq->mprq_repl = buf;
+ PORT(buf) = rxq->port_id;
+ }
+ DRV_LOG(DEBUG,
+ "port %u Rx queue %u allocated and configured %u segments",
+ rxq_ctrl->priv->dev->data->port_id, rxq_ctrl->idx, wqe_n);
+ return 0;
+error:
+ err = rte_errno; /* Save rte_errno before cleanup. */
+ wqe_n = i;
+ for (i = 0; (i != wqe_n); ++i) {
+ if ((*rxq->elts)[i] != NULL)
+ rte_mempool_put(rxq->mprq_mp,
+ (*rxq->elts)[i]);
+ (*rxq->elts)[i] = NULL;
+ }
+ DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
+ rxq_ctrl->priv->dev->data->port_id, rxq_ctrl->idx);
+ rte_errno = err; /* Restore rte_errno. */
+ return -rte_errno;
+}
+
+/**
+ * Allocate RX queue elements for Single-Packet RQ.
+ *
+ * @param rxq_ctrl
+ * Pointer to RX queue structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+static int
+rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
{
const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
@@ -140,6 +260,22 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
}
/**
+ * Allocate RX queue elements.
+ *
+ * @param rxq_ctrl
+ * Pointer to RX queue structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
+{
+ return rxq_mprq_enabled(&rxq_ctrl->rxq) ?
+ rxq_alloc_elts_mprq(rxq_ctrl) : rxq_alloc_elts_sprq(rxq_ctrl);
+}
+
+/**
* Free RX queue elements.
*
* @param rxq_ctrl
@@ -172,6 +308,10 @@ rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
rte_pktmbuf_free_seg((*rxq->elts)[i]);
(*rxq->elts)[i] = NULL;
}
+ if (rxq->mprq_repl != NULL) {
+ rte_pktmbuf_free_seg(rxq->mprq_repl);
+ rxq->mprq_repl = NULL;
+ }
}
/**
@@ -623,10 +763,16 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
struct ibv_cq_init_attr_ex ibv;
struct mlx5dv_cq_init_attr mlx5;
} cq;
- struct ibv_wq_init_attr wq;
+ struct {
+ struct ibv_wq_init_attr ibv;
+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
+ struct mlx5dv_wq_init_attr mlx5;
+#endif
+ } wq;
struct ibv_cq_ex cq_attr;
} attr;
- unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
+ unsigned int cqe_n;
+ unsigned int wqe_n = 1 << rxq_data->elts_n;
struct mlx5_rxq_ibv *tmpl;
struct mlx5dv_cq cq_info;
struct mlx5dv_rwq rwq;
@@ -634,6 +780,7 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
int ret = 0;
struct mlx5dv_obj obj;
struct mlx5_dev_config *config = &priv->config;
+ const int mprq_en = rxq_mprq_enabled(rxq_data);
assert(rxq_data);
assert(!rxq_ctrl->ibv);
@@ -659,6 +806,19 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
goto error;
}
}
+ if (mprq_en) {
+ tmpl->mprq_mr = mlx5_mr_get(dev, rxq_data->mprq_mp);
+ if (!tmpl->mprq_mr) {
+ tmpl->mprq_mr = mlx5_mr_new(dev, rxq_data->mprq_mp);
+ if (!tmpl->mprq_mr) {
+ DRV_LOG(ERR,
+ "port %u Rx queue %u: "
+ "MR creation failure for Multi-Packet RQ",
+ dev->data->port_id, rxq_ctrl->idx);
+ goto error;
+ }
+ }
+ }
if (rxq_ctrl->irq) {
tmpl->channel = mlx5_glue->create_comp_channel(priv->ctx);
if (!tmpl->channel) {
@@ -668,6 +828,10 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
goto error;
}
}
+ if (mprq_en)
+ cqe_n = wqe_n * (1 << MLX5_MPRQ_STRIDE_NUM_N) - 1;
+ else
+ cqe_n = wqe_n - 1;
attr.cq.ibv = (struct ibv_cq_init_attr_ex){
.cqe = cqe_n,
.channel = tmpl->channel,
@@ -705,11 +869,11 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
dev->data->port_id, priv->device_attr.orig_attr.max_qp_wr);
DRV_LOG(DEBUG, "port %u priv->device_attr.max_sge is %d",
dev->data->port_id, priv->device_attr.orig_attr.max_sge);
- attr.wq = (struct ibv_wq_init_attr){
+ attr.wq.ibv = (struct ibv_wq_init_attr){
.wq_context = NULL, /* Could be useful in the future. */
.wq_type = IBV_WQT_RQ,
/* Max number of outstanding WRs. */
- .max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
+ .max_wr = wqe_n >> rxq_data->sges_n,
/* Max number of scatter/gather elements in a WR. */
.max_sge = 1 << rxq_data->sges_n,
.pd = priv->pd,
@@ -723,8 +887,8 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
};
/* By default, FCS (CRC) is stripped by hardware. */
if (rxq_data->crc_present) {
- attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
- attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
+ attr.wq.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
+ attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
}
#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
if (config->hw_padding) {
@@ -732,7 +896,26 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
}
#endif
- tmpl->wq = mlx5_glue->create_wq(priv->ctx, &attr.wq);
+#ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT
+ attr.wq.mlx5 = (struct mlx5dv_wq_init_attr){
+ .comp_mask = 0,
+ };
+ if (mprq_en) {
+ struct mlx5dv_striding_rq_init_attr *mprq_attr =
+ &attr.wq.mlx5.striding_rq_attrs;
+
+ attr.wq.mlx5.comp_mask |= MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;
+ *mprq_attr = (struct mlx5dv_striding_rq_init_attr){
+ .single_stride_log_num_of_bytes = MLX5_MPRQ_STRIDE_SZ_N,
+ .single_wqe_log_num_of_strides = MLX5_MPRQ_STRIDE_NUM_N,
+ .two_byte_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT,
+ };
+ }
+ tmpl->wq = mlx5_glue->dv_create_wq(priv->ctx, &attr.wq.ibv,
+ &attr.wq.mlx5);
+#else
+ tmpl->wq = mlx5_glue->create_wq(priv->ctx, &attr.wq.ibv);
+#endif
if (tmpl->wq == NULL) {
DRV_LOG(ERR, "port %u Rx queue %u WQ creation failure",
dev->data->port_id, idx);
@@ -743,16 +926,14 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
* Make sure number of WRs*SGEs match expectations since a queue
* cannot allocate more than "desc" buffers.
*/
- if (((int)attr.wq.max_wr !=
- ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
- ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
+ if (attr.wq.ibv.max_wr != (wqe_n >> rxq_data->sges_n) ||
+ attr.wq.ibv.max_sge != (1u << rxq_data->sges_n)) {
DRV_LOG(ERR,
"port %u Rx queue %u requested %u*%u but got %u*%u"
" WRs*SGEs",
dev->data->port_id, idx,
- ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
- (1 << rxq_data->sges_n),
- attr.wq.max_wr, attr.wq.max_sge);
+ wqe_n >> rxq_data->sges_n, (1 << rxq_data->sges_n),
+ attr.wq.ibv.max_wr, attr.wq.ibv.max_sge);
rte_errno = EINVAL;
goto error;
}
@@ -787,25 +968,38 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
goto error;
}
/* Fill the rings. */
- rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
- (uintptr_t)rwq.buf;
- for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
+ rxq_data->wqes = rwq.buf;
+ for (i = 0; (i != wqe_n); ++i) {
+ volatile struct mlx5_wqe_data_seg *scat;
struct rte_mbuf *buf = (*rxq_data->elts)[i];
- volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
-
+ uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
+ uint32_t byte_count;
+ uint32_t lkey;
+
+ if (mprq_en) {
+ scat = &((volatile struct mlx5_wqe_mprq *)
+ rxq_data->wqes)[i].dseg;
+ byte_count = (1 << MLX5_MPRQ_STRIDE_SZ_N) *
+ (1 << MLX5_MPRQ_STRIDE_NUM_N);
+ lkey = tmpl->mprq_mr->lkey;
+ } else {
+ scat = &((volatile struct mlx5_wqe_data_seg *)
+ rxq_data->wqes)[i];
+ byte_count = DATA_LEN(buf);
+ lkey = tmpl->mr->lkey;
+ }
/* scat->addr must be able to store a pointer. */
assert(sizeof(scat->addr) >= sizeof(uintptr_t));
*scat = (struct mlx5_wqe_data_seg){
- .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
- uintptr_t)),
- .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
- .lkey = tmpl->mr->lkey,
+ .addr = rte_cpu_to_be_64(addr),
+ .byte_count = rte_cpu_to_be_32(byte_count),
+ .lkey = lkey
};
}
rxq_data->rq_db = rwq.dbrec;
rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
rxq_data->cq_ci = 0;
- rxq_data->rq_ci = 0;
+ rxq_data->strd_ci = 0;
rxq_data->rq_pi = 0;
rxq_data->zip = (struct rxq_zip){
.ai = 0,
@@ -816,7 +1010,7 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
rxq_data->cqn = cq_info.cqn;
rxq_data->cq_arm_sn = 0;
/* Update doorbell counter. */
- rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
+ rxq_data->rq_ci = wqe_n >> rxq_data->sges_n;
rte_wmb();
*rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id,
@@ -835,6 +1029,8 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
claim_zero(mlx5_glue->destroy_cq(tmpl->cq));
if (tmpl->channel)
claim_zero(mlx5_glue->destroy_comp_channel(tmpl->channel));
+ if (tmpl->mprq_mr)
+ mlx5_mr_release(tmpl->mprq_mr);
if (tmpl->mr)
mlx5_mr_release(tmpl->mr);
priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
@@ -867,6 +1063,8 @@ mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx)
rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
if (rxq_ctrl->ibv) {
mlx5_mr_get(dev, rxq_data->mp);
+ if (rxq_mprq_enabled(rxq_data))
+ mlx5_mr_get(dev, rxq_data->mprq_mp);
rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
DRV_LOG(DEBUG, "port %u Verbs Rx queue %u: refcnt %d",
dev->data->port_id, rxq_ctrl->idx,
@@ -896,6 +1094,11 @@ mlx5_rxq_ibv_release(struct mlx5_rxq_ibv *rxq_ibv)
ret = mlx5_mr_release(rxq_ibv->mr);
if (!ret)
rxq_ibv->mr = NULL;
+ if (rxq_mprq_enabled(&rxq_ibv->rxq_ctrl->rxq)) {
+ ret = mlx5_mr_release(rxq_ibv->mprq_mr);
+ if (!ret)
+ rxq_ibv->mprq_mr = NULL;
+ }
DRV_LOG(DEBUG, "port %u Verbs Rx queue %u: refcnt %d",
rxq_ibv->rxq_ctrl->priv->dev->data->port_id,
rxq_ibv->rxq_ctrl->idx, rte_atomic32_read(&rxq_ibv->refcnt));
@@ -951,12 +1154,101 @@ mlx5_rxq_ibv_releasable(struct mlx5_rxq_ibv *rxq_ibv)
}
/**
+ * Callback function to initialize mbufs for Multi-Packet RQ.
+ */
+static inline void
+mlx5_mprq_mbuf_init(struct rte_mempool *mp, void *opaque_arg,
+ void *_m, unsigned int i __rte_unused)
+{
+ struct rte_mbuf *m = _m;
+
+ rte_pktmbuf_init(mp, opaque_arg, _m, i);
+ m->buf_len =
+ (1 << MLX5_MPRQ_STRIDE_SZ_N) * (1 << MLX5_MPRQ_STRIDE_NUM_N);
+ rte_pktmbuf_reset_headroom(m);
+}
+
+/**
+ * Configure Rx queue as Multi-Packet RQ.
+ *
+ * @param rxq_ctrl
+ * Pointer to RX queue structure.
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * RX queue index.
+ * @param desc
+ * Number of descriptors to configure in queue.
+ *
+ * @return
+ * 0 on success, negative errno value on failure.
+ */
+static int
+rxq_configure_mprq(struct mlx5_rxq_ctrl *rxq_ctrl, uint16_t idx, uint16_t desc)
+{
+ struct priv *priv = rxq_ctrl->priv;
+ struct rte_eth_dev *dev = priv->dev;
+ struct mlx5_dev_config *config = &priv->config;
+ struct rte_mempool *mp;
+ char name[RTE_MEMPOOL_NAMESIZE];
+ unsigned int buf_len;
+ unsigned int obj_size;
+
+ assert(rxq_ctrl->rxq.sges_n == 0);
+ rxq_ctrl->rxq.strd_sz_n =
+ MLX5_MPRQ_STRIDE_SZ_N - MLX5_MPRQ_MIN_STRIDE_SZ_N;
+ rxq_ctrl->rxq.strd_num_n =
+ MLX5_MPRQ_STRIDE_NUM_N - MLX5_MPRQ_MIN_STRIDE_NUM_N;
+ rxq_ctrl->rxq.strd_shift_en = MLX5_MPRQ_TWO_BYTE_SHIFT;
+ rxq_ctrl->rxq.mprq_max_memcpy_len = config->mprq_max_memcpy_len;
+ buf_len = (1 << MLX5_MPRQ_STRIDE_SZ_N) * (1 << MLX5_MPRQ_STRIDE_NUM_N) +
+ RTE_PKTMBUF_HEADROOM;
+ obj_size = buf_len + sizeof(struct rte_mbuf);
+ snprintf(name, sizeof(name), "%s-mprq-%u", dev->data->name, idx);
+ /*
+ * Allocate per-queue Mempool for Multi-Packet RQ.
+ *
+ * Received packets can be either memcpy'd or indirectly referenced. In
+ * case of mbuf indirection, as it isn't possible to predict how the
+ * buffers will be queued by application, there's no option to exactly
+ * pre-allocate needed buffers in advance but to speculatively prepares
+ * enough buffers.
+ *
+ * In the data path, if this Mempool is depleted, PMD will try to memcpy
+ * received packets to buffers provided by application (rxq->mp) until
+ * this Mempool gets available again.
+ */
+ desc *= 4;
+ mp = rte_mempool_create(name, desc + MLX5_MPRQ_MP_CACHE_SZ,
+ obj_size, MLX5_MPRQ_MP_CACHE_SZ,
+ sizeof(struct rte_pktmbuf_pool_private),
+ NULL, NULL, NULL, NULL,
+ dev->device->numa_node,
+ MEMPOOL_F_SC_GET);
+ if (mp == NULL) {
+ DRV_LOG(ERR,
+ "port %u Rx queue %u: failed to allocate a mempool for"
+ " Multi-Packet RQ",
+ dev->data->port_id, idx);
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
+
+ rte_pktmbuf_pool_init(mp, NULL);
+ rte_mempool_obj_iter(mp, mlx5_mprq_mbuf_init, NULL);
+ rxq_ctrl->rxq.mprq_mp = mp;
+ DRV_LOG(DEBUG, "port %u Rx queue %u: Multi-Packet RQ is enabled",
+ dev->data->port_id, idx);
+ return 0;
+}
+
+/**
* Create a DPDK Rx queue.
*
* @param dev
* Pointer to Ethernet device.
* @param idx
- * TX queue index.
+ * RX queue index.
* @param desc
* Number of descriptors to configure in queue.
* @param socket
@@ -978,8 +1270,9 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
* Always allocate extra slots, even if eventually
* the vector Rx will not be used.
*/
- const uint16_t desc_n =
+ uint16_t desc_n =
desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
+ const int mprq_en = mlx5_check_mprq_support(dev) > 0;
tmpl = rte_calloc_socket("RXQ", 1,
sizeof(*tmpl) +
@@ -989,13 +1282,35 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
rte_errno = ENOMEM;
return NULL;
}
+ tmpl->priv = priv;
tmpl->socket = socket;
if (priv->dev->data->dev_conf.intr_conf.rxq)
tmpl->irq = 1;
- /* Enable scattered packets support for this queue if necessary. */
+ /*
+ * This Rx queue can be configured as a Multi-Packet RQ if all of the
+ * following conditions are met:
+ * - MPRQ is enabled.
+ * - The number of descs is more than the number of strides.
+ * - max_rx_pkt_len is less than the size of a stride sparing headroom.
+ *
+ * Otherwise, enable Rx scatter if necessary.
+ */
assert(mb_len >= RTE_PKTMBUF_HEADROOM);
- if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
- (mb_len - RTE_PKTMBUF_HEADROOM)) {
+ if (mprq_en &&
+ desc >= (1U << MLX5_MPRQ_STRIDE_NUM_N) &&
+ dev->data->dev_conf.rxmode.max_rx_pkt_len <=
+ (1U << MLX5_MPRQ_STRIDE_SZ_N) - RTE_PKTMBUF_HEADROOM) {
+ int ret;
+
+ /* TODO: Rx scatter isn't supported yet. */
+ tmpl->rxq.sges_n = 0;
+ /* Trim the number of descs needed. */
+ desc >>= MLX5_MPRQ_STRIDE_NUM_N;
+ ret = rxq_configure_mprq(tmpl, idx, desc);
+ if (ret)
+ goto error;
+ } else if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
+ (mb_len - RTE_PKTMBUF_HEADROOM)) {
tmpl->rxq.sges_n = 0;
} else if (conf->offloads & DEV_RX_OFFLOAD_SCATTER) {
unsigned int size =
@@ -1073,7 +1388,6 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf &&
(!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS));
tmpl->rxq.port_id = dev->data->port_id;
- tmpl->priv = priv;
tmpl->rxq.mp = mp;
tmpl->rxq.stats.idx = idx;
tmpl->rxq.elts_n = log2above(desc);
@@ -1146,6 +1460,27 @@ mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx)
DRV_LOG(DEBUG, "port %u Rx queue %u: refcnt %d", dev->data->port_id,
rxq_ctrl->idx, rte_atomic32_read(&rxq_ctrl->refcnt));
if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
+ if (rxq_ctrl->rxq.mprq_mp != NULL) {
+ /* If a mbuf in the pool has an indirect mbuf attached
+ * and it is still in use by application, destroying
+ * the Rx qeueue can spoil the packet. It is unlikely
+ * to happen but if application dynamically creates and
+ * destroys with holding Rx packets, this can happen.
+ *
+ * TODO: It is unavoidable for now because the Mempool
+ * for Multi-Packet RQ isn't provided by application but
+ * managed by PMD.
+ */
+ if (!rte_mempool_full(rxq_ctrl->rxq.mprq_mp)) {
+ DRV_LOG(DEBUG,
+ "port %u Rx queue %u: "
+ "Mempool for Multi-Packet RQ is still in use",
+ dev->data->port_id, rxq_ctrl->idx);
+ return 1;
+ }
+ rte_mempool_free(rxq_ctrl->rxq.mprq_mp);
+ rxq_ctrl->rxq.mprq_mp = NULL;
+ }
LIST_REMOVE(rxq_ctrl, next);
rte_free(rxq_ctrl);
(*priv->rxqs)[idx] = NULL;
@@ -1840,7 +1840,8 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
while (pkts_n) {
unsigned int idx = rq_ci & wqe_cnt;
- volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx];
+ volatile struct mlx5_wqe_data_seg *wqe =
+ &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
struct rte_mbuf *rep = (*rxq->elts)[idx];
uint32_t rss_hash_res = 0;
@@ -1941,6 +1942,155 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
}
/**
+ * DPDK callback for RX with Multi-Packet RQ support.
+ *
+ * @param dpdk_rxq
+ * Generic pointer to RX queue structure.
+ * @param[out] pkts
+ * Array to store received packets.
+ * @param pkts_n
+ * Maximum number of packets in array.
+ *
+ * @return
+ * Number of packets successfully received (<= pkts_n).
+ */
+uint16_t
+mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+ struct mlx5_rxq_data *rxq = dpdk_rxq;
+ const unsigned int strd_n =
+ 1 << (rxq->strd_num_n + MLX5_MPRQ_MIN_STRIDE_NUM_N);
+ const unsigned int strd_sz =
+ 1 << (rxq->strd_sz_n + MLX5_MPRQ_MIN_STRIDE_SZ_N);
+ const unsigned int strd_shift =
+ MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en;
+ const unsigned int cq_mask = (1 << rxq->cqe_n) - 1;
+ const unsigned int wq_mask = (1 << rxq->elts_n) - 1;
+ volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
+ unsigned int i = 0;
+ uint16_t rq_ci = rxq->rq_ci;
+ uint16_t strd_idx = rxq->strd_ci;
+ struct rte_mbuf *buf = (*rxq->elts)[rq_ci & wq_mask];
+
+ while (i < pkts_n) {
+ struct rte_mbuf *pkt;
+ int ret;
+ unsigned int len;
+ uint16_t consumed_strd;
+ uint32_t offset;
+ uint32_t byte_cnt;
+ uint32_t rss_hash_res = 0;
+
+ if (strd_idx == strd_n) {
+ /* Replace WQE only if the buffer is still in use. */
+ if (unlikely(rte_mbuf_refcnt_read(buf) > 1)) {
+ struct rte_mbuf *rep = rxq->mprq_repl;
+ volatile struct mlx5_wqe_data_seg *wqe =
+ &((volatile struct mlx5_wqe_mprq *)
+ rxq->wqes)[rq_ci & wq_mask].dseg;
+ uintptr_t addr;
+
+ /* Replace mbuf. */
+ (*rxq->elts)[rq_ci & wq_mask] = rep;
+ PORT(rep) = PORT(buf);
+ /* Release the old buffer. */
+ if (__rte_mbuf_refcnt_update(buf, -1) == 0) {
+ rte_mbuf_refcnt_set(buf, 1);
+ rte_mbuf_raw_free(buf);
+ }
+ /* Replace WQE. */
+ addr = rte_pktmbuf_mtod(rep, uintptr_t);
+ wqe->addr = rte_cpu_to_be_64(addr);
+ /* Stash a mbuf for next replacement. */
+ if (likely(!rte_mempool_get(rxq->mprq_mp,
+ (void **)&rep)))
+ rxq->mprq_repl = rep;
+ else
+ rxq->mprq_repl = NULL;
+ }
+ /* Advance to the next WQE. */
+ strd_idx = 0;
+ ++rq_ci;
+ buf = (*rxq->elts)[rq_ci & wq_mask];
+ }
+ cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
+ ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &rss_hash_res);
+ if (!ret)
+ break;
+ if (unlikely(ret == -1)) {
+ /* RX error, packet is likely too large. */
+ ++rxq->stats.idropped;
+ continue;
+ }
+ byte_cnt = ret;
+ offset = strd_idx * strd_sz + strd_shift;
+ consumed_strd = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >>
+ MLX5_MPRQ_STRIDE_NUM_SHIFT;
+ strd_idx += consumed_strd;
+ if (byte_cnt & MLX5_MPRQ_FILLER_MASK)
+ continue;
+ pkt = rte_pktmbuf_alloc(rxq->mp);
+ if (unlikely(pkt == NULL)) {
+ ++rxq->stats.rx_nombuf;
+ break;
+ }
+ len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;
+ assert((int)len >= (rxq->crc_present << 2));
+ if (rxq->crc_present)
+ len -= ETHER_CRC_LEN;
+ /*
+ * Memcpy packets to the target mbuf if:
+ * - The size of packet is smaller than MLX5_MPRQ_MEMCPY_LEN.
+ * - Out of buffer in the Mempool for Multi-Packet RQ.
+ */
+ if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl == NULL) {
+ uintptr_t base = rte_pktmbuf_mtod(buf, uintptr_t);
+
+ rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
+ (void *)(base + offset), len);
+ /* Initialize the offload flag. */
+ pkt->ol_flags = 0;
+ } else {
+ /*
+ * IND_ATTACHED_MBUF will be set to pkt->ol_flags when
+ * attaching the mbuf and more offload flags will be
+ * added below by calling rxq_cq_to_mbuf(). Other fields
+ * will be overwritten.
+ */
+ rte_pktmbuf_attach_at(pkt, buf, offset,
+ consumed_strd * strd_sz);
+ assert(pkt->ol_flags == IND_ATTACHED_MBUF);
+ rte_pktmbuf_reset_headroom(pkt);
+ }
+ rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
+ PKT_LEN(pkt) = len;
+ DATA_LEN(pkt) = len;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ /* Increment bytes counter. */
+ rxq->stats.ibytes += PKT_LEN(pkt);
+#endif
+ /* Return packet. */
+ *(pkts++) = pkt;
+ ++i;
+ }
+ /* Update the consumer index. */
+ rxq->rq_pi += i;
+ rxq->strd_ci = strd_idx;
+ rte_io_wmb();
+ *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+ if (rq_ci != rxq->rq_ci) {
+ rxq->rq_ci = rq_ci;
+ rte_io_wmb();
+ *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+ }
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ /* Increment packets counter. */
+ rxq->stats.ipackets += i;
+#endif
+ return i;
+}
+
+/**
* Dummy DPDK callback for TX.
*
* This function is used to temporarily replace the real callback during
@@ -86,18 +86,25 @@ struct mlx5_rxq_data {
unsigned int elts_n:4; /* Log 2 of Mbufs. */
unsigned int rss_hash:1; /* RSS hash result is enabled. */
unsigned int mark:1; /* Marked flow available on the queue. */
- unsigned int :15; /* Remaining bits. */
+ unsigned int strd_sz_n:3; /* Log 2 of stride size. */
+ unsigned int strd_num_n:4; /* Log 2 of the number of stride. */
+ unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */
+ unsigned int :8; /* Remaining bits. */
volatile uint32_t *rq_db;
volatile uint32_t *cq_db;
uint16_t port_id;
uint16_t rq_ci;
+ uint16_t strd_ci; /* Stride index in a WQE for Multi-Packet RQ. */
uint16_t rq_pi;
uint16_t cq_ci;
- volatile struct mlx5_wqe_data_seg(*wqes)[];
+ uint16_t mprq_max_memcpy_len; /* Maximum size of packet to memcpy. */
+ volatile void *wqes;
volatile struct mlx5_cqe(*cqes)[];
struct rxq_zip zip; /* Compressed context. */
struct rte_mbuf *(*elts)[];
struct rte_mempool *mp;
+ struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */
+ struct rte_mbuf *mprq_repl; /* Stashed mbuf for replenish. */
struct mlx5_rxq_stats stats;
uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */
struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
@@ -115,6 +122,7 @@ struct mlx5_rxq_ibv {
struct ibv_wq *wq; /* Work Queue. */
struct ibv_comp_channel *channel;
struct mlx5_mr *mr; /* Memory Region (for mp). */
+ struct mlx5_mr *mprq_mr; /* Memory Region (for mprq_mp). */
};
/* RX queue control descriptor. */
@@ -213,6 +221,8 @@ struct mlx5_txq_ctrl {
extern uint8_t rss_hash_default_key[];
extern const size_t rss_hash_default_key_len;
+int mlx5_check_mprq_support(struct rte_eth_dev *dev);
+int mlx5_mprq_enabled(struct rte_eth_dev *dev);
void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl);
int mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
unsigned int socket, const struct rte_eth_rxconf *conf,
@@ -236,6 +246,7 @@ int mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx);
int mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx);
int mlx5_rxq_verify(struct rte_eth_dev *dev);
int rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl);
+int rxq_alloc_mprq_buf(struct mlx5_rxq_ctrl *rxq_ctrl);
struct mlx5_ind_table_ibv *mlx5_ind_table_ibv_new(struct rte_eth_dev *dev,
uint16_t queues[],
uint16_t queues_n);
@@ -291,6 +302,8 @@ uint16_t mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n);
uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
+uint16_t mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts,
+ uint16_t pkts_n);
uint16_t removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n);
uint16_t removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts,
@@ -278,6 +278,8 @@ mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
struct mlx5_rxq_ctrl *ctrl =
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+ if (mlx5_mprq_enabled(ctrl->priv->dev))
+ return -ENOTSUP;
if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0)
return -ENOTSUP;
return 1;
@@ -300,6 +302,8 @@ mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
if (!priv->config.rx_vec_en)
return -ENOTSUP;
+ if (mlx5_mprq_enabled(dev))
+ return -ENOTSUP;
/* All the configured queues should support. */
for (i = 0; i < priv->rxqs_n; ++i) {
struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
@@ -87,7 +87,8 @@ mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n)
const uint16_t q_mask = q_n - 1;
uint16_t elts_idx = rxq->rq_ci & q_mask;
struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
- volatile struct mlx5_wqe_data_seg *wq = &(*rxq->wqes)[elts_idx];
+ volatile struct mlx5_wqe_data_seg *wq =
+ &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
unsigned int i;
assert(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH);