get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/81710/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 81710,
    "url": "http://patches.dpdk.org/api/patches/81710/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20201021203030.19042-2-akozyrev@nvidia.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20201021203030.19042-2-akozyrev@nvidia.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20201021203030.19042-2-akozyrev@nvidia.com",
    "date": "2020-10-21T20:30:29",
    "name": "[v2,1/2] net/mlx5: refactor vectorized Rx routine",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "e1d208ddfa9f12a99658fd064fd102e1c0fce503",
    "submitter": {
        "id": 1873,
        "url": "http://patches.dpdk.org/api/people/1873/?format=api",
        "name": "Alexander Kozyrev",
        "email": "akozyrev@nvidia.com"
    },
    "delegate": {
        "id": 3268,
        "url": "http://patches.dpdk.org/api/users/3268/?format=api",
        "username": "rasland",
        "first_name": "Raslan",
        "last_name": "Darawsheh",
        "email": "rasland@nvidia.com"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20201021203030.19042-2-akozyrev@nvidia.com/mbox/",
    "series": [
        {
            "id": 13181,
            "url": "http://patches.dpdk.org/api/series/13181/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=13181",
            "date": "2020-10-21T20:30:28",
            "name": "net/mlx5: add vectorized mprq",
            "version": 2,
            "mbox": "http://patches.dpdk.org/series/13181/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/81710/comments/",
    "check": "success",
    "checks": "http://patches.dpdk.org/api/patches/81710/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from dpdk.org (dpdk.org [92.243.14.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 2D727A04DD;\n\tWed, 21 Oct 2020 22:31:11 +0200 (CEST)",
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 6ACB2A575;\n\tWed, 21 Oct 2020 22:30:55 +0200 (CEST)",
            "from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129])\n by dpdk.org (Postfix) with ESMTP id E5F24A573\n for <dev@dpdk.org>; Wed, 21 Oct 2020 22:30:53 +0200 (CEST)",
            "from Internal Mail-Server by MTLPINE1 (envelope-from\n akozyrev@nvidia.com) with SMTP; 21 Oct 2020 23:30:47 +0300",
            "from nvidia.com (pegasus02.mtr.labs.mlnx [10.210.16.122])\n by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 09LKUl31019906;\n Wed, 21 Oct 2020 23:30:47 +0300"
        ],
        "From": "Alexander Kozyrev <akozyrev@nvidia.com>",
        "To": "dev@dpdk.org",
        "Cc": "rasland@nvidia.com, matan@nvidia.com, viacheslavo@nvidia.com",
        "Date": "Wed, 21 Oct 2020 20:30:29 +0000",
        "Message-Id": "<20201021203030.19042-2-akozyrev@nvidia.com>",
        "X-Mailer": "git-send-email 2.24.1",
        "In-Reply-To": "<20201021203030.19042-1-akozyrev@nvidia.com>",
        "References": "<20200719041142.14485-1-akozyrev@mellanox.com>\n <20201021203030.19042-1-akozyrev@nvidia.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Subject": "[dpdk-dev] [PATCH v2 1/2] net/mlx5: refactor vectorized Rx routine",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Move the main processing cycle into a separate function:\nrxq_cq_process_v. Put the regular rxq_burst_v function\nto a non-arch specific file. Having all SIMD instructions\nin a single reusable block is a first preparatory step to\nimplement vectorized Rx burst for MPRQ feature.\n\nPass a pointer to the storage of mbufs directly to the\nrxq_copy_mbuf_v instead of calculating the pointer inside\nthis function. This is needed for the future vectorized Rx\nrouting which is going to pass a different pointer here.\n\nCalculate the number of packets to replenish inside the\nmlx5_rx_replenish_bulk_mbuf. Containing this logic in one\nplace allows us to do the same for MPRQ case.\n\nSigned-off-by: Alexander Kozyrev <akozyrev@nvidia.com>\nAcked-by: Slava Ovsiienko <viacheslavo@nvidia.com>\n---\n drivers/net/mlx5/mlx5_rxtx_vec.c         | 104 +++++++++++++++++++\n drivers/net/mlx5/mlx5_rxtx_vec.h         |  69 ++++++-------\n drivers/net/mlx5/mlx5_rxtx_vec_altivec.h | 106 ++++----------------\n drivers/net/mlx5/mlx5_rxtx_vec_neon.h    | 103 ++++---------------\n drivers/net/mlx5/mlx5_rxtx_vec_sse.h     | 121 +++++------------------\n 5 files changed, 204 insertions(+), 299 deletions(-)",
    "diff": "diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c\nindex f083038682..aa48775738 100644\n--- a/drivers/net/mlx5/mlx5_rxtx_vec.c\n+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c\n@@ -77,6 +77,110 @@ rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,\n \treturn n;\n }\n \n+/**\n+ * Receive burst of packets. An errored completion also consumes a mbuf, but the\n+ * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed\n+ * before returning to application.\n+ *\n+ * @param rxq\n+ *   Pointer to RX queue structure.\n+ * @param[out] pkts\n+ *   Array to store received packets.\n+ * @param pkts_n\n+ *   Maximum number of packets in array.\n+ * @param[out] err\n+ *   Pointer to a flag. Set non-zero value if pkts array has at least one error\n+ *   packet to handle.\n+ * @param[out] no_cq\n+ *   Pointer to a boolean. Set true if no new CQE seen.\n+ *\n+ * @return\n+ *   Number of packets received including errors (<= pkts_n).\n+ */\n+static inline uint16_t\n+rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,\n+\t    uint16_t pkts_n, uint64_t *err, bool *no_cq)\n+{\n+\tconst uint16_t q_n = 1 << rxq->cqe_n;\n+\tconst uint16_t q_mask = q_n - 1;\n+\tconst uint16_t e_n = 1 << rxq->elts_n;\n+\tconst uint16_t e_mask = e_n - 1;\n+\tvolatile struct mlx5_cqe *cq;\n+\tstruct rte_mbuf **elts;\n+\tuint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;\n+\tuint16_t nocmp_n = 0;\n+\tuint16_t rcvd_pkt = 0;\n+\tunsigned int cq_idx = rxq->cq_ci & q_mask;\n+\tunsigned int elts_idx;\n+\n+\tMLX5_ASSERT(rxq->sges_n == 0);\n+\tMLX5_ASSERT(rxq->cqe_n == rxq->elts_n);\n+\tcq = &(*rxq->cqes)[cq_idx];\n+\trte_prefetch0(cq);\n+\trte_prefetch0(cq + 1);\n+\trte_prefetch0(cq + 2);\n+\trte_prefetch0(cq + 3);\n+\tpkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);\n+\tmlx5_rx_replenish_bulk_mbuf(rxq);\n+\t/* See if there're unreturned mbufs from compressed CQE. */\n+\trcvd_pkt = rxq->decompressed;\n+\tif (rcvd_pkt > 0) {\n+\t\trcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);\n+\t\trxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],\n+\t\t\t\tpkts, rcvd_pkt);\n+\t\trxq->rq_pi += rcvd_pkt;\n+\t\trxq->decompressed -= rcvd_pkt;\n+\t\tpkts += rcvd_pkt;\n+\t}\n+\telts_idx = rxq->rq_pi & e_mask;\n+\telts = &(*rxq->elts)[elts_idx];\n+\t/* Not to overflow pkts array. */\n+\tpkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);\n+\t/* Not to cross queue end. */\n+\tpkts_n = RTE_MIN(pkts_n, q_n - elts_idx);\n+\tpkts_n = RTE_MIN(pkts_n, q_n - cq_idx);\n+\tif (!pkts_n) {\n+\t\t*no_cq = !rcvd_pkt;\n+\t\treturn rcvd_pkt;\n+\t}\n+\t/* At this point, there shouldn't be any remaining packets. */\n+\tMLX5_ASSERT(rxq->decompressed == 0);\n+\t/* Process all the CQEs */\n+\tnocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);\n+\t/* If no new CQE seen, return without updating cq_db. */\n+\tif (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {\n+\t\t*no_cq = true;\n+\t\treturn rcvd_pkt;\n+\t}\n+\t/* Update the consumer indexes for non-compressed CQEs. */\n+\tMLX5_ASSERT(nocmp_n <= pkts_n);\n+\trxq->cq_ci += nocmp_n;\n+\trxq->rq_pi += nocmp_n;\n+\trcvd_pkt += nocmp_n;\n+\t/* Decompress the last CQE if compressed. */\n+\tif (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {\n+\t\tMLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));\n+\t\trxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],\n+\t\t\t\t\t\t\t&elts[nocmp_n]);\n+\t\trxq->cq_ci += rxq->decompressed;\n+\t\t/* Return more packets if needed. */\n+\t\tif (nocmp_n < pkts_n) {\n+\t\t\tuint16_t n = rxq->decompressed;\n+\n+\t\t\tn = RTE_MIN(n, pkts_n - nocmp_n);\n+\t\t\trxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask],\n+\t\t\t\t\t&pkts[nocmp_n], n);\n+\t\t\trxq->rq_pi += n;\n+\t\t\trcvd_pkt += n;\n+\t\t\trxq->decompressed -= n;\n+\t\t}\n+\t}\n+\trte_io_wmb();\n+\t*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);\n+\t*no_cq = !rcvd_pkt;\n+\treturn rcvd_pkt;\n+}\n+\n /**\n  * DPDK callback for vectorized RX.\n  *\ndiff --git a/drivers/net/mlx5/mlx5_rxtx_vec.h b/drivers/net/mlx5/mlx5_rxtx_vec.h\nindex a8d6c4f411..ce27074b08 100644\n--- a/drivers/net/mlx5/mlx5_rxtx_vec.h\n+++ b/drivers/net/mlx5/mlx5_rxtx_vec.h\n@@ -73,53 +73,54 @@ S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, op_own) ==\n  *\n  * @param rxq\n  *   Pointer to RX queue structure.\n- * @param n\n- *   Number of buffers to be replenished.\n  */\n static inline void\n-mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n)\n+mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)\n {\n \tconst uint16_t q_n = 1 << rxq->elts_n;\n \tconst uint16_t q_mask = q_n - 1;\n+\tuint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);\n \tuint16_t elts_idx = rxq->rq_ci & q_mask;\n \tstruct rte_mbuf **elts = &(*rxq->elts)[elts_idx];\n \tvolatile struct mlx5_wqe_data_seg *wq =\n \t\t&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];\n \tunsigned int i;\n \n-\tMLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));\n-\tMLX5_ASSERT(n <= (uint16_t)(q_n - (rxq->rq_ci - rxq->rq_pi)));\n-\tMLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >\n-\t\t    MLX5_VPMD_DESCS_PER_LOOP);\n-\t/* Not to cross queue end. */\n-\tn = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);\n-\tif (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {\n-\t\trxq->stats.rx_nombuf += n;\n-\t\treturn;\n-\t}\n-\tfor (i = 0; i < n; ++i) {\n-\t\tvoid *buf_addr;\n+\tif (n >= rxq->rq_repl_thresh) {\n+\t\tMLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));\n+\t\tMLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >\n+\t\t\t    MLX5_VPMD_DESCS_PER_LOOP);\n+\t\t/* Not to cross queue end. */\n+\t\tn = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);\n+\t\tif (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {\n+\t\t\trxq->stats.rx_nombuf += n;\n+\t\t\treturn;\n+\t\t}\n+\t\tfor (i = 0; i < n; ++i) {\n+\t\t\tvoid *buf_addr;\n \n-\t\t/*\n-\t\t * In order to support the mbufs with external attached\n-\t\t * data buffer we should use the buf_addr pointer instead of\n-\t\t * rte_mbuf_buf_addr(). It touches the mbuf itself and may\n-\t\t * impact the performance.\n-\t\t */\n-\t\tbuf_addr = elts[i]->buf_addr;\n-\t\twq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +\n-\t\t\t\t\t      RTE_PKTMBUF_HEADROOM);\n-\t\t/* If there's only one MR, no need to replace LKey in WQE. */\n-\t\tif (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))\n-\t\t\twq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);\n+\t\t\t/*\n+\t\t\t * In order to support the mbufs with external attached\n+\t\t\t * data buffer we should use the buf_addr pointer\n+\t\t\t * instead of rte_mbuf_buf_addr(). It touches the mbuf\n+\t\t\t * itself and may impact the performance.\n+\t\t\t */\n+\t\t\tbuf_addr = elts[i]->buf_addr;\n+\t\t\twq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +\n+\t\t\t\t\t\t      RTE_PKTMBUF_HEADROOM);\n+\t\t\t/* If there's a single MR, no need to replace LKey. */\n+\t\t\tif (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh)\n+\t\t\t\t     > 1))\n+\t\t\t\twq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);\n+\t\t}\n+\t\trxq->rq_ci += n;\n+\t\t/* Prevent overflowing into consumed mbufs. */\n+\t\telts_idx = rxq->rq_ci & q_mask;\n+\t\tfor (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)\n+\t\t\t(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;\n+\t\trte_io_wmb();\n+\t\t*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);\n \t}\n-\trxq->rq_ci += n;\n-\t/* Prevent overflowing into consumed mbufs. */\n-\telts_idx = rxq->rq_ci & q_mask;\n-\tfor (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)\n-\t\t(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;\n-\trte_io_wmb();\n-\t*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);\n }\n \n #endif /* RTE_PMD_MLX5_RXTX_VEC_H_ */\ndiff --git a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h\nindex 6bf0c9b540..cf3a795843 100644\n--- a/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h\n+++ b/drivers/net/mlx5/mlx5_rxtx_vec_altivec.h\n@@ -33,18 +33,16 @@\n /**\n  * Store free buffers to RX SW ring.\n  *\n- * @param rxq\n- *   Pointer to RX queue structure.\n+ * @param elts\n+ *   Pointer to SW ring to be filled.\n  * @param pkts\n  *   Pointer to array of packets to be stored.\n  * @param pkts_n\n  *   Number of packets to be stored.\n  */\n static inline void\n-rxq_copy_mbuf_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t n)\n+rxq_copy_mbuf_v(struct rte_mbuf **elts, struct rte_mbuf **pkts, uint16_t n)\n {\n-\tconst uint16_t q_mask = (1 << rxq->elts_n) - 1;\n-\tstruct rte_mbuf **elts = &(*rxq->elts)[rxq->rq_pi & q_mask];\n \tunsigned int pos;\n \tuint16_t p = n & -2;\n \n@@ -550,14 +548,17 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,\n \t\t(vector unsigned char *)&pkts[3]->rearm_data);\n }\n \n-\n /**\n- * Receive burst of packets. An errored completion also consumes a mbuf, but the\n- * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed\n- * before returning to application.\n+ * Process a non-compressed completion and fill in mbufs in RX SW ring\n+ * with data extracted from the title completion descriptor.\n  *\n  * @param rxq\n  *   Pointer to RX queue structure.\n+ * @param cq\n+ *   Pointer to completion array having a non-compressed completion at first.\n+ * @param elts\n+ *   Pointer to SW ring to be filled. The first mbuf has to be pre-built from\n+ *   the title completion descriptor to be copied to the rest of mbufs.\n  * @param[out] pkts\n  *   Array to store received packets.\n  * @param pkts_n\n@@ -565,28 +566,23 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,\n  * @param[out] err\n  *   Pointer to a flag. Set non-zero value if pkts array has at least one error\n  *   packet to handle.\n- * @param[out] no_cq\n- *  Pointer to a boolean. Set true if no new CQE seen.\n+ * @param[out] comp\n+ *   Pointer to a index. Set it to the first compressed completion if any.\n  *\n  * @return\n- *   Number of packets received including errors (<= pkts_n).\n+ *   Number of CQEs successfully processed.\n  */\n static inline uint16_t\n-rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,\n-\t    uint64_t *err, bool *no_cq)\n+rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,\n+\t\t struct rte_mbuf **elts, struct rte_mbuf **pkts,\n+\t\t uint16_t pkts_n, uint64_t *err, uint64_t *comp)\n {\n \tconst uint16_t q_n = 1 << rxq->cqe_n;\n \tconst uint16_t q_mask = q_n - 1;\n-\tvolatile struct mlx5_cqe *cq;\n-\tstruct rte_mbuf **elts;\n \tunsigned int pos;\n-\tuint64_t n;\n-\tuint16_t repl_n;\n+\tuint64_t n = 0;\n \tuint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;\n \tuint16_t nocmp_n = 0;\n-\tuint16_t rcvd_pkt = 0;\n-\tunsigned int cq_idx = rxq->cq_ci & q_mask;\n-\tunsigned int elts_idx;\n \tunsigned int ownership = !!(rxq->cq_ci & (q_mask + 1));\n \tconst vector unsigned char zero = (vector unsigned char){0};\n \tconst vector unsigned char ones = vec_splat_u8(-1);\n@@ -638,41 +634,6 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,\n \tconst vector unsigned short cqe_sel_mask2 =\n \t\t(vector unsigned short){0, 0, 0xffff, 0, 0, 0, 0, 0};\n \n-\tMLX5_ASSERT(rxq->sges_n == 0);\n-\tMLX5_ASSERT(rxq->cqe_n == rxq->elts_n);\n-\tcq = &(*rxq->cqes)[cq_idx];\n-\trte_prefetch0(cq);\n-\trte_prefetch0(cq + 1);\n-\trte_prefetch0(cq + 2);\n-\trte_prefetch0(cq + 3);\n-\tpkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);\n-\n-\trepl_n = q_n - (rxq->rq_ci - rxq->rq_pi);\n-\tif (repl_n >= rxq->rq_repl_thresh)\n-\t\tmlx5_rx_replenish_bulk_mbuf(rxq, repl_n);\n-\t/* See if there're unreturned mbufs from compressed CQE. */\n-\trcvd_pkt = rxq->decompressed;\n-\tif (rcvd_pkt > 0) {\n-\t\trcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);\n-\t\trxq_copy_mbuf_v(rxq, pkts, rcvd_pkt);\n-\t\trxq->rq_pi += rcvd_pkt;\n-\t\trxq->decompressed -= rcvd_pkt;\n-\t\tpkts += rcvd_pkt;\n-\t}\n-\telts_idx = rxq->rq_pi & q_mask;\n-\telts = &(*rxq->elts)[elts_idx];\n-\t/* Not to overflow pkts array. */\n-\tpkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);\n-\t/* Not to cross queue end. */\n-\tpkts_n = RTE_MIN(pkts_n, q_n - elts_idx);\n-\tpkts_n = RTE_MIN(pkts_n, q_n - cq_idx);\n-\tif (!pkts_n) {\n-\t\t*no_cq = !rcvd_pkt;\n-\t\treturn rcvd_pkt;\n-\t}\n-\t/* At this point, there shouldn't be any remaining packets. */\n-\tMLX5_ASSERT(rxq->decompressed == 0);\n-\n \t/*\n \t * A. load first Qword (8bytes) in one loop.\n \t * B. copy 4 mbuf pointers from elts ring to returing pkts.\n@@ -1101,40 +1062,13 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,\n \t\tif (n != MLX5_VPMD_DESCS_PER_LOOP)\n \t\t\tbreak;\n \t}\n-\t/* If no new CQE seen, return without updating cq_db. */\n-\tif (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {\n-\t\t*no_cq = true;\n-\t\treturn rcvd_pkt;\n-\t}\n-\t/* Update the consumer indexes for non-compressed CQEs. */\n-\tMLX5_ASSERT(nocmp_n <= pkts_n);\n-\trxq->cq_ci += nocmp_n;\n-\trxq->rq_pi += nocmp_n;\n-\trcvd_pkt += nocmp_n;\n #ifdef MLX5_PMD_SOFT_COUNTERS\n \trxq->stats.ipackets += nocmp_n;\n \trxq->stats.ibytes += rcvd_byte;\n #endif\n-\t/* Decompress the last CQE if compressed. */\n-\tif (comp_idx < MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n) {\n-\t\tMLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));\n-\t\trxq->decompressed =\n-\t\t\trxq_cq_decompress_v(rxq, &cq[nocmp_n], &elts[nocmp_n]);\n-\t\t/* Return more packets if needed. */\n-\t\tif (nocmp_n < pkts_n) {\n-\t\t\tuint16_t n = rxq->decompressed;\n-\n-\t\t\tn = RTE_MIN(n, pkts_n - nocmp_n);\n-\t\t\trxq_copy_mbuf_v(rxq, &pkts[nocmp_n], n);\n-\t\t\trxq->rq_pi += n;\n-\t\t\trcvd_pkt += n;\n-\t\t\trxq->decompressed -= n;\n-\t\t}\n-\t}\n-\trte_compiler_barrier();\n-\t*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);\n-\t*no_cq = !rcvd_pkt;\n-\treturn rcvd_pkt;\n+\tif (comp_idx == n)\n+\t\t*comp = comp_idx;\n+\treturn nocmp_n;\n }\n \n #endif /* RTE_PMD_MLX5_RXTX_VEC_ALTIVEC_H_ */\ndiff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h\nindex d122dad4fe..47b6692942 100644\n--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h\n+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h\n@@ -29,18 +29,16 @@\n /**\n  * Store free buffers to RX SW ring.\n  *\n- * @param rxq\n- *   Pointer to RX queue structure.\n+ * @param elts\n+ *   Pointer to SW ring to be filled.\n  * @param pkts\n  *   Pointer to array of packets to be stored.\n  * @param pkts_n\n  *   Number of packets to be stored.\n  */\n static inline void\n-rxq_copy_mbuf_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t n)\n+rxq_copy_mbuf_v(struct rte_mbuf **elts, struct rte_mbuf **pkts, uint16_t n)\n {\n-\tconst uint16_t q_mask = (1 << rxq->elts_n) - 1;\n-\tstruct rte_mbuf **elts = &(*rxq->elts)[rxq->rq_pi & q_mask];\n \tunsigned int pos;\n \tuint16_t p = n & -2;\n \n@@ -368,12 +366,16 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,\n }\n \n /**\n- * Receive burst of packets. An errored completion also consumes a mbuf, but the\n- * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed\n- * before returning to application.\n+ * Process a non-compressed completion and fill in mbufs in RX SW ring\n+ * with data extracted from the title completion descriptor.\n  *\n  * @param rxq\n  *   Pointer to RX queue structure.\n+ * @param cq\n+ *   Pointer to completion array having a non-compressed completion at first.\n+ * @param elts\n+ *   Pointer to SW ring to be filled. The first mbuf has to be pre-built from\n+ *   the title completion descriptor to be copied to the rest of mbufs.\n  * @param[out] pkts\n  *   Array to store received packets.\n  * @param pkts_n\n@@ -381,28 +383,23 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,\n  * @param[out] err\n  *   Pointer to a flag. Set non-zero value if pkts array has at least one error\n  *   packet to handle.\n- * @param[out] no_cq\n- *   Pointer to a boolean. Set true if no new CQE seen.\n+ * @param[out] comp\n+ *   Pointer to a index. Set it to the first compressed completion if any.\n  *\n  * @return\n- *   Number of packets received including errors (<= pkts_n).\n+ *   Number of CQEs successfully processed.\n  */\n static inline uint16_t\n-rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,\n-\t    uint64_t *err, bool *no_cq)\n+rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,\n+\t\t struct rte_mbuf **elts, struct rte_mbuf **pkts,\n+\t\t uint16_t pkts_n, uint64_t *err, uint64_t *comp)\n {\n \tconst uint16_t q_n = 1 << rxq->cqe_n;\n \tconst uint16_t q_mask = q_n - 1;\n-\tvolatile struct mlx5_cqe *cq;\n-\tstruct rte_mbuf **elts;\n \tunsigned int pos;\n-\tuint64_t n;\n-\tuint16_t repl_n;\n+\tuint64_t n = 0;\n \tuint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;\n \tuint16_t nocmp_n = 0;\n-\tuint16_t rcvd_pkt = 0;\n-\tunsigned int cq_idx = rxq->cq_ci & q_mask;\n-\tunsigned int elts_idx;\n \tconst uint16x4_t ownership = vdup_n_u16(!(rxq->cq_ci & (q_mask + 1)));\n \tconst uint16x4_t owner_check = vcreate_u16(0x0001000100010001);\n \tconst uint16x4_t opcode_check = vcreate_u16(0x00f000f000f000f0);\n@@ -463,39 +460,6 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,\n \t};\n \tconst uint32x4_t flow_mark_adj = { 0, 0, 0, rxq->mark * (-1) };\n \n-\tMLX5_ASSERT(rxq->sges_n == 0);\n-\tMLX5_ASSERT(rxq->cqe_n == rxq->elts_n);\n-\tcq = &(*rxq->cqes)[cq_idx];\n-\trte_prefetch_non_temporal(cq);\n-\trte_prefetch_non_temporal(cq + 1);\n-\trte_prefetch_non_temporal(cq + 2);\n-\trte_prefetch_non_temporal(cq + 3);\n-\tpkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);\n-\trepl_n = q_n - (rxq->rq_ci - rxq->rq_pi);\n-\tif (repl_n >= rxq->rq_repl_thresh)\n-\t\tmlx5_rx_replenish_bulk_mbuf(rxq, repl_n);\n-\t/* See if there're unreturned mbufs from compressed CQE. */\n-\trcvd_pkt = rxq->decompressed;\n-\tif (rcvd_pkt > 0) {\n-\t\trcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);\n-\t\trxq_copy_mbuf_v(rxq, pkts, rcvd_pkt);\n-\t\trxq->rq_pi += rcvd_pkt;\n-\t\tpkts += rcvd_pkt;\n-\t\trxq->decompressed -= rcvd_pkt;\n-\t}\n-\telts_idx = rxq->rq_pi & q_mask;\n-\telts = &(*rxq->elts)[elts_idx];\n-\t/* Not to overflow pkts array. */\n-\tpkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);\n-\t/* Not to cross queue end. */\n-\tpkts_n = RTE_MIN(pkts_n, q_n - elts_idx);\n-\tpkts_n = RTE_MIN(pkts_n, q_n - cq_idx);\n-\tif (!pkts_n) {\n-\t\t*no_cq = !rcvd_pkt;\n-\t\treturn rcvd_pkt;\n-\t}\n-\t/* At this point, there shouldn't be any remained packets. */\n-\tMLX5_ASSERT(rxq->decompressed == 0);\n \t/*\n \t * Note that vectors have reverse order - {v3, v2, v1, v0}, because\n \t * there's no instruction to count trailing zeros. __builtin_clzl() is\n@@ -773,40 +737,13 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,\n \t\tif (n != MLX5_VPMD_DESCS_PER_LOOP)\n \t\t\tbreak;\n \t}\n-\t/* If no new CQE seen, return without updating cq_db. */\n-\tif (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {\n-\t\t*no_cq = true;\n-\t\treturn rcvd_pkt;\n-\t}\n-\t/* Update the consumer indexes for non-compressed CQEs. */\n-\tMLX5_ASSERT(nocmp_n <= pkts_n);\n-\trxq->cq_ci += nocmp_n;\n-\trxq->rq_pi += nocmp_n;\n-\trcvd_pkt += nocmp_n;\n #ifdef MLX5_PMD_SOFT_COUNTERS\n \trxq->stats.ipackets += nocmp_n;\n \trxq->stats.ibytes += rcvd_byte;\n #endif\n-\t/* Decompress the last CQE if compressed. */\n-\tif (comp_idx < MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n) {\n-\t\tMLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));\n-\t\trxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],\n-\t\t\t\t\t\t\t&elts[nocmp_n]);\n-\t\t/* Return more packets if needed. */\n-\t\tif (nocmp_n < pkts_n) {\n-\t\t\tuint16_t n = rxq->decompressed;\n-\n-\t\t\tn = RTE_MIN(n, pkts_n - nocmp_n);\n-\t\t\trxq_copy_mbuf_v(rxq, &pkts[nocmp_n], n);\n-\t\t\trxq->rq_pi += n;\n-\t\t\trcvd_pkt += n;\n-\t\t\trxq->decompressed -= n;\n-\t\t}\n-\t}\n-\trte_io_wmb();\n-\t*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);\n-\t*no_cq = !rcvd_pkt;\n-\treturn rcvd_pkt;\n+\tif (comp_idx == n)\n+\t\t*comp = comp_idx;\n+\treturn nocmp_n;\n }\n \n #endif /* RTE_PMD_MLX5_RXTX_VEC_NEON_H_ */\ndiff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h\nindex 0bbcbeefff..59662fa12d 100644\n--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h\n+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h\n@@ -31,18 +31,16 @@\n /**\n  * Store free buffers to RX SW ring.\n  *\n- * @param rxq\n- *   Pointer to RX queue structure.\n+ * @param elts\n+ *   Pointer to SW ring to be filled.\n  * @param pkts\n  *   Pointer to array of packets to be stored.\n  * @param pkts_n\n  *   Number of packets to be stored.\n  */\n static inline void\n-rxq_copy_mbuf_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t n)\n+rxq_copy_mbuf_v(struct rte_mbuf **elts, struct rte_mbuf **pkts, uint16_t n)\n {\n-\tconst uint16_t q_mask = (1 << rxq->elts_n) - 1;\n-\tstruct rte_mbuf **elts = &(*rxq->elts)[rxq->rq_pi & q_mask];\n \tunsigned int pos;\n \tuint16_t p = n & -2;\n \n@@ -227,7 +225,6 @@ rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,\n \trxq->stats.ipackets += mcqe_n;\n \trxq->stats.ibytes += rcvd_byte;\n #endif\n-\trxq->cq_ci += mcqe_n;\n \treturn mcqe_n;\n }\n \n@@ -293,9 +290,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],\n \tpinfo1 = _mm_unpackhi_epi32(cqes[2], cqes[3]);\n \tptype = _mm_unpacklo_epi64(pinfo0, pinfo1);\n \tif (rxq->mark) {\n-\t\tconst __m128i pinfo_ft_mask =\n-\t\t\t_mm_set_epi32(0xffffff00, 0xffffff00,\n-\t\t\t\t      0xffffff00, 0xffffff00);\n+\t\tconst __m128i pinfo_ft_mask = _mm_set1_epi32(0xffffff00);\n \t\tconst __m128i fdir_flags = _mm_set1_epi32(PKT_RX_FDIR);\n \t\t__m128i fdir_id_flags = _mm_set1_epi32(PKT_RX_FDIR_ID);\n \t\t__m128i flow_tag, invalid_mask;\n@@ -373,12 +368,16 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],\n }\n \n /**\n- * Receive burst of packets. An errored completion also consumes a mbuf, but the\n- * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed\n- * before returning to application.\n+ * Process a non-compressed completion and fill in mbufs in RX SW ring\n+ * with data extracted from the title completion descriptor.\n  *\n  * @param rxq\n  *   Pointer to RX queue structure.\n+ * @param cq\n+ *   Pointer to completion array having a non-compressed completion at first.\n+ * @param elts\n+ *   Pointer to SW ring to be filled. The first mbuf has to be pre-built from\n+ *   the title completion descriptor to be copied to the rest of mbufs.\n  * @param[out] pkts\n  *   Array to store received packets.\n  * @param pkts_n\n@@ -386,37 +385,28 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],\n  * @param[out] err\n  *   Pointer to a flag. Set non-zero value if pkts array has at least one error\n  *   packet to handle.\n- * @param[out] no_cq\n- *   Pointer to a boolean. Set true if no new CQE seen.\n+ * @param[out] comp\n+ *   Pointer to a index. Set it to the first compressed completion if any.\n  *\n  * @return\n- *   Number of packets received including errors (<= pkts_n).\n+ *   Number of CQEs successfully processed.\n  */\n static inline uint16_t\n-rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,\n-\t    uint64_t *err, bool *no_cq)\n+rxq_cq_process_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,\n+\t\t struct rte_mbuf **elts, struct rte_mbuf **pkts,\n+\t\t uint16_t pkts_n, uint64_t *err, uint64_t *comp)\n {\n \tconst uint16_t q_n = 1 << rxq->cqe_n;\n \tconst uint16_t q_mask = q_n - 1;\n-\tvolatile struct mlx5_cqe *cq;\n-\tstruct rte_mbuf **elts;\n \tunsigned int pos;\n-\tuint64_t n;\n-\tuint16_t repl_n;\n+\tuint64_t n = 0;\n \tuint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;\n \tuint16_t nocmp_n = 0;\n-\tuint16_t rcvd_pkt = 0;\n-\tunsigned int cq_idx = rxq->cq_ci & q_mask;\n-\tunsigned int elts_idx;\n \tunsigned int ownership = !!(rxq->cq_ci & (q_mask + 1));\n-\tconst __m128i owner_check =\n-\t\t_mm_set_epi64x(0x0100000001000000LL, 0x0100000001000000LL);\n-\tconst __m128i opcode_check =\n-\t\t_mm_set_epi64x(0xf0000000f0000000LL, 0xf0000000f0000000LL);\n-\tconst __m128i format_check =\n-\t\t_mm_set_epi64x(0x0c0000000c000000LL, 0x0c0000000c000000LL);\n-\tconst __m128i resp_err_check =\n-\t\t_mm_set_epi64x(0xe0000000e0000000LL, 0xe0000000e0000000LL);\n+\tconst __m128i owner_check =\t_mm_set1_epi64x(0x0100000001000000LL);\n+\tconst __m128i opcode_check = _mm_set1_epi64x(0xf0000000f0000000LL);\n+\tconst __m128i format_check = _mm_set1_epi64x(0x0c0000000c000000LL);\n+\tconst __m128i resp_err_check = _mm_set1_epi64x(0xe0000000e0000000LL);\n #ifdef MLX5_PMD_SOFT_COUNTERS\n \tuint32_t rcvd_byte = 0;\n \t/* Mask to shuffle byte_cnt to add up stats. Do bswap16 for all. */\n@@ -448,40 +438,6 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,\n \t\t\t      0,\n \t\t\t      rxq->crc_present * RTE_ETHER_CRC_LEN);\n \tconst __m128i flow_mark_adj = _mm_set_epi32(rxq->mark * (-1), 0, 0, 0);\n-\n-\tMLX5_ASSERT(rxq->sges_n == 0);\n-\tMLX5_ASSERT(rxq->cqe_n == rxq->elts_n);\n-\tcq = &(*rxq->cqes)[cq_idx];\n-\trte_prefetch0(cq);\n-\trte_prefetch0(cq + 1);\n-\trte_prefetch0(cq + 2);\n-\trte_prefetch0(cq + 3);\n-\tpkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);\n-\trepl_n = q_n - (rxq->rq_ci - rxq->rq_pi);\n-\tif (repl_n >= rxq->rq_repl_thresh)\n-\t\tmlx5_rx_replenish_bulk_mbuf(rxq, repl_n);\n-\t/* See if there're unreturned mbufs from compressed CQE. */\n-\trcvd_pkt = rxq->decompressed;\n-\tif (rcvd_pkt > 0) {\n-\t\trcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);\n-\t\trxq_copy_mbuf_v(rxq, pkts, rcvd_pkt);\n-\t\trxq->rq_pi += rcvd_pkt;\n-\t\trxq->decompressed -= rcvd_pkt;\n-\t\tpkts += rcvd_pkt;\n-\t}\n-\telts_idx = rxq->rq_pi & q_mask;\n-\telts = &(*rxq->elts)[elts_idx];\n-\t/* Not to overflow pkts array. */\n-\tpkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);\n-\t/* Not to cross queue end. */\n-\tpkts_n = RTE_MIN(pkts_n, q_n - elts_idx);\n-\tpkts_n = RTE_MIN(pkts_n, q_n - cq_idx);\n-\tif (!pkts_n) {\n-\t\t*no_cq = !rcvd_pkt;\n-\t\treturn rcvd_pkt;\n-\t}\n-\t/* At this point, there shouldn't be any remained packets. */\n-\tMLX5_ASSERT(rxq->decompressed == 0);\n \t/*\n \t * A. load first Qword (8bytes) in one loop.\n \t * B. copy 4 mbuf pointers from elts ring to returing pkts.\n@@ -718,40 +674,13 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,\n \t\tif (n != MLX5_VPMD_DESCS_PER_LOOP)\n \t\t\tbreak;\n \t}\n-\t/* If no new CQE seen, return without updating cq_db. */\n-\tif (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {\n-\t\t*no_cq = true;\n-\t\treturn rcvd_pkt;\n-\t}\n-\t/* Update the consumer indexes for non-compressed CQEs. */\n-\tMLX5_ASSERT(nocmp_n <= pkts_n);\n-\trxq->cq_ci += nocmp_n;\n-\trxq->rq_pi += nocmp_n;\n-\trcvd_pkt += nocmp_n;\n #ifdef MLX5_PMD_SOFT_COUNTERS\n \trxq->stats.ipackets += nocmp_n;\n \trxq->stats.ibytes += rcvd_byte;\n #endif\n-\t/* Decompress the last CQE if compressed. */\n-\tif (comp_idx < MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n) {\n-\t\tMLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));\n-\t\trxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],\n-\t\t\t\t\t\t\t&elts[nocmp_n]);\n-\t\t/* Return more packets if needed. */\n-\t\tif (nocmp_n < pkts_n) {\n-\t\t\tuint16_t n = rxq->decompressed;\n-\n-\t\t\tn = RTE_MIN(n, pkts_n - nocmp_n);\n-\t\t\trxq_copy_mbuf_v(rxq, &pkts[nocmp_n], n);\n-\t\t\trxq->rq_pi += n;\n-\t\t\trcvd_pkt += n;\n-\t\t\trxq->decompressed -= n;\n-\t\t}\n-\t}\n-\trte_compiler_barrier();\n-\t*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);\n-\t*no_cq = !rcvd_pkt;\n-\treturn rcvd_pkt;\n+\tif (comp_idx == n)\n+\t\t*comp = comp_idx;\n+\treturn nocmp_n;\n }\n \n #endif /* RTE_PMD_MLX5_RXTX_VEC_SSE_H_ */\n",
    "prefixes": [
        "v2",
        "1/2"
    ]
}