get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/90187/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 90187,
    "url": "http://patches.dpdk.org/api/patches/90187/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20210331073749.1382377-3-suanmingm@nvidia.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20210331073749.1382377-3-suanmingm@nvidia.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20210331073749.1382377-3-suanmingm@nvidia.com",
    "date": "2021-03-31T07:37:47",
    "name": "[v4,2/4] regex/mlx5: add data path scattered mbuf process",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "cc54317ca552fe8be3075354f280f4ab549cb037",
    "submitter": {
        "id": 1887,
        "url": "http://patches.dpdk.org/api/people/1887/?format=api",
        "name": "Suanming Mou",
        "email": "suanmingm@nvidia.com"
    },
    "delegate": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/users/1/?format=api",
        "username": "tmonjalo",
        "first_name": "Thomas",
        "last_name": "Monjalon",
        "email": "thomas@monjalon.net"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20210331073749.1382377-3-suanmingm@nvidia.com/mbox/",
    "series": [
        {
            "id": 15999,
            "url": "http://patches.dpdk.org/api/series/15999/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=15999",
            "date": "2021-03-31T07:37:45",
            "name": "regex/mlx5: support scattered mbuf",
            "version": 4,
            "mbox": "http://patches.dpdk.org/series/15999/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/90187/comments/",
    "check": "success",
    "checks": "http://patches.dpdk.org/api/patches/90187/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 4E9BFA034F;\n\tWed, 31 Mar 2021 09:38:49 +0200 (CEST)",
            "from [217.70.189.124] (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 651DF140E25;\n\tWed, 31 Mar 2021 09:38:05 +0200 (CEST)",
            "from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129])\n by mails.dpdk.org (Postfix) with ESMTP id 71096140E0D\n for <dev@dpdk.org>; Wed, 31 Mar 2021 09:38:00 +0200 (CEST)",
            "from Internal Mail-Server by MTLPINE1 (envelope-from\n suanmingm@nvidia.com) with SMTP; 31 Mar 2021 10:37:55 +0300",
            "from nvidia.com (mtbc-r640-03.mtbc.labs.mlnx [10.75.70.8])\n by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id 12V7boeG002108;\n Wed, 31 Mar 2021 10:37:54 +0300"
        ],
        "From": "Suanming Mou <suanmingm@nvidia.com>",
        "To": "orika@nvidia.com",
        "Cc": "dev@dpdk.org, viacheslavo@nvidia.com, matan@nvidia.com, rasland@nvidia.com",
        "Date": "Wed, 31 Mar 2021 10:37:47 +0300",
        "Message-Id": "<20210331073749.1382377-3-suanmingm@nvidia.com>",
        "X-Mailer": "git-send-email 2.25.1",
        "In-Reply-To": "<20210331073749.1382377-1-suanmingm@nvidia.com>",
        "References": "<20210309235732.3952418-1-suanmingm@nvidia.com>\n <20210331073749.1382377-1-suanmingm@nvidia.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "Subject": "[dpdk-dev] [PATCH v4 2/4] regex/mlx5: add data path scattered mbuf\n process",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "UMR(User-Mode Memory Registration) WQE can present data buffers\nscattered within multiple mbufs with single indirect mkey. Take\nadvantage of the UMR WQE, scattered mbuf in one operation can be\npresented to an indirect mkey. The RegEx which only accepts one\nmkey can now process the whole scattered mbuf in one operation.\n\nThe maximum scattered mbuf can be supported in one UMR WQE is now\ndefined as 64. The mbufs from multiple operations can be combined\ninto one UMR WQE as well if there is enough space in the KLM array,\nsince the operations can address their own mbuf's content by the\nmkey's address and length. However, one operation's scattered mbuf's\ncan't be placed in two different UMR WQE's KLM array, if the UMR\nWQE's KLM does not has enough free space for one operation, the\nextra UMR WQE will be engaged.\n\nIn case the UMR WQE's indirect mkey will be over wrapped by the SQ's\nWQE move, the mkey's index used by the UMR WQE should be the index\nof last the RegEX WQE in the operations. As one operation consumes\none WQE set, build the RegEx WQE by reverse helps address the mkey\nmore efficiently. Once the operations in one burst consumes multiple\nmkeys, when the mkey KLM array is full, the reverse WQE set index\nwill always be the last of the new mkey's for the new UMR WQE.\n\nIn GGA mode, the SQ WQE's memory layout becomes UMR/NOP and RegEx\nWQE by interleave. The UMR and RegEx WQE can be called as WQE set.\nThe SQ's pi and ci will also be increased as WQE set not as WQE.\n\nFor operations don't have scattered mbuf, uses the mbuf's mkey directly,\nthe WQE set combination is NOP + RegEx.\nFor operations have scattered mubf but share the UMR WQE with others,\nthe WQE set combination is NOP + RegEx.\nFor operations complete the UMR WQE, the WQE set combination is UMR +\nRegEx.\n\nSigned-off-by: Suanming Mou <suanmingm@nvidia.com>\nAcked-by: Ori Kam <orika@nvidia.com>\n---\n doc/guides/regexdevs/mlx5.rst            |   5 +\n doc/guides/rel_notes/release_21_05.rst   |   4 +\n drivers/regex/mlx5/mlx5_regex.c          |   9 +\n drivers/regex/mlx5/mlx5_regex.h          |  26 +-\n drivers/regex/mlx5/mlx5_regex_control.c  |  43 ++-\n drivers/regex/mlx5/mlx5_regex_fastpath.c | 378 +++++++++++++++++++++--\n 6 files changed, 407 insertions(+), 58 deletions(-)",
    "diff": "diff --git a/doc/guides/regexdevs/mlx5.rst b/doc/guides/regexdevs/mlx5.rst\nindex faaa6ac11d..45a0b96980 100644\n--- a/doc/guides/regexdevs/mlx5.rst\n+++ b/doc/guides/regexdevs/mlx5.rst\n@@ -35,6 +35,11 @@ be specified as device parameter. The RegEx device can be probed and used with\n other Mellanox devices, by adding more options in the class.\n For example: ``class=net:regex`` will probe both the net PMD and the RegEx PMD.\n \n+Features\n+--------\n+\n+- Multi segments mbuf support.\n+\n Supported NICs\n --------------\n \ndiff --git a/doc/guides/rel_notes/release_21_05.rst b/doc/guides/rel_notes/release_21_05.rst\nindex 3c76148b11..c3d6b8e8ae 100644\n--- a/doc/guides/rel_notes/release_21_05.rst\n+++ b/doc/guides/rel_notes/release_21_05.rst\n@@ -119,6 +119,10 @@ New Features\n   * Added command to display Rx queue used descriptor count.\n     ``show port (port_id) rxq (queue_id) desc used count``\n \n+* **Updated Mellanox RegEx PMD.**\n+\n+  * Added support for multi segments mbuf.\n+\n \n Removed Items\n -------------\ndiff --git a/drivers/regex/mlx5/mlx5_regex.c b/drivers/regex/mlx5/mlx5_regex.c\nindex ac5b205fa9..82c485e50c 100644\n--- a/drivers/regex/mlx5/mlx5_regex.c\n+++ b/drivers/regex/mlx5/mlx5_regex.c\n@@ -199,6 +199,13 @@ mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,\n \t}\n \tpriv->regexdev->dev_ops = &mlx5_regexdev_ops;\n \tpriv->regexdev->enqueue = mlx5_regexdev_enqueue;\n+#ifdef HAVE_MLX5_UMR_IMKEY\n+\tif (!attr.umr_indirect_mkey_disabled &&\n+\t    !attr.umr_modify_entity_size_disabled)\n+\t\tpriv->has_umr = 1;\n+\tif (priv->has_umr)\n+\t\tpriv->regexdev->enqueue = mlx5_regexdev_enqueue_gga;\n+#endif\n \tpriv->regexdev->dequeue = mlx5_regexdev_dequeue;\n \tpriv->regexdev->device = (struct rte_device *)pci_dev;\n \tpriv->regexdev->data->dev_private = priv;\n@@ -213,6 +220,8 @@ mlx5_regex_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,\n \t    rte_errno = ENOMEM;\n \t\tgoto error;\n \t}\n+\tDRV_LOG(INFO, \"RegEx GGA is %s.\",\n+\t\tpriv->has_umr ? \"supported\" : \"unsupported\");\n \treturn 0;\n \n error:\ndiff --git a/drivers/regex/mlx5/mlx5_regex.h b/drivers/regex/mlx5/mlx5_regex.h\nindex a2b3f0d9f3..51a2101e53 100644\n--- a/drivers/regex/mlx5/mlx5_regex.h\n+++ b/drivers/regex/mlx5/mlx5_regex.h\n@@ -15,6 +15,7 @@\n #include <mlx5_common_devx.h>\n \n #include \"mlx5_rxp.h\"\n+#include \"mlx5_regex_utils.h\"\n \n struct mlx5_regex_sq {\n \tuint16_t log_nb_desc; /* Log 2 number of desc for this object. */\n@@ -40,6 +41,7 @@ struct mlx5_regex_qp {\n \tstruct mlx5_regex_job *jobs;\n \tstruct ibv_mr *metadata;\n \tstruct ibv_mr *outputs;\n+\tstruct ibv_mr *imkey_addr; /* Indirect mkey array region. */\n \tsize_t ci, pi;\n \tstruct mlx5_mr_ctrl mr_ctrl;\n };\n@@ -71,8 +73,29 @@ struct mlx5_regex_priv {\n \tstruct mlx5_mr_share_cache mr_scache; /* Global shared MR cache. */\n \tuint8_t is_bf2; /* The device is BF2 device. */\n \tuint8_t sq_ts_format; /* Whether SQ supports timestamp formats. */\n+\tuint8_t has_umr; /* The device supports UMR. */\n };\n \n+#ifdef HAVE_IBV_FLOW_DV_SUPPORT\n+static inline int\n+regex_get_pdn(void *pd, uint32_t *pdn)\n+{\n+\tstruct mlx5dv_obj obj;\n+\tstruct mlx5dv_pd pd_info;\n+\tint ret = 0;\n+\n+\tobj.pd.in = pd;\n+\tobj.pd.out = &pd_info;\n+\tret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD);\n+\tif (ret) {\n+\t\tDRV_LOG(DEBUG, \"Fail to get PD object info\");\n+\t\treturn ret;\n+\t}\n+\t*pdn = pd_info.pdn;\n+\treturn 0;\n+}\n+#endif\n+\n /* mlx5_regex.c */\n int mlx5_regex_start(struct rte_regexdev *dev);\n int mlx5_regex_stop(struct rte_regexdev *dev);\n@@ -108,5 +131,6 @@ uint16_t mlx5_regexdev_enqueue(struct rte_regexdev *dev, uint16_t qp_id,\n \t\t       struct rte_regex_ops **ops, uint16_t nb_ops);\n uint16_t mlx5_regexdev_dequeue(struct rte_regexdev *dev, uint16_t qp_id,\n \t\t       struct rte_regex_ops **ops, uint16_t nb_ops);\n-\n+uint16_t mlx5_regexdev_enqueue_gga(struct rte_regexdev *dev, uint16_t qp_id,\n+\t\t       struct rte_regex_ops **ops, uint16_t nb_ops);\n #endif /* MLX5_REGEX_H */\ndiff --git a/drivers/regex/mlx5/mlx5_regex_control.c b/drivers/regex/mlx5/mlx5_regex_control.c\nindex 55fbb419ed..eef0fe579d 100644\n--- a/drivers/regex/mlx5/mlx5_regex_control.c\n+++ b/drivers/regex/mlx5/mlx5_regex_control.c\n@@ -27,6 +27,9 @@\n \n #define MLX5_REGEX_NUM_WQE_PER_PAGE (4096/64)\n \n+#define MLX5_REGEX_WQE_LOG_NUM(has_umr, log_desc) \\\n+\t\t((has_umr) ? ((log_desc) + 2) : (log_desc))\n+\n /**\n  * Returns the number of qp obj to be created.\n  *\n@@ -91,26 +94,6 @@ regex_ctrl_create_cq(struct mlx5_regex_priv *priv, struct mlx5_regex_cq *cq)\n \treturn 0;\n }\n \n-#ifdef HAVE_IBV_FLOW_DV_SUPPORT\n-static int\n-regex_get_pdn(void *pd, uint32_t *pdn)\n-{\n-\tstruct mlx5dv_obj obj;\n-\tstruct mlx5dv_pd pd_info;\n-\tint ret = 0;\n-\n-\tobj.pd.in = pd;\n-\tobj.pd.out = &pd_info;\n-\tret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD);\n-\tif (ret) {\n-\t\tDRV_LOG(DEBUG, \"Fail to get PD object info\");\n-\t\treturn ret;\n-\t}\n-\t*pdn = pd_info.pdn;\n-\treturn 0;\n-}\n-#endif\n-\n /**\n  * Destroy the SQ object.\n  *\n@@ -168,14 +151,16 @@ regex_ctrl_create_sq(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,\n \tint ret;\n \n \tsq->log_nb_desc = log_nb_desc;\n+\tsq->sqn = q_ind;\n \tsq->ci = 0;\n \tsq->pi = 0;\n \tret = regex_get_pdn(priv->pd, &pd_num);\n \tif (ret)\n \t\treturn ret;\n \tattr.wq_attr.pd = pd_num;\n-\tret = mlx5_devx_sq_create(priv->ctx, &sq->sq_obj, log_nb_desc, &attr,\n-\t\t\t\t  SOCKET_ID_ANY);\n+\tret = mlx5_devx_sq_create(priv->ctx, &sq->sq_obj,\n+\t\t\tMLX5_REGEX_WQE_LOG_NUM(priv->has_umr, log_nb_desc),\n+\t\t\t&attr, SOCKET_ID_ANY);\n \tif (ret) {\n \t\tDRV_LOG(ERR, \"Can't create SQ object.\");\n \t\trte_errno = ENOMEM;\n@@ -225,10 +210,18 @@ mlx5_regex_qp_setup(struct rte_regexdev *dev, uint16_t qp_ind,\n \n \tqp = &priv->qps[qp_ind];\n \tqp->flags = cfg->qp_conf_flags;\n-\tqp->cq.log_nb_desc = rte_log2_u32(cfg->nb_desc);\n-\tqp->nb_desc = 1 << qp->cq.log_nb_desc;\n+\tlog_desc = rte_log2_u32(cfg->nb_desc);\n+\t/*\n+\t * UMR mode requires two WQEs(UMR and RegEx WQE) for one descriptor.\n+\t * For CQ, expand the CQE number multiple with 2.\n+\t * For SQ, the UMR and RegEx WQE for one descriptor consumes 4 WQEBBS,\n+\t * expand the WQE number multiple with 4.\n+\t */\n+\tqp->cq.log_nb_desc = log_desc + (!!priv->has_umr);\n+\tqp->nb_desc = 1 << log_desc;\n \tif (qp->flags & RTE_REGEX_QUEUE_PAIR_CFG_OOS_F)\n-\t\tqp->nb_obj = regex_ctrl_get_nb_obj(qp->nb_desc);\n+\t\tqp->nb_obj = regex_ctrl_get_nb_obj\n+\t\t\t(1 << MLX5_REGEX_WQE_LOG_NUM(priv->has_umr, log_desc));\n \telse\n \t\tqp->nb_obj = 1;\n \tqp->sqs = rte_malloc(NULL,\ndiff --git a/drivers/regex/mlx5/mlx5_regex_fastpath.c b/drivers/regex/mlx5/mlx5_regex_fastpath.c\nindex beaea7b63f..4f9402c583 100644\n--- a/drivers/regex/mlx5/mlx5_regex_fastpath.c\n+++ b/drivers/regex/mlx5/mlx5_regex_fastpath.c\n@@ -32,6 +32,15 @@\n #define MLX5_REGEX_WQE_GATHER_OFFSET 32\n #define MLX5_REGEX_WQE_SCATTER_OFFSET 48\n #define MLX5_REGEX_METADATA_OFF 32\n+#define MLX5_REGEX_UMR_WQE_SIZE 192\n+/* The maximum KLMs can be added to one UMR indirect mkey. */\n+#define MLX5_REGEX_MAX_KLM_NUM 128\n+/* The KLM array size for one job. */\n+#define MLX5_REGEX_KLMS_SIZE \\\n+\t((MLX5_REGEX_MAX_KLM_NUM) * sizeof(struct mlx5_klm))\n+/* In WQE set mode, the pi should be quarter of the MLX5_REGEX_MAX_WQE_INDEX. */\n+#define MLX5_REGEX_UMR_SQ_PI_IDX(pi, ops) \\\n+\t(((pi) + (ops)) & (MLX5_REGEX_MAX_WQE_INDEX >> 2))\n \n static inline uint32_t\n sq_size_get(struct mlx5_regex_sq *sq)\n@@ -49,6 +58,8 @@ struct mlx5_regex_job {\n \tuint64_t user_id;\n \tvolatile uint8_t *output;\n \tvolatile uint8_t *metadata;\n+\tstruct mlx5_klm *imkey_array; /* Indirect mkey's KLM array. */\n+\tstruct mlx5_devx_obj *imkey; /* UMR WQE's indirect meky. */\n } __rte_cached_aligned;\n \n static inline void\n@@ -99,12 +110,13 @@ set_wqe_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, uint16_t pi, uint8_t opcode,\n }\n \n static inline void\n-prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,\n-\t struct mlx5_regex_sq *sq, struct rte_regex_ops *op,\n-\t struct mlx5_regex_job *job)\n+__prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_sq *sq,\n+\t   struct rte_regex_ops *op, struct mlx5_regex_job *job,\n+\t   size_t pi, struct mlx5_klm *klm)\n {\n-\tsize_t wqe_offset = (sq->pi & (sq_size_get(sq) - 1)) * MLX5_SEND_WQE_BB;\n-\tuint32_t lkey;\n+\tsize_t wqe_offset = (pi & (sq_size_get(sq) - 1)) *\n+\t\t\t    (MLX5_SEND_WQE_BB << (priv->has_umr ? 2 : 0)) +\n+\t\t\t    (priv->has_umr ? MLX5_REGEX_UMR_WQE_SIZE : 0);\n \tuint16_t group0 = op->req_flags & RTE_REGEX_OPS_REQ_GROUP_ID0_VALID_F ?\n \t\t\t\top->group_id0 : 0;\n \tuint16_t group1 = op->req_flags & RTE_REGEX_OPS_REQ_GROUP_ID1_VALID_F ?\n@@ -122,14 +134,11 @@ prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,\n \t\t\t       RTE_REGEX_OPS_REQ_GROUP_ID2_VALID_F |\n \t\t\t       RTE_REGEX_OPS_REQ_GROUP_ID3_VALID_F)))\n \t\tgroup0 = op->group_id0;\n-\tlkey = mlx5_mr_addr2mr_bh(priv->pd, 0,\n-\t\t\t\t  &priv->mr_scache, &qp->mr_ctrl,\n-\t\t\t\t  rte_pktmbuf_mtod(op->mbuf, uintptr_t),\n-\t\t\t\t  !!(op->mbuf->ol_flags & EXT_ATTACHED_MBUF));\n \tuint8_t *wqe = (uint8_t *)(uintptr_t)sq->sq_obj.wqes + wqe_offset;\n \tint ds = 4; /*  ctrl + meta + input + output */\n \n-\tset_wqe_ctrl_seg((struct mlx5_wqe_ctrl_seg *)wqe, sq->pi,\n+\tset_wqe_ctrl_seg((struct mlx5_wqe_ctrl_seg *)wqe,\n+\t\t\t (priv->has_umr ? (pi * 4 + 3) : pi),\n \t\t\t MLX5_OPCODE_MMO, MLX5_OPC_MOD_MMO_REGEX,\n \t\t\t sq->sq_obj.sq->id, 0, ds, 0, 0);\n \tset_regex_ctrl_seg(wqe + 12, 0, group0, group1, group2, group3,\n@@ -137,36 +146,54 @@ prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,\n \tstruct mlx5_wqe_data_seg *input_seg =\n \t\t(struct mlx5_wqe_data_seg *)(wqe +\n \t\t\t\t\t     MLX5_REGEX_WQE_GATHER_OFFSET);\n-\tinput_seg->byte_count =\n-\t\trte_cpu_to_be_32(rte_pktmbuf_data_len(op->mbuf));\n-\tinput_seg->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(op->mbuf,\n-\t\t\t\t\t\t\t    uintptr_t));\n-\tinput_seg->lkey = lkey;\n+\tinput_seg->byte_count = rte_cpu_to_be_32(klm->byte_count);\n+\tinput_seg->addr = rte_cpu_to_be_64(klm->address);\n+\tinput_seg->lkey = klm->mkey;\n \tjob->user_id = op->user_id;\n+}\n+\n+static inline void\n+prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,\n+\t struct mlx5_regex_sq *sq, struct rte_regex_ops *op,\n+\t struct mlx5_regex_job *job)\n+{\n+\tstruct mlx5_klm klm;\n+\n+\tklm.byte_count = rte_pktmbuf_data_len(op->mbuf);\n+\tklm.mkey = mlx5_mr_addr2mr_bh(priv->pd, 0,\n+\t\t\t\t  &priv->mr_scache, &qp->mr_ctrl,\n+\t\t\t\t  rte_pktmbuf_mtod(op->mbuf, uintptr_t),\n+\t\t\t\t  !!(op->mbuf->ol_flags & EXT_ATTACHED_MBUF));\n+\tklm.address = rte_pktmbuf_mtod(op->mbuf, uintptr_t);\n+\t__prep_one(priv, sq, op, job, sq->pi, &klm);\n \tsq->db_pi = sq->pi;\n \tsq->pi = (sq->pi + 1) & MLX5_REGEX_MAX_WQE_INDEX;\n }\n \n static inline void\n-send_doorbell(struct mlx5dv_devx_uar *uar, struct mlx5_regex_sq *sq)\n+send_doorbell(struct mlx5_regex_priv *priv, struct mlx5_regex_sq *sq)\n {\n+\tstruct mlx5dv_devx_uar *uar = priv->uar;\n \tsize_t wqe_offset = (sq->db_pi & (sq_size_get(sq) - 1)) *\n-\t\tMLX5_SEND_WQE_BB;\n+\t\t(MLX5_SEND_WQE_BB << (priv->has_umr ? 2 : 0)) +\n+\t\t(priv->has_umr ? MLX5_REGEX_UMR_WQE_SIZE : 0);\n \tuint8_t *wqe = (uint8_t *)(uintptr_t)sq->sq_obj.wqes + wqe_offset;\n-\t((struct mlx5_wqe_ctrl_seg *)wqe)->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;\n+\t/* Or the fm_ce_se instead of set, avoid the fence be cleared. */\n+\t((struct mlx5_wqe_ctrl_seg *)wqe)->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;\n \tuint64_t *doorbell_addr =\n \t\t(uint64_t *)((uint8_t *)uar->base_addr + 0x800);\n \trte_io_wmb();\n-\tsq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32((sq->db_pi + 1) &\n-\t\t\t\t\t\t MLX5_REGEX_MAX_WQE_INDEX);\n+\tsq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32((priv->has_umr ?\n+\t\t\t\t\t(sq->db_pi * 4 + 3) : sq->db_pi) &\n+\t\t\t\t\tMLX5_REGEX_MAX_WQE_INDEX);\n \trte_wmb();\n \t*doorbell_addr = *(volatile uint64_t *)wqe;\n \trte_wmb();\n }\n \n static inline int\n-can_send(struct mlx5_regex_sq *sq) {\n-\treturn ((uint16_t)(sq->pi - sq->ci) < sq_size_get(sq));\n+get_free(struct mlx5_regex_sq *sq) {\n+\treturn (sq_size_get(sq) - (uint16_t)(sq->pi - sq->ci));\n }\n \n static inline uint32_t\n@@ -174,6 +201,211 @@ job_id_get(uint32_t qid, size_t sq_size, size_t index) {\n \treturn qid * sq_size + (index & (sq_size - 1));\n }\n \n+#ifdef HAVE_MLX5_UMR_IMKEY\n+static inline int\n+mkey_klm_available(struct mlx5_klm *klm, uint32_t pos, uint32_t new)\n+{\n+\treturn (klm && ((pos + new) <= MLX5_REGEX_MAX_KLM_NUM));\n+}\n+\n+static inline void\n+complete_umr_wqe(struct mlx5_regex_qp *qp, struct mlx5_regex_sq *sq,\n+\t\t struct mlx5_regex_job *mkey_job,\n+\t\t size_t umr_index, uint32_t klm_size, uint32_t total_len)\n+{\n+\tsize_t wqe_offset = (umr_index & (sq_size_get(sq) - 1)) *\n+\t\t(MLX5_SEND_WQE_BB * 4);\n+\tstruct mlx5_wqe_ctrl_seg *wqe = (struct mlx5_wqe_ctrl_seg *)((uint8_t *)\n+\t\t\t\t   (uintptr_t)sq->sq_obj.wqes + wqe_offset);\n+\tstruct mlx5_wqe_umr_ctrl_seg *ucseg =\n+\t\t\t\t(struct mlx5_wqe_umr_ctrl_seg *)(wqe + 1);\n+\tstruct mlx5_wqe_mkey_context_seg *mkc =\n+\t\t\t\t(struct mlx5_wqe_mkey_context_seg *)(ucseg + 1);\n+\tstruct mlx5_klm *iklm = (struct mlx5_klm *)(mkc + 1);\n+\tuint16_t klm_align = RTE_ALIGN(klm_size, 4);\n+\n+\tmemset(wqe, 0, MLX5_REGEX_UMR_WQE_SIZE);\n+\t/* Set WQE control seg. Non-inline KLM UMR WQE size must be 9 WQE_DS. */\n+\tset_wqe_ctrl_seg(wqe, (umr_index * 4), MLX5_OPCODE_UMR,\n+\t\t\t 0, sq->sq_obj.sq->id, 0, 9, 0,\n+\t\t\t rte_cpu_to_be_32(mkey_job->imkey->id));\n+\t/* Set UMR WQE control seg. */\n+\tucseg->mkey_mask |= rte_cpu_to_be_64(MLX5_WQE_UMR_CTRL_MKEY_MASK_LEN |\n+\t\t\t\tMLX5_WQE_UMR_CTRL_FLAG_TRNSLATION_OFFSET |\n+\t\t\t\tMLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_LOCAL_WRITE);\n+\tucseg->klm_octowords = rte_cpu_to_be_16(klm_align);\n+\t/* Set mkey context seg. */\n+\tmkc->len = rte_cpu_to_be_64(total_len);\n+\tmkc->qpn_mkey = rte_cpu_to_be_32(0xffffff00 |\n+\t\t\t\t\t(mkey_job->imkey->id & 0xff));\n+\t/* Set UMR pointer to data seg. */\n+\tiklm->address = rte_cpu_to_be_64\n+\t\t\t\t((uintptr_t)((char *)mkey_job->imkey_array));\n+\tiklm->mkey = rte_cpu_to_be_32(qp->imkey_addr->lkey);\n+\tiklm->byte_count = rte_cpu_to_be_32(klm_align);\n+\t/* Clear the padding memory. */\n+\tmemset((uint8_t *)&mkey_job->imkey_array[klm_size], 0,\n+\t       sizeof(struct mlx5_klm) * (klm_align - klm_size));\n+\n+\t/* Add the following RegEx WQE with fence. */\n+\twqe = (struct mlx5_wqe_ctrl_seg *)\n+\t\t\t\t(((uint8_t *)wqe) + MLX5_REGEX_UMR_WQE_SIZE);\n+\twqe->fm_ce_se |= MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE;\n+}\n+\n+static inline void\n+prep_nop_regex_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_sq *sq,\n+\t\t       struct rte_regex_ops *op, struct mlx5_regex_job *job,\n+\t\t       size_t pi, struct mlx5_klm *klm)\n+{\n+\tsize_t wqe_offset = (pi & (sq_size_get(sq) - 1)) *\n+\t\t\t    (MLX5_SEND_WQE_BB << 2);\n+\tstruct mlx5_wqe_ctrl_seg *wqe = (struct mlx5_wqe_ctrl_seg *)((uint8_t *)\n+\t\t\t\t   (uintptr_t)sq->sq_obj.wqes + wqe_offset);\n+\n+\t/* Clear the WQE memory used as UMR WQE previously. */\n+\tif ((rte_be_to_cpu_32(wqe->opmod_idx_opcode) & 0xff) != MLX5_OPCODE_NOP)\n+\t\tmemset(wqe, 0, MLX5_REGEX_UMR_WQE_SIZE);\n+\t/* UMR WQE size is 9 DS, align nop WQE to 3 WQEBBS(12 DS). */\n+\tset_wqe_ctrl_seg(wqe, pi * 4, MLX5_OPCODE_NOP, 0, sq->sq_obj.sq->id,\n+\t\t\t 0, 12, 0, 0);\n+\t__prep_one(priv, sq, op, job, pi, klm);\n+}\n+\n+static inline void\n+prep_regex_umr_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,\n+\t struct mlx5_regex_sq *sq, struct rte_regex_ops **op, size_t nb_ops)\n+{\n+\tstruct mlx5_regex_job *job = NULL;\n+\tsize_t sqid = sq->sqn, mkey_job_id = 0;\n+\tsize_t left_ops = nb_ops;\n+\tuint32_t klm_num = 0, len;\n+\tstruct mlx5_klm *mkey_klm = NULL;\n+\tstruct mlx5_klm klm;\n+\n+\tsqid = sq->sqn;\n+\twhile (left_ops--)\n+\t\trte_prefetch0(op[left_ops]);\n+\tleft_ops = nb_ops;\n+\t/*\n+\t * Build the WQE set by reverse. In case the burst may consume\n+\t * multiple mkeys, build the WQE set as normal will hard to\n+\t * address the last mkey index, since we will only know the last\n+\t * RegEx WQE's index when finishes building.\n+\t */\n+\twhile (left_ops--) {\n+\t\tstruct rte_mbuf *mbuf = op[left_ops]->mbuf;\n+\t\tsize_t pi = MLX5_REGEX_UMR_SQ_PI_IDX(sq->pi, left_ops);\n+\n+\t\tif (mbuf->nb_segs > 1) {\n+\t\t\tsize_t scatter_size = 0;\n+\n+\t\t\tif (!mkey_klm_available(mkey_klm, klm_num,\n+\t\t\t\t\t\tmbuf->nb_segs)) {\n+\t\t\t\t/*\n+\t\t\t\t * The mkey's KLM is full, create the UMR\n+\t\t\t\t * WQE in the next WQE set.\n+\t\t\t\t */\n+\t\t\t\tif (mkey_klm)\n+\t\t\t\t\tcomplete_umr_wqe(qp, sq,\n+\t\t\t\t\t\t&qp->jobs[mkey_job_id],\n+\t\t\t\t\t\tMLX5_REGEX_UMR_SQ_PI_IDX(pi, 1),\n+\t\t\t\t\t\tklm_num, len);\n+\t\t\t\t/*\n+\t\t\t\t * Get the indircet mkey and KLM array index\n+\t\t\t\t * from the last WQE set.\n+\t\t\t\t */\n+\t\t\t\tmkey_job_id = job_id_get(sqid,\n+\t\t\t\t\t\t\t sq_size_get(sq), pi);\n+\t\t\t\tmkey_klm = qp->jobs[mkey_job_id].imkey_array;\n+\t\t\t\tklm_num = 0;\n+\t\t\t\tlen = 0;\n+\t\t\t}\n+\t\t\t/* Build RegEx WQE's data segment KLM. */\n+\t\t\tklm.address = len;\n+\t\t\tklm.mkey = rte_cpu_to_be_32\n+\t\t\t\t\t(qp->jobs[mkey_job_id].imkey->id);\n+\t\t\twhile (mbuf) {\n+\t\t\t\t/* Build indirect mkey seg's KLM. */\n+\t\t\t\tmkey_klm->mkey = mlx5_mr_addr2mr_bh(priv->pd,\n+\t\t\t\t\tNULL, &priv->mr_scache, &qp->mr_ctrl,\n+\t\t\t\t\trte_pktmbuf_mtod(mbuf, uintptr_t),\n+\t\t\t\t\t!!(mbuf->ol_flags & EXT_ATTACHED_MBUF));\n+\t\t\t\tmkey_klm->address = rte_cpu_to_be_64\n+\t\t\t\t\t(rte_pktmbuf_mtod(mbuf, uintptr_t));\n+\t\t\t\tmkey_klm->byte_count = rte_cpu_to_be_32\n+\t\t\t\t\t\t(rte_pktmbuf_data_len(mbuf));\n+\t\t\t\t/*\n+\t\t\t\t * Save the mbuf's total size for RegEx data\n+\t\t\t\t * segment.\n+\t\t\t\t */\n+\t\t\t\tscatter_size += rte_pktmbuf_data_len(mbuf);\n+\t\t\t\tmkey_klm++;\n+\t\t\t\tklm_num++;\n+\t\t\t\tmbuf = mbuf->next;\n+\t\t\t}\n+\t\t\tlen += scatter_size;\n+\t\t\tklm.byte_count = scatter_size;\n+\t\t} else {\n+\t\t\t/* The single mubf case. Build the KLM directly. */\n+\t\t\tklm.mkey = mlx5_mr_addr2mr_bh(priv->pd, NULL,\n+\t\t\t\t\t&priv->mr_scache, &qp->mr_ctrl,\n+\t\t\t\t\trte_pktmbuf_mtod(mbuf, uintptr_t),\n+\t\t\t\t\t!!(mbuf->ol_flags & EXT_ATTACHED_MBUF));\n+\t\t\tklm.address = rte_pktmbuf_mtod(mbuf, uintptr_t);\n+\t\t\tklm.byte_count = rte_pktmbuf_data_len(mbuf);\n+\t\t}\n+\t\tjob = &qp->jobs[job_id_get(sqid, sq_size_get(sq), pi)];\n+\t\t/*\n+\t\t * Build the nop + RegEx WQE set by default. The fist nop WQE\n+\t\t * will be updated later as UMR WQE if scattered mubf exist.\n+\t\t */\n+\t\tprep_nop_regex_wqe_set(priv, sq, op[left_ops], job, pi, &klm);\n+\t}\n+\t/*\n+\t * Scattered mbuf have been added to the KLM array. Complete the build\n+\t * of UMR WQE, update the first nop WQE as UMR WQE.\n+\t */\n+\tif (mkey_klm)\n+\t\tcomplete_umr_wqe(qp, sq, &qp->jobs[mkey_job_id], sq->pi,\n+\t\t\t\t klm_num, len);\n+\tsq->db_pi = MLX5_REGEX_UMR_SQ_PI_IDX(sq->pi, nb_ops - 1);\n+\tsq->pi = MLX5_REGEX_UMR_SQ_PI_IDX(sq->pi, nb_ops);\n+}\n+\n+uint16_t\n+mlx5_regexdev_enqueue_gga(struct rte_regexdev *dev, uint16_t qp_id,\n+\t\t\t  struct rte_regex_ops **ops, uint16_t nb_ops)\n+{\n+\tstruct mlx5_regex_priv *priv = dev->data->dev_private;\n+\tstruct mlx5_regex_qp *queue = &priv->qps[qp_id];\n+\tstruct mlx5_regex_sq *sq;\n+\tsize_t sqid, nb_left = nb_ops, nb_desc;\n+\n+\twhile ((sqid = ffs(queue->free_sqs))) {\n+\t\tsqid--; /* ffs returns 1 for bit 0 */\n+\t\tsq = &queue->sqs[sqid];\n+\t\tnb_desc = get_free(sq);\n+\t\tif (nb_desc) {\n+\t\t\t/* The ops be handled can't exceed nb_ops. */\n+\t\t\tif (nb_desc > nb_left)\n+\t\t\t\tnb_desc = nb_left;\n+\t\t\telse\n+\t\t\t\tqueue->free_sqs &= ~(1 << sqid);\n+\t\t\tprep_regex_umr_wqe_set(priv, queue, sq, ops, nb_desc);\n+\t\t\tsend_doorbell(priv, sq);\n+\t\t\tnb_left -= nb_desc;\n+\t\t}\n+\t\tif (!nb_left)\n+\t\t\tbreak;\n+\t\tops += nb_desc;\n+\t}\n+\tnb_ops -= nb_left;\n+\tqueue->pi += nb_ops;\n+\treturn nb_ops;\n+}\n+#endif\n+\n uint16_t\n mlx5_regexdev_enqueue(struct rte_regexdev *dev, uint16_t qp_id,\n \t\t      struct rte_regex_ops **ops, uint16_t nb_ops)\n@@ -186,17 +418,17 @@ mlx5_regexdev_enqueue(struct rte_regexdev *dev, uint16_t qp_id,\n \twhile ((sqid = ffs(queue->free_sqs))) {\n \t\tsqid--; /* ffs returns 1 for bit 0 */\n \t\tsq = &queue->sqs[sqid];\n-\t\twhile (can_send(sq)) {\n+\t\twhile (get_free(sq)) {\n \t\t\tjob_id = job_id_get(sqid, sq_size_get(sq), sq->pi);\n \t\t\tprep_one(priv, queue, sq, ops[i], &queue->jobs[job_id]);\n \t\t\ti++;\n \t\t\tif (unlikely(i == nb_ops)) {\n-\t\t\t\tsend_doorbell(priv->uar, sq);\n+\t\t\t\tsend_doorbell(priv, sq);\n \t\t\t\tgoto out;\n \t\t\t}\n \t\t}\n \t\tqueue->free_sqs &= ~(1 << sqid);\n-\t\tsend_doorbell(priv->uar, sq);\n+\t\tsend_doorbell(priv, sq);\n \t}\n \n out:\n@@ -308,6 +540,10 @@ mlx5_regexdev_dequeue(struct rte_regexdev *dev, uint16_t qp_id,\n \t\t\t  MLX5_REGEX_MAX_WQE_INDEX;\n \t\tsize_t sqid = cqe->rsvd3[2];\n \t\tstruct mlx5_regex_sq *sq = &queue->sqs[sqid];\n+\n+\t\t/* UMR mode WQE counter move as WQE set(4 WQEBBS).*/\n+\t\tif (priv->has_umr)\n+\t\t\twq_counter >>= 2;\n \t\twhile (sq->ci != wq_counter) {\n \t\t\tif (unlikely(i == nb_ops)) {\n \t\t\t\t/* Return without updating cq->ci */\n@@ -316,7 +552,9 @@ mlx5_regexdev_dequeue(struct rte_regexdev *dev, uint16_t qp_id,\n \t\t\tuint32_t job_id = job_id_get(sqid, sq_size_get(sq),\n \t\t\t\t\t\t     sq->ci);\n \t\t\textract_result(ops[i], &queue->jobs[job_id]);\n-\t\t\tsq->ci = (sq->ci + 1) & MLX5_REGEX_MAX_WQE_INDEX;\n+\t\t\tsq->ci = (sq->ci + 1) & (priv->has_umr ?\n+\t\t\t\t (MLX5_REGEX_MAX_WQE_INDEX >> 2) :\n+\t\t\t\t  MLX5_REGEX_MAX_WQE_INDEX);\n \t\t\ti++;\n \t\t}\n \t\tcq->ci = (cq->ci + 1) & 0xffffff;\n@@ -331,7 +569,7 @@ mlx5_regexdev_dequeue(struct rte_regexdev *dev, uint16_t qp_id,\n }\n \n static void\n-setup_sqs(struct mlx5_regex_qp *queue)\n+setup_sqs(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *queue)\n {\n \tsize_t sqid, entry;\n \tuint32_t job_id;\n@@ -342,6 +580,14 @@ setup_sqs(struct mlx5_regex_qp *queue)\n \t\t\tjob_id = sqid * sq_size_get(sq) + entry;\n \t\t\tstruct mlx5_regex_job *job = &queue->jobs[job_id];\n \n+\t\t\t/* Fill UMR WQE with NOP in advanced. */\n+\t\t\tif (priv->has_umr) {\n+\t\t\t\tset_wqe_ctrl_seg\n+\t\t\t\t\t((struct mlx5_wqe_ctrl_seg *)wqe,\n+\t\t\t\t\t entry * 2, MLX5_OPCODE_NOP, 0,\n+\t\t\t\t\t sq->sq_obj.sq->id, 0, 12, 0, 0);\n+\t\t\t\twqe += MLX5_REGEX_UMR_WQE_SIZE;\n+\t\t\t}\n \t\t\tset_metadata_seg((struct mlx5_wqe_metadata_seg *)\n \t\t\t\t\t (wqe + MLX5_REGEX_WQE_METADATA_OFFSET),\n \t\t\t\t\t 0, queue->metadata->lkey,\n@@ -358,8 +604,9 @@ setup_sqs(struct mlx5_regex_qp *queue)\n }\n \n static int\n-setup_buffers(struct mlx5_regex_qp *qp, struct ibv_pd *pd)\n+setup_buffers(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp)\n {\n+\tstruct ibv_pd *pd = priv->pd;\n \tuint32_t i;\n \tint err;\n \n@@ -395,6 +642,24 @@ setup_buffers(struct mlx5_regex_qp *qp, struct ibv_pd *pd)\n \t\tgoto err_output;\n \t}\n \n+\tif (priv->has_umr) {\n+\t\tptr = rte_calloc(__func__, qp->nb_desc, MLX5_REGEX_KLMS_SIZE,\n+\t\t\t\t MLX5_REGEX_KLMS_SIZE);\n+\t\tif (!ptr) {\n+\t\t\terr = -ENOMEM;\n+\t\t\tgoto err_imkey;\n+\t\t}\n+\t\tqp->imkey_addr = mlx5_glue->reg_mr(pd, ptr,\n+\t\t\t\t\tMLX5_REGEX_KLMS_SIZE * qp->nb_desc,\n+\t\t\t\t\tIBV_ACCESS_LOCAL_WRITE);\n+\t\tif (!qp->imkey_addr) {\n+\t\t\trte_free(ptr);\n+\t\t\tDRV_LOG(ERR, \"Failed to register output\");\n+\t\t\terr = -EINVAL;\n+\t\t\tgoto err_imkey;\n+\t\t}\n+\t}\n+\n \t/* distribute buffers to jobs */\n \tfor (i = 0; i < qp->nb_desc; i++) {\n \t\tqp->jobs[i].output =\n@@ -403,9 +668,18 @@ setup_buffers(struct mlx5_regex_qp *qp, struct ibv_pd *pd)\n \t\tqp->jobs[i].metadata =\n \t\t\t(uint8_t *)qp->metadata->addr +\n \t\t\t(i % qp->nb_desc) * MLX5_REGEX_METADATA_SIZE;\n+\t\tif (qp->imkey_addr)\n+\t\t\tqp->jobs[i].imkey_array = (struct mlx5_klm *)\n+\t\t\t\tqp->imkey_addr->addr +\n+\t\t\t\t(i % qp->nb_desc) * MLX5_REGEX_MAX_KLM_NUM;\n \t}\n+\n \treturn 0;\n \n+err_imkey:\n+\tptr = qp->outputs->addr;\n+\trte_free(ptr);\n+\tmlx5_glue->dereg_mr(qp->outputs);\n err_output:\n \tptr = qp->metadata->addr;\n \trte_free(ptr);\n@@ -417,23 +691,57 @@ int\n mlx5_regexdev_setup_fastpath(struct mlx5_regex_priv *priv, uint32_t qp_id)\n {\n \tstruct mlx5_regex_qp *qp = &priv->qps[qp_id];\n-\tint err;\n+\tstruct mlx5_klm klm = { 0 };\n+\tstruct mlx5_devx_mkey_attr attr = {\n+\t\t.klm_array = &klm,\n+\t\t.klm_num = 1,\n+\t\t.umr_en = 1,\n+\t};\n+\tuint32_t i;\n+\tint err = 0;\n \n \tqp->jobs = rte_calloc(__func__, qp->nb_desc, sizeof(*qp->jobs), 64);\n \tif (!qp->jobs)\n \t\treturn -ENOMEM;\n-\terr = setup_buffers(qp, priv->pd);\n+\terr = setup_buffers(priv, qp);\n \tif (err) {\n \t\trte_free(qp->jobs);\n \t\treturn err;\n \t}\n-\tsetup_sqs(qp);\n-\treturn 0;\n+\n+\tsetup_sqs(priv, qp);\n+\n+\tif (priv->has_umr) {\n+#ifdef HAVE_IBV_FLOW_DV_SUPPORT\n+\t\tif (regex_get_pdn(priv->pd, &attr.pd)) {\n+\t\t\terr = -rte_errno;\n+\t\t\tDRV_LOG(ERR, \"Failed to get pdn.\");\n+\t\t\tmlx5_regexdev_teardown_fastpath(priv, qp_id);\n+\t\t\treturn err;\n+\t\t}\n+#endif\n+\t\tfor (i = 0; i < qp->nb_desc; i++) {\n+\t\t\tattr.klm_num = MLX5_REGEX_MAX_KLM_NUM;\n+\t\t\tattr.klm_array = qp->jobs[i].imkey_array;\n+\t\t\tqp->jobs[i].imkey = mlx5_devx_cmd_mkey_create(priv->ctx,\n+\t\t\t\t\t\t\t\t      &attr);\n+\t\t\tif (!qp->jobs[i].imkey) {\n+\t\t\t\terr = -rte_errno;\n+\t\t\t\tDRV_LOG(ERR, \"Failed to allocate imkey.\");\n+\t\t\t\tmlx5_regexdev_teardown_fastpath(priv, qp_id);\n+\t\t\t}\n+\t\t}\n+\t}\n+\treturn err;\n }\n \n static void\n free_buffers(struct mlx5_regex_qp *qp)\n {\n+\tif (qp->imkey_addr) {\n+\t\tmlx5_glue->dereg_mr(qp->imkey_addr);\n+\t\trte_free(qp->imkey_addr->addr);\n+\t}\n \tif (qp->metadata) {\n \t\tmlx5_glue->dereg_mr(qp->metadata);\n \t\trte_free(qp->metadata->addr);\n@@ -448,8 +756,14 @@ void\n mlx5_regexdev_teardown_fastpath(struct mlx5_regex_priv *priv, uint32_t qp_id)\n {\n \tstruct mlx5_regex_qp *qp = &priv->qps[qp_id];\n+\tuint32_t i;\n \n \tif (qp) {\n+\t\tfor (i = 0; i < qp->nb_desc; i++) {\n+\t\t\tif (qp->jobs[i].imkey)\n+\t\t\t\tclaim_zero(mlx5_devx_cmd_destroy\n+\t\t\t\t\t\t\t(qp->jobs[i].imkey));\n+\t\t}\n \t\tfree_buffers(qp);\n \t\tif (qp->jobs)\n \t\t\trte_free(qp->jobs);\n",
    "prefixes": [
        "v4",
        "2/4"
    ]
}