get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/56452/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 56452,
    "url": "http://patches.dpdk.org/api/patches/56452/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/1563199161-29745-2-git-send-email-viacheslavo@mellanox.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<1563199161-29745-2-git-send-email-viacheslavo@mellanox.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/1563199161-29745-2-git-send-email-viacheslavo@mellanox.com",
    "date": "2019-07-15T13:59:15",
    "name": "[v2,1/7] net/mlx5: remove Tx datapath implementation",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "5899d82fca01fdc4682061c852481a70a25508ed",
    "submitter": {
        "id": 1102,
        "url": "http://patches.dpdk.org/api/people/1102/?format=api",
        "name": "Slava Ovsiienko",
        "email": "viacheslavo@mellanox.com"
    },
    "delegate": {
        "id": 3268,
        "url": "http://patches.dpdk.org/api/users/3268/?format=api",
        "username": "rasland",
        "first_name": "Raslan",
        "last_name": "Darawsheh",
        "email": "rasland@nvidia.com"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/1563199161-29745-2-git-send-email-viacheslavo@mellanox.com/mbox/",
    "series": [
        {
            "id": 5500,
            "url": "http://patches.dpdk.org/api/series/5500/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=5500",
            "date": "2019-07-15T13:59:14",
            "name": "net/mlx5: consolidate Tx datapath",
            "version": 2,
            "mbox": "http://patches.dpdk.org/series/5500/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/56452/comments/",
    "check": "success",
    "checks": "http://patches.dpdk.org/api/patches/56452/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 4B6DC1B957;\n\tMon, 15 Jul 2019 15:59:39 +0200 (CEST)",
            "from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129])\n\tby dpdk.org (Postfix) with ESMTP id B53531B957\n\tfor <dev@dpdk.org>; Mon, 15 Jul 2019 15:59:37 +0200 (CEST)",
            "from Internal Mail-Server by MTLPINE2 (envelope-from\n\tviacheslavo@mellanox.com)\n\twith ESMTPS (AES256-SHA encrypted); 15 Jul 2019 16:59:33 +0300",
            "from pegasus12.mtr.labs.mlnx. (pegasus12.mtr.labs.mlnx\n\t[10.210.17.40])\n\tby labmailer.mlnx (8.13.8/8.13.8) with ESMTP id x6FDxOEK013758;\n\tMon, 15 Jul 2019 16:59:33 +0300"
        ],
        "From": "Viacheslav Ovsiienko <viacheslavo@mellanox.com>",
        "To": "dev@dpdk.org",
        "Cc": "yskoh@mellanox.com",
        "Date": "Mon, 15 Jul 2019 13:59:15 +0000",
        "Message-Id": "<1563199161-29745-2-git-send-email-viacheslavo@mellanox.com>",
        "X-Mailer": "git-send-email 1.8.3.1",
        "In-Reply-To": "<1563199161-29745-1-git-send-email-viacheslavo@mellanox.com>",
        "References": "<1562257767-19035-2-git-send-email-viacheslavo@mellanox.com>\n\t<1563199161-29745-1-git-send-email-viacheslavo@mellanox.com>",
        "Subject": "[dpdk-dev] [PATCH v2 1/7] net/mlx5: remove Tx datapath\n\timplementation",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "This patch removes the existing Tx datapath code\nas preparation step before introducing the new\nimplementation. The following entities are being\nremoved:\n\n- obsolete devargs\n- tx_burst() routines\n- related PRM definitions\n- SQ configuration code\n- Tx routine selection code\n- incompatible Tx completion code\n\nSigned-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>\n---\n drivers/net/mlx5/mlx5.c               |   43 -\n drivers/net/mlx5/mlx5.h               |    5 -\n drivers/net/mlx5/mlx5_defs.h          |   16 -\n drivers/net/mlx5/mlx5_ethdev.c        |   58 --\n drivers/net/mlx5/mlx5_prm.h           |   77 --\n drivers/net/mlx5/mlx5_rxtx.c          | 1434 +--------------------------------\n drivers/net/mlx5/mlx5_rxtx.h          |  273 -------\n drivers/net/mlx5/mlx5_rxtx_vec.c      |  175 ----\n drivers/net/mlx5/mlx5_rxtx_vec_neon.h |  289 -------\n drivers/net/mlx5/mlx5_rxtx_vec_sse.h  |  284 -------\n drivers/net/mlx5/mlx5_txq.c           |  110 +--\n 11 files changed, 35 insertions(+), 2729 deletions(-)",
    "diff": "diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c\nindex d93f92d..42b36a7 100644\n--- a/drivers/net/mlx5/mlx5.c\n+++ b/drivers/net/mlx5/mlx5.c\n@@ -68,33 +68,15 @@\n /* Device parameter to set the minimum number of Rx queues to enable MPRQ. */\n #define MLX5_RXQS_MIN_MPRQ \"rxqs_min_mprq\"\n \n-/* Device parameter to configure inline send. */\n-#define MLX5_TXQ_INLINE \"txq_inline\"\n-\n /*\n  * Device parameter to configure the number of TX queues threshold for\n  * enabling inline send.\n  */\n #define MLX5_TXQS_MIN_INLINE \"txqs_min_inline\"\n \n-/*\n- * Device parameter to configure the number of TX queues threshold for\n- * enabling vectorized Tx.\n- */\n-#define MLX5_TXQS_MAX_VEC \"txqs_max_vec\"\n-\n /* Device parameter to enable multi-packet send WQEs. */\n #define MLX5_TXQ_MPW_EN \"txq_mpw_en\"\n \n-/* Device parameter to include 2 dsegs in the title WQEBB. */\n-#define MLX5_TXQ_MPW_HDR_DSEG_EN \"txq_mpw_hdr_dseg_en\"\n-\n-/* Device parameter to limit the size of inlining packet. */\n-#define MLX5_TXQ_MAX_INLINE_LEN \"txq_max_inline_len\"\n-\n-/* Device parameter to enable hardware Tx vector. */\n-#define MLX5_TX_VEC_EN \"tx_vec_en\"\n-\n /* Device parameter to enable hardware Rx vector. */\n #define MLX5_RX_VEC_EN \"rx_vec_en\"\n \n@@ -902,20 +884,10 @@ struct mlx5_dev_spawn_data {\n \t\tconfig->mprq.max_memcpy_len = tmp;\n \t} else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) {\n \t\tconfig->mprq.min_rxqs_num = tmp;\n-\t} else if (strcmp(MLX5_TXQ_INLINE, key) == 0) {\n-\t\tconfig->txq_inline = tmp;\n \t} else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {\n \t\tconfig->txqs_inline = tmp;\n-\t} else if (strcmp(MLX5_TXQS_MAX_VEC, key) == 0) {\n-\t\tconfig->txqs_vec = tmp;\n \t} else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {\n \t\tconfig->mps = !!tmp;\n-\t} else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) {\n-\t\tconfig->mpw_hdr_dseg = !!tmp;\n-\t} else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) {\n-\t\tconfig->inline_max_packet_sz = tmp;\n-\t} else if (strcmp(MLX5_TX_VEC_EN, key) == 0) {\n-\t\tconfig->tx_vec_en = !!tmp;\n \t} else if (strcmp(MLX5_RX_VEC_EN, key) == 0) {\n \t\tconfig->rx_vec_en = !!tmp;\n \t} else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) {\n@@ -960,13 +932,8 @@ struct mlx5_dev_spawn_data {\n \t\tMLX5_RX_MPRQ_LOG_STRIDE_NUM,\n \t\tMLX5_RX_MPRQ_MAX_MEMCPY_LEN,\n \t\tMLX5_RXQS_MIN_MPRQ,\n-\t\tMLX5_TXQ_INLINE,\n \t\tMLX5_TXQS_MIN_INLINE,\n-\t\tMLX5_TXQS_MAX_VEC,\n \t\tMLX5_TXQ_MPW_EN,\n-\t\tMLX5_TXQ_MPW_HDR_DSEG_EN,\n-\t\tMLX5_TXQ_MAX_INLINE_LEN,\n-\t\tMLX5_TX_VEC_EN,\n \t\tMLX5_RX_VEC_EN,\n \t\tMLX5_L3_VXLAN_EN,\n \t\tMLX5_VF_NL_EN,\n@@ -1914,12 +1881,8 @@ struct mlx5_dev_spawn_data {\n \tdev_config = (struct mlx5_dev_config){\n \t\t.hw_padding = 0,\n \t\t.mps = MLX5_ARG_UNSET,\n-\t\t.tx_vec_en = 1,\n \t\t.rx_vec_en = 1,\n-\t\t.txq_inline = MLX5_ARG_UNSET,\n \t\t.txqs_inline = MLX5_ARG_UNSET,\n-\t\t.txqs_vec = MLX5_ARG_UNSET,\n-\t\t.inline_max_packet_sz = MLX5_ARG_UNSET,\n \t\t.vf_nl_en = 1,\n \t\t.mr_ext_memseg_en = 1,\n \t\t.mprq = {\n@@ -1932,9 +1895,6 @@ struct mlx5_dev_spawn_data {\n \t};\n \t/* Device specific configuration. */\n \tswitch (pci_dev->id.device_id) {\n-\tcase PCI_DEVICE_ID_MELLANOX_CONNECTX5BF:\n-\t\tdev_config.txqs_vec = MLX5_VPMD_MAX_TXQS_BLUEFIELD;\n-\t\tbreak;\n \tcase PCI_DEVICE_ID_MELLANOX_CONNECTX4VF:\n \tcase PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF:\n \tcase PCI_DEVICE_ID_MELLANOX_CONNECTX5VF:\n@@ -1944,9 +1904,6 @@ struct mlx5_dev_spawn_data {\n \tdefault:\n \t\tbreak;\n \t}\n-\t/* Set architecture-dependent default value if unset. */\n-\tif (dev_config.txqs_vec == MLX5_ARG_UNSET)\n-\t\tdev_config.txqs_vec = MLX5_VPMD_MAX_TXQS;\n \tfor (i = 0; i != ns; ++i) {\n \t\tuint32_t restore;\n \ndiff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h\nindex 5af3f41..b8986fc 100644\n--- a/drivers/net/mlx5/mlx5.h\n+++ b/drivers/net/mlx5/mlx5.h\n@@ -189,9 +189,7 @@ struct mlx5_dev_config {\n \tunsigned int cqe_comp:1; /* CQE compression is enabled. */\n \tunsigned int cqe_pad:1; /* CQE padding is enabled. */\n \tunsigned int tso:1; /* Whether TSO is supported. */\n-\tunsigned int tx_vec_en:1; /* Tx vector is enabled. */\n \tunsigned int rx_vec_en:1; /* Rx vector is enabled. */\n-\tunsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */\n \tunsigned int mr_ext_memseg_en:1;\n \t/* Whether memseg should be extended for MR creation. */\n \tunsigned int l3_vxlan_en:1; /* Enable L3 VXLAN flow creation. */\n@@ -215,10 +213,7 @@ struct mlx5_dev_config {\n \tunsigned int tso_max_payload_sz; /* Maximum TCP payload for TSO. */\n \tunsigned int ind_table_max_size; /* Maximum indirection table size. */\n \tunsigned int max_dump_files_num; /* Maximum dump files per queue. */\n-\tint txq_inline; /* Maximum packet size for inlining. */\n \tint txqs_inline; /* Queue number threshold for inlining. */\n-\tint txqs_vec; /* Queue number threshold for vectorized Tx. */\n-\tint inline_max_packet_sz; /* Max packet size for inlining. */\n \tstruct mlx5_hca_attr hca_attr; /* HCA attributes. */\n };\n \ndiff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h\nindex 13801a5..6861304 100644\n--- a/drivers/net/mlx5/mlx5_defs.h\n+++ b/drivers/net/mlx5/mlx5_defs.h\n@@ -60,15 +60,6 @@\n /* Maximum Packet headers size (L2+L3+L4) for TSO. */\n #define MLX5_MAX_TSO_HEADER 192\n \n-/* Default maximum number of Tx queues for vectorized Tx. */\n-#if defined(RTE_ARCH_ARM64)\n-#define MLX5_VPMD_MAX_TXQS 8\n-#define MLX5_VPMD_MAX_TXQS_BLUEFIELD 16\n-#else\n-#define MLX5_VPMD_MAX_TXQS 4\n-#define MLX5_VPMD_MAX_TXQS_BLUEFIELD MLX5_VPMD_MAX_TXQS\n-#endif\n-\n /* Threshold of buffer replenishment for vectorized Rx. */\n #define MLX5_VPMD_RXQ_RPLNSH_THRESH(n) \\\n \t(RTE_MIN(MLX5_VPMD_RX_MAX_BURST, (unsigned int)(n) >> 2))\n@@ -76,13 +67,6 @@\n /* Maximum size of burst for vectorized Rx. */\n #define MLX5_VPMD_RX_MAX_BURST 64U\n \n-/*\n- * Maximum size of burst for vectorized Tx. This is related to the maximum size\n- * of Enhanced MPW (eMPW) WQE as vectorized Tx is supported with eMPW.\n- * Careful when changing, large value can cause WQE DS to overlap.\n- */\n-#define MLX5_VPMD_TX_MAX_BURST        32U\n-\n /* Number of packets vectorized Rx can simultaneously process in a loop. */\n #define MLX5_VPMD_DESCS_PER_LOOP      4\n \ndiff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c\nindex eeefe4d..3c3253d 100644\n--- a/drivers/net/mlx5/mlx5_ethdev.c\n+++ b/drivers/net/mlx5/mlx5_ethdev.c\n@@ -1577,64 +1577,6 @@ int mlx5_fw_version_get(struct rte_eth_dev *dev, char *fw_ver, size_t fw_size)\n }\n \n /**\n- * Configure the TX function to use.\n- *\n- * @param dev\n- *   Pointer to private data structure.\n- *\n- * @return\n- *   Pointer to selected Tx burst function.\n- */\n-eth_tx_burst_t\n-mlx5_select_tx_function(struct rte_eth_dev *dev)\n-{\n-\tstruct mlx5_priv *priv = dev->data->dev_private;\n-\teth_tx_burst_t tx_pkt_burst = mlx5_tx_burst;\n-\tstruct mlx5_dev_config *config = &priv->config;\n-\tuint64_t tx_offloads = dev->data->dev_conf.txmode.offloads;\n-\tint tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO |\n-\t\t\t\t    DEV_TX_OFFLOAD_VXLAN_TNL_TSO |\n-\t\t\t\t    DEV_TX_OFFLOAD_GRE_TNL_TSO |\n-\t\t\t\t    DEV_TX_OFFLOAD_IP_TNL_TSO |\n-\t\t\t\t    DEV_TX_OFFLOAD_UDP_TNL_TSO));\n-\tint swp = !!(tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO |\n-\t\t\t\t    DEV_TX_OFFLOAD_UDP_TNL_TSO |\n-\t\t\t\t    DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM));\n-\tint vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT);\n-\n-\tassert(priv != NULL);\n-\t/* Select appropriate TX function. */\n-\tif (vlan_insert || tso || swp)\n-\t\treturn tx_pkt_burst;\n-\tif (config->mps == MLX5_MPW_ENHANCED) {\n-\t\tif (mlx5_check_vec_tx_support(dev) > 0) {\n-\t\t\tif (mlx5_check_raw_vec_tx_support(dev) > 0)\n-\t\t\t\ttx_pkt_burst = mlx5_tx_burst_raw_vec;\n-\t\t\telse\n-\t\t\t\ttx_pkt_burst = mlx5_tx_burst_vec;\n-\t\t\tDRV_LOG(DEBUG,\n-\t\t\t\t\"port %u selected enhanced MPW Tx vectorized\"\n-\t\t\t\t\" function\",\n-\t\t\t\tdev->data->port_id);\n-\t\t} else {\n-\t\t\ttx_pkt_burst = mlx5_tx_burst_empw;\n-\t\t\tDRV_LOG(DEBUG,\n-\t\t\t\t\"port %u selected enhanced MPW Tx function\",\n-\t\t\t\tdev->data->port_id);\n-\t\t}\n-\t} else if (config->mps && (config->txq_inline > 0)) {\n-\t\ttx_pkt_burst = mlx5_tx_burst_mpw_inline;\n-\t\tDRV_LOG(DEBUG, \"port %u selected MPW inline Tx function\",\n-\t\t\tdev->data->port_id);\n-\t} else if (config->mps) {\n-\t\ttx_pkt_burst = mlx5_tx_burst_mpw;\n-\t\tDRV_LOG(DEBUG, \"port %u selected MPW Tx function\",\n-\t\t\tdev->data->port_id);\n-\t}\n-\treturn tx_pkt_burst;\n-}\n-\n-/**\n  * Configure the RX function to use.\n  *\n  * @param dev\ndiff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h\nindex fe171f1..ff5dfbb 100644\n--- a/drivers/net/mlx5/mlx5_prm.h\n+++ b/drivers/net/mlx5/mlx5_prm.h\n@@ -39,32 +39,12 @@\n /* Invalidate a CQE. */\n #define MLX5_CQE_INVALIDATE (MLX5_CQE_INVALID << 4)\n \n-/* Maximum number of packets a multi-packet WQE can handle. */\n-#define MLX5_MPW_DSEG_MAX 5\n-\n /* WQE DWORD size */\n #define MLX5_WQE_DWORD_SIZE 16\n \n /* WQE size */\n #define MLX5_WQE_SIZE (4 * MLX5_WQE_DWORD_SIZE)\n \n-/* Max size of a WQE session. */\n-#define MLX5_WQE_SIZE_MAX 960U\n-\n-/* Compute the number of DS. */\n-#define MLX5_WQE_DS(n) \\\n-\t(((n) + MLX5_WQE_DWORD_SIZE - 1) / MLX5_WQE_DWORD_SIZE)\n-\n-/* Room for inline data in multi-packet WQE. */\n-#define MLX5_MWQE64_INL_DATA 28\n-\n-/* Default minimum number of Tx queues for inlining packets. */\n-#define MLX5_EMPW_MIN_TXQS 8\n-\n-/* Default max packet length to be inlined. */\n-#define MLX5_EMPW_MAX_INLINE_LEN (4U * MLX5_WQE_SIZE)\n-\n-\n #define MLX5_OPC_MOD_ENHANCED_MPSW 0\n #define MLX5_OPCODE_ENHANCED_MPSW 0x29\n \n@@ -164,47 +144,11 @@ enum mlx5_completion_mode {\n \tMLX5_COMP_CQE_AND_EQE = 0x3,\n };\n \n-/* Subset of struct mlx5_wqe_eth_seg. */\n-struct mlx5_wqe_eth_seg_small {\n-\tuint32_t rsvd0;\n-\tuint8_t\tcs_flags;\n-\tuint8_t\trsvd1;\n-\tuint16_t mss;\n-\tuint32_t flow_table_metadata;\n-\tuint16_t inline_hdr_sz;\n-\tuint8_t inline_hdr[2];\n-} __rte_aligned(MLX5_WQE_DWORD_SIZE);\n-\n-struct mlx5_wqe_inl_small {\n-\tuint32_t byte_cnt;\n-\tuint8_t raw;\n-} __rte_aligned(MLX5_WQE_DWORD_SIZE);\n-\n-struct mlx5_wqe_ctrl {\n-\tuint32_t ctrl0;\n-\tuint32_t ctrl1;\n-\tuint32_t ctrl2;\n-\tuint32_t ctrl3;\n-} __rte_aligned(MLX5_WQE_DWORD_SIZE);\n-\n /* Small common part of the WQE. */\n struct mlx5_wqe {\n \tuint32_t ctrl[4];\n-\tstruct mlx5_wqe_eth_seg_small eseg;\n-};\n-\n-/* Vectorize WQE header. */\n-struct mlx5_wqe_v {\n-\trte_v128u32_t ctrl;\n-\trte_v128u32_t eseg;\n };\n \n-/* WQE. */\n-struct mlx5_wqe64 {\n-\tstruct mlx5_wqe hdr;\n-\tuint8_t raw[32];\n-} __rte_aligned(MLX5_WQE_SIZE);\n-\n /* MPW mode. */\n enum mlx5_mpw_mode {\n \tMLX5_MPW_DISABLED,\n@@ -212,27 +156,6 @@ enum mlx5_mpw_mode {\n \tMLX5_MPW_ENHANCED, /* Enhanced Multi-Packet Send WQE, a.k.a MPWv2. */\n };\n \n-/* MPW session status. */\n-enum mlx5_mpw_state {\n-\tMLX5_MPW_STATE_OPENED,\n-\tMLX5_MPW_INL_STATE_OPENED,\n-\tMLX5_MPW_ENHANCED_STATE_OPENED,\n-\tMLX5_MPW_STATE_CLOSED,\n-};\n-\n-/* MPW session descriptor. */\n-struct mlx5_mpw {\n-\tenum mlx5_mpw_state state;\n-\tunsigned int pkts_n;\n-\tunsigned int len;\n-\tunsigned int total_len;\n-\tvolatile struct mlx5_wqe *wqe;\n-\tunion {\n-\t\tvolatile struct mlx5_wqe_data_seg *dseg[MLX5_MPW_DSEG_MAX];\n-\t\tvolatile uint8_t *raw;\n-\t} data;\n-};\n-\n /* WQE for Multi-Packet RQ. */\n struct mlx5_wqe_mprq {\n \tstruct mlx5_wqe_srq_next_seg next_seg;\ndiff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c\nindex c1dc8c4..f2d6918 100644\n--- a/drivers/net/mlx5/mlx5_rxtx.c\n+++ b/drivers/net/mlx5/mlx5_rxtx.c\n@@ -288,140 +288,6 @@\n }\n \n /**\n- * Return the size of tailroom of WQ.\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- * @param addr\n- *   Pointer to tail of WQ.\n- *\n- * @return\n- *   Size of tailroom.\n- */\n-static inline size_t\n-tx_mlx5_wq_tailroom(struct mlx5_txq_data *txq, void *addr)\n-{\n-\tsize_t tailroom;\n-\ttailroom = (uintptr_t)(txq->wqes) +\n-\t\t   (1 << txq->wqe_n) * MLX5_WQE_SIZE -\n-\t\t   (uintptr_t)addr;\n-\treturn tailroom;\n-}\n-\n-/**\n- * Copy data to tailroom of circular queue.\n- *\n- * @param dst\n- *   Pointer to destination.\n- * @param src\n- *   Pointer to source.\n- * @param n\n- *   Number of bytes to copy.\n- * @param base\n- *   Pointer to head of queue.\n- * @param tailroom\n- *   Size of tailroom from dst.\n- *\n- * @return\n- *   Pointer after copied data.\n- */\n-static inline void *\n-mlx5_copy_to_wq(void *dst, const void *src, size_t n,\n-\t\tvoid *base, size_t tailroom)\n-{\n-\tvoid *ret;\n-\n-\tif (n > tailroom) {\n-\t\trte_memcpy(dst, src, tailroom);\n-\t\trte_memcpy(base, (void *)((uintptr_t)src + tailroom),\n-\t\t\t   n - tailroom);\n-\t\tret = (uint8_t *)base + n - tailroom;\n-\t} else {\n-\t\trte_memcpy(dst, src, n);\n-\t\tret = (n == tailroom) ? base : (uint8_t *)dst + n;\n-\t}\n-\treturn ret;\n-}\n-\n-/**\n- * Inline TSO headers into WQE.\n- *\n- * @return\n- *   0 on success, negative errno value on failure.\n- */\n-static int\n-inline_tso(struct mlx5_txq_data *txq, struct rte_mbuf *buf,\n-\t   uint32_t *length,\n-\t   uintptr_t *addr,\n-\t   uint16_t *pkt_inline_sz,\n-\t   uint8_t **raw,\n-\t   uint16_t *max_wqe,\n-\t   uint16_t *tso_segsz,\n-\t   uint16_t *tso_header_sz)\n-{\n-\tuintptr_t end = (uintptr_t)(((uintptr_t)txq->wqes) +\n-\t\t\t\t    (1 << txq->wqe_n) * MLX5_WQE_SIZE);\n-\tunsigned int copy_b;\n-\tuint8_t vlan_sz = (buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0;\n-\tconst uint8_t tunneled = txq->tunnel_en && (buf->ol_flags &\n-\t\t\t\t PKT_TX_TUNNEL_MASK);\n-\tuint16_t n_wqe;\n-\n-\t*tso_segsz = buf->tso_segsz;\n-\t*tso_header_sz = buf->l2_len + vlan_sz + buf->l3_len + buf->l4_len;\n-\tif (unlikely(*tso_segsz == 0 || *tso_header_sz == 0)) {\n-\t\ttxq->stats.oerrors++;\n-\t\treturn -EINVAL;\n-\t}\n-\tif (tunneled)\n-\t\t*tso_header_sz += buf->outer_l2_len + buf->outer_l3_len;\n-\t/* First seg must contain all TSO headers. */\n-\tif (unlikely(*tso_header_sz > MLX5_MAX_TSO_HEADER) ||\n-\t\t     *tso_header_sz > DATA_LEN(buf)) {\n-\t\ttxq->stats.oerrors++;\n-\t\treturn -EINVAL;\n-\t}\n-\tcopy_b = *tso_header_sz - *pkt_inline_sz;\n-\tif (!copy_b || ((end - (uintptr_t)*raw) < copy_b))\n-\t\treturn -EAGAIN;\n-\tn_wqe = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4;\n-\tif (unlikely(*max_wqe < n_wqe))\n-\t\treturn -EINVAL;\n-\t*max_wqe -= n_wqe;\n-\trte_memcpy((void *)*raw, (void *)*addr, copy_b);\n-\t*length -= copy_b;\n-\t*addr += copy_b;\n-\tcopy_b = MLX5_WQE_DS(copy_b) * MLX5_WQE_DWORD_SIZE;\n-\t*pkt_inline_sz += copy_b;\n-\t*raw += copy_b;\n-\treturn 0;\n-}\n-\n-/**\n- * DPDK callback to check the status of a tx descriptor.\n- *\n- * @param tx_queue\n- *   The tx queue.\n- * @param[in] offset\n- *   The index of the descriptor in the ring.\n- *\n- * @return\n- *   The status of the tx descriptor.\n- */\n-int\n-mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)\n-{\n-\tstruct mlx5_txq_data *txq = tx_queue;\n-\tuint16_t used;\n-\n-\tmlx5_tx_complete(txq);\n-\tused = txq->elts_head - txq->elts_tail;\n-\tif (offset < used)\n-\t\treturn RTE_ETH_TX_DESC_FULL;\n-\treturn RTE_ETH_TX_DESC_DONE;\n-}\n-\n-/**\n  * Internal function to compute the number of used descriptors in an RX queue\n  *\n  * @param rxq\n@@ -655,7 +521,7 @@\n \t\t\t\t\t\t    (1 << txq->cqe_n));\n \t\t\tmlx5_dump_debug_information(name, \"MLX5 Error SQ:\",\n \t\t\t\t\t\t    (const void *)((uintptr_t)\n-\t\t\t\t\t\t    tx_mlx5_wqe(txq, 0)),\n+\t\t\t\t\t\t    txq->wqes),\n \t\t\t\t\t\t    MLX5_WQE_SIZE *\n \t\t\t\t\t\t    (1 << txq->wqe_n));\n \t\t\ttxq_ctrl->dump_file_n++;\n@@ -683,1247 +549,6 @@\n }\n \n /**\n- * DPDK callback for TX.\n- *\n- * @param dpdk_txq\n- *   Generic pointer to TX queue structure.\n- * @param[in] pkts\n- *   Packets to transmit.\n- * @param pkts_n\n- *   Number of packets in array.\n- *\n- * @return\n- *   Number of packets successfully transmitted (<= pkts_n).\n- */\n-uint16_t\n-mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)\n-{\n-\tstruct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;\n-\tuint16_t elts_head = txq->elts_head;\n-\tconst uint16_t elts_n = 1 << txq->elts_n;\n-\tconst uint16_t elts_m = elts_n - 1;\n-\tunsigned int i = 0;\n-\tunsigned int j = 0;\n-\tunsigned int k = 0;\n-\tuint16_t max_elts;\n-\tuint16_t max_wqe;\n-\tunsigned int comp;\n-\tvolatile struct mlx5_wqe_ctrl *last_wqe = NULL;\n-\tunsigned int segs_n = 0;\n-\tconst unsigned int max_inline = txq->max_inline;\n-\tuint64_t addr_64;\n-\n-\tif (unlikely(!pkts_n))\n-\t\treturn 0;\n-\t/* Prefetch first packet cacheline. */\n-\trte_prefetch0(*pkts);\n-\t/* Start processing. */\n-\tmlx5_tx_complete(txq);\n-\tmax_elts = (elts_n - (elts_head - txq->elts_tail));\n-\tmax_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);\n-\tif (unlikely(!max_wqe))\n-\t\treturn 0;\n-\tdo {\n-\t\tstruct rte_mbuf *buf = *pkts; /* First_seg. */\n-\t\tuint8_t *raw;\n-\t\tvolatile struct mlx5_wqe_v *wqe = NULL;\n-\t\tvolatile rte_v128u32_t *dseg = NULL;\n-\t\tuint32_t length;\n-\t\tunsigned int ds = 0;\n-\t\tunsigned int sg = 0; /* counter of additional segs attached. */\n-\t\tuintptr_t addr;\n-\t\tuint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2;\n-\t\tuint16_t tso_header_sz = 0;\n-\t\tuint16_t ehdr;\n-\t\tuint8_t cs_flags;\n-\t\tuint8_t tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG);\n-\t\tuint32_t swp_offsets = 0;\n-\t\tuint8_t swp_types = 0;\n-\t\trte_be32_t metadata;\n-\t\tuint16_t tso_segsz = 0;\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\t\tuint32_t total_length = 0;\n-#endif\n-\t\tint ret;\n-\n-\t\tsegs_n = buf->nb_segs;\n-\t\t/*\n-\t\t * Make sure there is enough room to store this packet and\n-\t\t * that one ring entry remains unused.\n-\t\t */\n-\t\tassert(segs_n);\n-\t\tif (max_elts < segs_n)\n-\t\t\tbreak;\n-\t\tmax_elts -= segs_n;\n-\t\tsg = --segs_n;\n-\t\tif (unlikely(--max_wqe == 0))\n-\t\t\tbreak;\n-\t\twqe = (volatile struct mlx5_wqe_v *)\n-\t\t\ttx_mlx5_wqe(txq, txq->wqe_ci);\n-\t\trte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));\n-\t\tif (pkts_n - i > 1)\n-\t\t\trte_prefetch0(*(pkts + 1));\n-\t\taddr = rte_pktmbuf_mtod(buf, uintptr_t);\n-\t\tlength = DATA_LEN(buf);\n-\t\tehdr = (((uint8_t *)addr)[1] << 8) |\n-\t\t       ((uint8_t *)addr)[0];\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\t\ttotal_length = length;\n-#endif\n-\t\tif (length < (MLX5_WQE_DWORD_SIZE + 2)) {\n-\t\t\ttxq->stats.oerrors++;\n-\t\t\tbreak;\n-\t\t}\n-\t\t/* Update element. */\n-\t\t(*txq->elts)[elts_head & elts_m] = buf;\n-\t\t/* Prefetch next buffer data. */\n-\t\tif (pkts_n - i > 1)\n-\t\t\trte_prefetch0(\n-\t\t\t    rte_pktmbuf_mtod(*(pkts + 1), volatile void *));\n-\t\tcs_flags = txq_ol_cksum_to_cs(buf);\n-\t\ttxq_mbuf_to_swp(txq, buf, (uint8_t *)&swp_offsets, &swp_types);\n-\t\traw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;\n-\t\t/* Copy metadata from mbuf if valid */\n-\t\tmetadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :\n-\t\t\t\t\t\t\t     0;\n-\t\t/* Replace the Ethernet type by the VLAN if necessary. */\n-\t\tif (buf->ol_flags & PKT_TX_VLAN_PKT) {\n-\t\t\tuint32_t vlan = rte_cpu_to_be_32(0x81000000 |\n-\t\t\t\t\t\t\t buf->vlan_tci);\n-\t\t\tunsigned int len = 2 * RTE_ETHER_ADDR_LEN - 2;\n-\n-\t\t\taddr += 2;\n-\t\t\tlength -= 2;\n-\t\t\t/* Copy Destination and source mac address. */\n-\t\t\tmemcpy((uint8_t *)raw, ((uint8_t *)addr), len);\n-\t\t\t/* Copy VLAN. */\n-\t\t\tmemcpy((uint8_t *)raw + len, &vlan, sizeof(vlan));\n-\t\t\t/* Copy missing two bytes to end the DSeg. */\n-\t\t\tmemcpy((uint8_t *)raw + len + sizeof(vlan),\n-\t\t\t       ((uint8_t *)addr) + len, 2);\n-\t\t\taddr += len + 2;\n-\t\t\tlength -= (len + 2);\n-\t\t} else {\n-\t\t\tmemcpy((uint8_t *)raw, ((uint8_t *)addr) + 2,\n-\t\t\t       MLX5_WQE_DWORD_SIZE);\n-\t\t\tlength -= pkt_inline_sz;\n-\t\t\taddr += pkt_inline_sz;\n-\t\t}\n-\t\traw += MLX5_WQE_DWORD_SIZE;\n-\t\tif (tso) {\n-\t\t\tret = inline_tso(txq, buf, &length,\n-\t\t\t\t\t &addr, &pkt_inline_sz,\n-\t\t\t\t\t &raw, &max_wqe,\n-\t\t\t\t\t &tso_segsz, &tso_header_sz);\n-\t\t\tif (ret == -EINVAL) {\n-\t\t\t\tbreak;\n-\t\t\t} else if (ret == -EAGAIN) {\n-\t\t\t\t/* NOP WQE. */\n-\t\t\t\twqe->ctrl = (rte_v128u32_t){\n-\t\t\t\t\trte_cpu_to_be_32(txq->wqe_ci << 8),\n-\t\t\t\t\trte_cpu_to_be_32(txq->qp_num_8s | 1),\n-\t\t\t\t\trte_cpu_to_be_32\n-\t\t\t\t\t\t(MLX5_COMP_ONLY_FIRST_ERR <<\n-\t\t\t\t\t\t MLX5_COMP_MODE_OFFSET),\n-\t\t\t\t\t0,\n-\t\t\t\t};\n-\t\t\t\tds = 1;\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\t\t\t\ttotal_length = 0;\n-#endif\n-\t\t\t\tk++;\n-\t\t\t\tgoto next_wqe;\n-\t\t\t}\n-\t\t}\n-\t\t/* Inline if enough room. */\n-\t\tif (max_inline || tso) {\n-\t\t\tuint32_t inl = 0;\n-\t\t\tuintptr_t end = (uintptr_t)\n-\t\t\t\t(((uintptr_t)txq->wqes) +\n-\t\t\t\t (1 << txq->wqe_n) * MLX5_WQE_SIZE);\n-\t\t\tunsigned int inline_room = max_inline *\n-\t\t\t\t\t\t   RTE_CACHE_LINE_SIZE -\n-\t\t\t\t\t\t   (pkt_inline_sz - 2) -\n-\t\t\t\t\t\t   !!tso * sizeof(inl);\n-\t\t\tuintptr_t addr_end;\n-\t\t\tunsigned int copy_b;\n-\n-pkt_inline:\n-\t\t\taddr_end = RTE_ALIGN_FLOOR(addr + inline_room,\n-\t\t\t\t\t\t   RTE_CACHE_LINE_SIZE);\n-\t\t\tcopy_b = (addr_end > addr) ?\n-\t\t\t\t RTE_MIN((addr_end - addr), length) : 0;\n-\t\t\tif (copy_b && ((end - (uintptr_t)raw) >\n-\t\t\t\t       (copy_b + sizeof(inl)))) {\n-\t\t\t\t/*\n-\t\t\t\t * One Dseg remains in the current WQE.  To\n-\t\t\t\t * keep the computation positive, it is\n-\t\t\t\t * removed after the bytes to Dseg conversion.\n-\t\t\t\t */\n-\t\t\t\tuint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4;\n-\n-\t\t\t\tif (unlikely(max_wqe < n))\n-\t\t\t\t\tbreak;\n-\t\t\t\tmax_wqe -= n;\n-\t\t\t\tif (tso) {\n-\t\t\t\t\tassert(inl == 0);\n-\t\t\t\t\tinl = rte_cpu_to_be_32(copy_b |\n-\t\t\t\t\t\t\t       MLX5_INLINE_SEG);\n-\t\t\t\t\trte_memcpy((void *)raw,\n-\t\t\t\t\t\t   (void *)&inl, sizeof(inl));\n-\t\t\t\t\traw += sizeof(inl);\n-\t\t\t\t\tpkt_inline_sz += sizeof(inl);\n-\t\t\t\t}\n-\t\t\t\trte_memcpy((void *)raw, (void *)addr, copy_b);\n-\t\t\t\taddr += copy_b;\n-\t\t\t\tlength -= copy_b;\n-\t\t\t\tpkt_inline_sz += copy_b;\n-\t\t\t}\n-\t\t\t/*\n-\t\t\t * 2 DWORDs consumed by the WQE header + ETH segment +\n-\t\t\t * the size of the inline part of the packet.\n-\t\t\t */\n-\t\t\tds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);\n-\t\t\tif (length > 0) {\n-\t\t\t\tif (ds % (MLX5_WQE_SIZE /\n-\t\t\t\t\t  MLX5_WQE_DWORD_SIZE) == 0) {\n-\t\t\t\t\tif (unlikely(--max_wqe == 0))\n-\t\t\t\t\t\tbreak;\n-\t\t\t\t\tdseg = (volatile rte_v128u32_t *)\n-\t\t\t\t\t       tx_mlx5_wqe(txq, txq->wqe_ci +\n-\t\t\t\t\t\t\t   ds / 4);\n-\t\t\t\t} else {\n-\t\t\t\t\tdseg = (volatile rte_v128u32_t *)\n-\t\t\t\t\t\t((uintptr_t)wqe +\n-\t\t\t\t\t\t (ds * MLX5_WQE_DWORD_SIZE));\n-\t\t\t\t}\n-\t\t\t\tgoto use_dseg;\n-\t\t\t} else if (!segs_n) {\n-\t\t\t\tgoto next_pkt;\n-\t\t\t} else {\n-\t\t\t\t/*\n-\t\t\t\t * Further inline the next segment only for\n-\t\t\t\t * non-TSO packets.\n-\t\t\t\t */\n-\t\t\t\tif (!tso) {\n-\t\t\t\t\traw += copy_b;\n-\t\t\t\t\tinline_room -= copy_b;\n-\t\t\t\t} else {\n-\t\t\t\t\tinline_room = 0;\n-\t\t\t\t}\n-\t\t\t\t/* Move to the next segment. */\n-\t\t\t\t--segs_n;\n-\t\t\t\tbuf = buf->next;\n-\t\t\t\tassert(buf);\n-\t\t\t\taddr = rte_pktmbuf_mtod(buf, uintptr_t);\n-\t\t\t\tlength = DATA_LEN(buf);\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\t\t\t\ttotal_length += length;\n-#endif\n-\t\t\t\t(*txq->elts)[++elts_head & elts_m] = buf;\n-\t\t\t\tgoto pkt_inline;\n-\t\t\t}\n-\t\t} else {\n-\t\t\t/*\n-\t\t\t * No inline has been done in the packet, only the\n-\t\t\t * Ethernet Header as been stored.\n-\t\t\t */\n-\t\t\tdseg = (volatile rte_v128u32_t *)\n-\t\t\t\t((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE));\n-\t\t\tds = 3;\n-use_dseg:\n-\t\t\t/* Add the remaining packet as a simple ds. */\n-\t\t\taddr_64 = rte_cpu_to_be_64(addr);\n-\t\t\t*dseg = (rte_v128u32_t){\n-\t\t\t\trte_cpu_to_be_32(length),\n-\t\t\t\tmlx5_tx_mb2mr(txq, buf),\n-\t\t\t\taddr_64,\n-\t\t\t\taddr_64 >> 32,\n-\t\t\t};\n-\t\t\t++ds;\n-\t\t\tif (!segs_n)\n-\t\t\t\tgoto next_pkt;\n-\t\t}\n-next_seg:\n-\t\tassert(buf);\n-\t\tassert(ds);\n-\t\tassert(wqe);\n-\t\t/*\n-\t\t * Spill on next WQE when the current one does not have\n-\t\t * enough room left. Size of WQE must a be a multiple\n-\t\t * of data segment size.\n-\t\t */\n-\t\tassert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE));\n-\t\tif (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) {\n-\t\t\tif (unlikely(--max_wqe == 0))\n-\t\t\t\tbreak;\n-\t\t\tdseg = (volatile rte_v128u32_t *)\n-\t\t\t       tx_mlx5_wqe(txq, txq->wqe_ci + ds / 4);\n-\t\t\trte_prefetch0(tx_mlx5_wqe(txq,\n-\t\t\t\t\t\t  txq->wqe_ci + ds / 4 + 1));\n-\t\t} else {\n-\t\t\t++dseg;\n-\t\t}\n-\t\t++ds;\n-\t\tbuf = buf->next;\n-\t\tassert(buf);\n-\t\tlength = DATA_LEN(buf);\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\t\ttotal_length += length;\n-#endif\n-\t\t/* Store segment information. */\n-\t\taddr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));\n-\t\t*dseg = (rte_v128u32_t){\n-\t\t\trte_cpu_to_be_32(length),\n-\t\t\tmlx5_tx_mb2mr(txq, buf),\n-\t\t\taddr_64,\n-\t\t\taddr_64 >> 32,\n-\t\t};\n-\t\t(*txq->elts)[++elts_head & elts_m] = buf;\n-\t\tif (--segs_n)\n-\t\t\tgoto next_seg;\n-next_pkt:\n-\t\tif (ds > MLX5_DSEG_MAX) {\n-\t\t\ttxq->stats.oerrors++;\n-\t\t\tbreak;\n-\t\t}\n-\t\t++elts_head;\n-\t\t++pkts;\n-\t\t++i;\n-\t\tj += sg;\n-\t\t/* Initialize known and common part of the WQE structure. */\n-\t\tif (tso) {\n-\t\t\twqe->ctrl = (rte_v128u32_t){\n-\t\t\t\trte_cpu_to_be_32((txq->wqe_ci << 8) |\n-\t\t\t\t\t\t MLX5_OPCODE_TSO),\n-\t\t\t\trte_cpu_to_be_32(txq->qp_num_8s | ds),\n-\t\t\t\trte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<\n-\t\t\t\t\t\t MLX5_COMP_MODE_OFFSET),\n-\t\t\t\t0,\n-\t\t\t};\n-\t\t\twqe->eseg = (rte_v128u32_t){\n-\t\t\t\tswp_offsets,\n-\t\t\t\tcs_flags | (swp_types << 8) |\n-\t\t\t\t(rte_cpu_to_be_16(tso_segsz) << 16),\n-\t\t\t\tmetadata,\n-\t\t\t\t(ehdr << 16) | rte_cpu_to_be_16(tso_header_sz),\n-\t\t\t};\n-\t\t} else {\n-\t\t\twqe->ctrl = (rte_v128u32_t){\n-\t\t\t\trte_cpu_to_be_32((txq->wqe_ci << 8) |\n-\t\t\t\t\t\t MLX5_OPCODE_SEND),\n-\t\t\t\trte_cpu_to_be_32(txq->qp_num_8s | ds),\n-\t\t\t\trte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<\n-\t\t\t\t\t\t MLX5_COMP_MODE_OFFSET),\n-\t\t\t\t0,\n-\t\t\t};\n-\t\t\twqe->eseg = (rte_v128u32_t){\n-\t\t\t\tswp_offsets,\n-\t\t\t\tcs_flags | (swp_types << 8),\n-\t\t\t\tmetadata,\n-\t\t\t\t(ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz),\n-\t\t\t};\n-\t\t}\n-next_wqe:\n-\t\ttxq->wqe_ci += (ds + 3) / 4;\n-\t\t/* Save the last successful WQE for completion request */\n-\t\tlast_wqe = (volatile struct mlx5_wqe_ctrl *)wqe;\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\t\t/* Increment sent bytes counter. */\n-\t\ttxq->stats.obytes += total_length;\n-#endif\n-\t} while (i < pkts_n);\n-\t/* Take a shortcut if nothing must be sent. */\n-\tif (unlikely((i + k) == 0))\n-\t\treturn 0;\n-\ttxq->elts_head += (i + j);\n-\t/* Check whether completion threshold has been reached. */\n-\tcomp = txq->elts_comp + i + j + k;\n-\tif (comp >= MLX5_TX_COMP_THRESH) {\n-\t\t/* A CQE slot must always be available. */\n-\t\tassert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));\n-\t\t/* Request completion on last WQE. */\n-\t\tlast_wqe->ctrl2 = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<\n-\t\t\t\t\t\t   MLX5_COMP_MODE_OFFSET);\n-\t\t/* Save elts_head in unused \"immediate\" field of WQE. */\n-\t\tlast_wqe->ctrl3 = txq->elts_head;\n-\t\ttxq->elts_comp = 0;\n-\t} else {\n-\t\ttxq->elts_comp = comp;\n-\t}\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\t/* Increment sent packets counter. */\n-\ttxq->stats.opackets += i;\n-#endif\n-\t/* Ring QP doorbell. */\n-\tmlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)last_wqe);\n-\treturn i;\n-}\n-\n-/**\n- * Open a MPW session.\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- * @param mpw\n- *   Pointer to MPW session structure.\n- * @param length\n- *   Packet length.\n- */\n-static inline void\n-mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length)\n-{\n-\tuint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);\n-\tvolatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =\n-\t\t(volatile struct mlx5_wqe_data_seg (*)[])\n-\t\ttx_mlx5_wqe(txq, idx + 1);\n-\n-\tmpw->state = MLX5_MPW_STATE_OPENED;\n-\tmpw->pkts_n = 0;\n-\tmpw->len = length;\n-\tmpw->total_len = 0;\n-\tmpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);\n-\tmpw->wqe->eseg.mss = rte_cpu_to_be_16(length);\n-\tmpw->wqe->eseg.inline_hdr_sz = 0;\n-\tmpw->wqe->eseg.rsvd0 = 0;\n-\tmpw->wqe->eseg.rsvd1 = 0;\n-\tmpw->wqe->eseg.flow_table_metadata = 0;\n-\tmpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |\n-\t\t\t\t\t     (txq->wqe_ci << 8) |\n-\t\t\t\t\t     MLX5_OPCODE_TSO);\n-\tmpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<\n-\t\t\t\t\t     MLX5_COMP_MODE_OFFSET);\n-\tmpw->wqe->ctrl[3] = 0;\n-\tmpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *)\n-\t\t(((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));\n-\tmpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *)\n-\t\t(((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE));\n-\tmpw->data.dseg[2] = &(*dseg)[0];\n-\tmpw->data.dseg[3] = &(*dseg)[1];\n-\tmpw->data.dseg[4] = &(*dseg)[2];\n-}\n-\n-/**\n- * Close a MPW session.\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- * @param mpw\n- *   Pointer to MPW session structure.\n- */\n-static inline void\n-mlx5_mpw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)\n-{\n-\tunsigned int num = mpw->pkts_n;\n-\n-\t/*\n-\t * Store size in multiple of 16 bytes. Control and Ethernet segments\n-\t * count as 2.\n-\t */\n-\tmpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | (2 + num));\n-\tmpw->state = MLX5_MPW_STATE_CLOSED;\n-\tif (num < 3)\n-\t\t++txq->wqe_ci;\n-\telse\n-\t\ttxq->wqe_ci += 2;\n-\trte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));\n-\trte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));\n-}\n-\n-/**\n- * DPDK callback for TX with MPW support.\n- *\n- * @param dpdk_txq\n- *   Generic pointer to TX queue structure.\n- * @param[in] pkts\n- *   Packets to transmit.\n- * @param pkts_n\n- *   Number of packets in array.\n- *\n- * @return\n- *   Number of packets successfully transmitted (<= pkts_n).\n- */\n-uint16_t\n-mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)\n-{\n-\tstruct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;\n-\tuint16_t elts_head = txq->elts_head;\n-\tconst uint16_t elts_n = 1 << txq->elts_n;\n-\tconst uint16_t elts_m = elts_n - 1;\n-\tunsigned int i = 0;\n-\tunsigned int j = 0;\n-\tuint16_t max_elts;\n-\tuint16_t max_wqe;\n-\tunsigned int comp;\n-\tstruct mlx5_mpw mpw = {\n-\t\t.state = MLX5_MPW_STATE_CLOSED,\n-\t};\n-\n-\tif (unlikely(!pkts_n))\n-\t\treturn 0;\n-\t/* Prefetch first packet cacheline. */\n-\trte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));\n-\trte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));\n-\t/* Start processing. */\n-\tmlx5_tx_complete(txq);\n-\tmax_elts = (elts_n - (elts_head - txq->elts_tail));\n-\tmax_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);\n-\tif (unlikely(!max_wqe))\n-\t\treturn 0;\n-\tdo {\n-\t\tstruct rte_mbuf *buf = *(pkts++);\n-\t\tuint32_t length;\n-\t\tunsigned int segs_n = buf->nb_segs;\n-\t\tuint32_t cs_flags;\n-\t\trte_be32_t metadata;\n-\n-\t\t/*\n-\t\t * Make sure there is enough room to store this packet and\n-\t\t * that one ring entry remains unused.\n-\t\t */\n-\t\tassert(segs_n);\n-\t\tif (max_elts < segs_n)\n-\t\t\tbreak;\n-\t\t/* Do not bother with large packets MPW cannot handle. */\n-\t\tif (segs_n > MLX5_MPW_DSEG_MAX) {\n-\t\t\ttxq->stats.oerrors++;\n-\t\t\tbreak;\n-\t\t}\n-\t\tmax_elts -= segs_n;\n-\t\t--pkts_n;\n-\t\tcs_flags = txq_ol_cksum_to_cs(buf);\n-\t\t/* Copy metadata from mbuf if valid */\n-\t\tmetadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :\n-\t\t\t\t\t\t\t     0;\n-\t\t/* Retrieve packet information. */\n-\t\tlength = PKT_LEN(buf);\n-\t\tassert(length);\n-\t\t/* Start new session if packet differs. */\n-\t\tif ((mpw.state == MLX5_MPW_STATE_OPENED) &&\n-\t\t    ((mpw.len != length) ||\n-\t\t     (segs_n != 1) ||\n-\t\t     (mpw.wqe->eseg.flow_table_metadata != metadata) ||\n-\t\t     (mpw.wqe->eseg.cs_flags != cs_flags)))\n-\t\t\tmlx5_mpw_close(txq, &mpw);\n-\t\tif (mpw.state == MLX5_MPW_STATE_CLOSED) {\n-\t\t\t/*\n-\t\t\t * Multi-Packet WQE consumes at most two WQE.\n-\t\t\t * mlx5_mpw_new() expects to be able to use such\n-\t\t\t * resources.\n-\t\t\t */\n-\t\t\tif (unlikely(max_wqe < 2))\n-\t\t\t\tbreak;\n-\t\t\tmax_wqe -= 2;\n-\t\t\tmlx5_mpw_new(txq, &mpw, length);\n-\t\t\tmpw.wqe->eseg.cs_flags = cs_flags;\n-\t\t\tmpw.wqe->eseg.flow_table_metadata = metadata;\n-\t\t}\n-\t\t/* Multi-segment packets must be alone in their MPW. */\n-\t\tassert((segs_n == 1) || (mpw.pkts_n == 0));\n-#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)\n-\t\tlength = 0;\n-#endif\n-\t\tdo {\n-\t\t\tvolatile struct mlx5_wqe_data_seg *dseg;\n-\t\t\tuintptr_t addr;\n-\n-\t\t\tassert(buf);\n-\t\t\t(*txq->elts)[elts_head++ & elts_m] = buf;\n-\t\t\tdseg = mpw.data.dseg[mpw.pkts_n];\n-\t\t\taddr = rte_pktmbuf_mtod(buf, uintptr_t);\n-\t\t\t*dseg = (struct mlx5_wqe_data_seg){\n-\t\t\t\t.byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),\n-\t\t\t\t.lkey = mlx5_tx_mb2mr(txq, buf),\n-\t\t\t\t.addr = rte_cpu_to_be_64(addr),\n-\t\t\t};\n-#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)\n-\t\t\tlength += DATA_LEN(buf);\n-#endif\n-\t\t\tbuf = buf->next;\n-\t\t\t++mpw.pkts_n;\n-\t\t\t++j;\n-\t\t} while (--segs_n);\n-\t\tassert(length == mpw.len);\n-\t\tif (mpw.pkts_n == MLX5_MPW_DSEG_MAX)\n-\t\t\tmlx5_mpw_close(txq, &mpw);\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\t\t/* Increment sent bytes counter. */\n-\t\ttxq->stats.obytes += length;\n-#endif\n-\t\t++i;\n-\t} while (pkts_n);\n-\t/* Take a shortcut if nothing must be sent. */\n-\tif (unlikely(i == 0))\n-\t\treturn 0;\n-\t/* Check whether completion threshold has been reached. */\n-\t/* \"j\" includes both packets and segments. */\n-\tcomp = txq->elts_comp + j;\n-\tif (comp >= MLX5_TX_COMP_THRESH) {\n-\t\tvolatile struct mlx5_wqe *wqe = mpw.wqe;\n-\n-\t\t/* A CQE slot must always be available. */\n-\t\tassert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));\n-\t\t/* Request completion on last WQE. */\n-\t\twqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<\n-\t\t\t\t\t\tMLX5_COMP_MODE_OFFSET);\n-\t\t/* Save elts_head in unused \"immediate\" field of WQE. */\n-\t\twqe->ctrl[3] = elts_head;\n-\t\ttxq->elts_comp = 0;\n-\t} else {\n-\t\ttxq->elts_comp = comp;\n-\t}\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\t/* Increment sent packets counter. */\n-\ttxq->stats.opackets += i;\n-#endif\n-\t/* Ring QP doorbell. */\n-\tif (mpw.state == MLX5_MPW_STATE_OPENED)\n-\t\tmlx5_mpw_close(txq, &mpw);\n-\tmlx5_tx_dbrec(txq, mpw.wqe);\n-\ttxq->elts_head = elts_head;\n-\treturn i;\n-}\n-\n-/**\n- * Open a MPW inline session.\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- * @param mpw\n- *   Pointer to MPW session structure.\n- * @param length\n- *   Packet length.\n- */\n-static inline void\n-mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw,\n-\t\t    uint32_t length)\n-{\n-\tuint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);\n-\tstruct mlx5_wqe_inl_small *inl;\n-\n-\tmpw->state = MLX5_MPW_INL_STATE_OPENED;\n-\tmpw->pkts_n = 0;\n-\tmpw->len = length;\n-\tmpw->total_len = 0;\n-\tmpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);\n-\tmpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |\n-\t\t\t\t\t     (txq->wqe_ci << 8) |\n-\t\t\t\t\t     MLX5_OPCODE_TSO);\n-\tmpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<\n-\t\t\t\t\t     MLX5_COMP_MODE_OFFSET);\n-\tmpw->wqe->ctrl[3] = 0;\n-\tmpw->wqe->eseg.mss = rte_cpu_to_be_16(length);\n-\tmpw->wqe->eseg.inline_hdr_sz = 0;\n-\tmpw->wqe->eseg.cs_flags = 0;\n-\tmpw->wqe->eseg.rsvd0 = 0;\n-\tmpw->wqe->eseg.rsvd1 = 0;\n-\tmpw->wqe->eseg.flow_table_metadata = 0;\n-\tinl = (struct mlx5_wqe_inl_small *)\n-\t\t(((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE);\n-\tmpw->data.raw = (uint8_t *)&inl->raw;\n-}\n-\n-/**\n- * Close a MPW inline session.\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- * @param mpw\n- *   Pointer to MPW session structure.\n- */\n-static inline void\n-mlx5_mpw_inline_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)\n-{\n-\tunsigned int size;\n-\tstruct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *)\n-\t\t(((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));\n-\n-\tsize = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len;\n-\t/*\n-\t * Store size in multiple of 16 bytes. Control and Ethernet segments\n-\t * count as 2.\n-\t */\n-\tmpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s |\n-\t\t\t\t\t     MLX5_WQE_DS(size));\n-\tmpw->state = MLX5_MPW_STATE_CLOSED;\n-\tinl->byte_cnt = rte_cpu_to_be_32(mpw->total_len | MLX5_INLINE_SEG);\n-\ttxq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;\n-}\n-\n-/**\n- * DPDK callback for TX with MPW inline support.\n- *\n- * @param dpdk_txq\n- *   Generic pointer to TX queue structure.\n- * @param[in] pkts\n- *   Packets to transmit.\n- * @param pkts_n\n- *   Number of packets in array.\n- *\n- * @return\n- *   Number of packets successfully transmitted (<= pkts_n).\n- */\n-uint16_t\n-mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,\n-\t\t\t uint16_t pkts_n)\n-{\n-\tstruct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;\n-\tuint16_t elts_head = txq->elts_head;\n-\tconst uint16_t elts_n = 1 << txq->elts_n;\n-\tconst uint16_t elts_m = elts_n - 1;\n-\tunsigned int i = 0;\n-\tunsigned int j = 0;\n-\tuint16_t max_elts;\n-\tuint16_t max_wqe;\n-\tunsigned int comp;\n-\tunsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE;\n-\tstruct mlx5_mpw mpw = {\n-\t\t.state = MLX5_MPW_STATE_CLOSED,\n-\t};\n-\t/*\n-\t * Compute the maximum number of WQE which can be consumed by inline\n-\t * code.\n-\t * - 2 DSEG for:\n-\t *   - 1 control segment,\n-\t *   - 1 Ethernet segment,\n-\t * - N Dseg from the inline request.\n-\t */\n-\tconst unsigned int wqe_inl_n =\n-\t\t((2 * MLX5_WQE_DWORD_SIZE +\n-\t\t  txq->max_inline * RTE_CACHE_LINE_SIZE) +\n-\t\t RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;\n-\n-\tif (unlikely(!pkts_n))\n-\t\treturn 0;\n-\t/* Prefetch first packet cacheline. */\n-\trte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));\n-\trte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));\n-\t/* Start processing. */\n-\tmlx5_tx_complete(txq);\n-\tmax_elts = (elts_n - (elts_head - txq->elts_tail));\n-\tdo {\n-\t\tstruct rte_mbuf *buf = *(pkts++);\n-\t\tuintptr_t addr;\n-\t\tuint32_t length;\n-\t\tunsigned int segs_n = buf->nb_segs;\n-\t\tuint8_t cs_flags;\n-\t\trte_be32_t metadata;\n-\n-\t\t/*\n-\t\t * Make sure there is enough room to store this packet and\n-\t\t * that one ring entry remains unused.\n-\t\t */\n-\t\tassert(segs_n);\n-\t\tif (max_elts < segs_n)\n-\t\t\tbreak;\n-\t\t/* Do not bother with large packets MPW cannot handle. */\n-\t\tif (segs_n > MLX5_MPW_DSEG_MAX) {\n-\t\t\ttxq->stats.oerrors++;\n-\t\t\tbreak;\n-\t\t}\n-\t\tmax_elts -= segs_n;\n-\t\t--pkts_n;\n-\t\t/*\n-\t\t * Compute max_wqe in case less WQE were consumed in previous\n-\t\t * iteration.\n-\t\t */\n-\t\tmax_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);\n-\t\tcs_flags = txq_ol_cksum_to_cs(buf);\n-\t\t/* Copy metadata from mbuf if valid */\n-\t\tmetadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :\n-\t\t\t\t\t\t\t     0;\n-\t\t/* Retrieve packet information. */\n-\t\tlength = PKT_LEN(buf);\n-\t\t/* Start new session if packet differs. */\n-\t\tif (mpw.state == MLX5_MPW_STATE_OPENED) {\n-\t\t\tif ((mpw.len != length) ||\n-\t\t\t    (segs_n != 1) ||\n-\t\t\t    (mpw.wqe->eseg.flow_table_metadata != metadata) ||\n-\t\t\t    (mpw.wqe->eseg.cs_flags != cs_flags))\n-\t\t\t\tmlx5_mpw_close(txq, &mpw);\n-\t\t} else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {\n-\t\t\tif ((mpw.len != length) ||\n-\t\t\t    (segs_n != 1) ||\n-\t\t\t    (length > inline_room) ||\n-\t\t\t    (mpw.wqe->eseg.flow_table_metadata != metadata) ||\n-\t\t\t    (mpw.wqe->eseg.cs_flags != cs_flags)) {\n-\t\t\t\tmlx5_mpw_inline_close(txq, &mpw);\n-\t\t\t\tinline_room =\n-\t\t\t\t\ttxq->max_inline * RTE_CACHE_LINE_SIZE;\n-\t\t\t}\n-\t\t}\n-\t\tif (mpw.state == MLX5_MPW_STATE_CLOSED) {\n-\t\t\tif ((segs_n != 1) ||\n-\t\t\t    (length > inline_room)) {\n-\t\t\t\t/*\n-\t\t\t\t * Multi-Packet WQE consumes at most two WQE.\n-\t\t\t\t * mlx5_mpw_new() expects to be able to use\n-\t\t\t\t * such resources.\n-\t\t\t\t */\n-\t\t\t\tif (unlikely(max_wqe < 2))\n-\t\t\t\t\tbreak;\n-\t\t\t\tmax_wqe -= 2;\n-\t\t\t\tmlx5_mpw_new(txq, &mpw, length);\n-\t\t\t\tmpw.wqe->eseg.cs_flags = cs_flags;\n-\t\t\t\tmpw.wqe->eseg.flow_table_metadata = metadata;\n-\t\t\t} else {\n-\t\t\t\tif (unlikely(max_wqe < wqe_inl_n))\n-\t\t\t\t\tbreak;\n-\t\t\t\tmax_wqe -= wqe_inl_n;\n-\t\t\t\tmlx5_mpw_inline_new(txq, &mpw, length);\n-\t\t\t\tmpw.wqe->eseg.cs_flags = cs_flags;\n-\t\t\t\tmpw.wqe->eseg.flow_table_metadata = metadata;\n-\t\t\t}\n-\t\t}\n-\t\t/* Multi-segment packets must be alone in their MPW. */\n-\t\tassert((segs_n == 1) || (mpw.pkts_n == 0));\n-\t\tif (mpw.state == MLX5_MPW_STATE_OPENED) {\n-\t\t\tassert(inline_room ==\n-\t\t\t       txq->max_inline * RTE_CACHE_LINE_SIZE);\n-#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)\n-\t\t\tlength = 0;\n-#endif\n-\t\t\tdo {\n-\t\t\t\tvolatile struct mlx5_wqe_data_seg *dseg;\n-\n-\t\t\t\tassert(buf);\n-\t\t\t\t(*txq->elts)[elts_head++ & elts_m] = buf;\n-\t\t\t\tdseg = mpw.data.dseg[mpw.pkts_n];\n-\t\t\t\taddr = rte_pktmbuf_mtod(buf, uintptr_t);\n-\t\t\t\t*dseg = (struct mlx5_wqe_data_seg){\n-\t\t\t\t\t.byte_count =\n-\t\t\t\t\t       rte_cpu_to_be_32(DATA_LEN(buf)),\n-\t\t\t\t\t.lkey = mlx5_tx_mb2mr(txq, buf),\n-\t\t\t\t\t.addr = rte_cpu_to_be_64(addr),\n-\t\t\t\t};\n-#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)\n-\t\t\t\tlength += DATA_LEN(buf);\n-#endif\n-\t\t\t\tbuf = buf->next;\n-\t\t\t\t++mpw.pkts_n;\n-\t\t\t\t++j;\n-\t\t\t} while (--segs_n);\n-\t\t\tassert(length == mpw.len);\n-\t\t\tif (mpw.pkts_n == MLX5_MPW_DSEG_MAX)\n-\t\t\t\tmlx5_mpw_close(txq, &mpw);\n-\t\t} else {\n-\t\t\tunsigned int max;\n-\n-\t\t\tassert(mpw.state == MLX5_MPW_INL_STATE_OPENED);\n-\t\t\tassert(length <= inline_room);\n-\t\t\tassert(length == DATA_LEN(buf));\n-\t\t\taddr = rte_pktmbuf_mtod(buf, uintptr_t);\n-\t\t\t(*txq->elts)[elts_head++ & elts_m] = buf;\n-\t\t\t/* Maximum number of bytes before wrapping. */\n-\t\t\tmax = ((((uintptr_t)(txq->wqes)) +\n-\t\t\t\t(1 << txq->wqe_n) *\n-\t\t\t\tMLX5_WQE_SIZE) -\n-\t\t\t       (uintptr_t)mpw.data.raw);\n-\t\t\tif (length > max) {\n-\t\t\t\trte_memcpy((void *)(uintptr_t)mpw.data.raw,\n-\t\t\t\t\t   (void *)addr,\n-\t\t\t\t\t   max);\n-\t\t\t\tmpw.data.raw = (volatile void *)txq->wqes;\n-\t\t\t\trte_memcpy((void *)(uintptr_t)mpw.data.raw,\n-\t\t\t\t\t   (void *)(addr + max),\n-\t\t\t\t\t   length - max);\n-\t\t\t\tmpw.data.raw += length - max;\n-\t\t\t} else {\n-\t\t\t\trte_memcpy((void *)(uintptr_t)mpw.data.raw,\n-\t\t\t\t\t   (void *)addr,\n-\t\t\t\t\t   length);\n-\n-\t\t\t\tif (length == max)\n-\t\t\t\t\tmpw.data.raw =\n-\t\t\t\t\t\t(volatile void *)txq->wqes;\n-\t\t\t\telse\n-\t\t\t\t\tmpw.data.raw += length;\n-\t\t\t}\n-\t\t\t++mpw.pkts_n;\n-\t\t\tmpw.total_len += length;\n-\t\t\t++j;\n-\t\t\tif (mpw.pkts_n == MLX5_MPW_DSEG_MAX) {\n-\t\t\t\tmlx5_mpw_inline_close(txq, &mpw);\n-\t\t\t\tinline_room =\n-\t\t\t\t\ttxq->max_inline * RTE_CACHE_LINE_SIZE;\n-\t\t\t} else {\n-\t\t\t\tinline_room -= length;\n-\t\t\t}\n-\t\t}\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\t\t/* Increment sent bytes counter. */\n-\t\ttxq->stats.obytes += length;\n-#endif\n-\t\t++i;\n-\t} while (pkts_n);\n-\t/* Take a shortcut if nothing must be sent. */\n-\tif (unlikely(i == 0))\n-\t\treturn 0;\n-\t/* Check whether completion threshold has been reached. */\n-\t/* \"j\" includes both packets and segments. */\n-\tcomp = txq->elts_comp + j;\n-\tif (comp >= MLX5_TX_COMP_THRESH) {\n-\t\tvolatile struct mlx5_wqe *wqe = mpw.wqe;\n-\n-\t\t/* A CQE slot must always be available. */\n-\t\tassert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));\n-\t\t/* Request completion on last WQE. */\n-\t\twqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<\n-\t\t\t\t\t\tMLX5_COMP_MODE_OFFSET);\n-\t\t/* Save elts_head in unused \"immediate\" field of WQE. */\n-\t\twqe->ctrl[3] = elts_head;\n-\t\ttxq->elts_comp = 0;\n-\t} else {\n-\t\ttxq->elts_comp = comp;\n-\t}\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\t/* Increment sent packets counter. */\n-\ttxq->stats.opackets += i;\n-#endif\n-\t/* Ring QP doorbell. */\n-\tif (mpw.state == MLX5_MPW_INL_STATE_OPENED)\n-\t\tmlx5_mpw_inline_close(txq, &mpw);\n-\telse if (mpw.state == MLX5_MPW_STATE_OPENED)\n-\t\tmlx5_mpw_close(txq, &mpw);\n-\tmlx5_tx_dbrec(txq, mpw.wqe);\n-\ttxq->elts_head = elts_head;\n-\treturn i;\n-}\n-\n-/**\n- * Open an Enhanced MPW session.\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- * @param mpw\n- *   Pointer to MPW session structure.\n- * @param length\n- *   Packet length.\n- */\n-static inline void\n-mlx5_empw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, int padding)\n-{\n-\tuint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);\n-\n-\tmpw->state = MLX5_MPW_ENHANCED_STATE_OPENED;\n-\tmpw->pkts_n = 0;\n-\tmpw->total_len = sizeof(struct mlx5_wqe);\n-\tmpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);\n-\tmpw->wqe->ctrl[0] =\n-\t\trte_cpu_to_be_32((MLX5_OPC_MOD_ENHANCED_MPSW << 24) |\n-\t\t\t\t (txq->wqe_ci << 8) |\n-\t\t\t\t MLX5_OPCODE_ENHANCED_MPSW);\n-\tmpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR <<\n-\t\t\t\t\t     MLX5_COMP_MODE_OFFSET);\n-\tmpw->wqe->ctrl[3] = 0;\n-\tmemset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE);\n-\tif (unlikely(padding)) {\n-\t\tuintptr_t addr = (uintptr_t)(mpw->wqe + 1);\n-\n-\t\t/* Pad the first 2 DWORDs with zero-length inline header. */\n-\t\t*(volatile uint32_t *)addr = rte_cpu_to_be_32(MLX5_INLINE_SEG);\n-\t\t*(volatile uint32_t *)(addr + MLX5_WQE_DWORD_SIZE) =\n-\t\t\trte_cpu_to_be_32(MLX5_INLINE_SEG);\n-\t\tmpw->total_len += 2 * MLX5_WQE_DWORD_SIZE;\n-\t\t/* Start from the next WQEBB. */\n-\t\tmpw->data.raw = (volatile void *)(tx_mlx5_wqe(txq, idx + 1));\n-\t} else {\n-\t\tmpw->data.raw = (volatile void *)(mpw->wqe + 1);\n-\t}\n-}\n-\n-/**\n- * Close an Enhanced MPW session.\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- * @param mpw\n- *   Pointer to MPW session structure.\n- *\n- * @return\n- *   Number of consumed WQEs.\n- */\n-static inline uint16_t\n-mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)\n-{\n-\tuint16_t ret;\n-\n-\t/* Store size in multiple of 16 bytes. Control and Ethernet segments\n-\t * count as 2.\n-\t */\n-\tmpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s |\n-\t\t\t\t\t     MLX5_WQE_DS(mpw->total_len));\n-\tmpw->state = MLX5_MPW_STATE_CLOSED;\n-\tret = (mpw->total_len + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;\n-\ttxq->wqe_ci += ret;\n-\treturn ret;\n-}\n-\n-/**\n- * TX with Enhanced MPW support.\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- * @param[in] pkts\n- *   Packets to transmit.\n- * @param pkts_n\n- *   Number of packets in array.\n- *\n- * @return\n- *   Number of packets successfully transmitted (<= pkts_n).\n- */\n-static inline uint16_t\n-txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,\n-\t       uint16_t pkts_n)\n-{\n-\tuint16_t elts_head = txq->elts_head;\n-\tconst uint16_t elts_n = 1 << txq->elts_n;\n-\tconst uint16_t elts_m = elts_n - 1;\n-\tunsigned int i = 0;\n-\tunsigned int j = 0;\n-\tuint16_t max_elts;\n-\tuint16_t max_wqe;\n-\tunsigned int max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;\n-\tunsigned int mpw_room = 0;\n-\tunsigned int inl_pad = 0;\n-\tuint32_t inl_hdr;\n-\tuint64_t addr_64;\n-\tstruct mlx5_mpw mpw = {\n-\t\t.state = MLX5_MPW_STATE_CLOSED,\n-\t};\n-\n-\tif (unlikely(!pkts_n))\n-\t\treturn 0;\n-\t/* Start processing. */\n-\tmlx5_tx_complete(txq);\n-\tmax_elts = (elts_n - (elts_head - txq->elts_tail));\n-\tmax_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);\n-\tif (unlikely(!max_wqe))\n-\t\treturn 0;\n-\tdo {\n-\t\tstruct rte_mbuf *buf = *(pkts++);\n-\t\tuintptr_t addr;\n-\t\tunsigned int do_inline = 0; /* Whether inline is possible. */\n-\t\tuint32_t length;\n-\t\tuint8_t cs_flags;\n-\t\trte_be32_t metadata;\n-\n-\t\t/* Multi-segmented packet is handled in slow-path outside. */\n-\t\tassert(NB_SEGS(buf) == 1);\n-\t\t/* Make sure there is enough room to store this packet. */\n-\t\tif (max_elts - j == 0)\n-\t\t\tbreak;\n-\t\tcs_flags = txq_ol_cksum_to_cs(buf);\n-\t\t/* Copy metadata from mbuf if valid */\n-\t\tmetadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :\n-\t\t\t\t\t\t\t     0;\n-\t\t/* Retrieve packet information. */\n-\t\tlength = PKT_LEN(buf);\n-\t\t/* Start new session if:\n-\t\t * - multi-segment packet\n-\t\t * - no space left even for a dseg\n-\t\t * - next packet can be inlined with a new WQE\n-\t\t * - cs_flag differs\n-\t\t */\n-\t\tif (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) {\n-\t\t\tif ((inl_pad + sizeof(struct mlx5_wqe_data_seg) >\n-\t\t\t     mpw_room) ||\n-\t\t\t    (length <= txq->inline_max_packet_sz &&\n-\t\t\t     inl_pad + sizeof(inl_hdr) + length >\n-\t\t\t     mpw_room) ||\n-\t\t\t     (mpw.wqe->eseg.flow_table_metadata != metadata) ||\n-\t\t\t    (mpw.wqe->eseg.cs_flags != cs_flags))\n-\t\t\t\tmax_wqe -= mlx5_empw_close(txq, &mpw);\n-\t\t}\n-\t\tif (unlikely(mpw.state == MLX5_MPW_STATE_CLOSED)) {\n-\t\t\t/* In Enhanced MPW, inline as much as the budget is\n-\t\t\t * allowed. The remaining space is to be filled with\n-\t\t\t * dsegs. If the title WQEBB isn't padded, it will have\n-\t\t\t * 2 dsegs there.\n-\t\t\t */\n-\t\t\tmpw_room = RTE_MIN(MLX5_WQE_SIZE_MAX,\n-\t\t\t\t\t   (max_inline ? max_inline :\n-\t\t\t\t\t    pkts_n * MLX5_WQE_DWORD_SIZE) +\n-\t\t\t\t\t   MLX5_WQE_SIZE);\n-\t\t\tif (unlikely(max_wqe * MLX5_WQE_SIZE < mpw_room))\n-\t\t\t\tbreak;\n-\t\t\t/* Don't pad the title WQEBB to not waste WQ. */\n-\t\t\tmlx5_empw_new(txq, &mpw, 0);\n-\t\t\tmpw_room -= mpw.total_len;\n-\t\t\tinl_pad = 0;\n-\t\t\tdo_inline = length <= txq->inline_max_packet_sz &&\n-\t\t\t\t    sizeof(inl_hdr) + length <= mpw_room &&\n-\t\t\t\t    !txq->mpw_hdr_dseg;\n-\t\t\tmpw.wqe->eseg.cs_flags = cs_flags;\n-\t\t\tmpw.wqe->eseg.flow_table_metadata = metadata;\n-\t\t} else {\n-\t\t\t/* Evaluate whether the next packet can be inlined.\n-\t\t\t * Inlininig is possible when:\n-\t\t\t * - length is less than configured value\n-\t\t\t * - length fits for remaining space\n-\t\t\t * - not required to fill the title WQEBB with dsegs\n-\t\t\t */\n-\t\t\tdo_inline =\n-\t\t\t\tlength <= txq->inline_max_packet_sz &&\n-\t\t\t\tinl_pad + sizeof(inl_hdr) + length <=\n-\t\t\t\t mpw_room &&\n-\t\t\t\t(!txq->mpw_hdr_dseg ||\n-\t\t\t\t mpw.total_len >= MLX5_WQE_SIZE);\n-\t\t}\n-\t\tif (max_inline && do_inline) {\n-\t\t\t/* Inline packet into WQE. */\n-\t\t\tunsigned int max;\n-\n-\t\t\tassert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED);\n-\t\t\tassert(length == DATA_LEN(buf));\n-\t\t\tinl_hdr = rte_cpu_to_be_32(length | MLX5_INLINE_SEG);\n-\t\t\taddr = rte_pktmbuf_mtod(buf, uintptr_t);\n-\t\t\tmpw.data.raw = (volatile void *)\n-\t\t\t\t((uintptr_t)mpw.data.raw + inl_pad);\n-\t\t\tmax = tx_mlx5_wq_tailroom(txq,\n-\t\t\t\t\t(void *)(uintptr_t)mpw.data.raw);\n-\t\t\t/* Copy inline header. */\n-\t\t\tmpw.data.raw = (volatile void *)\n-\t\t\t\tmlx5_copy_to_wq(\n-\t\t\t\t\t  (void *)(uintptr_t)mpw.data.raw,\n-\t\t\t\t\t  &inl_hdr,\n-\t\t\t\t\t  sizeof(inl_hdr),\n-\t\t\t\t\t  (void *)(uintptr_t)txq->wqes,\n-\t\t\t\t\t  max);\n-\t\t\tmax = tx_mlx5_wq_tailroom(txq,\n-\t\t\t\t\t(void *)(uintptr_t)mpw.data.raw);\n-\t\t\t/* Copy packet data. */\n-\t\t\tmpw.data.raw = (volatile void *)\n-\t\t\t\tmlx5_copy_to_wq(\n-\t\t\t\t\t  (void *)(uintptr_t)mpw.data.raw,\n-\t\t\t\t\t  (void *)addr,\n-\t\t\t\t\t  length,\n-\t\t\t\t\t  (void *)(uintptr_t)txq->wqes,\n-\t\t\t\t\t  max);\n-\t\t\t++mpw.pkts_n;\n-\t\t\tmpw.total_len += (inl_pad + sizeof(inl_hdr) + length);\n-\t\t\t/* No need to get completion as the entire packet is\n-\t\t\t * copied to WQ. Free the buf right away.\n-\t\t\t */\n-\t\t\trte_pktmbuf_free_seg(buf);\n-\t\t\tmpw_room -= (inl_pad + sizeof(inl_hdr) + length);\n-\t\t\t/* Add pad in the next packet if any. */\n-\t\t\tinl_pad = (((uintptr_t)mpw.data.raw +\n-\t\t\t\t\t(MLX5_WQE_DWORD_SIZE - 1)) &\n-\t\t\t\t\t~(MLX5_WQE_DWORD_SIZE - 1)) -\n-\t\t\t\t  (uintptr_t)mpw.data.raw;\n-\t\t} else {\n-\t\t\t/* No inline. Load a dseg of packet pointer. */\n-\t\t\tvolatile rte_v128u32_t *dseg;\n-\n-\t\t\tassert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED);\n-\t\t\tassert((inl_pad + sizeof(*dseg)) <= mpw_room);\n-\t\t\tassert(length == DATA_LEN(buf));\n-\t\t\tif (!tx_mlx5_wq_tailroom(txq,\n-\t\t\t\t\t(void *)((uintptr_t)mpw.data.raw\n-\t\t\t\t\t\t+ inl_pad)))\n-\t\t\t\tdseg = (volatile void *)txq->wqes;\n-\t\t\telse\n-\t\t\t\tdseg = (volatile void *)\n-\t\t\t\t\t((uintptr_t)mpw.data.raw +\n-\t\t\t\t\t inl_pad);\n-\t\t\t(*txq->elts)[elts_head++ & elts_m] = buf;\n-\t\t\taddr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,\n-\t\t\t\t\t\t\t\t    uintptr_t));\n-\t\t\t*dseg = (rte_v128u32_t) {\n-\t\t\t\trte_cpu_to_be_32(length),\n-\t\t\t\tmlx5_tx_mb2mr(txq, buf),\n-\t\t\t\taddr_64,\n-\t\t\t\taddr_64 >> 32,\n-\t\t\t};\n-\t\t\tmpw.data.raw = (volatile void *)(dseg + 1);\n-\t\t\tmpw.total_len += (inl_pad + sizeof(*dseg));\n-\t\t\t++j;\n-\t\t\t++mpw.pkts_n;\n-\t\t\tmpw_room -= (inl_pad + sizeof(*dseg));\n-\t\t\tinl_pad = 0;\n-\t\t}\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\t\t/* Increment sent bytes counter. */\n-\t\ttxq->stats.obytes += length;\n-#endif\n-\t\t++i;\n-\t} while (i < pkts_n);\n-\t/* Take a shortcut if nothing must be sent. */\n-\tif (unlikely(i == 0))\n-\t\treturn 0;\n-\t/* Check whether completion threshold has been reached. */\n-\tif (txq->elts_comp + j >= MLX5_TX_COMP_THRESH ||\n-\t\t\t(uint16_t)(txq->wqe_ci - txq->mpw_comp) >=\n-\t\t\t (1 << txq->wqe_n) / MLX5_TX_COMP_THRESH_INLINE_DIV) {\n-\t\tvolatile struct mlx5_wqe *wqe = mpw.wqe;\n-\n-\t\t/* A CQE slot must always be available. */\n-\t\tassert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));\n-\t\t/* Request completion on last WQE. */\n-\t\twqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<\n-\t\t\t\t\t\tMLX5_COMP_MODE_OFFSET);\n-\t\t/* Save elts_head in unused \"immediate\" field of WQE. */\n-\t\twqe->ctrl[3] = elts_head;\n-\t\ttxq->elts_comp = 0;\n-\t\ttxq->mpw_comp = txq->wqe_ci;\n-\t} else {\n-\t\ttxq->elts_comp += j;\n-\t}\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\t/* Increment sent packets counter. */\n-\ttxq->stats.opackets += i;\n-#endif\n-\tif (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED)\n-\t\tmlx5_empw_close(txq, &mpw);\n-\t/* Ring QP doorbell. */\n-\tmlx5_tx_dbrec(txq, mpw.wqe);\n-\ttxq->elts_head = elts_head;\n-\treturn i;\n-}\n-\n-/**\n- * DPDK callback for TX with Enhanced MPW support.\n- *\n- * @param dpdk_txq\n- *   Generic pointer to TX queue structure.\n- * @param[in] pkts\n- *   Packets to transmit.\n- * @param pkts_n\n- *   Number of packets in array.\n- *\n- * @return\n- *   Number of packets successfully transmitted (<= pkts_n).\n- */\n-uint16_t\n-mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)\n-{\n-\tstruct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;\n-\tuint16_t nb_tx = 0;\n-\n-\twhile (pkts_n > nb_tx) {\n-\t\tuint16_t n;\n-\t\tuint16_t ret;\n-\n-\t\tn = txq_count_contig_multi_seg(&pkts[nb_tx], pkts_n - nb_tx);\n-\t\tif (n) {\n-\t\t\tret = mlx5_tx_burst(dpdk_txq, &pkts[nb_tx], n);\n-\t\t\tif (!ret)\n-\t\t\t\tbreak;\n-\t\t\tnb_tx += ret;\n-\t\t}\n-\t\tn = txq_count_contig_single_seg(&pkts[nb_tx], pkts_n - nb_tx);\n-\t\tif (n) {\n-\t\t\tret = txq_burst_empw(txq, &pkts[nb_tx], n);\n-\t\t\tif (!ret)\n-\t\t\t\tbreak;\n-\t\t\tnb_tx += ret;\n-\t\t}\n-\t}\n-\treturn nb_tx;\n-}\n-\n-/**\n  * Translate RX completion flags to packet type.\n  *\n  * @param[in] rxq\n@@ -2867,22 +1492,6 @@\n  */\n \n __rte_weak uint16_t\n-mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused,\n-\t\t      struct rte_mbuf **pkts __rte_unused,\n-\t\t      uint16_t pkts_n __rte_unused)\n-{\n-\treturn 0;\n-}\n-\n-__rte_weak uint16_t\n-mlx5_tx_burst_vec(void *dpdk_txq __rte_unused,\n-\t\t  struct rte_mbuf **pkts __rte_unused,\n-\t\t  uint16_t pkts_n __rte_unused)\n-{\n-\treturn 0;\n-}\n-\n-__rte_weak uint16_t\n mlx5_rx_burst_vec(void *dpdk_txq __rte_unused,\n \t\t  struct rte_mbuf **pkts __rte_unused,\n \t\t  uint16_t pkts_n __rte_unused)\n@@ -2891,25 +1500,50 @@\n }\n \n __rte_weak int\n-mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev __rte_unused)\n+mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused)\n {\n \treturn -ENOTSUP;\n }\n \n __rte_weak int\n-mlx5_check_vec_tx_support(struct rte_eth_dev *dev __rte_unused)\n+mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused)\n {\n \treturn -ENOTSUP;\n }\n \n-__rte_weak int\n-mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused)\n+/**\n+ * DPDK callback to check the status of a tx descriptor.\n+ *\n+ * @param tx_queue\n+ *   The tx queue.\n+ * @param[in] offset\n+ *   The index of the descriptor in the ring.\n+ *\n+ * @return\n+ *   The status of the tx descriptor.\n+ */\n+int\n+mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)\n {\n-\treturn -ENOTSUP;\n+\t(void)tx_queue;\n+\t(void)offset;\n+\treturn RTE_ETH_TX_DESC_FULL;\n }\n \n-__rte_weak int\n-mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused)\n+/**\n+ * Configure the TX function to use.\n+ *\n+ * @param dev\n+ *   Pointer to private data structure.\n+ *\n+ * @return\n+ *   Pointer to selected Tx burst function.\n+ */\n+eth_tx_burst_t\n+mlx5_select_tx_function(struct rte_eth_dev *dev)\n {\n-\treturn -ENOTSUP;\n+\t(void)dev;\n+\treturn removed_tx_burst;\n }\n+\n+\ndiff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h\nindex 3d79c18..acde09d 100644\n--- a/drivers/net/mlx5/mlx5_rxtx.h\n+++ b/drivers/net/mlx5/mlx5_rxtx.h\n@@ -329,14 +329,6 @@ struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx,\n void mlx5_set_ptype_table(void);\n void mlx5_set_cksum_table(void);\n void mlx5_set_swp_types_table(void);\n-uint16_t mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts,\n-\t\t       uint16_t pkts_n);\n-uint16_t mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts,\n-\t\t\t   uint16_t pkts_n);\n-uint16_t mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,\n-\t\t\t\t  uint16_t pkts_n);\n-uint16_t mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts,\n-\t\t\t    uint16_t pkts_n);\n __rte_noinline uint16_t mlx5_tx_error_cqe_handle(struct mlx5_txq_data *txq,\n \t\t\t\t\tvolatile struct mlx5_err_cqe *err_cqe);\n uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);\n@@ -360,14 +352,8 @@ int mlx5_queue_state_modify_primary(struct rte_eth_dev *dev,\n \t\t\tconst struct mlx5_mp_arg_queue_state_modify *sm);\n \n /* Vectorized version of mlx5_rxtx.c */\n-int mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev);\n-int mlx5_check_vec_tx_support(struct rte_eth_dev *dev);\n int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq_data);\n int mlx5_check_vec_rx_support(struct rte_eth_dev *dev);\n-uint16_t mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,\n-\t\t\t       uint16_t pkts_n);\n-uint16_t mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,\n-\t\t\t   uint16_t pkts_n);\n uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,\n \t\t\t   uint16_t pkts_n);\n \n@@ -478,122 +464,6 @@ enum mlx5_cqe_status {\n }\n \n /**\n- * Return the address of the WQE.\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- * @param  wqe_ci\n- *   WQE consumer index.\n- *\n- * @return\n- *   WQE address.\n- */\n-static inline uintptr_t *\n-tx_mlx5_wqe(struct mlx5_txq_data *txq, uint16_t ci)\n-{\n-\tci &= ((1 << txq->wqe_n) - 1);\n-\treturn (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);\n-}\n-\n-/**\n- * Handle the next CQE.\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- *\n- * @return\n- *   The last Tx buffer element to free.\n- */\n-static __rte_always_inline uint16_t\n-mlx5_tx_cqe_handle(struct mlx5_txq_data *txq)\n-{\n-\tconst unsigned int cqe_n = 1 << txq->cqe_n;\n-\tconst unsigned int cqe_cnt = cqe_n - 1;\n-\tuint16_t last_elts;\n-\tunion {\n-\t\tvolatile struct mlx5_cqe *cqe;\n-\t\tvolatile struct mlx5_err_cqe *err_cqe;\n-\t} u = {\n-\t\t.cqe =  &(*txq->cqes)[txq->cq_ci & cqe_cnt],\n-\t};\n-\tint ret = check_cqe(u.cqe, cqe_n, txq->cq_ci);\n-\n-\tif (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {\n-\t\tif (unlikely(ret == MLX5_CQE_STATUS_ERR))\n-\t\t\tlast_elts = mlx5_tx_error_cqe_handle(txq, u.err_cqe);\n-\t\telse\n-\t\t\t/* Do not release buffers. */\n-\t\t\treturn txq->elts_tail;\n-\t} else {\n-\t\tuint16_t new_wqe_pi = rte_be_to_cpu_16(u.cqe->wqe_counter);\n-\t\tvolatile struct mlx5_wqe_ctrl *ctrl =\n-\t\t\t\t(volatile struct mlx5_wqe_ctrl *)\n-\t\t\t\t\ttx_mlx5_wqe(txq, new_wqe_pi);\n-\n-\t\t/* Release completion burst buffers. */\n-\t\tlast_elts = ctrl->ctrl3;\n-\t\ttxq->wqe_pi = new_wqe_pi;\n-\t\ttxq->cq_ci++;\n-\t}\n-\trte_compiler_barrier();\n-\t*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);\n-\treturn last_elts;\n-}\n-\n-/**\n- * Manage TX completions.\n- *\n- * When sending a burst, mlx5_tx_burst() posts several WRs.\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- */\n-static __rte_always_inline void\n-mlx5_tx_complete(struct mlx5_txq_data *txq)\n-{\n-\tconst uint16_t elts_n = 1 << txq->elts_n;\n-\tconst uint16_t elts_m = elts_n - 1;\n-\tuint16_t elts_free = txq->elts_tail;\n-\tuint16_t elts_tail;\n-\tstruct rte_mbuf *m, *free[elts_n];\n-\tstruct rte_mempool *pool = NULL;\n-\tunsigned int blk_n = 0;\n-\n-\telts_tail = mlx5_tx_cqe_handle(txq);\n-\tassert((elts_tail & elts_m) < (1 << txq->wqe_n));\n-\t/* Free buffers. */\n-\twhile (elts_free != elts_tail) {\n-\t\tm = rte_pktmbuf_prefree_seg((*txq->elts)[elts_free++ & elts_m]);\n-\t\tif (likely(m != NULL)) {\n-\t\t\tif (likely(m->pool == pool)) {\n-\t\t\t\tfree[blk_n++] = m;\n-\t\t\t} else {\n-\t\t\t\tif (likely(pool != NULL))\n-\t\t\t\t\trte_mempool_put_bulk(pool,\n-\t\t\t\t\t\t\t     (void *)free,\n-\t\t\t\t\t\t\t     blk_n);\n-\t\t\t\tfree[0] = m;\n-\t\t\t\tpool = m->pool;\n-\t\t\t\tblk_n = 1;\n-\t\t\t}\n-\t\t}\n-\t}\n-\tif (blk_n)\n-\t\trte_mempool_put_bulk(pool, (void *)free, blk_n);\n-#ifndef NDEBUG\n-\telts_free = txq->elts_tail;\n-\t/* Poisoning. */\n-\twhile (elts_free != elts_tail) {\n-\t\tmemset(&(*txq->elts)[elts_free & elts_m],\n-\t\t       0x66,\n-\t\t       sizeof((*txq->elts)[elts_free & elts_m]));\n-\t\t++elts_free;\n-\t}\n-#endif\n-\ttxq->elts_tail = elts_tail;\n-}\n-\n-/**\n  * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which the\n  * cloned mbuf is allocated is returned instead.\n  *\n@@ -710,147 +580,4 @@ enum mlx5_cqe_status {\n \tmlx5_tx_dbrec_cond_wmb(txq, wqe, 1);\n }\n \n-/**\n- * Convert mbuf to Verb SWP.\n- *\n- * @param txq_data\n- *   Pointer to the Tx queue.\n- * @param buf\n- *   Pointer to the mbuf.\n- * @param offsets\n- *   Pointer to the SWP header offsets.\n- * @param swp_types\n- *   Pointer to the SWP header types.\n- */\n-static __rte_always_inline void\n-txq_mbuf_to_swp(struct mlx5_txq_data *txq, struct rte_mbuf *buf,\n-\t\tuint8_t *offsets, uint8_t *swp_types)\n-{\n-\tconst uint64_t vlan = buf->ol_flags & PKT_TX_VLAN_PKT;\n-\tconst uint64_t tunnel = buf->ol_flags & PKT_TX_TUNNEL_MASK;\n-\tconst uint64_t tso = buf->ol_flags & PKT_TX_TCP_SEG;\n-\tconst uint64_t csum_flags = buf->ol_flags & PKT_TX_L4_MASK;\n-\tconst uint64_t inner_ip =\n-\t\tbuf->ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6);\n-\tconst uint64_t ol_flags_mask = PKT_TX_L4_MASK | PKT_TX_IPV6 |\n-\t\t\t\t       PKT_TX_OUTER_IPV6;\n-\tuint16_t idx;\n-\tuint16_t off;\n-\n-\tif (likely(!txq->swp_en || (tunnel != PKT_TX_TUNNEL_UDP &&\n-\t\t\t\t    tunnel != PKT_TX_TUNNEL_IP)))\n-\t\treturn;\n-\t/*\n-\t * The index should have:\n-\t * bit[0:1] = PKT_TX_L4_MASK\n-\t * bit[4] = PKT_TX_IPV6\n-\t * bit[8] = PKT_TX_OUTER_IPV6\n-\t * bit[9] = PKT_TX_OUTER_UDP\n-\t */\n-\tidx = (buf->ol_flags & ol_flags_mask) >> 52;\n-\tif (tunnel == PKT_TX_TUNNEL_UDP)\n-\t\tidx |= 1 << 9;\n-\t*swp_types = mlx5_swp_types_table[idx];\n-\t/*\n-\t * Set offsets for SW parser. Since ConnectX-5, SW parser just\n-\t * complements HW parser. SW parser starts to engage only if HW parser\n-\t * can't reach a header. For the older devices, HW parser will not kick\n-\t * in if any of SWP offsets is set. Therefore, all of the L3 offsets\n-\t * should be set regardless of HW offload.\n-\t */\n-\toff = buf->outer_l2_len + (vlan ? sizeof(struct rte_vlan_hdr) : 0);\n-\toffsets[1] = off >> 1; /* Outer L3 offset. */\n-\toff += buf->outer_l3_len;\n-\tif (tunnel == PKT_TX_TUNNEL_UDP)\n-\t\toffsets[0] = off >> 1; /* Outer L4 offset. */\n-\tif (inner_ip) {\n-\t\toff += buf->l2_len;\n-\t\toffsets[3] = off >> 1; /* Inner L3 offset. */\n-\t\tif (csum_flags == PKT_TX_TCP_CKSUM || tso ||\n-\t\t    csum_flags == PKT_TX_UDP_CKSUM) {\n-\t\t\toff += buf->l3_len;\n-\t\t\toffsets[2] = off >> 1; /* Inner L4 offset. */\n-\t\t}\n-\t}\n-}\n-\n-/**\n- * Convert the Checksum offloads to Verbs.\n- *\n- * @param buf\n- *   Pointer to the mbuf.\n- *\n- * @return\n- *   Converted checksum flags.\n- */\n-static __rte_always_inline uint8_t\n-txq_ol_cksum_to_cs(struct rte_mbuf *buf)\n-{\n-\tuint32_t idx;\n-\tuint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK);\n-\tconst uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK |\n-\t\t\t\t       PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM;\n-\n-\t/*\n-\t * The index should have:\n-\t * bit[0] = PKT_TX_TCP_SEG\n-\t * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM\n-\t * bit[4] = PKT_TX_IP_CKSUM\n-\t * bit[8] = PKT_TX_OUTER_IP_CKSUM\n-\t * bit[9] = tunnel\n-\t */\n-\tidx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9);\n-\treturn mlx5_cksum_table[idx];\n-}\n-\n-/**\n- * Count the number of contiguous single segment packets.\n- *\n- * @param pkts\n- *   Pointer to array of packets.\n- * @param pkts_n\n- *   Number of packets.\n- *\n- * @return\n- *   Number of contiguous single segment packets.\n- */\n-static __rte_always_inline unsigned int\n-txq_count_contig_single_seg(struct rte_mbuf **pkts, uint16_t pkts_n)\n-{\n-\tunsigned int pos;\n-\n-\tif (!pkts_n)\n-\t\treturn 0;\n-\t/* Count the number of contiguous single segment packets. */\n-\tfor (pos = 0; pos < pkts_n; ++pos)\n-\t\tif (NB_SEGS(pkts[pos]) > 1)\n-\t\t\tbreak;\n-\treturn pos;\n-}\n-\n-/**\n- * Count the number of contiguous multi-segment packets.\n- *\n- * @param pkts\n- *   Pointer to array of packets.\n- * @param pkts_n\n- *   Number of packets.\n- *\n- * @return\n- *   Number of contiguous multi-segment packets.\n- */\n-static __rte_always_inline unsigned int\n-txq_count_contig_multi_seg(struct rte_mbuf **pkts, uint16_t pkts_n)\n-{\n-\tunsigned int pos;\n-\n-\tif (!pkts_n)\n-\t\treturn 0;\n-\t/* Count the number of contiguous multi-segment packets. */\n-\tfor (pos = 0; pos < pkts_n; ++pos)\n-\t\tif (NB_SEGS(pkts[pos]) == 1)\n-\t\t\tbreak;\n-\treturn pos;\n-}\n-\n #endif /* RTE_PMD_MLX5_RXTX_H_ */\ndiff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c\nindex 073044f..f6ec828 100644\n--- a/drivers/net/mlx5/mlx5_rxtx_vec.c\n+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c\n@@ -40,138 +40,6 @@\n #endif\n \n /**\n- * Count the number of packets having same ol_flags and same metadata (if\n- * PKT_TX_METADATA is set in ol_flags), and calculate cs_flags.\n- *\n- * @param pkts\n- *   Pointer to array of packets.\n- * @param pkts_n\n- *   Number of packets.\n- * @param cs_flags\n- *   Pointer of flags to be returned.\n- * @param metadata\n- *   Pointer of metadata to be returned.\n- * @param txq_offloads\n- *   Offloads enabled on Tx queue\n- *\n- * @return\n- *   Number of packets having same ol_flags and metadata, if relevant.\n- */\n-static inline unsigned int\n-txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t *cs_flags,\n-\t\t rte_be32_t *metadata, const uint64_t txq_offloads)\n-{\n-\tunsigned int pos;\n-\tconst uint64_t cksum_ol_mask =\n-\t\tPKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |\n-\t\tPKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |\n-\t\tPKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;\n-\trte_be32_t p0_metadata, pn_metadata;\n-\n-\tif (!pkts_n)\n-\t\treturn 0;\n-\tp0_metadata = pkts[0]->ol_flags & PKT_TX_METADATA ?\n-\t\t\tpkts[0]->tx_metadata : 0;\n-\t/* Count the number of packets having same offload parameters. */\n-\tfor (pos = 1; pos < pkts_n; ++pos) {\n-\t\t/* Check if packet has same checksum flags. */\n-\t\tif ((txq_offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP) &&\n-\t\t    ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & cksum_ol_mask))\n-\t\t\tbreak;\n-\t\t/* Check if packet has same metadata. */\n-\t\tif (txq_offloads & DEV_TX_OFFLOAD_MATCH_METADATA) {\n-\t\t\tpn_metadata = pkts[pos]->ol_flags & PKT_TX_METADATA ?\n-\t\t\t\t\tpkts[pos]->tx_metadata : 0;\n-\t\t\tif (pn_metadata != p0_metadata)\n-\t\t\t\tbreak;\n-\t\t}\n-\t}\n-\t*cs_flags = txq_ol_cksum_to_cs(pkts[0]);\n-\t*metadata = p0_metadata;\n-\treturn pos;\n-}\n-\n-/**\n- * DPDK callback for vectorized TX.\n- *\n- * @param dpdk_txq\n- *   Generic pointer to TX queue structure.\n- * @param[in] pkts\n- *   Packets to transmit.\n- * @param pkts_n\n- *   Number of packets in array.\n- *\n- * @return\n- *   Number of packets successfully transmitted (<= pkts_n).\n- */\n-uint16_t\n-mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,\n-\t\t      uint16_t pkts_n)\n-{\n-\tstruct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;\n-\tuint16_t nb_tx = 0;\n-\n-\twhile (pkts_n > nb_tx) {\n-\t\tuint16_t n;\n-\t\tuint16_t ret;\n-\n-\t\tn = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);\n-\t\tret = txq_burst_v(txq, &pkts[nb_tx], n, 0, 0);\n-\t\tnb_tx += ret;\n-\t\tif (!ret)\n-\t\t\tbreak;\n-\t}\n-\treturn nb_tx;\n-}\n-\n-/**\n- * DPDK callback for vectorized TX with multi-seg packets and offload.\n- *\n- * @param dpdk_txq\n- *   Generic pointer to TX queue structure.\n- * @param[in] pkts\n- *   Packets to transmit.\n- * @param pkts_n\n- *   Number of packets in array.\n- *\n- * @return\n- *   Number of packets successfully transmitted (<= pkts_n).\n- */\n-uint16_t\n-mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)\n-{\n-\tstruct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;\n-\tuint16_t nb_tx = 0;\n-\n-\twhile (pkts_n > nb_tx) {\n-\t\tuint8_t cs_flags = 0;\n-\t\tuint16_t n;\n-\t\tuint16_t ret;\n-\t\trte_be32_t metadata = 0;\n-\n-\t\t/* Transmit multi-seg packets in the head of pkts list. */\n-\t\tif ((txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) &&\n-\t\t    NB_SEGS(pkts[nb_tx]) > 1)\n-\t\t\tnb_tx += txq_scatter_v(txq,\n-\t\t\t\t\t       &pkts[nb_tx],\n-\t\t\t\t\t       pkts_n - nb_tx);\n-\t\tn = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);\n-\t\tif (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS)\n-\t\t\tn = txq_count_contig_single_seg(&pkts[nb_tx], n);\n-\t\tif (txq->offloads & (MLX5_VEC_TX_CKSUM_OFFLOAD_CAP |\n-\t\t\t\t     DEV_TX_OFFLOAD_MATCH_METADATA))\n-\t\t\tn = txq_calc_offload(&pkts[nb_tx], n,\n-\t\t\t\t\t     &cs_flags, &metadata,\n-\t\t\t\t\t     txq->offloads);\n-\t\tret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags, metadata);\n-\t\tnb_tx += ret;\n-\t\tif (!ret)\n-\t\t\tbreak;\n-\t}\n-\treturn nb_tx;\n-}\n-\n-/**\n  * Skip error packets.\n  *\n  * @param rxq\n@@ -243,49 +111,6 @@\n }\n \n /**\n- * Check Tx queue flags are set for raw vectorized Tx.\n- *\n- * @param dev\n- *   Pointer to Ethernet device.\n- *\n- * @return\n- *   1 if supported, negative errno value if not.\n- */\n-int __attribute__((cold))\n-mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev)\n-{\n-\tuint64_t offloads = dev->data->dev_conf.txmode.offloads;\n-\n-\t/* Doesn't support any offload. */\n-\tif (offloads)\n-\t\treturn -ENOTSUP;\n-\treturn 1;\n-}\n-\n-/**\n- * Check a device can support vectorized TX.\n- *\n- * @param dev\n- *   Pointer to Ethernet device.\n- *\n- * @return\n- *   1 if supported, negative errno value if not.\n- */\n-int __attribute__((cold))\n-mlx5_check_vec_tx_support(struct rte_eth_dev *dev)\n-{\n-\tstruct mlx5_priv *priv = dev->data->dev_private;\n-\tuint64_t offloads = dev->data->dev_conf.txmode.offloads;\n-\n-\tif (!priv->config.tx_vec_en ||\n-\t    priv->txqs_n > (unsigned int)priv->config.txqs_vec ||\n-\t    priv->config.mps != MLX5_MPW_ENHANCED ||\n-\t    offloads & ~MLX5_VEC_TX_OFFLOAD_CAP)\n-\t\treturn -ENOTSUP;\n-\treturn 1;\n-}\n-\n-/**\n  * Check a RX queue can support vectorized RX.\n  *\n  * @param rxq\ndiff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h\nindex 1c7e3b4..9930286 100644\n--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h\n+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h\n@@ -27,295 +27,6 @@\n #pragma GCC diagnostic ignored \"-Wcast-qual\"\n \n /**\n- * Fill in buffer descriptors in a multi-packet send descriptor.\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- * @param dseg\n- *   Pointer to buffer descriptor to be written.\n- * @param pkts\n- *   Pointer to array of packets to be sent.\n- * @param n\n- *   Number of packets to be filled.\n- */\n-static inline void\n-txq_wr_dseg_v(struct mlx5_txq_data *txq, uint8_t *dseg,\n-\t      struct rte_mbuf **pkts, unsigned int n)\n-{\n-\tunsigned int pos;\n-\tuintptr_t addr;\n-\tconst uint8x16_t dseg_shuf_m = {\n-\t\t 3,  2,  1,  0, /* length, bswap32 */\n-\t\t 4,  5,  6,  7, /* lkey */\n-\t\t15, 14, 13, 12, /* addr, bswap64 */\n-\t\t11, 10,  9,  8\n-\t};\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\tuint32_t tx_byte = 0;\n-#endif\n-\n-\tfor (pos = 0; pos < n; ++pos, dseg += MLX5_WQE_DWORD_SIZE) {\n-\t\tuint8x16_t desc;\n-\t\tstruct rte_mbuf *pkt = pkts[pos];\n-\n-\t\taddr = rte_pktmbuf_mtod(pkt, uintptr_t);\n-\t\tdesc = vreinterpretq_u8_u32((uint32x4_t) {\n-\t\t\t\tDATA_LEN(pkt),\n-\t\t\t\tmlx5_tx_mb2mr(txq, pkt),\n-\t\t\t\taddr,\n-\t\t\t\taddr >> 32 });\n-\t\tdesc = vqtbl1q_u8(desc, dseg_shuf_m);\n-\t\tvst1q_u8(dseg, desc);\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\t\ttx_byte += DATA_LEN(pkt);\n-#endif\n-\t}\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\ttxq->stats.obytes += tx_byte;\n-#endif\n-}\n-\n-/**\n- * Send multi-segmented packets until it encounters a single segment packet in\n- * the pkts list.\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- * @param pkts\n- *   Pointer to array of packets to be sent.\n- * @param pkts_n\n- *   Number of packets to be sent.\n- *\n- * @return\n- *   Number of packets successfully transmitted (<= pkts_n).\n- */\n-static uint16_t\n-txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,\n-\t      uint16_t pkts_n)\n-{\n-\tuint16_t elts_head = txq->elts_head;\n-\tconst uint16_t elts_n = 1 << txq->elts_n;\n-\tconst uint16_t elts_m = elts_n - 1;\n-\tconst uint16_t wq_n = 1 << txq->wqe_n;\n-\tconst uint16_t wq_mask = wq_n - 1;\n-\tconst unsigned int nb_dword_per_wqebb =\n-\t\tMLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE;\n-\tconst unsigned int nb_dword_in_hdr =\n-\t\tsizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;\n-\tunsigned int n;\n-\tvolatile struct mlx5_wqe *wqe = NULL;\n-\tbool metadata_ol =\n-\t\ttxq->offloads & DEV_TX_OFFLOAD_MATCH_METADATA ? true : false;\n-\n-\tassert(elts_n > pkts_n);\n-\tmlx5_tx_complete(txq);\n-\tif (unlikely(!pkts_n))\n-\t\treturn 0;\n-\tfor (n = 0; n < pkts_n; ++n) {\n-\t\tstruct rte_mbuf *buf = pkts[n];\n-\t\tunsigned int segs_n = buf->nb_segs;\n-\t\tunsigned int ds = nb_dword_in_hdr;\n-\t\tunsigned int len = PKT_LEN(buf);\n-\t\tuint16_t wqe_ci = txq->wqe_ci;\n-\t\tconst uint8x16_t ctrl_shuf_m = {\n-\t\t\t3,  2,  1,  0, /* bswap32 */\n-\t\t\t7,  6,  5,  4, /* bswap32 */\n-\t\t\t11, 10,  9,  8, /* bswap32 */\n-\t\t\t12, 13, 14, 15\n-\t\t};\n-\t\tuint8_t cs_flags;\n-\t\tuint16_t max_elts;\n-\t\tuint16_t max_wqe;\n-\t\tuint8x16_t *t_wqe;\n-\t\tuint8_t *dseg;\n-\t\tuint8x16_t ctrl;\n-\t\trte_be32_t metadata =\n-\t\t\tmetadata_ol && (buf->ol_flags & PKT_TX_METADATA) ?\n-\t\t\tbuf->tx_metadata : 0;\n-\n-\t\tassert(segs_n);\n-\t\tmax_elts = elts_n - (elts_head - txq->elts_tail);\n-\t\tmax_wqe = wq_n - (txq->wqe_ci - txq->wqe_pi);\n-\t\t/*\n-\t\t * A MPW session consumes 2 WQEs at most to\n-\t\t * include MLX5_MPW_DSEG_MAX pointers.\n-\t\t */\n-\t\tif (segs_n == 1 ||\n-\t\t    max_elts < segs_n || max_wqe < 2)\n-\t\t\tbreak;\n-\t\twqe = &((volatile struct mlx5_wqe64 *)\n-\t\t\t txq->wqes)[wqe_ci & wq_mask].hdr;\n-\t\tcs_flags = txq_ol_cksum_to_cs(buf);\n-\t\t/* Title WQEBB pointer. */\n-\t\tt_wqe = (uint8x16_t *)wqe;\n-\t\tdseg = (uint8_t *)(wqe + 1);\n-\t\tdo {\n-\t\t\tif (!(ds++ % nb_dword_per_wqebb)) {\n-\t\t\t\tdseg = (uint8_t *)\n-\t\t\t\t\t&((volatile struct mlx5_wqe64 *)\n-\t\t\t\t\t   txq->wqes)[++wqe_ci & wq_mask];\n-\t\t\t}\n-\t\t\ttxq_wr_dseg_v(txq, dseg, &buf, 1);\n-\t\t\tdseg += MLX5_WQE_DWORD_SIZE;\n-\t\t\t(*txq->elts)[elts_head++ & elts_m] = buf;\n-\t\t\tbuf = buf->next;\n-\t\t} while (--segs_n);\n-\t\t++wqe_ci;\n-\t\t/* Fill CTRL in the header. */\n-\t\tctrl = vreinterpretq_u8_u32((uint32x4_t) {\n-\t\t\t\tMLX5_OPC_MOD_MPW << 24 |\n-\t\t\t\ttxq->wqe_ci << 8 | MLX5_OPCODE_TSO,\n-\t\t\t\ttxq->qp_num_8s | ds, 4, 0});\n-\t\tctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m);\n-\t\tvst1q_u8((void *)t_wqe, ctrl);\n-\t\t/* Fill ESEG in the header. */\n-\t\tvst1q_u32((void *)(t_wqe + 1),\n-\t\t\t  ((uint32x4_t){ 0,\n-\t\t\t\t\t rte_cpu_to_be_16(len) << 16 | cs_flags,\n-\t\t\t\t\t metadata, 0 }));\n-\t\ttxq->wqe_ci = wqe_ci;\n-\t}\n-\tif (!n)\n-\t\treturn 0;\n-\ttxq->elts_comp += (uint16_t)(elts_head - txq->elts_head);\n-\ttxq->elts_head = elts_head;\n-\tif (txq->elts_comp >= MLX5_TX_COMP_THRESH) {\n-\t\t/* A CQE slot must always be available. */\n-\t\tassert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));\n-\t\twqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<\n-\t\t\t\t\t\tMLX5_COMP_MODE_OFFSET);\n-\t\twqe->ctrl[3] = txq->elts_head;\n-\t\ttxq->elts_comp = 0;\n-\t}\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\ttxq->stats.opackets += n;\n-#endif\n-\tmlx5_tx_dbrec(txq, wqe);\n-\treturn n;\n-}\n-\n-/**\n- * Send burst of packets with Enhanced MPW. If it encounters a multi-seg packet,\n- * it returns to make it processed by txq_scatter_v(). All the packets in\n- * the pkts list should be single segment packets having same offload flags.\n- * This must be checked by txq_count_contig_single_seg() and txq_calc_offload().\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- * @param pkts\n- *   Pointer to array of packets to be sent.\n- * @param pkts_n\n- *   Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).\n- * @param cs_flags\n- *   Checksum offload flags to be written in the descriptor.\n- * @param metadata\n- *   Metadata value to be written in the descriptor.\n- *\n- * @return\n- *   Number of packets successfully transmitted (<= pkts_n).\n- */\n-static inline uint16_t\n-txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,\n-\t    uint8_t cs_flags, rte_be32_t metadata)\n-{\n-\tstruct rte_mbuf **elts;\n-\tuint16_t elts_head = txq->elts_head;\n-\tconst uint16_t elts_n = 1 << txq->elts_n;\n-\tconst uint16_t elts_m = elts_n - 1;\n-\tconst unsigned int nb_dword_per_wqebb =\n-\t\tMLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE;\n-\tconst unsigned int nb_dword_in_hdr =\n-\t\tsizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;\n-\tunsigned int n = 0;\n-\tunsigned int pos;\n-\tuint16_t max_elts;\n-\tuint16_t max_wqe;\n-\tuint32_t comp_req;\n-\tconst uint16_t wq_n = 1 << txq->wqe_n;\n-\tconst uint16_t wq_mask = wq_n - 1;\n-\tuint16_t wq_idx = txq->wqe_ci & wq_mask;\n-\tvolatile struct mlx5_wqe64 *wq =\n-\t\t&((volatile struct mlx5_wqe64 *)txq->wqes)[wq_idx];\n-\tvolatile struct mlx5_wqe *wqe = (volatile struct mlx5_wqe *)wq;\n-\tconst uint8x16_t ctrl_shuf_m = {\n-\t\t 3,  2,  1,  0, /* bswap32 */\n-\t\t 7,  6,  5,  4, /* bswap32 */\n-\t\t11, 10,  9,  8, /* bswap32 */\n-\t\t12, 13, 14, 15\n-\t};\n-\tuint8x16_t *t_wqe;\n-\tuint8_t *dseg;\n-\tuint8x16_t ctrl;\n-\n-\t/* Make sure all packets can fit into a single WQE. */\n-\tassert(elts_n > pkts_n);\n-\tmlx5_tx_complete(txq);\n-\tmax_elts = (elts_n - (elts_head - txq->elts_tail));\n-\tmax_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);\n-\tpkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);\n-\tif (unlikely(!pkts_n))\n-\t\treturn 0;\n-\telts = &(*txq->elts)[elts_head & elts_m];\n-\t/* Loop for available tailroom first. */\n-\tn = RTE_MIN(elts_n - (elts_head & elts_m), pkts_n);\n-\tfor (pos = 0; pos < (n & -2); pos += 2)\n-\t\tvst1q_u64((void *)&elts[pos], vld1q_u64((void *)&pkts[pos]));\n-\tif (n & 1)\n-\t\telts[pos] = pkts[pos];\n-\t/* Check if it crosses the end of the queue. */\n-\tif (unlikely(n < pkts_n)) {\n-\t\telts = &(*txq->elts)[0];\n-\t\tfor (pos = 0; pos < pkts_n - n; ++pos)\n-\t\t\telts[pos] = pkts[n + pos];\n-\t}\n-\ttxq->elts_head += pkts_n;\n-\t/* Save title WQEBB pointer. */\n-\tt_wqe = (uint8x16_t *)wqe;\n-\tdseg = (uint8_t *)(wqe + 1);\n-\t/* Calculate the number of entries to the end. */\n-\tn = RTE_MIN(\n-\t\t(wq_n - wq_idx) * nb_dword_per_wqebb - nb_dword_in_hdr,\n-\t\tpkts_n);\n-\t/* Fill DSEGs. */\n-\ttxq_wr_dseg_v(txq, dseg, pkts, n);\n-\t/* Check if it crosses the end of the queue. */\n-\tif (n < pkts_n) {\n-\t\tdseg = (uint8_t *)txq->wqes;\n-\t\ttxq_wr_dseg_v(txq, dseg, &pkts[n], pkts_n - n);\n-\t}\n-\tif (txq->elts_comp + pkts_n < MLX5_TX_COMP_THRESH) {\n-\t\ttxq->elts_comp += pkts_n;\n-\t\tcomp_req = MLX5_COMP_ONLY_FIRST_ERR << MLX5_COMP_MODE_OFFSET;\n-\t} else {\n-\t\t/* A CQE slot must always be available. */\n-\t\tassert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));\n-\t\t/* Request a completion. */\n-\t\ttxq->elts_comp = 0;\n-\t\tcomp_req = MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET;\n-\t}\n-\t/* Fill CTRL in the header. */\n-\tctrl = vreinterpretq_u8_u32((uint32x4_t) {\n-\t\t\tMLX5_OPC_MOD_ENHANCED_MPSW << 24 |\n-\t\t\ttxq->wqe_ci << 8 | MLX5_OPCODE_ENHANCED_MPSW,\n-\t\t\ttxq->qp_num_8s | (pkts_n + 2),\n-\t\t\tcomp_req,\n-\t\t\ttxq->elts_head });\n-\tctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m);\n-\tvst1q_u8((void *)t_wqe, ctrl);\n-\t/* Fill ESEG in the header. */\n-\tvst1q_u32((void *)(t_wqe + 1),\n-\t\t ((uint32x4_t) { 0, cs_flags, metadata, 0 }));\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\ttxq->stats.opackets += pkts_n;\n-#endif\n-\ttxq->wqe_ci += (nb_dword_in_hdr + pkts_n + (nb_dword_per_wqebb - 1)) /\n-\t\t       nb_dword_per_wqebb;\n-\t/* Ring QP doorbell. */\n-\tmlx5_tx_dbrec_cond_wmb(txq, wqe, pkts_n < MLX5_VPMD_TX_MAX_BURST);\n-\treturn pkts_n;\n-}\n-\n-/**\n  * Store free buffers to RX SW ring.\n  *\n  * @param rxq\ndiff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h\nindex 503ca0f..7bd254f 100644\n--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h\n+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h\n@@ -29,290 +29,6 @@\n #endif\n \n /**\n- * Fill in buffer descriptors in a multi-packet send descriptor.\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- * @param dseg\n- *   Pointer to buffer descriptor to be written.\n- * @param pkts\n- *   Pointer to array of packets to be sent.\n- * @param n\n- *   Number of packets to be filled.\n- */\n-static inline void\n-txq_wr_dseg_v(struct mlx5_txq_data *txq, __m128i *dseg,\n-\t      struct rte_mbuf **pkts, unsigned int n)\n-{\n-\tunsigned int pos;\n-\tuintptr_t addr;\n-\tconst __m128i shuf_mask_dseg =\n-\t\t_mm_set_epi8(8,  9, 10, 11, /* addr, bswap64 */\n-\t\t\t    12, 13, 14, 15,\n-\t\t\t     7,  6,  5,  4, /* lkey */\n-\t\t\t     0,  1,  2,  3  /* length, bswap32 */);\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\tuint32_t tx_byte = 0;\n-#endif\n-\n-\tfor (pos = 0; pos < n; ++pos, ++dseg) {\n-\t\t__m128i desc;\n-\t\tstruct rte_mbuf *pkt = pkts[pos];\n-\n-\t\taddr = rte_pktmbuf_mtod(pkt, uintptr_t);\n-\t\tdesc = _mm_set_epi32(addr >> 32,\n-\t\t\t\t     addr,\n-\t\t\t\t     mlx5_tx_mb2mr(txq, pkt),\n-\t\t\t\t     DATA_LEN(pkt));\n-\t\tdesc = _mm_shuffle_epi8(desc, shuf_mask_dseg);\n-\t\t_mm_store_si128(dseg, desc);\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\t\ttx_byte += DATA_LEN(pkt);\n-#endif\n-\t}\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\ttxq->stats.obytes += tx_byte;\n-#endif\n-}\n-\n-/**\n- * Send multi-segmented packets until it encounters a single segment packet in\n- * the pkts list.\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- * @param pkts\n- *   Pointer to array of packets to be sent.\n- * @param pkts_n\n- *   Number of packets to be sent.\n- *\n- * @return\n- *   Number of packets successfully transmitted (<= pkts_n).\n- */\n-static uint16_t\n-txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,\n-\t      uint16_t pkts_n)\n-{\n-\tuint16_t elts_head = txq->elts_head;\n-\tconst uint16_t elts_n = 1 << txq->elts_n;\n-\tconst uint16_t elts_m = elts_n - 1;\n-\tconst uint16_t wq_n = 1 << txq->wqe_n;\n-\tconst uint16_t wq_mask = wq_n - 1;\n-\tconst unsigned int nb_dword_per_wqebb =\n-\t\tMLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE;\n-\tconst unsigned int nb_dword_in_hdr =\n-\t\tsizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;\n-\tunsigned int n;\n-\tvolatile struct mlx5_wqe *wqe = NULL;\n-\tbool metadata_ol =\n-\t\ttxq->offloads & DEV_TX_OFFLOAD_MATCH_METADATA ? true : false;\n-\n-\tassert(elts_n > pkts_n);\n-\tmlx5_tx_complete(txq);\n-\tif (unlikely(!pkts_n))\n-\t\treturn 0;\n-\tfor (n = 0; n < pkts_n; ++n) {\n-\t\tstruct rte_mbuf *buf = pkts[n];\n-\t\tunsigned int segs_n = buf->nb_segs;\n-\t\tunsigned int ds = nb_dword_in_hdr;\n-\t\tunsigned int len = PKT_LEN(buf);\n-\t\tuint16_t wqe_ci = txq->wqe_ci;\n-\t\tconst __m128i shuf_mask_ctrl =\n-\t\t\t_mm_set_epi8(15, 14, 13, 12,\n-\t\t\t\t      8,  9, 10, 11, /* bswap32 */\n-\t\t\t\t      4,  5,  6,  7, /* bswap32 */\n-\t\t\t\t      0,  1,  2,  3  /* bswap32 */);\n-\t\tuint8_t cs_flags;\n-\t\tuint16_t max_elts;\n-\t\tuint16_t max_wqe;\n-\t\t__m128i *t_wqe, *dseg;\n-\t\t__m128i ctrl;\n-\t\trte_be32_t metadata =\n-\t\t\tmetadata_ol && (buf->ol_flags & PKT_TX_METADATA) ?\n-\t\t\tbuf->tx_metadata : 0;\n-\n-\t\tassert(segs_n);\n-\t\tmax_elts = elts_n - (elts_head - txq->elts_tail);\n-\t\tmax_wqe = wq_n - (txq->wqe_ci - txq->wqe_pi);\n-\t\t/*\n-\t\t * A MPW session consumes 2 WQEs at most to\n-\t\t * include MLX5_MPW_DSEG_MAX pointers.\n-\t\t */\n-\t\tif (segs_n == 1 ||\n-\t\t    max_elts < segs_n || max_wqe < 2)\n-\t\t\tbreak;\n-\t\tif (segs_n > MLX5_MPW_DSEG_MAX) {\n-\t\t\ttxq->stats.oerrors++;\n-\t\t\tbreak;\n-\t\t}\n-\t\twqe = &((volatile struct mlx5_wqe64 *)\n-\t\t\t txq->wqes)[wqe_ci & wq_mask].hdr;\n-\t\tcs_flags = txq_ol_cksum_to_cs(buf);\n-\t\t/* Title WQEBB pointer. */\n-\t\tt_wqe = (__m128i *)wqe;\n-\t\tdseg = (__m128i *)(wqe + 1);\n-\t\tdo {\n-\t\t\tif (!(ds++ % nb_dword_per_wqebb)) {\n-\t\t\t\tdseg = (__m128i *)\n-\t\t\t\t\t&((volatile struct mlx5_wqe64 *)\n-\t\t\t\t\t   txq->wqes)[++wqe_ci & wq_mask];\n-\t\t\t}\n-\t\t\ttxq_wr_dseg_v(txq, dseg++, &buf, 1);\n-\t\t\t(*txq->elts)[elts_head++ & elts_m] = buf;\n-\t\t\tbuf = buf->next;\n-\t\t} while (--segs_n);\n-\t\t++wqe_ci;\n-\t\t/* Fill CTRL in the header. */\n-\t\tctrl = _mm_set_epi32(0, 4, txq->qp_num_8s | ds,\n-\t\t\t\t     MLX5_OPC_MOD_MPW << 24 |\n-\t\t\t\t     txq->wqe_ci << 8 | MLX5_OPCODE_TSO);\n-\t\tctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl);\n-\t\t_mm_store_si128(t_wqe, ctrl);\n-\t\t/* Fill ESEG in the header. */\n-\t\t_mm_store_si128(t_wqe + 1,\n-\t\t\t\t_mm_set_epi32(0, metadata,\n-\t\t\t\t\t      (rte_cpu_to_be_16(len) << 16) |\n-\t\t\t\t\t      cs_flags, 0));\n-\t\ttxq->wqe_ci = wqe_ci;\n-\t}\n-\tif (!n)\n-\t\treturn 0;\n-\ttxq->elts_comp += (uint16_t)(elts_head - txq->elts_head);\n-\ttxq->elts_head = elts_head;\n-\tif (txq->elts_comp >= MLX5_TX_COMP_THRESH) {\n-\t\t/* A CQE slot must always be available. */\n-\t\tassert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));\n-\t\twqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<\n-\t\t\t\t\t\tMLX5_COMP_MODE_OFFSET);\n-\t\twqe->ctrl[3] = txq->elts_head;\n-\t\ttxq->elts_comp = 0;\n-\t}\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\ttxq->stats.opackets += n;\n-#endif\n-\tmlx5_tx_dbrec(txq, wqe);\n-\treturn n;\n-}\n-\n-/**\n- * Send burst of packets with Enhanced MPW. If it encounters a multi-seg packet,\n- * it returns to make it processed by txq_scatter_v(). All the packets in\n- * the pkts list should be single segment packets having same offload flags.\n- * This must be checked by txq_count_contig_single_seg() and txq_calc_offload().\n- *\n- * @param txq\n- *   Pointer to TX queue structure.\n- * @param pkts\n- *   Pointer to array of packets to be sent.\n- * @param pkts_n\n- *   Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).\n- * @param cs_flags\n- *   Checksum offload flags to be written in the descriptor.\n- * @param metadata\n- *   Metadata value to be written in the descriptor.\n- *\n- * @return\n- *   Number of packets successfully transmitted (<= pkts_n).\n- */\n-static inline uint16_t\n-txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,\n-\t    uint8_t cs_flags, rte_be32_t metadata)\n-{\n-\tstruct rte_mbuf **elts;\n-\tuint16_t elts_head = txq->elts_head;\n-\tconst uint16_t elts_n = 1 << txq->elts_n;\n-\tconst uint16_t elts_m = elts_n - 1;\n-\tconst unsigned int nb_dword_per_wqebb =\n-\t\tMLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE;\n-\tconst unsigned int nb_dword_in_hdr =\n-\t\tsizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;\n-\tunsigned int n = 0;\n-\tunsigned int pos;\n-\tuint16_t max_elts;\n-\tuint16_t max_wqe;\n-\tuint32_t comp_req;\n-\tconst uint16_t wq_n = 1 << txq->wqe_n;\n-\tconst uint16_t wq_mask = wq_n - 1;\n-\tuint16_t wq_idx = txq->wqe_ci & wq_mask;\n-\tvolatile struct mlx5_wqe64 *wq =\n-\t\t&((volatile struct mlx5_wqe64 *)txq->wqes)[wq_idx];\n-\tvolatile struct mlx5_wqe *wqe = (volatile struct mlx5_wqe *)wq;\n-\tconst __m128i shuf_mask_ctrl =\n-\t\t_mm_set_epi8(15, 14, 13, 12,\n-\t\t\t      8,  9, 10, 11, /* bswap32 */\n-\t\t\t      4,  5,  6,  7, /* bswap32 */\n-\t\t\t      0,  1,  2,  3  /* bswap32 */);\n-\t__m128i *t_wqe, *dseg;\n-\t__m128i ctrl;\n-\n-\t/* Make sure all packets can fit into a single WQE. */\n-\tassert(elts_n > pkts_n);\n-\tmlx5_tx_complete(txq);\n-\tmax_elts = (elts_n - (elts_head - txq->elts_tail));\n-\tmax_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);\n-\tpkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);\n-\tassert(pkts_n <= MLX5_DSEG_MAX - nb_dword_in_hdr);\n-\tif (unlikely(!pkts_n))\n-\t\treturn 0;\n-\telts = &(*txq->elts)[elts_head & elts_m];\n-\t/* Loop for available tailroom first. */\n-\tn = RTE_MIN(elts_n - (elts_head & elts_m), pkts_n);\n-\tfor (pos = 0; pos < (n & -2); pos += 2)\n-\t\t_mm_storeu_si128((__m128i *)&elts[pos],\n-\t\t\t\t _mm_loadu_si128((__m128i *)&pkts[pos]));\n-\tif (n & 1)\n-\t\telts[pos] = pkts[pos];\n-\t/* Check if it crosses the end of the queue. */\n-\tif (unlikely(n < pkts_n)) {\n-\t\telts = &(*txq->elts)[0];\n-\t\tfor (pos = 0; pos < pkts_n - n; ++pos)\n-\t\t\telts[pos] = pkts[n + pos];\n-\t}\n-\ttxq->elts_head += pkts_n;\n-\t/* Save title WQEBB pointer. */\n-\tt_wqe = (__m128i *)wqe;\n-\tdseg = (__m128i *)(wqe + 1);\n-\t/* Calculate the number of entries to the end. */\n-\tn = RTE_MIN(\n-\t\t(wq_n - wq_idx) * nb_dword_per_wqebb - nb_dword_in_hdr,\n-\t\tpkts_n);\n-\t/* Fill DSEGs. */\n-\ttxq_wr_dseg_v(txq, dseg, pkts, n);\n-\t/* Check if it crosses the end of the queue. */\n-\tif (n < pkts_n) {\n-\t\tdseg = (__m128i *)txq->wqes;\n-\t\ttxq_wr_dseg_v(txq, dseg, &pkts[n], pkts_n - n);\n-\t}\n-\tif (txq->elts_comp + pkts_n < MLX5_TX_COMP_THRESH) {\n-\t\ttxq->elts_comp += pkts_n;\n-\t\tcomp_req = MLX5_COMP_ONLY_FIRST_ERR << MLX5_COMP_MODE_OFFSET;\n-\t} else {\n-\t\t/* A CQE slot must always be available. */\n-\t\tassert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));\n-\t\t/* Request a completion. */\n-\t\ttxq->elts_comp = 0;\n-\t\tcomp_req = MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET;\n-\t}\n-\t/* Fill CTRL in the header. */\n-\tctrl = _mm_set_epi32(txq->elts_head, comp_req,\n-\t\t\t     txq->qp_num_8s | (pkts_n + 2),\n-\t\t\t     MLX5_OPC_MOD_ENHANCED_MPSW << 24 |\n-\t\t\t\ttxq->wqe_ci << 8 | MLX5_OPCODE_ENHANCED_MPSW);\n-\tctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl);\n-\t_mm_store_si128(t_wqe, ctrl);\n-\t/* Fill ESEG in the header. */\n-\t_mm_store_si128(t_wqe + 1, _mm_set_epi32(0, metadata, cs_flags, 0));\n-#ifdef MLX5_PMD_SOFT_COUNTERS\n-\ttxq->stats.opackets += pkts_n;\n-#endif\n-\ttxq->wqe_ci += (nb_dword_in_hdr + pkts_n + (nb_dword_per_wqebb - 1)) /\n-\t\t       nb_dword_per_wqebb;\n-\t/* Ring QP doorbell. */\n-\tmlx5_tx_dbrec_cond_wmb(txq, wqe, pkts_n < MLX5_VPMD_TX_MAX_BURST);\n-\treturn pkts_n;\n-}\n-\n-/**\n  * Store free buffers to RX SW ring.\n  *\n  * @param rxq\ndiff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c\nindex 7741095..0d2dbfa 100644\n--- a/drivers/net/mlx5/mlx5_txq.c\n+++ b/drivers/net/mlx5/mlx5_txq.c\n@@ -365,25 +365,6 @@\n }\n \n /**\n- * Check if the burst function is using eMPW.\n- *\n- * @param tx_pkt_burst\n- *   Tx burst function pointer.\n- *\n- * @return\n- *   1 if the burst function is using eMPW, 0 otherwise.\n- */\n-static int\n-is_empw_burst_func(eth_tx_burst_t tx_pkt_burst)\n-{\n-\tif (tx_pkt_burst == mlx5_tx_burst_raw_vec ||\n-\t    tx_pkt_burst == mlx5_tx_burst_vec ||\n-\t    tx_pkt_burst == mlx5_tx_burst_empw)\n-\t\treturn 1;\n-\treturn 0;\n-}\n-\n-/**\n  * Create the Tx queue Verbs object.\n  *\n  * @param dev\n@@ -414,7 +395,6 @@ struct mlx5_txq_ibv *\n \tstruct mlx5dv_cq cq_info;\n \tstruct mlx5dv_obj obj;\n \tconst int desc = 1 << txq_data->elts_n;\n-\teth_tx_burst_t tx_pkt_burst = mlx5_select_tx_function(dev);\n \tint ret = 0;\n \n \tassert(txq_data);\n@@ -432,8 +412,6 @@ struct mlx5_txq_ibv *\n \t\t.comp_mask = 0,\n \t};\n \tcqe_n = desc / MLX5_TX_COMP_THRESH + 1;\n-\tif (is_empw_burst_func(tx_pkt_burst))\n-\t\tcqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV;\n \ttmpl.cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0);\n \tif (tmpl.cq == NULL) {\n \t\tDRV_LOG(ERR, \"port %u Tx queue %u CQ creation failure\",\n@@ -698,93 +676,7 @@ struct mlx5_txq_ibv *\n static void\n txq_set_params(struct mlx5_txq_ctrl *txq_ctrl)\n {\n-\tstruct mlx5_priv *priv = txq_ctrl->priv;\n-\tstruct mlx5_dev_config *config = &priv->config;\n-\tconst unsigned int max_tso_inline =\n-\t\t((MLX5_MAX_TSO_HEADER + (RTE_CACHE_LINE_SIZE - 1)) /\n-\t\t RTE_CACHE_LINE_SIZE);\n-\tunsigned int txq_inline;\n-\tunsigned int txqs_inline;\n-\tunsigned int inline_max_packet_sz;\n-\teth_tx_burst_t tx_pkt_burst =\n-\t\tmlx5_select_tx_function(ETH_DEV(priv));\n-\tint is_empw_func = is_empw_burst_func(tx_pkt_burst);\n-\tint tso = !!(txq_ctrl->txq.offloads & (DEV_TX_OFFLOAD_TCP_TSO |\n-\t\t\t\t\t       DEV_TX_OFFLOAD_VXLAN_TNL_TSO |\n-\t\t\t\t\t       DEV_TX_OFFLOAD_GRE_TNL_TSO |\n-\t\t\t\t\t       DEV_TX_OFFLOAD_IP_TNL_TSO |\n-\t\t\t\t\t       DEV_TX_OFFLOAD_UDP_TNL_TSO));\n-\n-\ttxq_inline = (config->txq_inline == MLX5_ARG_UNSET) ?\n-\t\t0 : config->txq_inline;\n-\ttxqs_inline = (config->txqs_inline == MLX5_ARG_UNSET) ?\n-\t\t0 : config->txqs_inline;\n-\tinline_max_packet_sz =\n-\t\t(config->inline_max_packet_sz == MLX5_ARG_UNSET) ?\n-\t\t0 : config->inline_max_packet_sz;\n-\tif (is_empw_func) {\n-\t\tif (config->txq_inline == MLX5_ARG_UNSET)\n-\t\t\ttxq_inline = MLX5_WQE_SIZE_MAX - MLX5_WQE_SIZE;\n-\t\tif (config->txqs_inline == MLX5_ARG_UNSET)\n-\t\t\ttxqs_inline = MLX5_EMPW_MIN_TXQS;\n-\t\tif (config->inline_max_packet_sz == MLX5_ARG_UNSET)\n-\t\t\tinline_max_packet_sz = MLX5_EMPW_MAX_INLINE_LEN;\n-\t\ttxq_ctrl->txq.mpw_hdr_dseg = config->mpw_hdr_dseg;\n-\t\ttxq_ctrl->txq.inline_max_packet_sz = inline_max_packet_sz;\n-\t}\n-\tif (txq_inline && priv->txqs_n >= txqs_inline) {\n-\t\tunsigned int ds_cnt;\n-\n-\t\ttxq_ctrl->txq.max_inline =\n-\t\t\t((txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /\n-\t\t\t RTE_CACHE_LINE_SIZE);\n-\t\tif (is_empw_func) {\n-\t\t\t/* To minimize the size of data set, avoid requesting\n-\t\t\t * too large WQ.\n-\t\t\t */\n-\t\t\ttxq_ctrl->max_inline_data =\n-\t\t\t\t((RTE_MIN(txq_inline,\n-\t\t\t\t\t  inline_max_packet_sz) +\n-\t\t\t\t  (RTE_CACHE_LINE_SIZE - 1)) /\n-\t\t\t\t RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE;\n-\t\t} else {\n-\t\t\ttxq_ctrl->max_inline_data =\n-\t\t\t\ttxq_ctrl->txq.max_inline * RTE_CACHE_LINE_SIZE;\n-\t\t}\n-\t\t/*\n-\t\t * Check if the inline size is too large in a way which\n-\t\t * can make the WQE DS to overflow.\n-\t\t * Considering in calculation:\n-\t\t *      WQE CTRL (1 DS)\n-\t\t *      WQE ETH  (1 DS)\n-\t\t *      Inline part (N DS)\n-\t\t */\n-\t\tds_cnt = 2 + (txq_ctrl->txq.max_inline / MLX5_WQE_DWORD_SIZE);\n-\t\tif (ds_cnt > MLX5_DSEG_MAX) {\n-\t\t\tunsigned int max_inline = (MLX5_DSEG_MAX - 2) *\n-\t\t\t\t\t\t  MLX5_WQE_DWORD_SIZE;\n-\n-\t\t\tmax_inline = max_inline - (max_inline %\n-\t\t\t\t\t\t   RTE_CACHE_LINE_SIZE);\n-\t\t\tDRV_LOG(WARNING,\n-\t\t\t\t\"port %u txq inline is too large (%d) setting\"\n-\t\t\t\t\" it to the maximum possible: %d\\n\",\n-\t\t\t\tPORT_ID(priv), txq_inline, max_inline);\n-\t\t\ttxq_ctrl->txq.max_inline = max_inline /\n-\t\t\t\t\t\t   RTE_CACHE_LINE_SIZE;\n-\t\t}\n-\t}\n-\tif (tso) {\n-\t\ttxq_ctrl->max_tso_header = max_tso_inline * RTE_CACHE_LINE_SIZE;\n-\t\ttxq_ctrl->txq.max_inline = RTE_MAX(txq_ctrl->txq.max_inline,\n-\t\t\t\t\t\t   max_tso_inline);\n-\t\ttxq_ctrl->txq.tso_en = 1;\n-\t}\n-\ttxq_ctrl->txq.tunnel_en = config->tunnel_en | config->swp;\n-\ttxq_ctrl->txq.swp_en = ((DEV_TX_OFFLOAD_IP_TNL_TSO |\n-\t\t\t\t DEV_TX_OFFLOAD_UDP_TNL_TSO |\n-\t\t\t\t DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) &\n-\t\t\t\ttxq_ctrl->txq.offloads) && config->swp;\n+\t(void)txq_ctrl;\n }\n \n /**\n",
    "prefixes": [
        "v2",
        "1/7"
    ]
}