Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/134901/?format=api
http://patches.dpdk.org/api/patches/134901/?format=api", "web_url": "http://patches.dpdk.org/project/dpdk/patch/20231207063514.2001192-3-wenzhuo.lu@intel.com/", "project": { "id": 1, "url": "http://patches.dpdk.org/api/projects/1/?format=api", "name": "DPDK", "link_name": "dpdk", "list_id": "dev.dpdk.org", "list_email": "dev@dpdk.org", "web_url": "http://core.dpdk.org", "scm_url": "git://dpdk.org/dpdk", "webscm_url": "http://git.dpdk.org/dpdk", "list_archive_url": "https://inbox.dpdk.org/dev", "list_archive_url_format": "https://inbox.dpdk.org/dev/{}", "commit_url_format": "" }, "msgid": "<20231207063514.2001192-3-wenzhuo.lu@intel.com>", "list_archive_url": "https://inbox.dpdk.org/dev/20231207063514.2001192-3-wenzhuo.lu@intel.com", "date": "2023-12-07T06:35:14", "name": "[2/2] common/idpf: enable AVX2 for single queue Tx", "commit_ref": null, "pull_url": null, "state": "superseded", "archived": true, "hash": "3a34721499569fedf362404e8a72373a4726023d", "submitter": { "id": 258, "url": "http://patches.dpdk.org/api/people/258/?format=api", "name": "Wenzhuo Lu", "email": "wenzhuo.lu@intel.com" }, "delegate": { "id": 1540, "url": "http://patches.dpdk.org/api/users/1540/?format=api", "username": "qzhan15", "first_name": "Qi", "last_name": "Zhang", "email": "qi.z.zhang@intel.com" }, "mbox": "http://patches.dpdk.org/project/dpdk/patch/20231207063514.2001192-3-wenzhuo.lu@intel.com/mbox/", "series": [ { "id": 30468, "url": "http://patches.dpdk.org/api/series/30468/?format=api", "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=30468", "date": "2023-12-07T06:35:12", "name": "enable AVX2 for IDPF single queue", "version": 1, "mbox": "http://patches.dpdk.org/series/30468/mbox/" } ], "comments": "http://patches.dpdk.org/api/patches/134901/comments/", "check": "fail", "checks": "http://patches.dpdk.org/api/patches/134901/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<dev-bounces@dpdk.org>", "X-Original-To": "patchwork@inbox.dpdk.org", "Delivered-To": "patchwork@inbox.dpdk.org", "Received": [ "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id CBCDE43694;\n\tThu, 7 Dec 2023 07:12:35 +0100 (CET)", "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id 78C0F42EB2;\n\tThu, 7 Dec 2023 07:12:28 +0100 (CET)", "from mgamail.intel.com (mgamail.intel.com [134.134.136.31])\n by mails.dpdk.org (Postfix) with ESMTP id CB2EB40042\n for <dev@dpdk.org>; Thu, 7 Dec 2023 07:12:24 +0100 (CET)", "from orsmga002.jf.intel.com ([10.7.209.21])\n by orsmga104.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 06 Dec 2023 22:12:24 -0800", "from dpdk-wenzhuo-icelake.sh.intel.com ([10.67.111.210])\n by orsmga002.jf.intel.com with ESMTP; 06 Dec 2023 22:12:23 -0800" ], "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/simple;\n d=intel.com; i=@intel.com; q=dns/txt; s=Intel;\n t=1701929545; x=1733465545;\n h=from:to:cc:subject:date:message-id:in-reply-to:\n references:mime-version:content-transfer-encoding;\n bh=50015ZhHfs1UGekC2r+YbL/ZN99tw97CPBUUiLWh/rw=;\n b=NLQfxgSOCc7Jmd7J8wK4LdOKFMWuar+VEY3aZJZ9r7jYp3pyL5YZhvfu\n UeiUpJPPnwGWGRZ3/ee2rT3rgz6kUSDRpyHch/imrTirOpJRZWIns4rR+\n 8jSjslmnt6HwoTkI8XVOoiI3JQLol61Y4NUi9r3UodhJZ6onmdEFXAaAJ\n Yv6O8LIq1kgznw+ctpJl0CbhidxB0eZ78wIWT/dQRZv/B4Zy7uY/lnkP1\n mg8N7ApqkHloL+iD2EiXePn8d5iHT7FNOdG7XkTEt6+HykRUoMlzWh6B6\n BL29tR28pHPoAYnXyuxMxF0tdkU+06rDNn8Vinv6p64LrrmZlNRc4pxlR w==;", "X-IronPort-AV": [ "E=McAfee;i=\"6600,9927,10916\"; a=\"458507911\"", "E=Sophos;i=\"6.04,256,1695711600\"; d=\"scan'208\";a=\"458507911\"", "E=McAfee;i=\"6600,9927,10916\"; a=\"771599775\"", "E=Sophos;i=\"6.04,256,1695711600\"; d=\"scan'208\";a=\"771599775\"" ], "X-ExtLoop1": "1", "From": "Wenzhuo Lu <wenzhuo.lu@intel.com>", "To": "dev@dpdk.org", "Cc": "Wenzhuo Lu <wenzhuo.lu@intel.com>", "Subject": "[PATCH 2/2] common/idpf: enable AVX2 for single queue Tx", "Date": "Thu, 7 Dec 2023 06:35:14 +0000", "Message-Id": "<20231207063514.2001192-3-wenzhuo.lu@intel.com>", "X-Mailer": "git-send-email 2.25.1", "In-Reply-To": "<20231207063514.2001192-1-wenzhuo.lu@intel.com>", "References": "<20231207063514.2001192-1-wenzhuo.lu@intel.com>", "MIME-Version": "1.0", "Content-Transfer-Encoding": "8bit", "X-BeenThere": "dev@dpdk.org", "X-Mailman-Version": "2.1.29", "Precedence": "list", "List-Id": "DPDK patches and discussions <dev.dpdk.org>", "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>", "List-Archive": "<http://mails.dpdk.org/archives/dev/>", "List-Post": "<mailto:dev@dpdk.org>", "List-Help": "<mailto:dev-request@dpdk.org?subject=help>", "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>", "Errors-To": "dev-bounces@dpdk.org" }, "content": "In case some CPUs don't support AVX512. Enable AVX2 for them to\nget better per-core performance.\n\nSigned-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>\n---\n doc/guides/rel_notes/release_24_03.rst | 3 +\n drivers/common/idpf/idpf_common_device.h | 1 +\n drivers/common/idpf/idpf_common_rxtx.h | 4 +\n drivers/common/idpf/idpf_common_rxtx_avx2.c | 225 ++++++++++++++++++++\n drivers/common/idpf/version.map | 1 +\n drivers/net/idpf/idpf_rxtx.c | 14 ++\n 6 files changed, 248 insertions(+)", "diff": "diff --git a/doc/guides/rel_notes/release_24_03.rst b/doc/guides/rel_notes/release_24_03.rst\nindex e9c9717706..08c8ee07c3 100644\n--- a/doc/guides/rel_notes/release_24_03.rst\n+++ b/doc/guides/rel_notes/release_24_03.rst\n@@ -55,6 +55,9 @@ New Features\n Also, make sure to start the actual text at the margin.\n =======================================================\n \n+ * **Added support of vector instructions on IDPF.**\n+\n+ Added support of AVX2 instructions in IDPF single queue RX and TX path.\n \n Removed Items\n -------------\ndiff --git a/drivers/common/idpf/idpf_common_device.h b/drivers/common/idpf/idpf_common_device.h\nindex afe3d48798..60f8cab53a 100644\n--- a/drivers/common/idpf/idpf_common_device.h\n+++ b/drivers/common/idpf/idpf_common_device.h\n@@ -115,6 +115,7 @@ struct idpf_vport {\n \tbool rx_vec_allowed;\n \tbool tx_vec_allowed;\n \tbool rx_use_avx2;\n+\tbool tx_use_avx2;\n \tbool rx_use_avx512;\n \tbool tx_use_avx512;\n \ndiff --git a/drivers/common/idpf/idpf_common_rxtx.h b/drivers/common/idpf/idpf_common_rxtx.h\nindex 4d64063718..a92d328313 100644\n--- a/drivers/common/idpf/idpf_common_rxtx.h\n+++ b/drivers/common/idpf/idpf_common_rxtx.h\n@@ -306,5 +306,9 @@ __rte_internal\n uint16_t idpf_dp_singleq_recv_pkts_avx2(void *rx_queue,\n \t\t\t\t\tstruct rte_mbuf **rx_pkts,\n \t\t\t\t\tuint16_t nb_pkts);\n+__rte_internal\n+uint16_t idpf_dp_singleq_xmit_pkts_avx2(void *tx_queue,\n+\t\t\t\t\tstruct rte_mbuf **tx_pkts,\n+\t\t\t\t\tuint16_t nb_pkts);\n \n #endif /* _IDPF_COMMON_RXTX_H_ */\ndiff --git a/drivers/common/idpf/idpf_common_rxtx_avx2.c b/drivers/common/idpf/idpf_common_rxtx_avx2.c\nindex 0403cf118f..77e651b201 100644\n--- a/drivers/common/idpf/idpf_common_rxtx_avx2.c\n+++ b/drivers/common/idpf/idpf_common_rxtx_avx2.c\n@@ -607,3 +607,228 @@ idpf_dp_singleq_recv_pkts_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,\n {\n \treturn _idpf_singleq_recv_raw_pkts_vec_avx2(rx_queue, rx_pkts, nb_pkts, NULL);\n }\n+\n+static __rte_always_inline void\n+idpf_tx_backlog_entry(struct idpf_tx_entry *txep,\n+\t\t struct rte_mbuf **tx_pkts, uint16_t nb_pkts)\n+{\n+\tint i;\n+\n+\tfor (i = 0; i < (int)nb_pkts; ++i)\n+\t\ttxep[i].mbuf = tx_pkts[i];\n+}\n+\n+static __rte_always_inline int\n+idpf_singleq_tx_free_bufs_vec(struct idpf_tx_queue *txq)\n+{\n+\tstruct idpf_tx_entry *txep;\n+\tuint32_t n;\n+\tuint32_t i;\n+\tint nb_free = 0;\n+\tstruct rte_mbuf *m, *free[txq->rs_thresh];\n+\n+\t/* check DD bits on threshold descriptor */\n+\tif ((txq->tx_ring[txq->next_dd].qw1 &\n+\t\t\trte_cpu_to_le_64(IDPF_TXD_QW1_DTYPE_M)) !=\n+\t\t\trte_cpu_to_le_64(IDPF_TX_DESC_DTYPE_DESC_DONE))\n+\t\treturn 0;\n+\n+\tn = txq->rs_thresh;\n+\n+\t /* first buffer to free from S/W ring is at index\n+\t * next_dd - (rs_thresh-1)\n+\t */\n+\ttxep = &txq->sw_ring[txq->next_dd - (n - 1)];\n+\tm = rte_pktmbuf_prefree_seg(txep[0].mbuf);\n+\tif (likely(m)) {\n+\t\tfree[0] = m;\n+\t\tnb_free = 1;\n+\t\tfor (i = 1; i < n; i++) {\n+\t\t\tm = rte_pktmbuf_prefree_seg(txep[i].mbuf);\n+\t\t\tif (likely(m)) {\n+\t\t\t\tif (likely(m->pool == free[0]->pool)) {\n+\t\t\t\t\tfree[nb_free++] = m;\n+\t\t\t\t} else {\n+\t\t\t\t\trte_mempool_put_bulk(free[0]->pool,\n+\t\t\t\t\t\t\t (void *)free,\n+\t\t\t\t\t\t\t nb_free);\n+\t\t\t\t\tfree[0] = m;\n+\t\t\t\t\tnb_free = 1;\n+\t\t\t\t}\n+\t\t\t}\n+\t\t}\n+\t\trte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);\n+\t} else {\n+\t\tfor (i = 1; i < n; i++) {\n+\t\t\tm = rte_pktmbuf_prefree_seg(txep[i].mbuf);\n+\t\t\tif (m)\n+\t\t\t\trte_mempool_put(m->pool, m);\n+\t\t}\n+\t}\n+\n+\t/* buffers were freed, update counters */\n+\ttxq->nb_free = (uint16_t)(txq->nb_free + txq->rs_thresh);\n+\ttxq->next_dd = (uint16_t)(txq->next_dd + txq->rs_thresh);\n+\tif (txq->next_dd >= txq->nb_tx_desc)\n+\t\ttxq->next_dd = (uint16_t)(txq->rs_thresh - 1);\n+\n+\treturn txq->rs_thresh;\n+}\n+\n+static inline void\n+idpf_singleq_vtx1(volatile struct idpf_base_tx_desc *txdp,\n+\t\t struct rte_mbuf *pkt, uint64_t flags)\n+{\n+\tuint64_t high_qw =\n+\t\t(IDPF_TX_DESC_DTYPE_DATA |\n+\t\t ((uint64_t)flags << IDPF_TXD_QW1_CMD_S) |\n+\t\t ((uint64_t)pkt->data_len << IDPF_TXD_QW1_TX_BUF_SZ_S));\n+\n+\t__m128i descriptor = _mm_set_epi64x(high_qw,\n+\t\t\t\tpkt->buf_iova + pkt->data_off);\n+\t_mm_store_si128((__m128i *)txdp, descriptor);\n+}\n+\n+static inline void\n+idpf_singleq_vtx(volatile struct idpf_base_tx_desc *txdp,\n+\t\t struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags)\n+{\n+\tconst uint64_t hi_qw_tmpl = (IDPF_TX_DESC_DTYPE_DATA |\n+\t\t\t((uint64_t)flags << IDPF_TXD_QW1_CMD_S));\n+\n+\t/* if unaligned on 32-bit boundary, do one to align */\n+\tif (((uintptr_t)txdp & 0x1F) != 0 && nb_pkts != 0) {\n+\t\tidpf_singleq_vtx1(txdp, *pkt, flags);\n+\t\tnb_pkts--, txdp++, pkt++;\n+\t}\n+\n+\t/* do two at a time while possible, in bursts */\n+\tfor (; nb_pkts > 3; txdp += 4, pkt += 4, nb_pkts -= 4) {\n+\t\tuint64_t hi_qw3 =\n+\t\t\thi_qw_tmpl |\n+\t\t\t((uint64_t)pkt[3]->data_len <<\n+\t\t\t IDPF_TXD_QW1_TX_BUF_SZ_S);\n+\t\tuint64_t hi_qw2 =\n+\t\t\thi_qw_tmpl |\n+\t\t\t((uint64_t)pkt[2]->data_len <<\n+\t\t\t IDPF_TXD_QW1_TX_BUF_SZ_S);\n+\t\tuint64_t hi_qw1 =\n+\t\t\thi_qw_tmpl |\n+\t\t\t((uint64_t)pkt[1]->data_len <<\n+\t\t\t IDPF_TXD_QW1_TX_BUF_SZ_S);\n+\t\tuint64_t hi_qw0 =\n+\t\t\thi_qw_tmpl |\n+\t\t\t((uint64_t)pkt[0]->data_len <<\n+\t\t\t IDPF_TXD_QW1_TX_BUF_SZ_S);\n+\n+\t\t__m256i desc2_3 =\n+\t\t\t_mm256_set_epi64x\n+\t\t\t\t(hi_qw3,\n+\t\t\t\t pkt[3]->buf_iova + pkt[3]->data_off,\n+\t\t\t\t hi_qw2,\n+\t\t\t\t pkt[2]->buf_iova + pkt[2]->data_off);\n+\t\t__m256i desc0_1 =\n+\t\t\t_mm256_set_epi64x\n+\t\t\t\t(hi_qw1,\n+\t\t\t\t pkt[1]->buf_iova + pkt[1]->data_off,\n+\t\t\t\t hi_qw0,\n+\t\t\t\t pkt[0]->buf_iova + pkt[0]->data_off);\n+\t\t_mm256_store_si256((void *)(txdp + 2), desc2_3);\n+\t\t_mm256_store_si256((void *)txdp, desc0_1);\n+\t}\n+\n+\t/* do any last ones */\n+\twhile (nb_pkts) {\n+\t\tidpf_singleq_vtx1(txdp, *pkt, flags);\n+\t\ttxdp++, pkt++, nb_pkts--;\n+\t}\n+}\n+\n+static inline uint16_t\n+idpf_singleq_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,\n+\t\t\t\t uint16_t nb_pkts)\n+{\n+\tstruct idpf_tx_queue *txq = (struct idpf_tx_queue *)tx_queue;\n+\tvolatile struct idpf_base_tx_desc *txdp;\n+\tstruct idpf_tx_entry *txep;\n+\tuint16_t n, nb_commit, tx_id;\n+\tuint64_t flags = IDPF_TX_DESC_CMD_EOP;\n+\tuint64_t rs = IDPF_TX_DESC_CMD_RS | flags;\n+\n+\t/* cross rx_thresh boundary is not allowed */\n+\tnb_pkts = RTE_MIN(nb_pkts, txq->rs_thresh);\n+\n+\tif (txq->nb_free < txq->free_thresh)\n+\t\tidpf_singleq_tx_free_bufs_vec(txq);\n+\n+\tnb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_free, nb_pkts);\n+\tif (unlikely(nb_pkts == 0))\n+\t\treturn 0;\n+\n+\ttx_id = txq->tx_tail;\n+\ttxdp = &txq->tx_ring[tx_id];\n+\ttxep = &txq->sw_ring[tx_id];\n+\n+\ttxq->nb_free = (uint16_t)(txq->nb_free - nb_pkts);\n+\n+\tn = (uint16_t)(txq->nb_tx_desc - tx_id);\n+\tif (nb_commit >= n) {\n+\t\tidpf_tx_backlog_entry(txep, tx_pkts, n);\n+\n+\t\tidpf_singleq_vtx(txdp, tx_pkts, n - 1, flags);\n+\t\ttx_pkts += (n - 1);\n+\t\ttxdp += (n - 1);\n+\n+\t\tidpf_singleq_vtx1(txdp, *tx_pkts++, rs);\n+\n+\t\tnb_commit = (uint16_t)(nb_commit - n);\n+\n+\t\ttx_id = 0;\n+\t\ttxq->next_rs = (uint16_t)(txq->rs_thresh - 1);\n+\n+\t\t/* avoid reach the end of ring */\n+\t\ttxdp = &txq->tx_ring[tx_id];\n+\t\ttxep = &txq->sw_ring[tx_id];\n+\t}\n+\n+\tidpf_tx_backlog_entry(txep, tx_pkts, nb_commit);\n+\n+\tidpf_singleq_vtx(txdp, tx_pkts, nb_commit, flags);\n+\n+\ttx_id = (uint16_t)(tx_id + nb_commit);\n+\tif (tx_id > txq->next_rs) {\n+\t\ttxq->tx_ring[txq->next_rs].qw1 |=\n+\t\t\trte_cpu_to_le_64(((uint64_t)IDPF_TX_DESC_CMD_RS) <<\n+\t\t\t\t\t IDPF_TXD_QW1_CMD_S);\n+\t\ttxq->next_rs =\n+\t\t\t(uint16_t)(txq->next_rs + txq->rs_thresh);\n+\t}\n+\n+\ttxq->tx_tail = tx_id;\n+\n+\tIDPF_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);\n+\n+\treturn nb_pkts;\n+}\n+\n+uint16_t\n+idpf_dp_singleq_xmit_pkts_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,\n+\t\t\t uint16_t nb_pkts)\n+{\n+\tuint16_t nb_tx = 0;\n+\tstruct idpf_tx_queue *txq = (struct idpf_tx_queue *)tx_queue;\n+\n+\twhile (nb_pkts) {\n+\t\tuint16_t ret, num;\n+\n+\t\tnum = (uint16_t)RTE_MIN(nb_pkts, txq->rs_thresh);\n+\t\tret = idpf_singleq_xmit_fixed_burst_vec_avx2(tx_queue, &tx_pkts[nb_tx],\n+\t\t\t\t\t\t num);\n+\t\tnb_tx += ret;\n+\t\tnb_pkts -= ret;\n+\t\tif (ret < num)\n+\t\t\tbreak;\n+\t}\n+\n+\treturn nb_tx;\n+}\ndiff --git a/drivers/common/idpf/version.map b/drivers/common/idpf/version.map\nindex 4510aae6b3..eadcb9a2cf 100644\n--- a/drivers/common/idpf/version.map\n+++ b/drivers/common/idpf/version.map\n@@ -15,6 +15,7 @@ INTERNAL {\n \tidpf_dp_splitq_xmit_pkts;\n \tidpf_dp_splitq_xmit_pkts_avx512;\n \tidpf_dp_singleq_recv_pkts_avx2;\n+\tidpf_dp_singleq_xmit_pkts_avx2;\n \n \tidpf_qc_rx_thresh_check;\n \tidpf_qc_rx_queue_release;\ndiff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c\nindex b155c9ccd1..45c791515d 100644\n--- a/drivers/net/idpf/idpf_rxtx.c\n+++ b/drivers/net/idpf/idpf_rxtx.c\n@@ -884,6 +884,12 @@ idpf_set_tx_function(struct rte_eth_dev *dev)\n \tif (idpf_tx_vec_dev_check_default(dev) == IDPF_VECTOR_PATH &&\n \t rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {\n \t\tvport->tx_vec_allowed = true;\n+\n+\t\tif ((rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||\n+\t\t rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1) &&\n+\t\t rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256)\n+\t\t\tvport->tx_use_avx2 = true;\n+\n \t\tif (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)\n #ifdef CC_AVX512_SUPPORT\n \t\t{\n@@ -943,6 +949,14 @@ idpf_set_tx_function(struct rte_eth_dev *dev)\n \t\t\t\treturn;\n \t\t\t}\n #endif /* CC_AVX512_SUPPORT */\n+\t\t\tif (vport->tx_use_avx2) {\n+\t\t\t\tPMD_DRV_LOG(NOTICE,\n+\t\t\t\t\t \"Using Single AVX2 Vector Tx (port %d).\",\n+\t\t\t\t\t dev->data->port_id);\n+\t\t\t\tdev->tx_pkt_burst = idpf_dp_singleq_xmit_pkts_avx2;\n+\t\t\t\tdev->tx_pkt_prepare = idpf_dp_prep_pkts;\n+\t\t\t\treturn;\n+\t\t\t}\n \t\t}\n \t\tPMD_DRV_LOG(NOTICE,\n \t\t\t \"Using Single Scalar Tx (port %d).\",\n", "prefixes": [ "2/2" ] }{ "id": 134901, "url": "