From patchwork Mon Jun 13 05:51:36 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Feifei Wang X-Patchwork-Id: 112680 X-Patchwork-Delegate: qi.z.zhang@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 3091FA0543; Mon, 13 Jun 2022 07:51:47 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 1272F4021E; Mon, 13 Jun 2022 07:51:47 +0200 (CEST) Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by mails.dpdk.org (Postfix) with ESMTP id CB0D940150 for ; Mon, 13 Jun 2022 07:51:45 +0200 (CEST) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 58970D6E; Sun, 12 Jun 2022 22:51:45 -0700 (PDT) Received: from net-x86-dell-8268.shanghai.arm.com (net-x86-dell-8268.shanghai.arm.com [10.169.210.133]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id A76DB3F792; Sun, 12 Jun 2022 22:51:42 -0700 (PDT) From: Feifei Wang To: Yuying Zhang , Beilei Xing , Ruifeng Wang Cc: dev@dpdk.org, nd@arm.com, Feifei Wang , Konstantin Ananyev , Honnappa Nagarahalli Subject: [RFC PATCH v1] net/i40e: put mempool cache out of API Date: Mon, 13 Jun 2022 13:51:36 +0800 Message-Id: <20220613055136.1949784-1-feifei.wang2@arm.com> X-Mailer: git-send-email 2.25.1 MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Refer to "i40e_tx_free_bufs_avx512", this patch puts mempool cache out of API to free buffers directly. There are two changes different with previous version: 1. change txep from "i40e_entry" to "i40e_vec_entry" 2. put cache out of "mempool_bulk" API to copy buffers into it directly Performance Test with l3fwd neon path: with this patch n1sdp: no perforamnce change amper-altra: +4.0% Suggested-by: Konstantin Ananyev Suggested-by: Honnappa Nagarahalli Signed-off-by: Feifei Wang Acked-by: Morten Brørup --- drivers/net/i40e/i40e_rxtx_vec_common.h | 36 ++++++++++++++++++++----- drivers/net/i40e/i40e_rxtx_vec_neon.c | 10 ++++--- 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h index 959832ed6a..e418225b4e 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_common.h +++ b/drivers/net/i40e/i40e_rxtx_vec_common.h @@ -81,7 +81,7 @@ reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs, static __rte_always_inline int i40e_tx_free_bufs(struct i40e_tx_queue *txq) { - struct i40e_tx_entry *txep; + struct i40e_vec_tx_entry *txep; uint32_t n; uint32_t i; int nb_free = 0; @@ -98,17 +98,39 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq) /* first buffer to free from S/W ring is at index * tx_next_dd - (tx_rs_thresh-1) */ - txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)]; + txep = (void *)txq->sw_ring; + txep += txq->tx_next_dd - (n - 1); if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) { - for (i = 0; i < n; i++) { - free[i] = txep[i].mbuf; - /* no need to reset txep[i].mbuf in vector path */ + struct rte_mempool *mp = txep[0].mbuf->pool; + void **cache_objs; + struct rte_mempool_cache *cache = rte_mempool_default_cache(mp, + rte_lcore_id()); + + if (!cache || cache->len == 0) + goto normal; + + cache_objs = &cache->objs[cache->len]; + + if (n > RTE_MEMPOOL_CACHE_MAX_SIZE) { + rte_mempool_ops_enqueue_bulk(mp, (void *)txep, n); + goto done; + } + + rte_memcpy(cache_objs, txep, sizeof(void *) * n); + /* no need to reset txep[i].mbuf in vector path */ + cache->len += n; + + if (cache->len >= cache->flushthresh) { + rte_mempool_ops_enqueue_bulk + (mp, &cache->objs[cache->size], + cache->len - cache->size); + cache->len = cache->size; } - rte_mempool_put_bulk(free[0]->pool, (void **)free, n); goto done; } +normal: m = rte_pktmbuf_prefree_seg(txep[0].mbuf); if (likely(m != NULL)) { free[0] = m; @@ -147,7 +169,7 @@ i40e_tx_free_bufs(struct i40e_tx_queue *txq) } static __rte_always_inline void -tx_backlog_entry(struct i40e_tx_entry *txep, +tx_backlog_entry(struct i40e_vec_tx_entry *txep, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { int i; diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c index 12e6f1cbcb..d2d61e8ef4 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_neon.c +++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c @@ -680,12 +680,15 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue, { struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue; volatile struct i40e_tx_desc *txdp; - struct i40e_tx_entry *txep; + struct i40e_vec_tx_entry *txep; uint16_t n, nb_commit, tx_id; uint64_t flags = I40E_TD_CMD; uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD; int i; + /* cross rx_thresh boundary is not allowed */ + nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh); + if (txq->nb_tx_free < txq->tx_free_thresh) i40e_tx_free_bufs(txq); @@ -695,7 +698,8 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue, tx_id = txq->tx_tail; txdp = &txq->tx_ring[tx_id]; - txep = &txq->sw_ring[tx_id]; + txep = (void *)txq->sw_ring; + txep += tx_id; txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts); @@ -715,7 +719,7 @@ i40e_xmit_fixed_burst_vec(void *__rte_restrict tx_queue, /* avoid reach the end of ring */ txdp = &txq->tx_ring[tx_id]; - txep = &txq->sw_ring[tx_id]; + txep = (void *)txq->sw_ring; } tx_backlog_entry(txep, tx_pkts, nb_commit);