From patchwork Sun Dec 4 12:54:58 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?Morten_Br=C3=B8rup?= X-Patchwork-Id: 120445 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id D24D6A04FD; Sun, 4 Dec 2022 13:55:01 +0100 (CET) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 78CD54068E; Sun, 4 Dec 2022 13:55:01 +0100 (CET) Received: from smartserver.smartsharesystems.com (smartserver.smartsharesystems.com [77.243.40.215]) by mails.dpdk.org (Postfix) with ESMTP id 7007040042 for ; Sun, 4 Dec 2022 13:55:00 +0100 (CET) Received: from dkrd2.smartsharesys.local ([192.168.4.12]) by smartserver.smartsharesystems.com with Microsoft SMTPSVC(6.0.3790.4675); Sun, 4 Dec 2022 13:54:59 +0100 From: =?utf-8?q?Morten_Br=C3=B8rup?= To: dev@dpdk.org Cc: =?utf-8?q?Morten_Br=C3=B8rup?= Subject: [PATCH v5] mbuf perf test, please ignore Date: Sun, 4 Dec 2022 13:54:58 +0100 Message-Id: <20221204125458.32620-1-mb@smartsharesystems.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <98CBD80474FA8B44BF855DF32C47DC35D8754C@smartserver.smartshare.dk> References: <98CBD80474FA8B44BF855DF32C47DC35D8754C@smartserver.smartshare.dk> MIME-Version: 1.0 X-OriginalArrivalTime: 04 Dec 2022 12:54:59.0820 (UTC) FILETIME=[9E3F72C0:01D907DF] X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Playing around with the mbuf structure, trying to reduce the use of the second cache line in some common scenarios. v5: * Fix rte_pktmbuf_chain() for the case where the head is a single segment. v4: * Use tabs, not spaces. * Fix copy-paste bug in linearize. v3: * Make 'next' depend on 'nb_segs' > 1. * Implement new interpretation of 'nb_segs' in i40e PMD. v2: * Remove BUILD_BUG_ON in cnxk PMD. Signed-off-by: Morten Brørup --- drivers/net/cnxk/cn10k_ethdev.c | 2 ++ drivers/net/cnxk/cn9k_ethdev.c | 2 ++ drivers/net/i40e/i40e_rxtx.c | 9 ++++-- drivers/net/i40e/i40e_rxtx_vec_altivec.c | 4 +++ drivers/net/i40e/i40e_rxtx_vec_common.h | 2 ++ drivers/net/i40e/i40e_rxtx_vec_neon.c | 4 +++ lib/mbuf/rte_mbuf.c | 39 +++++++++++++----------- lib/mbuf/rte_mbuf.h | 24 ++++++++------- lib/mbuf/rte_mbuf_core.h | 38 +++++++++++------------ 9 files changed, 73 insertions(+), 51 deletions(-) diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c index 4658713591..9f6086efe6 100644 --- a/drivers/net/cnxk/cn10k_ethdev.c +++ b/drivers/net/cnxk/cn10k_ethdev.c @@ -72,8 +72,10 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev) offsetof(struct rte_mbuf, buf_addr) + 24); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) != offsetof(struct rte_mbuf, ol_flags) + 12); +/* RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, tx_offload) != offsetof(struct rte_mbuf, pool) + 2 * sizeof(void *)); +*/ if (conf & RTE_ETH_TX_OFFLOAD_VLAN_INSERT || conf & RTE_ETH_TX_OFFLOAD_QINQ_INSERT) diff --git a/drivers/net/cnxk/cn9k_ethdev.c b/drivers/net/cnxk/cn9k_ethdev.c index 3b702d9696..3e9161ca79 100644 --- a/drivers/net/cnxk/cn9k_ethdev.c +++ b/drivers/net/cnxk/cn9k_ethdev.c @@ -72,8 +72,10 @@ nix_tx_offload_flags(struct rte_eth_dev *eth_dev) offsetof(struct rte_mbuf, buf_addr) + 24); RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) != offsetof(struct rte_mbuf, ol_flags) + 12); +/* RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, tx_offload) != offsetof(struct rte_mbuf, pool) + 2 * sizeof(void *)); +*/ if (conf & RTE_ETH_TX_OFFLOAD_VLAN_INSERT || conf & RTE_ETH_TX_OFFLOAD_QINQ_INSERT) diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 788ffb51c2..a08afe0a13 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -920,8 +920,9 @@ i40e_recv_scattered_pkts(void *rx_queue, first_seg->pkt_len = (uint16_t)(first_seg->pkt_len + rx_packet_len); - first_seg->nb_segs++; + last_seg->nb_segs = 2; last_seg->next = rxm; + first_seg->nb_segs++; } /** @@ -944,6 +945,7 @@ i40e_recv_scattered_pkts(void *rx_queue, * the length of that CRC part from the data length of the * previous mbuf. */ + rxm->nb_segs = 1; rxm->next = NULL; if (unlikely(rxq->crc_len > 0)) { first_seg->pkt_len -= RTE_ETHER_CRC_LEN; @@ -953,6 +955,7 @@ i40e_recv_scattered_pkts(void *rx_queue, last_seg->data_len = (uint16_t)(last_seg->data_len - (RTE_ETHER_CRC_LEN - rx_packet_len)); + last_seg->nb_segs = 1; last_seg->next = NULL; } else rxm->data_len = (uint16_t)(rx_packet_len - @@ -1065,7 +1068,7 @@ i40e_calc_pkt_desc(struct rte_mbuf *tx_pkt) while (txd != NULL) { count += DIV_ROUND_UP(txd->data_len, I40E_MAX_DATA_PER_TXD); - txd = txd->next; + txd = (txd->nb_segs == 1) ? NULL : txd->next; } return count; @@ -1282,7 +1285,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) txe->last_id = tx_last; tx_id = txe->next_id; txe = txn; - m_seg = m_seg->next; + m_seg = (m_seg->nb_segs == 1) ? NULL : m_seg->next; } while (m_seg != NULL); /* The last packet data descriptor needs End Of Packet (EOP) */ diff --git a/drivers/net/i40e/i40e_rxtx_vec_altivec.c b/drivers/net/i40e/i40e_rxtx_vec_altivec.c index 2dfa04599c..ad91b5cb60 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_altivec.c +++ b/drivers/net/i40e/i40e_rxtx_vec_altivec.c @@ -410,6 +410,10 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, split_packet += RTE_I40E_DESCS_PER_LOOP; /* zero-out next pointers */ + rx_pkts[pos]->nb_segs = 1; + rx_pkts[pos + 1]->nb_segs = 1; + rx_pkts[pos + 2]->nb_segs = 1; + rx_pkts[pos + 3]->nb_segs = 1; rx_pkts[pos]->next = NULL; rx_pkts[pos + 1]->next = NULL; rx_pkts[pos + 2]->next = NULL; diff --git a/drivers/net/i40e/i40e_rxtx_vec_common.h b/drivers/net/i40e/i40e_rxtx_vec_common.h index fe1a6ec75e..d5799f5242 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_common.h +++ b/drivers/net/i40e/i40e_rxtx_vec_common.h @@ -27,6 +27,7 @@ reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs, for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) { if (end != NULL) { /* processing a split packet */ + end->nb_segs = 2; end->next = rx_bufs[buf_idx]; rx_bufs[buf_idx]->data_len += rxq->crc_len; @@ -52,6 +53,7 @@ reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs, secondlast = secondlast->next; secondlast->data_len -= (rxq->crc_len - end->data_len); + secondlast->nb_segs = 1; secondlast->next = NULL; rte_pktmbuf_free_seg(end); } diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c index 12e6f1cbcb..3199b0f8cf 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_neon.c +++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c @@ -541,6 +541,10 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *__rte_restrict rxq, split_packet += RTE_I40E_DESCS_PER_LOOP; /* zero-out next pointers */ + rx_pkts[pos]->nb_segs = 1; + rx_pkts[pos + 1]->nb_segs = 1; + rx_pkts[pos + 2]->nb_segs = 1; + rx_pkts[pos + 3]->nb_segs = 1; rx_pkts[pos]->next = NULL; rx_pkts[pos + 1]->next = NULL; rx_pkts[pos + 2]->next = NULL; diff --git a/lib/mbuf/rte_mbuf.c b/lib/mbuf/rte_mbuf.c index cfd8062f1e..6a8153b443 100644 --- a/lib/mbuf/rte_mbuf.c +++ b/lib/mbuf/rte_mbuf.c @@ -123,10 +123,10 @@ rte_pktmbuf_free_pinned_extmem(void *addr, void *opaque) rte_mbuf_ext_refcnt_set(m->shinfo, 1); m->ol_flags = RTE_MBUF_F_EXTERNAL; - if (m->next != NULL) - m->next = NULL; - if (m->nb_segs != 1) + if (m->nb_segs != 1) { m->nb_segs = 1; + m->next = NULL; + } rte_mbuf_raw_free(m); } @@ -427,7 +427,7 @@ int rte_mbuf_check(const struct rte_mbuf *m, int is_header, } nb_segs -= 1; pkt_len -= m->data_len; - } while ((m = m->next) != NULL); + } while ((m = ((m->nb_segs == 1) ? NULL : m->next)) != NULL); if (nb_segs) { *reason = "bad nb_segs"; @@ -495,7 +495,7 @@ void rte_pktmbuf_free_bulk(struct rte_mbuf **mbufs, unsigned int count) __rte_mbuf_sanity_check(m, 1); do { - m_next = m->next; + m_next = (m->nb_segs == 1) ? NULL : m->next; __rte_pktmbuf_free_seg_via_array(m, pending, &nb_pending, RTE_PKTMBUF_FREE_PENDING_SZ); @@ -511,7 +511,7 @@ void rte_pktmbuf_free_bulk(struct rte_mbuf **mbufs, unsigned int count) struct rte_mbuf * rte_pktmbuf_clone(struct rte_mbuf *md, struct rte_mempool *mp) { - struct rte_mbuf *mc, *mi, **prev; + struct rte_mbuf *mc, *mi, *prev; uint32_t pktlen; uint16_t nseg; @@ -520,19 +520,21 @@ rte_pktmbuf_clone(struct rte_mbuf *md, struct rte_mempool *mp) return NULL; mi = mc; - prev = &mi->next; + prev = mi; pktlen = md->pkt_len; nseg = 0; do { nseg++; rte_pktmbuf_attach(mi, md); - *prev = mi; - prev = &mi->next; - } while ((md = md->next) != NULL && + prev->nb_segs = 2; + prev->next = mi; + prev = mi; + } while ((md = ((md->nb_segs == 1) ? NULL : md->next)) != NULL && (mi = rte_pktmbuf_alloc(mp)) != NULL); - *prev = NULL; + prev->nb_segs = 1; + prev->next = NULL; mc->nb_segs = nseg; mc->pkt_len = pktlen; @@ -565,9 +567,9 @@ __rte_pktmbuf_linearize(struct rte_mbuf *mbuf) mbuf->data_len = (uint16_t)(mbuf->pkt_len); /* Append data from next segments to the first one */ - m = mbuf->next; + m = (mbuf->nb_segs == 1) ? NULL : mbuf->next; while (m != NULL) { - m_next = m->next; + m_next = (m->nb_segs == 1) ? NULL : m->next; seg_len = rte_pktmbuf_data_len(m); rte_memcpy(buffer, rte_pktmbuf_mtod(m, char *), seg_len); @@ -589,7 +591,7 @@ rte_pktmbuf_copy(const struct rte_mbuf *m, struct rte_mempool *mp, uint32_t off, uint32_t len) { const struct rte_mbuf *seg = m; - struct rte_mbuf *mc, *m_last, **prev; + struct rte_mbuf *mc, *m_last, *prev; /* garbage in check */ __rte_mbuf_sanity_check(m, 1); @@ -611,7 +613,7 @@ rte_pktmbuf_copy(const struct rte_mbuf *m, struct rte_mempool *mp, /* copied mbuf is not indirect or external */ mc->ol_flags = m->ol_flags & ~(RTE_MBUF_F_INDIRECT|RTE_MBUF_F_EXTERNAL); - prev = &mc->next; + prev = mc; m_last = mc; while (len > 0) { uint32_t copy_len; @@ -629,9 +631,10 @@ rte_pktmbuf_copy(const struct rte_mbuf *m, struct rte_mempool *mp, rte_pktmbuf_free(mc); return NULL; } + prev->nb_segs = 2; + prev->next = m_last; ++mc->nb_segs; - *prev = m_last; - prev = &m_last->next; + prev = m_last; } /* @@ -697,7 +700,7 @@ rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len) if (len != 0) rte_hexdump(f, NULL, rte_pktmbuf_mtod(m, void *), len); dump_len -= len; - m = m->next; + m = (m->nb_segs == 1) ? NULL : m->next; nb_segs --; } } diff --git a/lib/mbuf/rte_mbuf.h b/lib/mbuf/rte_mbuf.h index 3a82eb136d..6d08c4ebfd 100644 --- a/lib/mbuf/rte_mbuf.h +++ b/lib/mbuf/rte_mbuf.h @@ -1353,10 +1353,10 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m) return NULL; } - if (m->next != NULL) - m->next = NULL; - if (m->nb_segs != 1) + if (m->nb_segs != 1) { m->nb_segs = 1; + m->next = NULL; + } return m; @@ -1370,10 +1370,10 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m) return NULL; } - if (m->next != NULL) - m->next = NULL; - if (m->nb_segs != 1) + if (m->nb_segs != 1) { m->nb_segs = 1; + m->next = NULL; + } rte_mbuf_refcnt_set(m, 1); return m; @@ -1415,7 +1415,7 @@ static inline void rte_pktmbuf_free(struct rte_mbuf *m) __rte_mbuf_sanity_check(m, 1); while (m != NULL) { - m_next = m->next; + m_next = (m->nb_segs == 1) ? NULL : m->next; rte_pktmbuf_free_seg(m); m = m_next; } @@ -1497,7 +1497,7 @@ static inline void rte_pktmbuf_refcnt_update(struct rte_mbuf *m, int16_t v) do { rte_mbuf_refcnt_update(m, v); - } while ((m = m->next) != NULL); + } while ((m = ((m->nb_segs == 1) ? NULL : m->next)) != NULL); } /** @@ -1540,7 +1540,7 @@ static inline uint16_t rte_pktmbuf_tailroom(const struct rte_mbuf *m) static inline struct rte_mbuf *rte_pktmbuf_lastseg(struct rte_mbuf *m) { __rte_mbuf_sanity_check(m, 1); - while (m->next != NULL) + while (m->nb_segs != 1) m = m->next; return m; } @@ -1758,20 +1758,22 @@ static inline const void *rte_pktmbuf_read(const struct rte_mbuf *m, static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail) { struct rte_mbuf *cur_tail; + const unsigned int nb_segs = head->nb_segs + tail->nb_segs; /* Check for number-of-segments-overflow */ - if (head->nb_segs + tail->nb_segs > RTE_MBUF_MAX_NB_SEGS) + if (nb_segs > RTE_MBUF_MAX_NB_SEGS) return -EOVERFLOW; /* Chain 'tail' onto the old tail */ cur_tail = rte_pktmbuf_lastseg(head); + cur_tail->nb_segs = 2; cur_tail->next = tail; /* accumulate number of segments and total length. * NB: elaborating the addition like this instead of using * -= allows us to ensure the result type is uint16_t * avoiding compiler warnings on gcc 8.1 at least */ - head->nb_segs = (uint16_t)(head->nb_segs + tail->nb_segs); + head->nb_segs = (uint16_t)nb_segs; head->pkt_len += tail->pkt_len; /* pkt_len is only set in the head */ diff --git a/lib/mbuf/rte_mbuf_core.h b/lib/mbuf/rte_mbuf_core.h index a30e1e0eaf..c0c3b45024 100644 --- a/lib/mbuf/rte_mbuf_core.h +++ b/lib/mbuf/rte_mbuf_core.h @@ -594,25 +594,6 @@ struct rte_mbuf { uint16_t buf_len; /**< Length of segment buffer. */ - struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */ - - /* second cache line - fields only used in slow path or on TX */ - RTE_MARKER cacheline1 __rte_cache_min_aligned; - -#if RTE_IOVA_AS_PA - /** - * Next segment of scattered packet. Must be NULL in the last - * segment or in case of non-segmented packet. - */ - struct rte_mbuf *next; -#else - /** - * Reserved for dynamic fields - * when the next pointer is in first cache line (i.e. RTE_IOVA_AS_PA is 0). - */ - uint64_t dynfield2; -#endif - /* fields to support TX offloads */ RTE_STD_C11 union { @@ -651,6 +632,25 @@ struct rte_mbuf { }; }; + /* second cache line - fields only used in slow path or on TX */ + RTE_MARKER cacheline1 __rte_cache_min_aligned; + +#if RTE_IOVA_AS_PA + /** + * Next segment of scattered packet. Must be NULL in the last + * segment or in case of non-segmented packet. + */ + struct rte_mbuf *next; +#else + /** + * Reserved for dynamic fields + * when the next pointer is in first cache line (i.e. RTE_IOVA_AS_PA is 0). + */ + uint64_t dynfield2; +#endif + + struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */ + /** Shared data for external buffer attached to mbuf. See * rte_pktmbuf_attach_extbuf(). */