diff mbox series

net/bnxt: optimizations for Tx completion handling

Message ID 20210306151912.556988-1-lance.richardson@broadcom.com (mailing list archive)
State Accepted, archived
Delegated to: Ajit Khaparde
Headers show
Series net/bnxt: optimizations for Tx completion handling | expand

Checks

Context Check Description
ci/intel-Testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/github-robot success github build: passed
ci/travis-robot success travis build: passed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/checkpatch success coding style OK

Commit Message

Lance Richardson March 6, 2021, 3:19 p.m. UTC
Avoid copying mbuf pointers to separate array for bulk
mbuf free when handling transmit completions for vector
mode transmit.

Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
---
 drivers/net/bnxt/bnxt_ethdev.c          |  4 +-
 drivers/net/bnxt/bnxt_ring.c            |  2 +-
 drivers/net/bnxt/bnxt_rxtx_vec_common.h | 89 +++++++++++++++----------
 drivers/net/bnxt/bnxt_rxtx_vec_neon.c   |  5 +-
 drivers/net/bnxt/bnxt_rxtx_vec_sse.c    |  7 +-
 drivers/net/bnxt/bnxt_txq.c             |  8 +--
 drivers/net/bnxt/bnxt_txr.c             | 68 ++++++++++---------
 drivers/net/bnxt/bnxt_txr.h             |  7 +-
 8 files changed, 106 insertions(+), 84 deletions(-)

Comments

Ajit Khaparde March 12, 2021, 3:23 p.m. UTC | #1
On Sat, Mar 6, 2021 at 7:19 AM Lance Richardson
<lance.richardson@broadcom.com> wrote:
>
> Avoid copying mbuf pointers to separate array for bulk
> mbuf free when handling transmit completions for vector
> mode transmit.
>
> Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
> Reviewed-by: Ajit Kumar Khaparde <ajit.khaparde@broadcom.com>
Patch applied to dpdk-next-net-brcm. Thanks

> ---
>  drivers/net/bnxt/bnxt_ethdev.c          |  4 +-
>  drivers/net/bnxt/bnxt_ring.c            |  2 +-
>  drivers/net/bnxt/bnxt_rxtx_vec_common.h | 89 +++++++++++++++----------
>  drivers/net/bnxt/bnxt_rxtx_vec_neon.c   |  5 +-
>  drivers/net/bnxt/bnxt_rxtx_vec_sse.c    |  7 +-
>  drivers/net/bnxt/bnxt_txq.c             |  8 +--
>  drivers/net/bnxt/bnxt_txr.c             | 68 ++++++++++---------
>  drivers/net/bnxt/bnxt_txr.h             |  7 +-
>  8 files changed, 106 insertions(+), 84 deletions(-)
>
> diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
> index 88da345034..d4028e2bb2 100644
> --- a/drivers/net/bnxt/bnxt_ethdev.c
> +++ b/drivers/net/bnxt/bnxt_ethdev.c
> @@ -3186,7 +3186,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
>         struct bnxt_tx_queue *txq = (struct bnxt_tx_queue *)tx_queue;
>         struct bnxt_tx_ring_info *txr;
>         struct bnxt_cp_ring_info *cpr;
> -       struct bnxt_sw_tx_bd *tx_buf;
> +       struct rte_mbuf **tx_buf;
>         struct tx_pkt_cmpl *txcmp;
>         uint32_t cons, cp_cons;
>         int rc;
> @@ -3216,7 +3216,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
>                         return RTE_ETH_TX_DESC_UNAVAIL;
>         }
>         tx_buf = &txr->tx_buf_ring[cons];
> -       if (tx_buf->mbuf == NULL)
> +       if (*tx_buf == NULL)
>                 return RTE_ETH_TX_DESC_DONE;
>
>         return RTE_ETH_TX_DESC_FULL;
> diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c
> index 997dcdc28b..e4055fa49b 100644
> --- a/drivers/net/bnxt/bnxt_ring.c
> +++ b/drivers/net/bnxt/bnxt_ring.c
> @@ -230,7 +230,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
>                         tx_ring->vmem =
>                             (void **)((char *)mz->addr + tx_vmem_start);
>                         tx_ring_info->tx_buf_ring =
> -                           (struct bnxt_sw_tx_bd *)tx_ring->vmem;
> +                           (struct rte_mbuf **)tx_ring->vmem;
>                 }
>         }
>
> diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_common.h b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
> index 91ff6736b1..9b9489a695 100644
> --- a/drivers/net/bnxt/bnxt_rxtx_vec_common.h
> +++ b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
> @@ -100,57 +100,78 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
>   * is enabled.
>   */
>  static inline void
> -bnxt_tx_cmp_vec_fast(struct bnxt_tx_queue *txq, int nr_pkts)
> +bnxt_tx_cmp_vec_fast(struct bnxt_tx_queue *txq, uint32_t nr_pkts)
>  {
>         struct bnxt_tx_ring_info *txr = txq->tx_ring;
> -       struct rte_mbuf **free = txq->free;
>         uint16_t cons, raw_cons = txr->tx_raw_cons;
> -       unsigned int blk = 0;
> -       uint32_t ring_mask = txr->tx_ring_struct->ring_mask;
> -
> -       while (nr_pkts--) {
> -               struct bnxt_sw_tx_bd *tx_buf;
> -
> -               cons = raw_cons++ & ring_mask;
> -               tx_buf = &txr->tx_buf_ring[cons];
> -               free[blk++] = tx_buf->mbuf;
> -               tx_buf->mbuf = NULL;
> +       uint32_t ring_mask, ring_size, num;
> +       struct rte_mempool *pool;
> +
> +       ring_mask = txr->tx_ring_struct->ring_mask;
> +       ring_size = txr->tx_ring_struct->ring_size;
> +
> +       cons = raw_cons & ring_mask;
> +       num = RTE_MIN(nr_pkts, ring_size - cons);
> +       pool = txr->tx_buf_ring[cons]->pool;
> +
> +       rte_mempool_put_bulk(pool, (void **)&txr->tx_buf_ring[cons], num);
> +       memset(&txr->tx_buf_ring[cons], 0, num * sizeof(struct rte_mbuf *));
> +       raw_cons += num;
> +       num = nr_pkts - num;
> +       if (num) {
> +               cons = raw_cons & ring_mask;
> +               rte_mempool_put_bulk(pool, (void **)&txr->tx_buf_ring[cons],
> +                                    num);
> +               memset(&txr->tx_buf_ring[cons], 0,
> +                      num * sizeof(struct rte_mbuf *));
> +               raw_cons += num;
>         }
> -       if (blk)
> -               rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
>
>         txr->tx_raw_cons = raw_cons;
>  }
>
>  static inline void
> -bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, int nr_pkts)
> +bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, uint32_t nr_pkts)
>  {
>         struct bnxt_tx_ring_info *txr = txq->tx_ring;
> -       struct rte_mbuf **free = txq->free;
>         uint16_t cons, raw_cons = txr->tx_raw_cons;
> -       unsigned int blk = 0;
> -       uint32_t ring_mask = txr->tx_ring_struct->ring_mask;
> +       uint32_t ring_mask, ring_size, num, blk;
> +       struct rte_mempool *pool;
>
> -       while (nr_pkts--) {
> -               struct bnxt_sw_tx_bd *tx_buf;
> -               struct rte_mbuf *mbuf;
> +       ring_mask = txr->tx_ring_struct->ring_mask;
> +       ring_size = txr->tx_ring_struct->ring_size;
>
> -               cons = raw_cons++ & ring_mask;
> -               tx_buf = &txr->tx_buf_ring[cons];
> -               mbuf = rte_pktmbuf_prefree_seg(tx_buf->mbuf);
> -               if (unlikely(mbuf == NULL))
> -                       continue;
> -               tx_buf->mbuf = NULL;
> +       while (nr_pkts) {
> +               struct rte_mbuf *mbuf;
>
> -               if (blk && mbuf->pool != free[0]->pool) {
> -                       rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
> -                       blk = 0;
> +               cons = raw_cons & ring_mask;
> +               num = RTE_MIN(nr_pkts, ring_size - cons);
> +               pool = txr->tx_buf_ring[cons]->pool;
> +
> +               blk = 0;
> +               do {
> +                       mbuf = txr->tx_buf_ring[cons + blk];
> +                       mbuf = rte_pktmbuf_prefree_seg(mbuf);
> +                       if (!mbuf || mbuf->pool != pool)
> +                               break;
> +                       blk++;
> +               } while (blk < num);
> +
> +               if (blk) {
> +                       rte_mempool_put_bulk(pool,
> +                                            (void **)&txr->tx_buf_ring[cons],
> +                                            blk);
> +                       memset(&txr->tx_buf_ring[cons], 0,
> +                              blk * sizeof(struct rte_mbuf *));
> +                       raw_cons += blk;
> +                       nr_pkts -= blk;
> +               }
> +               if (!mbuf) {
> +                       /* Skip freeing mbufs with non-zero reference count. */
> +                       raw_cons++;
> +                       nr_pkts--;
>                 }
> -               free[blk++] = mbuf;
>         }
> -       if (blk)
> -               rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
> -
>         txr->tx_raw_cons = raw_cons;
>  }
>  #endif /* _BNXT_RXTX_VEC_COMMON_H_ */
> diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> index 3d54d9d59d..bc2e96ec38 100644
> --- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> +++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> @@ -346,7 +346,7 @@ bnxt_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
>         uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod;
>         struct rte_mbuf *tx_mbuf;
>         struct tx_bd_long *txbd = NULL;
> -       struct bnxt_sw_tx_bd *tx_buf;
> +       struct rte_mbuf **tx_buf;
>         uint16_t to_send;
>
>         nb_pkts = RTE_MIN(nb_pkts, bnxt_tx_avail(txq));
> @@ -362,8 +362,7 @@ bnxt_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
>
>                 tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod);
>                 tx_buf = &txr->tx_buf_ring[tx_prod];
> -               tx_buf->mbuf = tx_mbuf;
> -               tx_buf->nr_bds = 1;
> +               *tx_buf = tx_mbuf;
>
>                 txbd = &txr->tx_desc_ring[tx_prod];
>                 txbd->address = tx_mbuf->buf_iova + tx_mbuf->data_off;
> diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> index 7a58434ce9..7ec04797b7 100644
> --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> @@ -321,12 +321,11 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq)
>
>  static inline void
>  bnxt_xmit_one(struct rte_mbuf *mbuf, struct tx_bd_long *txbd,
> -             struct bnxt_sw_tx_bd *tx_buf)
> +             struct rte_mbuf **tx_buf)
>  {
>         __m128i desc;
>
> -       tx_buf->mbuf = mbuf;
> -       tx_buf->nr_bds = 1;
> +       *tx_buf = mbuf;
>
>         desc = _mm_set_epi64x(mbuf->buf_iova + mbuf->data_off,
>                               bnxt_xmit_flags_len(mbuf->data_len,
> @@ -343,7 +342,7 @@ bnxt_xmit_fixed_burst_vec(struct bnxt_tx_queue *txq, struct rte_mbuf **tx_pkts,
>         struct bnxt_tx_ring_info *txr = txq->tx_ring;
>         uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod;
>         struct tx_bd_long *txbd;
> -       struct bnxt_sw_tx_bd *tx_buf;
> +       struct rte_mbuf **tx_buf;
>         uint16_t to_send;
>
>         tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod);
> diff --git a/drivers/net/bnxt/bnxt_txq.c b/drivers/net/bnxt/bnxt_txq.c
> index 8679ac91e7..d95e1f7526 100644
> --- a/drivers/net/bnxt/bnxt_txq.c
> +++ b/drivers/net/bnxt/bnxt_txq.c
> @@ -24,7 +24,7 @@ void bnxt_free_txq_stats(struct bnxt_tx_queue *txq)
>
>  static void bnxt_tx_queue_release_mbufs(struct bnxt_tx_queue *txq)
>  {
> -       struct bnxt_sw_tx_bd *sw_ring;
> +       struct rte_mbuf **sw_ring;
>         uint16_t i;
>
>         if (!txq || !txq->tx_ring)
> @@ -33,9 +33,9 @@ static void bnxt_tx_queue_release_mbufs(struct bnxt_tx_queue *txq)
>         sw_ring = txq->tx_ring->tx_buf_ring;
>         if (sw_ring) {
>                 for (i = 0; i < txq->tx_ring->tx_ring_struct->ring_size; i++) {
> -                       if (sw_ring[i].mbuf) {
> -                               rte_pktmbuf_free_seg(sw_ring[i].mbuf);
> -                               sw_ring[i].mbuf = NULL;
> +                       if (sw_ring[i]) {
> +                               rte_pktmbuf_free_seg(sw_ring[i]);
> +                               sw_ring[i] = NULL;
>                         }
>                 }
>         }
> diff --git a/drivers/net/bnxt/bnxt_txr.c b/drivers/net/bnxt/bnxt_txr.c
> index 2810906a3a..68fbd3f582 100644
> --- a/drivers/net/bnxt/bnxt_txr.c
> +++ b/drivers/net/bnxt/bnxt_txr.c
> @@ -76,7 +76,7 @@ int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id)
>         ring->ring_mask = ring->ring_size - 1;
>         ring->bd = (void *)txr->tx_desc_ring;
>         ring->bd_dma = txr->tx_desc_mapping;
> -       ring->vmem_size = ring->ring_size * sizeof(struct bnxt_sw_tx_bd);
> +       ring->vmem_size = ring->ring_size * sizeof(struct rte_mbuf *);
>         ring->vmem = (void **)&txr->tx_buf_ring;
>         ring->fw_ring_id = INVALID_HW_RING_ID;
>
> @@ -104,6 +104,21 @@ int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id)
>         return 0;
>  }
>
> +static bool
> +bnxt_xmit_need_long_bd(struct rte_mbuf *tx_pkt, struct bnxt_tx_queue *txq)
> +{
> +       if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
> +                               PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
> +                               PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
> +                               PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN |
> +                               PKT_TX_TUNNEL_GENEVE | PKT_TX_IEEE1588_TMST |
> +                               PKT_TX_QINQ_PKT) ||
> +            (BNXT_TRUFLOW_EN(txq->bp) &&
> +             (txq->bp->tx_cfa_action || txq->vfr_tx_cfa_action)))
> +               return true;
> +       return false;
> +}
> +
>  static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
>                                 struct bnxt_tx_queue *txq,
>                                 uint16_t *coal_pkts,
> @@ -116,10 +131,10 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
>         struct tx_bd_long_hi *txbd1 = NULL;
>         uint32_t vlan_tag_flags;
>         bool long_bd = false;
> -       unsigned short nr_bds = 0;
> +       unsigned short nr_bds;
>         uint16_t prod;
>         struct rte_mbuf *m_seg;
> -       struct bnxt_sw_tx_bd *tx_buf;
> +       struct rte_mbuf **tx_buf;
>         static const uint32_t lhint_arr[4] = {
>                 TX_BD_LONG_FLAGS_LHINT_LT512,
>                 TX_BD_LONG_FLAGS_LHINT_LT1K,
> @@ -130,17 +145,9 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
>         if (unlikely(is_bnxt_in_error(txq->bp)))
>                 return -EIO;
>
> -       if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
> -                               PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
> -                               PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
> -                               PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN |
> -                               PKT_TX_TUNNEL_GENEVE | PKT_TX_IEEE1588_TMST |
> -                               PKT_TX_QINQ_PKT) ||
> -            (BNXT_TRUFLOW_EN(txq->bp) &&
> -             (txq->bp->tx_cfa_action || txq->vfr_tx_cfa_action)))
> -               long_bd = true;
> -
> +       long_bd = bnxt_xmit_need_long_bd(tx_pkt, txq);
>         nr_bds = long_bd + tx_pkt->nb_segs;
> +
>         if (unlikely(bnxt_tx_avail(txq) < nr_bds))
>                 return -ENOMEM;
>
> @@ -172,8 +179,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
>
>         prod = RING_IDX(ring, txr->tx_raw_prod);
>         tx_buf = &txr->tx_buf_ring[prod];
> -       tx_buf->mbuf = tx_pkt;
> -       tx_buf->nr_bds = nr_bds;
> +       *tx_buf = tx_pkt;
>
>         txbd = &txr->tx_desc_ring[prod];
>         txbd->opaque = *coal_pkts;
> @@ -185,7 +191,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
>                 txbd->flags_type |= TX_BD_LONG_FLAGS_LHINT_GTE2K;
>         else
>                 txbd->flags_type |= lhint_arr[tx_pkt->pkt_len >> 9];
> -       txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(tx_buf->mbuf));
> +       txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(tx_pkt));
>         *last_txbd = txbd;
>
>         if (long_bd) {
> @@ -193,18 +199,18 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
>                 vlan_tag_flags = 0;
>
>                 /* HW can accelerate only outer vlan in QinQ mode */
> -               if (tx_buf->mbuf->ol_flags & PKT_TX_QINQ_PKT) {
> +               if (tx_pkt->ol_flags & PKT_TX_QINQ_PKT) {
>                         vlan_tag_flags = TX_BD_LONG_CFA_META_KEY_VLAN_TAG |
> -                               tx_buf->mbuf->vlan_tci_outer;
> +                               tx_pkt->vlan_tci_outer;
>                         outer_tpid_bd = txq->bp->outer_tpid_bd &
>                                 BNXT_OUTER_TPID_BD_MASK;
>                         vlan_tag_flags |= outer_tpid_bd;
> -               } else if (tx_buf->mbuf->ol_flags & PKT_TX_VLAN_PKT) {
> +               } else if (tx_pkt->ol_flags & PKT_TX_VLAN_PKT) {
>                         /* shurd: Should this mask at
>                          * TX_BD_LONG_CFA_META_VLAN_VID_MASK?
>                          */
>                         vlan_tag_flags = TX_BD_LONG_CFA_META_KEY_VLAN_TAG |
> -                               tx_buf->mbuf->vlan_tci;
> +                               tx_pkt->vlan_tci;
>                         /* Currently supports 8021Q, 8021AD vlan offloads
>                          * QINQ1, QINQ2, QINQ3 vlan headers are deprecated
>                          */
> @@ -325,7 +331,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
>
>                 prod = RING_IDX(ring, txr->tx_raw_prod);
>                 tx_buf = &txr->tx_buf_ring[prod];
> -               tx_buf->mbuf = m_seg;
> +               *tx_buf = m_seg;
>
>                 txbd = &txr->tx_desc_ring[prod];
>                 txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(m_seg));
> @@ -356,16 +362,17 @@ static void bnxt_tx_cmp_fast(struct bnxt_tx_queue *txq, int nr_pkts)
>         int i, j;
>
>         for (i = 0; i < nr_pkts; i++) {
> -               struct bnxt_sw_tx_bd *tx_buf;
> +               struct rte_mbuf **tx_buf;
>                 unsigned short nr_bds;
>
>                 tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
> -               nr_bds = tx_buf->nr_bds;
> +               nr_bds = (*tx_buf)->nb_segs +
> +                        bnxt_xmit_need_long_bd(*tx_buf, txq);
>                 for (j = 0; j < nr_bds; j++) {
> -                       if (tx_buf->mbuf) {
> +                       if (*tx_buf) {
>                                 /* Add mbuf to the bulk free array */
> -                               free[blk++] = tx_buf->mbuf;
> -                               tx_buf->mbuf = NULL;
> +                               free[blk++] = *tx_buf;
> +                               *tx_buf = NULL;
>                         }
>                         raw_cons = RING_NEXT(raw_cons);
>                         tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
> @@ -389,14 +396,15 @@ static void bnxt_tx_cmp(struct bnxt_tx_queue *txq, int nr_pkts)
>
>         for (i = 0; i < nr_pkts; i++) {
>                 struct rte_mbuf *mbuf;
> -               struct bnxt_sw_tx_bd *tx_buf;
> +               struct rte_mbuf **tx_buf;
>                 unsigned short nr_bds;
>
>                 tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
> -               nr_bds = tx_buf->nr_bds;
> +               nr_bds = (*tx_buf)->nb_segs +
> +                        bnxt_xmit_need_long_bd(*tx_buf, txq);
>                 for (j = 0; j < nr_bds; j++) {
> -                       mbuf = tx_buf->mbuf;
> -                       tx_buf->mbuf = NULL;
> +                       mbuf = *tx_buf;
> +                       *tx_buf = NULL;
>                         raw_cons = RING_NEXT(raw_cons);
>                         tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
>                         if (!mbuf)      /* long_bd's tx_buf ? */
> diff --git a/drivers/net/bnxt/bnxt_txr.h b/drivers/net/bnxt/bnxt_txr.h
> index 281a3e23c5..238be1d190 100644
> --- a/drivers/net/bnxt/bnxt_txr.h
> +++ b/drivers/net/bnxt/bnxt_txr.h
> @@ -17,18 +17,13 @@ struct bnxt_tx_ring_info {
>         struct bnxt_db_info     tx_db;
>
>         struct tx_bd_long       *tx_desc_ring;
> -       struct bnxt_sw_tx_bd    *tx_buf_ring;
> +       struct rte_mbuf         **tx_buf_ring;
>
>         rte_iova_t              tx_desc_mapping;
>
>         struct bnxt_ring        *tx_ring_struct;
>  };
>
> -struct bnxt_sw_tx_bd {
> -       struct rte_mbuf         *mbuf; /* mbuf associated with TX descriptor */
> -       unsigned short          nr_bds;
> -};
> -
>  static inline uint32_t bnxt_tx_bds_in_hw(struct bnxt_tx_queue *txq)
>  {
>         return ((txq->tx_ring->tx_raw_prod - txq->tx_ring->tx_raw_cons) &
> --
> 2.25.1
>
diff mbox series

Patch

diff --git a/drivers/net/bnxt/bnxt_ethdev.c b/drivers/net/bnxt/bnxt_ethdev.c
index 88da345034..d4028e2bb2 100644
--- a/drivers/net/bnxt/bnxt_ethdev.c
+++ b/drivers/net/bnxt/bnxt_ethdev.c
@@ -3186,7 +3186,7 @@  bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
 	struct bnxt_tx_queue *txq = (struct bnxt_tx_queue *)tx_queue;
 	struct bnxt_tx_ring_info *txr;
 	struct bnxt_cp_ring_info *cpr;
-	struct bnxt_sw_tx_bd *tx_buf;
+	struct rte_mbuf **tx_buf;
 	struct tx_pkt_cmpl *txcmp;
 	uint32_t cons, cp_cons;
 	int rc;
@@ -3216,7 +3216,7 @@  bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
 			return RTE_ETH_TX_DESC_UNAVAIL;
 	}
 	tx_buf = &txr->tx_buf_ring[cons];
-	if (tx_buf->mbuf == NULL)
+	if (*tx_buf == NULL)
 		return RTE_ETH_TX_DESC_DONE;
 
 	return RTE_ETH_TX_DESC_FULL;
diff --git a/drivers/net/bnxt/bnxt_ring.c b/drivers/net/bnxt/bnxt_ring.c
index 997dcdc28b..e4055fa49b 100644
--- a/drivers/net/bnxt/bnxt_ring.c
+++ b/drivers/net/bnxt/bnxt_ring.c
@@ -230,7 +230,7 @@  int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
 			tx_ring->vmem =
 			    (void **)((char *)mz->addr + tx_vmem_start);
 			tx_ring_info->tx_buf_ring =
-			    (struct bnxt_sw_tx_bd *)tx_ring->vmem;
+			    (struct rte_mbuf **)tx_ring->vmem;
 		}
 	}
 
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_common.h b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
index 91ff6736b1..9b9489a695 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_common.h
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_common.h
@@ -100,57 +100,78 @@  bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
  * is enabled.
  */
 static inline void
-bnxt_tx_cmp_vec_fast(struct bnxt_tx_queue *txq, int nr_pkts)
+bnxt_tx_cmp_vec_fast(struct bnxt_tx_queue *txq, uint32_t nr_pkts)
 {
 	struct bnxt_tx_ring_info *txr = txq->tx_ring;
-	struct rte_mbuf **free = txq->free;
 	uint16_t cons, raw_cons = txr->tx_raw_cons;
-	unsigned int blk = 0;
-	uint32_t ring_mask = txr->tx_ring_struct->ring_mask;
-
-	while (nr_pkts--) {
-		struct bnxt_sw_tx_bd *tx_buf;
-
-		cons = raw_cons++ & ring_mask;
-		tx_buf = &txr->tx_buf_ring[cons];
-		free[blk++] = tx_buf->mbuf;
-		tx_buf->mbuf = NULL;
+	uint32_t ring_mask, ring_size, num;
+	struct rte_mempool *pool;
+
+	ring_mask = txr->tx_ring_struct->ring_mask;
+	ring_size = txr->tx_ring_struct->ring_size;
+
+	cons = raw_cons & ring_mask;
+	num = RTE_MIN(nr_pkts, ring_size - cons);
+	pool = txr->tx_buf_ring[cons]->pool;
+
+	rte_mempool_put_bulk(pool, (void **)&txr->tx_buf_ring[cons], num);
+	memset(&txr->tx_buf_ring[cons], 0, num * sizeof(struct rte_mbuf *));
+	raw_cons += num;
+	num = nr_pkts - num;
+	if (num) {
+		cons = raw_cons & ring_mask;
+		rte_mempool_put_bulk(pool, (void **)&txr->tx_buf_ring[cons],
+				     num);
+		memset(&txr->tx_buf_ring[cons], 0,
+		       num * sizeof(struct rte_mbuf *));
+		raw_cons += num;
 	}
-	if (blk)
-		rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
 
 	txr->tx_raw_cons = raw_cons;
 }
 
 static inline void
-bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, int nr_pkts)
+bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, uint32_t nr_pkts)
 {
 	struct bnxt_tx_ring_info *txr = txq->tx_ring;
-	struct rte_mbuf **free = txq->free;
 	uint16_t cons, raw_cons = txr->tx_raw_cons;
-	unsigned int blk = 0;
-	uint32_t ring_mask = txr->tx_ring_struct->ring_mask;
+	uint32_t ring_mask, ring_size, num, blk;
+	struct rte_mempool *pool;
 
-	while (nr_pkts--) {
-		struct bnxt_sw_tx_bd *tx_buf;
-		struct rte_mbuf *mbuf;
+	ring_mask = txr->tx_ring_struct->ring_mask;
+	ring_size = txr->tx_ring_struct->ring_size;
 
-		cons = raw_cons++ & ring_mask;
-		tx_buf = &txr->tx_buf_ring[cons];
-		mbuf = rte_pktmbuf_prefree_seg(tx_buf->mbuf);
-		if (unlikely(mbuf == NULL))
-			continue;
-		tx_buf->mbuf = NULL;
+	while (nr_pkts) {
+		struct rte_mbuf *mbuf;
 
-		if (blk && mbuf->pool != free[0]->pool) {
-			rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
-			blk = 0;
+		cons = raw_cons & ring_mask;
+		num = RTE_MIN(nr_pkts, ring_size - cons);
+		pool = txr->tx_buf_ring[cons]->pool;
+
+		blk = 0;
+		do {
+			mbuf = txr->tx_buf_ring[cons + blk];
+			mbuf = rte_pktmbuf_prefree_seg(mbuf);
+			if (!mbuf || mbuf->pool != pool)
+				break;
+			blk++;
+		} while (blk < num);
+
+		if (blk) {
+			rte_mempool_put_bulk(pool,
+					     (void **)&txr->tx_buf_ring[cons],
+					     blk);
+			memset(&txr->tx_buf_ring[cons], 0,
+			       blk * sizeof(struct rte_mbuf *));
+			raw_cons += blk;
+			nr_pkts -= blk;
+		}
+		if (!mbuf) {
+			/* Skip freeing mbufs with non-zero reference count. */
+			raw_cons++;
+			nr_pkts--;
 		}
-		free[blk++] = mbuf;
 	}
-	if (blk)
-		rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
-
 	txr->tx_raw_cons = raw_cons;
 }
 #endif /* _BNXT_RXTX_VEC_COMMON_H_ */
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
index 3d54d9d59d..bc2e96ec38 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
@@ -346,7 +346,7 @@  bnxt_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod;
 	struct rte_mbuf *tx_mbuf;
 	struct tx_bd_long *txbd = NULL;
-	struct bnxt_sw_tx_bd *tx_buf;
+	struct rte_mbuf **tx_buf;
 	uint16_t to_send;
 
 	nb_pkts = RTE_MIN(nb_pkts, bnxt_tx_avail(txq));
@@ -362,8 +362,7 @@  bnxt_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 		tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod);
 		tx_buf = &txr->tx_buf_ring[tx_prod];
-		tx_buf->mbuf = tx_mbuf;
-		tx_buf->nr_bds = 1;
+		*tx_buf = tx_mbuf;
 
 		txbd = &txr->tx_desc_ring[tx_prod];
 		txbd->address = tx_mbuf->buf_iova + tx_mbuf->data_off;
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
index 7a58434ce9..7ec04797b7 100644
--- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
+++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
@@ -321,12 +321,11 @@  bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq)
 
 static inline void
 bnxt_xmit_one(struct rte_mbuf *mbuf, struct tx_bd_long *txbd,
-	      struct bnxt_sw_tx_bd *tx_buf)
+	      struct rte_mbuf **tx_buf)
 {
 	__m128i desc;
 
-	tx_buf->mbuf = mbuf;
-	tx_buf->nr_bds = 1;
+	*tx_buf = mbuf;
 
 	desc = _mm_set_epi64x(mbuf->buf_iova + mbuf->data_off,
 			      bnxt_xmit_flags_len(mbuf->data_len,
@@ -343,7 +342,7 @@  bnxt_xmit_fixed_burst_vec(struct bnxt_tx_queue *txq, struct rte_mbuf **tx_pkts,
 	struct bnxt_tx_ring_info *txr = txq->tx_ring;
 	uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod;
 	struct tx_bd_long *txbd;
-	struct bnxt_sw_tx_bd *tx_buf;
+	struct rte_mbuf **tx_buf;
 	uint16_t to_send;
 
 	tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod);
diff --git a/drivers/net/bnxt/bnxt_txq.c b/drivers/net/bnxt/bnxt_txq.c
index 8679ac91e7..d95e1f7526 100644
--- a/drivers/net/bnxt/bnxt_txq.c
+++ b/drivers/net/bnxt/bnxt_txq.c
@@ -24,7 +24,7 @@  void bnxt_free_txq_stats(struct bnxt_tx_queue *txq)
 
 static void bnxt_tx_queue_release_mbufs(struct bnxt_tx_queue *txq)
 {
-	struct bnxt_sw_tx_bd *sw_ring;
+	struct rte_mbuf **sw_ring;
 	uint16_t i;
 
 	if (!txq || !txq->tx_ring)
@@ -33,9 +33,9 @@  static void bnxt_tx_queue_release_mbufs(struct bnxt_tx_queue *txq)
 	sw_ring = txq->tx_ring->tx_buf_ring;
 	if (sw_ring) {
 		for (i = 0; i < txq->tx_ring->tx_ring_struct->ring_size; i++) {
-			if (sw_ring[i].mbuf) {
-				rte_pktmbuf_free_seg(sw_ring[i].mbuf);
-				sw_ring[i].mbuf = NULL;
+			if (sw_ring[i]) {
+				rte_pktmbuf_free_seg(sw_ring[i]);
+				sw_ring[i] = NULL;
 			}
 		}
 	}
diff --git a/drivers/net/bnxt/bnxt_txr.c b/drivers/net/bnxt/bnxt_txr.c
index 2810906a3a..68fbd3f582 100644
--- a/drivers/net/bnxt/bnxt_txr.c
+++ b/drivers/net/bnxt/bnxt_txr.c
@@ -76,7 +76,7 @@  int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id)
 	ring->ring_mask = ring->ring_size - 1;
 	ring->bd = (void *)txr->tx_desc_ring;
 	ring->bd_dma = txr->tx_desc_mapping;
-	ring->vmem_size = ring->ring_size * sizeof(struct bnxt_sw_tx_bd);
+	ring->vmem_size = ring->ring_size * sizeof(struct rte_mbuf *);
 	ring->vmem = (void **)&txr->tx_buf_ring;
 	ring->fw_ring_id = INVALID_HW_RING_ID;
 
@@ -104,6 +104,21 @@  int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id)
 	return 0;
 }
 
+static bool
+bnxt_xmit_need_long_bd(struct rte_mbuf *tx_pkt, struct bnxt_tx_queue *txq)
+{
+	if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
+				PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
+				PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
+				PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN |
+				PKT_TX_TUNNEL_GENEVE | PKT_TX_IEEE1588_TMST |
+				PKT_TX_QINQ_PKT) ||
+	     (BNXT_TRUFLOW_EN(txq->bp) &&
+	      (txq->bp->tx_cfa_action || txq->vfr_tx_cfa_action)))
+		return true;
+	return false;
+}
+
 static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 				struct bnxt_tx_queue *txq,
 				uint16_t *coal_pkts,
@@ -116,10 +131,10 @@  static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 	struct tx_bd_long_hi *txbd1 = NULL;
 	uint32_t vlan_tag_flags;
 	bool long_bd = false;
-	unsigned short nr_bds = 0;
+	unsigned short nr_bds;
 	uint16_t prod;
 	struct rte_mbuf *m_seg;
-	struct bnxt_sw_tx_bd *tx_buf;
+	struct rte_mbuf **tx_buf;
 	static const uint32_t lhint_arr[4] = {
 		TX_BD_LONG_FLAGS_LHINT_LT512,
 		TX_BD_LONG_FLAGS_LHINT_LT1K,
@@ -130,17 +145,9 @@  static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 	if (unlikely(is_bnxt_in_error(txq->bp)))
 		return -EIO;
 
-	if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
-				PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
-				PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
-				PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN |
-				PKT_TX_TUNNEL_GENEVE | PKT_TX_IEEE1588_TMST |
-				PKT_TX_QINQ_PKT) ||
-	     (BNXT_TRUFLOW_EN(txq->bp) &&
-	      (txq->bp->tx_cfa_action || txq->vfr_tx_cfa_action)))
-		long_bd = true;
-
+	long_bd = bnxt_xmit_need_long_bd(tx_pkt, txq);
 	nr_bds = long_bd + tx_pkt->nb_segs;
+
 	if (unlikely(bnxt_tx_avail(txq) < nr_bds))
 		return -ENOMEM;
 
@@ -172,8 +179,7 @@  static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 
 	prod = RING_IDX(ring, txr->tx_raw_prod);
 	tx_buf = &txr->tx_buf_ring[prod];
-	tx_buf->mbuf = tx_pkt;
-	tx_buf->nr_bds = nr_bds;
+	*tx_buf = tx_pkt;
 
 	txbd = &txr->tx_desc_ring[prod];
 	txbd->opaque = *coal_pkts;
@@ -185,7 +191,7 @@  static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 		txbd->flags_type |= TX_BD_LONG_FLAGS_LHINT_GTE2K;
 	else
 		txbd->flags_type |= lhint_arr[tx_pkt->pkt_len >> 9];
-	txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(tx_buf->mbuf));
+	txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(tx_pkt));
 	*last_txbd = txbd;
 
 	if (long_bd) {
@@ -193,18 +199,18 @@  static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 		vlan_tag_flags = 0;
 
 		/* HW can accelerate only outer vlan in QinQ mode */
-		if (tx_buf->mbuf->ol_flags & PKT_TX_QINQ_PKT) {
+		if (tx_pkt->ol_flags & PKT_TX_QINQ_PKT) {
 			vlan_tag_flags = TX_BD_LONG_CFA_META_KEY_VLAN_TAG |
-				tx_buf->mbuf->vlan_tci_outer;
+				tx_pkt->vlan_tci_outer;
 			outer_tpid_bd = txq->bp->outer_tpid_bd &
 				BNXT_OUTER_TPID_BD_MASK;
 			vlan_tag_flags |= outer_tpid_bd;
-		} else if (tx_buf->mbuf->ol_flags & PKT_TX_VLAN_PKT) {
+		} else if (tx_pkt->ol_flags & PKT_TX_VLAN_PKT) {
 			/* shurd: Should this mask at
 			 * TX_BD_LONG_CFA_META_VLAN_VID_MASK?
 			 */
 			vlan_tag_flags = TX_BD_LONG_CFA_META_KEY_VLAN_TAG |
-				tx_buf->mbuf->vlan_tci;
+				tx_pkt->vlan_tci;
 			/* Currently supports 8021Q, 8021AD vlan offloads
 			 * QINQ1, QINQ2, QINQ3 vlan headers are deprecated
 			 */
@@ -325,7 +331,7 @@  static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 
 		prod = RING_IDX(ring, txr->tx_raw_prod);
 		tx_buf = &txr->tx_buf_ring[prod];
-		tx_buf->mbuf = m_seg;
+		*tx_buf = m_seg;
 
 		txbd = &txr->tx_desc_ring[prod];
 		txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(m_seg));
@@ -356,16 +362,17 @@  static void bnxt_tx_cmp_fast(struct bnxt_tx_queue *txq, int nr_pkts)
 	int i, j;
 
 	for (i = 0; i < nr_pkts; i++) {
-		struct bnxt_sw_tx_bd *tx_buf;
+		struct rte_mbuf **tx_buf;
 		unsigned short nr_bds;
 
 		tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
-		nr_bds = tx_buf->nr_bds;
+		nr_bds = (*tx_buf)->nb_segs +
+			 bnxt_xmit_need_long_bd(*tx_buf, txq);
 		for (j = 0; j < nr_bds; j++) {
-			if (tx_buf->mbuf) {
+			if (*tx_buf) {
 				/* Add mbuf to the bulk free array */
-				free[blk++] = tx_buf->mbuf;
-				tx_buf->mbuf = NULL;
+				free[blk++] = *tx_buf;
+				*tx_buf = NULL;
 			}
 			raw_cons = RING_NEXT(raw_cons);
 			tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
@@ -389,14 +396,15 @@  static void bnxt_tx_cmp(struct bnxt_tx_queue *txq, int nr_pkts)
 
 	for (i = 0; i < nr_pkts; i++) {
 		struct rte_mbuf *mbuf;
-		struct bnxt_sw_tx_bd *tx_buf;
+		struct rte_mbuf **tx_buf;
 		unsigned short nr_bds;
 
 		tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
-		nr_bds = tx_buf->nr_bds;
+		nr_bds = (*tx_buf)->nb_segs +
+			 bnxt_xmit_need_long_bd(*tx_buf, txq);
 		for (j = 0; j < nr_bds; j++) {
-			mbuf = tx_buf->mbuf;
-			tx_buf->mbuf = NULL;
+			mbuf = *tx_buf;
+			*tx_buf = NULL;
 			raw_cons = RING_NEXT(raw_cons);
 			tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
 			if (!mbuf)	/* long_bd's tx_buf ? */
diff --git a/drivers/net/bnxt/bnxt_txr.h b/drivers/net/bnxt/bnxt_txr.h
index 281a3e23c5..238be1d190 100644
--- a/drivers/net/bnxt/bnxt_txr.h
+++ b/drivers/net/bnxt/bnxt_txr.h
@@ -17,18 +17,13 @@  struct bnxt_tx_ring_info {
 	struct bnxt_db_info     tx_db;
 
 	struct tx_bd_long	*tx_desc_ring;
-	struct bnxt_sw_tx_bd	*tx_buf_ring;
+	struct rte_mbuf		**tx_buf_ring;
 
 	rte_iova_t		tx_desc_mapping;
 
 	struct bnxt_ring	*tx_ring_struct;
 };
 
-struct bnxt_sw_tx_bd {
-	struct rte_mbuf		*mbuf; /* mbuf associated with TX descriptor */
-	unsigned short		nr_bds;
-};
-
 static inline uint32_t bnxt_tx_bds_in_hw(struct bnxt_tx_queue *txq)
 {
 	return ((txq->tx_ring->tx_raw_prod - txq->tx_ring->tx_raw_cons) &