net/cnxk: add atomic fc check in poll mode Tx path

Message ID 20230606061249.833290-1-rbhansali@marvell.com (mailing list archive)
State Accepted, archived
Delegated to: Jerin Jacob
Headers
Series net/cnxk: add atomic fc check in poll mode Tx path |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/intel-Functional fail Functional issues
ci/iol-testing warning apply patch failure

Commit Message

Rahul Bhansali June 6, 2023, 6:12 a.m. UTC
  Add a support of atomic fc check in poll mode Tx path. This atomic
check is useful if multiple threads are using the same Tx queue.
This will be enabled when Tx offload RTE_ETH_TX_OFFLOAD_MT_LOCKFREE
is set.

Signed-off-by: Rahul Bhansali <rbhansali@marvell.com>
---
 drivers/net/cnxk/cn10k_ethdev.c |  3 ++
 drivers/net/cnxk/cn10k_rxtx.h   |  1 +
 drivers/net/cnxk/cn10k_tx.h     | 61 ++++++++++++++++++++++++++-------
 3 files changed, 52 insertions(+), 13 deletions(-)
  

Comments

Jerin Jacob June 13, 2023, 6:56 a.m. UTC | #1
On Tue, Jun 6, 2023 at 11:43 AM Rahul Bhansali <rbhansali@marvell.com> wrote:
>
> Add a support of atomic fc check in poll mode Tx path. This atomic
> check is useful if multiple threads are using the same Tx queue.
> This will be enabled when Tx offload RTE_ETH_TX_OFFLOAD_MT_LOCKFREE
> is set.
>
> Signed-off-by: Rahul Bhansali <rbhansali@marvell.com>


Updated the git commit as follows and applied to
dpdk-next-net-mrvl/for-next-net. Thanks

    net/cnxk: improve Tx queue depth calculation

    Add a support of atomic flow control check in poll mode Tx path.
    This atomic check is useful if multiple threads are using the same Tx queue.
    This will be enabled when Tx offload RTE_ETH_TX_OFFLOAD_MT_LOCKFREE
    is set.

    Signed-off-by: Rahul Bhansali <rbhansali@marvell.com>


> ---
>  drivers/net/cnxk/cn10k_ethdev.c |  3 ++
>  drivers/net/cnxk/cn10k_rxtx.h   |  1 +
>  drivers/net/cnxk/cn10k_tx.h     | 61 ++++++++++++++++++++++++++-------
>  3 files changed, 52 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c
> index 792c1b1970..4c4acc7cf0 100644
> --- a/drivers/net/cnxk/cn10k_ethdev.c
> +++ b/drivers/net/cnxk/cn10k_ethdev.c
> @@ -241,6 +241,9 @@ cn10k_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
>                         return rc;
>         }
>
> +       /* Set Txq flag for MT_LOCKFREE */
> +       txq->flag = !!(dev->tx_offloads & RTE_ETH_TX_OFFLOAD_MT_LOCKFREE);
> +
>         /* Store lmt base in tx queue for easy access */
>         txq->lmt_base = nix->lmt_base;
>         txq->io_addr = sq->io_addr;
> diff --git a/drivers/net/cnxk/cn10k_rxtx.h b/drivers/net/cnxk/cn10k_rxtx.h
> index 65dd57494a..b4287e2864 100644
> --- a/drivers/net/cnxk/cn10k_rxtx.h
> +++ b/drivers/net/cnxk/cn10k_rxtx.h
> @@ -51,6 +51,7 @@ struct cn10k_eth_txq {
>         rte_iova_t io_addr;
>         uint16_t sqes_per_sqb_log2;
>         int16_t nb_sqb_bufs_adj;
> +       uint8_t flag;
>         rte_iova_t cpt_io_addr;
>         uint64_t sa_base;
>         uint64_t *cpt_fc;
> diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
> index 4f23a8dfc3..17793493cc 100644
> --- a/drivers/net/cnxk/cn10k_tx.h
> +++ b/drivers/net/cnxk/cn10k_tx.h
> @@ -47,6 +47,47 @@
>                 }                                                              \
>         } while (0)
>
> +#define NIX_XMIT_FC_OR_RETURN_MTS(txq, pkts)                                                       \
> +       do {                                                                                       \
> +               int64_t *fc_cache = &(txq)->fc_cache_pkts;                                         \
> +               uint8_t retry_count = 8;                                                           \
> +               int64_t val, newval;                                                               \
> +       retry:                                                                                     \
> +               /* Reduce the cached count */                                                      \
> +               val = (int64_t)__atomic_fetch_sub(fc_cache, pkts, __ATOMIC_RELAXED);               \
> +               val -= pkts;                                                                       \
> +               /* Cached value is low, Update the fc_cache_pkts */                                \
> +               if (unlikely(val < 0)) {                                                           \
> +                       /* Multiply with sqe_per_sqb to express in pkts */                         \
> +                       newval = txq->nb_sqb_bufs_adj - __atomic_load_n(txq->fc_mem,               \
> +                                                                       __ATOMIC_RELAXED);         \
> +                       newval = (newval << (txq)->sqes_per_sqb_log2) - newval;                    \
> +                       newval -= pkts;                                                            \
> +                       if (!__atomic_compare_exchange_n(fc_cache, &val, newval, false,            \
> +                                                        __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {    \
> +                               if (retry_count) {                                                 \
> +                                       retry_count--;                                             \
> +                                       goto retry;                                                \
> +                               } else                                                             \
> +                                       return 0;                                                  \
> +                       }                                                                          \
> +                       /* Update and check it again for the room */                               \
> +                       if (unlikely(newval < 0))                                                  \
> +                               return 0;                                                          \
> +               }                                                                                  \
> +       } while (0)
> +
> +#define NIX_XMIT_FC_CHECK_RETURN(txq, pkts)                                                        \
> +       do {                                                                                       \
> +               if (unlikely((txq)->flag))                                                         \
> +                       NIX_XMIT_FC_OR_RETURN_MTS(txq, pkts);                                      \
> +               else {                                                                             \
> +                       NIX_XMIT_FC_OR_RETURN(txq, pkts);                                          \
> +                       /* Reduce the cached count */                                              \
> +                       txq->fc_cache_pkts -= pkts;                                                \
> +               }                                                                                  \
> +       } while (0)
> +
>  /* Encoded number of segments to number of dwords macro, each value of nb_segs
>   * is encoded as 4bits.
>   */
> @@ -1174,11 +1215,9 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
>         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
>                 handle_tx_completion_pkts(txq, flags & NIX_TX_VWQE_F);
>
> -       if (!(flags & NIX_TX_VWQE_F)) {
> -               NIX_XMIT_FC_OR_RETURN(txq, pkts);
> -               /* Reduce the cached count */
> -               txq->fc_cache_pkts -= pkts;
> -       }
> +       if (!(flags & NIX_TX_VWQE_F))
> +               NIX_XMIT_FC_CHECK_RETURN(txq, pkts);
> +
>         /* Get cmd skeleton */
>         cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
>
> @@ -1323,11 +1362,9 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
>         if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
>                 handle_tx_completion_pkts(txq, flags & NIX_TX_VWQE_F);
>
> -       if (!(flags & NIX_TX_VWQE_F)) {
> -               NIX_XMIT_FC_OR_RETURN(txq, pkts);
> -               /* Reduce the cached count */
> -               txq->fc_cache_pkts -= pkts;
> -       }
> +       if (!(flags & NIX_TX_VWQE_F))
> +               NIX_XMIT_FC_CHECK_RETURN(txq, pkts);
> +
>         /* Get cmd skeleton */
>         cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
>
> @@ -1879,11 +1916,9 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
>                 handle_tx_completion_pkts(txq, flags & NIX_TX_VWQE_F);
>
>         if (!(flags & NIX_TX_VWQE_F)) {
> -               NIX_XMIT_FC_OR_RETURN(txq, pkts);
>                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
>                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
> -               /* Reduce the cached count */
> -               txq->fc_cache_pkts -= pkts;
> +               NIX_XMIT_FC_CHECK_RETURN(txq, pkts);
>         } else {
>                 scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
>                 pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
> --
> 2.25.1
>
  

Patch

diff --git a/drivers/net/cnxk/cn10k_ethdev.c b/drivers/net/cnxk/cn10k_ethdev.c
index 792c1b1970..4c4acc7cf0 100644
--- a/drivers/net/cnxk/cn10k_ethdev.c
+++ b/drivers/net/cnxk/cn10k_ethdev.c
@@ -241,6 +241,9 @@  cn10k_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid,
 			return rc;
 	}
 
+	/* Set Txq flag for MT_LOCKFREE */
+	txq->flag = !!(dev->tx_offloads & RTE_ETH_TX_OFFLOAD_MT_LOCKFREE);
+
 	/* Store lmt base in tx queue for easy access */
 	txq->lmt_base = nix->lmt_base;
 	txq->io_addr = sq->io_addr;
diff --git a/drivers/net/cnxk/cn10k_rxtx.h b/drivers/net/cnxk/cn10k_rxtx.h
index 65dd57494a..b4287e2864 100644
--- a/drivers/net/cnxk/cn10k_rxtx.h
+++ b/drivers/net/cnxk/cn10k_rxtx.h
@@ -51,6 +51,7 @@  struct cn10k_eth_txq {
 	rte_iova_t io_addr;
 	uint16_t sqes_per_sqb_log2;
 	int16_t nb_sqb_bufs_adj;
+	uint8_t flag;
 	rte_iova_t cpt_io_addr;
 	uint64_t sa_base;
 	uint64_t *cpt_fc;
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 4f23a8dfc3..17793493cc 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -47,6 +47,47 @@ 
 		}                                                              \
 	} while (0)
 
+#define NIX_XMIT_FC_OR_RETURN_MTS(txq, pkts)                                                       \
+	do {                                                                                       \
+		int64_t *fc_cache = &(txq)->fc_cache_pkts;                                         \
+		uint8_t retry_count = 8;                                                           \
+		int64_t val, newval;                                                               \
+	retry:                                                                                     \
+		/* Reduce the cached count */                                                      \
+		val = (int64_t)__atomic_fetch_sub(fc_cache, pkts, __ATOMIC_RELAXED);               \
+		val -= pkts;                                                                       \
+		/* Cached value is low, Update the fc_cache_pkts */                                \
+		if (unlikely(val < 0)) {                                                           \
+			/* Multiply with sqe_per_sqb to express in pkts */                         \
+			newval = txq->nb_sqb_bufs_adj - __atomic_load_n(txq->fc_mem,               \
+									__ATOMIC_RELAXED);         \
+			newval = (newval << (txq)->sqes_per_sqb_log2) - newval;                    \
+			newval -= pkts;                                                            \
+			if (!__atomic_compare_exchange_n(fc_cache, &val, newval, false,            \
+							 __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {    \
+				if (retry_count) {                                                 \
+					retry_count--;                                             \
+					goto retry;                                                \
+				} else                                                             \
+					return 0;                                                  \
+			}                                                                          \
+			/* Update and check it again for the room */                               \
+			if (unlikely(newval < 0))                                                  \
+				return 0;                                                          \
+		}                                                                                  \
+	} while (0)
+
+#define NIX_XMIT_FC_CHECK_RETURN(txq, pkts)                                                        \
+	do {                                                                                       \
+		if (unlikely((txq)->flag))                                                         \
+			NIX_XMIT_FC_OR_RETURN_MTS(txq, pkts);                                      \
+		else {                                                                             \
+			NIX_XMIT_FC_OR_RETURN(txq, pkts);                                          \
+			/* Reduce the cached count */                                              \
+			txq->fc_cache_pkts -= pkts;                                                \
+		}                                                                                  \
+	} while (0)
+
 /* Encoded number of segments to number of dwords macro, each value of nb_segs
  * is encoded as 4bits.
  */
@@ -1174,11 +1215,9 @@  cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
 	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
 		handle_tx_completion_pkts(txq, flags & NIX_TX_VWQE_F);
 
-	if (!(flags & NIX_TX_VWQE_F)) {
-		NIX_XMIT_FC_OR_RETURN(txq, pkts);
-		/* Reduce the cached count */
-		txq->fc_cache_pkts -= pkts;
-	}
+	if (!(flags & NIX_TX_VWQE_F))
+		NIX_XMIT_FC_CHECK_RETURN(txq, pkts);
+
 	/* Get cmd skeleton */
 	cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
 
@@ -1323,11 +1362,9 @@  cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
 	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F && txq->tx_compl.ena)
 		handle_tx_completion_pkts(txq, flags & NIX_TX_VWQE_F);
 
-	if (!(flags & NIX_TX_VWQE_F)) {
-		NIX_XMIT_FC_OR_RETURN(txq, pkts);
-		/* Reduce the cached count */
-		txq->fc_cache_pkts -= pkts;
-	}
+	if (!(flags & NIX_TX_VWQE_F))
+		NIX_XMIT_FC_CHECK_RETURN(txq, pkts);
+
 	/* Get cmd skeleton */
 	cn10k_nix_tx_skeleton(txq, cmd, flags, !(flags & NIX_TX_VWQE_F));
 
@@ -1879,11 +1916,9 @@  cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
 		handle_tx_completion_pkts(txq, flags & NIX_TX_VWQE_F);
 
 	if (!(flags & NIX_TX_VWQE_F)) {
-		NIX_XMIT_FC_OR_RETURN(txq, pkts);
 		scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
 		pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);
-		/* Reduce the cached count */
-		txq->fc_cache_pkts -= pkts;
+		NIX_XMIT_FC_CHECK_RETURN(txq, pkts);
 	} else {
 		scalar = pkts & (NIX_DESCS_PER_LOOP - 1);
 		pkts = RTE_ALIGN_FLOOR(pkts, NIX_DESCS_PER_LOOP);