[v4,1/3] event/cnxk: store and reuse workslot status
Checks
Commit Message
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Store and reuse workslot status for TT, GRP and HEAD status
instead of reading from GWC as reading from GWC imposes
additional latency.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
Depends-on: 21590
v4 Changes:
- Update commit title for 3/3
v3 Changes:
- Split and rebase patches.
v2 Changes:
- Rebase.
- Fix incorrect use of RoC API
drivers/common/cnxk/roc_sso.h | 14 ++++++++------
drivers/event/cnxk/cn10k_worker.h | 16 +++++++++-------
drivers/event/cnxk/cn9k_worker.h | 6 +++---
drivers/event/cnxk/cnxk_eventdev.h | 2 ++
drivers/event/cnxk/cnxk_worker.h | 11 +++++++----
drivers/net/cnxk/cn10k_tx.h | 12 ++++++------
6 files changed, 35 insertions(+), 26 deletions(-)
--
2.17.1
Comments
On Thu, Feb 10, 2022 at 6:51 PM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Store and reuse workslot status for TT, GRP and HEAD status
> instead of reading from GWC as reading from GWC imposes
> additional latency.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Series Acked-by: Jerin Jacob <jerinj@marvell.com>
Series Applied to dpdk-next-net-eventdev/for-main. Thanks
> ---
> Depends-on: 21590
>
> v4 Changes:
> - Update commit title for 3/3
>
> v3 Changes:
> - Split and rebase patches.
>
> v2 Changes:
> - Rebase.
> - Fix incorrect use of RoC API
>
> drivers/common/cnxk/roc_sso.h | 14 ++++++++------
> drivers/event/cnxk/cn10k_worker.h | 16 +++++++++-------
> drivers/event/cnxk/cn9k_worker.h | 6 +++---
> drivers/event/cnxk/cnxk_eventdev.h | 2 ++
> drivers/event/cnxk/cnxk_worker.h | 11 +++++++----
> drivers/net/cnxk/cn10k_tx.h | 12 ++++++------
> 6 files changed, 35 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/common/cnxk/roc_sso.h b/drivers/common/cnxk/roc_sso.h
> index 27d49c6c68..ab7cee1c60 100644
> --- a/drivers/common/cnxk/roc_sso.h
> +++ b/drivers/common/cnxk/roc_sso.h
> @@ -54,12 +54,13 @@ struct roc_sso {
> uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
> } __plt_cache_aligned;
>
> -static __plt_always_inline void
> -roc_sso_hws_head_wait(uintptr_t tag_op)
> +static __plt_always_inline uint64_t
> +roc_sso_hws_head_wait(uintptr_t base)
> {
> -#ifdef RTE_ARCH_ARM64
> + uintptr_t tag_op = base + SSOW_LF_GWS_TAG;
> uint64_t tag;
>
> +#if defined(__aarch64__)
> asm volatile(PLT_CPU_FEATURE_PREAMBLE
> " ldr %[tag], [%[tag_op]] \n"
> " tbnz %[tag], 35, done%= \n"
> @@ -71,10 +72,11 @@ roc_sso_hws_head_wait(uintptr_t tag_op)
> : [tag] "=&r"(tag)
> : [tag_op] "r"(tag_op));
> #else
> - /* Wait for the SWTAG/SWTAG_FULL operation */
> - while (!(plt_read64(tag_op) & BIT_ULL(35)))
> - ;
> + do {
> + tag = plt_read64(tag_op);
> + } while (!(tag & BIT_ULL(35)));
> #endif
> + return tag;
> }
>
> /* SSO device initialization */
> diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
> index ff08b2d974..ada230ea1d 100644
> --- a/drivers/event/cnxk/cn10k_worker.h
> +++ b/drivers/event/cnxk/cn10k_worker.h
> @@ -40,8 +40,7 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
> {
> const uint32_t tag = (uint32_t)ev->event;
> const uint8_t new_tt = ev->sched_type;
> - const uint8_t cur_tt =
> - CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0));
> + const uint8_t cur_tt = CNXK_TT_FROM_TAG(ws->gw_rdata);
>
> /* CNXK model
> * cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
> @@ -81,7 +80,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
> const uint8_t grp = ev->queue_id;
>
> /* Group hasn't changed, Use SWTAG to forward the event */
> - if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp)
> + if (CNXK_GRP_FROM_TAG(ws->gw_rdata) == grp)
> cn10k_sso_hws_fwd_swtag(ws, ev);
> else
> /*
> @@ -211,6 +210,7 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
> } while (gw.u64[0] & BIT_ULL(63));
> mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
> #endif
> + ws->gw_rdata = gw.u64[0];
> gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
> (gw.u64[0] & (0x3FFull << 36)) << 4 |
> (gw.u64[0] & 0xffffffff);
> @@ -405,7 +405,8 @@ NIX_RX_FASTPATH_MODES
> RTE_SET_USED(timeout_ticks); \
> if (ws->swtag_req) { \
> ws->swtag_req = 0; \
> - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
> + ws->gw_rdata = cnxk_sso_hws_swtag_wait( \
> + ws->base + SSOW_LF_GWS_WQE0); \
> return 1; \
> } \
> return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
> @@ -424,7 +425,8 @@ NIX_RX_FASTPATH_MODES
> uint64_t iter; \
> if (ws->swtag_req) { \
> ws->swtag_req = 0; \
> - cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
> + ws->gw_rdata = cnxk_sso_hws_swtag_wait( \
> + ws->base + SSOW_LF_GWS_WQE0); \
> return ret; \
> } \
> ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
> @@ -507,8 +509,8 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
> else
> pa = txq->io_addr | ((segdw - 1) << 4);
>
> - if (!sched_type)
> - roc_sso_hws_head_wait(ws->base + SSOW_LF_GWS_TAG);
> + if (!CNXK_TAG_IS_HEAD(ws->gw_rdata) && !sched_type)
> + ws->gw_rdata = roc_sso_hws_head_wait(ws->base);
>
> roc_lmt_submit_steorl(lmt_id, pa);
> }
> diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
> index 303b04c215..8455272005 100644
> --- a/drivers/event/cnxk/cn9k_worker.h
> +++ b/drivers/event/cnxk/cn9k_worker.h
> @@ -700,7 +700,7 @@ cn9k_sso_hws_xmit_sec_one(const struct cn9k_eth_txq *txq, uint64_t base,
>
> /* Head wait if needed */
> if (base)
> - roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> + roc_sso_hws_head_wait(base);
>
> /* ESN */
> outb_priv = roc_nix_inl_onf_ipsec_outb_sa_sw_rsvd((void *)sa);
> @@ -793,7 +793,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
> flags);
> if (!CNXK_TT_FROM_EVENT(ev->event)) {
> cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
> - roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> + roc_sso_hws_head_wait(base);
> cn9k_sso_txq_fc_wait(txq);
> if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
> cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
> @@ -806,7 +806,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
> cn9k_nix_xmit_prepare_tstamp(txq, cmd, m->ol_flags, 4, flags);
> if (!CNXK_TT_FROM_EVENT(ev->event)) {
> cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
> - roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
> + roc_sso_hws_head_wait(base);
> cn9k_sso_txq_fc_wait(txq);
> if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
> cn9k_nix_xmit_one(cmd, txq->lmt_addr,
> diff --git a/drivers/event/cnxk/cnxk_eventdev.h b/drivers/event/cnxk/cnxk_eventdev.h
> index b26df58588..ab58508590 100644
> --- a/drivers/event/cnxk/cnxk_eventdev.h
> +++ b/drivers/event/cnxk/cnxk_eventdev.h
> @@ -47,6 +47,7 @@
> #define CNXK_CLR_SUB_EVENT(x) (~(0xffu << 20) & x)
> #define CNXK_GRP_FROM_TAG(x) (((x) >> 36) & 0x3ff)
> #define CNXK_SWTAG_PEND(x) (BIT_ULL(62) & x)
> +#define CNXK_TAG_IS_HEAD(x) (BIT_ULL(35) & x)
>
> #define CN9K_SSOW_GET_BASE_ADDR(_GW) ((_GW)-SSOW_LF_GWS_OP_GET_WORK0)
>
> @@ -123,6 +124,7 @@ struct cnxk_sso_evdev {
>
> struct cn10k_sso_hws {
> uint64_t base;
> + uint64_t gw_rdata;
> /* PTP timestamp */
> struct cnxk_timesync_info *tstamp;
> void *lookup_mem;
> diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
> index 9f9ceab8a1..7de03f3fbb 100644
> --- a/drivers/event/cnxk/cnxk_worker.h
> +++ b/drivers/event/cnxk/cnxk_worker.h
> @@ -52,11 +52,11 @@ cnxk_sso_hws_swtag_flush(uint64_t tag_op, uint64_t flush_op)
> plt_write64(0, flush_op);
> }
>
> -static __rte_always_inline void
> +static __rte_always_inline uint64_t
> cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
> {
> -#ifdef RTE_ARCH_ARM64
> uint64_t swtp;
> +#ifdef RTE_ARCH_ARM64
>
> asm volatile(PLT_CPU_FEATURE_PREAMBLE
> " ldr %[swtb], [%[swtp_loc]] \n"
> @@ -70,9 +70,12 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
> : [swtp_loc] "r"(tag_op));
> #else
> /* Wait for the SWTAG/SWTAG_FULL operation */
> - while (plt_read64(tag_op) & BIT_ULL(62))
> - ;
> + do {
> + swtp = plt_read64(tag_op);
> + } while (swtp & BIT_ULL(62));
> #endif
> +
> + return swtp;
> }
>
> #endif
> diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
> index 4ae6bbf517..ec6366168c 100644
> --- a/drivers/net/cnxk/cn10k_tx.h
> +++ b/drivers/net/cnxk/cn10k_tx.h
> @@ -905,8 +905,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
> lnum++;
> }
>
> - if (flags & NIX_TX_VWQE_F)
> - roc_sso_hws_head_wait(ws[0]);
> + if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
> + ws[1] = roc_sso_hws_head_wait(ws[0]);
>
> left -= burst;
> tx_pkts += burst;
> @@ -1041,8 +1041,8 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
> }
> }
>
> - if (flags & NIX_TX_VWQE_F)
> - roc_sso_hws_head_wait(ws[0]);
> + if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
> + ws[1] = roc_sso_hws_head_wait(ws[0]);
>
> left -= burst;
> tx_pkts += burst;
> @@ -2582,8 +2582,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
> if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
> wd.data[0] >>= 16;
>
> - if (flags & NIX_TX_VWQE_F)
> - roc_sso_hws_head_wait(ws[0]);
> + if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
> + ws[1] = roc_sso_hws_head_wait(ws[0]);
>
> left -= burst;
>
> --
> 2.17.1
>
@@ -54,12 +54,13 @@ struct roc_sso {
uint8_t reserved[ROC_SSO_MEM_SZ] __plt_cache_aligned;
} __plt_cache_aligned;
-static __plt_always_inline void
-roc_sso_hws_head_wait(uintptr_t tag_op)
+static __plt_always_inline uint64_t
+roc_sso_hws_head_wait(uintptr_t base)
{
-#ifdef RTE_ARCH_ARM64
+ uintptr_t tag_op = base + SSOW_LF_GWS_TAG;
uint64_t tag;
+#if defined(__aarch64__)
asm volatile(PLT_CPU_FEATURE_PREAMBLE
" ldr %[tag], [%[tag_op]] \n"
" tbnz %[tag], 35, done%= \n"
@@ -71,10 +72,11 @@ roc_sso_hws_head_wait(uintptr_t tag_op)
: [tag] "=&r"(tag)
: [tag_op] "r"(tag_op));
#else
- /* Wait for the SWTAG/SWTAG_FULL operation */
- while (!(plt_read64(tag_op) & BIT_ULL(35)))
- ;
+ do {
+ tag = plt_read64(tag_op);
+ } while (!(tag & BIT_ULL(35)));
#endif
+ return tag;
}
/* SSO device initialization */
@@ -40,8 +40,7 @@ cn10k_sso_hws_fwd_swtag(struct cn10k_sso_hws *ws, const struct rte_event *ev)
{
const uint32_t tag = (uint32_t)ev->event;
const uint8_t new_tt = ev->sched_type;
- const uint8_t cur_tt =
- CNXK_TT_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0));
+ const uint8_t cur_tt = CNXK_TT_FROM_TAG(ws->gw_rdata);
/* CNXK model
* cur_tt/new_tt SSO_TT_ORDERED SSO_TT_ATOMIC SSO_TT_UNTAGGED
@@ -81,7 +80,7 @@ cn10k_sso_hws_forward_event(struct cn10k_sso_hws *ws,
const uint8_t grp = ev->queue_id;
/* Group hasn't changed, Use SWTAG to forward the event */
- if (CNXK_GRP_FROM_TAG(plt_read64(ws->base + SSOW_LF_GWS_WQE0)) == grp)
+ if (CNXK_GRP_FROM_TAG(ws->gw_rdata) == grp)
cn10k_sso_hws_fwd_swtag(ws, ev);
else
/*
@@ -211,6 +210,7 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
} while (gw.u64[0] & BIT_ULL(63));
mbuf = (uint64_t)((char *)gw.u64[1] - sizeof(struct rte_mbuf));
#endif
+ ws->gw_rdata = gw.u64[0];
gw.u64[0] = (gw.u64[0] & (0x3ull << 32)) << 6 |
(gw.u64[0] & (0x3FFull << 36)) << 4 |
(gw.u64[0] & 0xffffffff);
@@ -405,7 +405,8 @@ NIX_RX_FASTPATH_MODES
RTE_SET_USED(timeout_ticks); \
if (ws->swtag_req) { \
ws->swtag_req = 0; \
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ ws->gw_rdata = cnxk_sso_hws_swtag_wait( \
+ ws->base + SSOW_LF_GWS_WQE0); \
return 1; \
} \
return cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
@@ -424,7 +425,8 @@ NIX_RX_FASTPATH_MODES
uint64_t iter; \
if (ws->swtag_req) { \
ws->swtag_req = 0; \
- cnxk_sso_hws_swtag_wait(ws->base + SSOW_LF_GWS_WQE0); \
+ ws->gw_rdata = cnxk_sso_hws_swtag_wait( \
+ ws->base + SSOW_LF_GWS_WQE0); \
return ret; \
} \
ret = cn10k_sso_hws_get_work(ws, ev, flags, ws->lookup_mem); \
@@ -507,8 +509,8 @@ cn10k_sso_tx_one(struct cn10k_sso_hws *ws, struct rte_mbuf *m, uint64_t *cmd,
else
pa = txq->io_addr | ((segdw - 1) << 4);
- if (!sched_type)
- roc_sso_hws_head_wait(ws->base + SSOW_LF_GWS_TAG);
+ if (!CNXK_TAG_IS_HEAD(ws->gw_rdata) && !sched_type)
+ ws->gw_rdata = roc_sso_hws_head_wait(ws->base);
roc_lmt_submit_steorl(lmt_id, pa);
}
@@ -700,7 +700,7 @@ cn9k_sso_hws_xmit_sec_one(const struct cn9k_eth_txq *txq, uint64_t base,
/* Head wait if needed */
if (base)
- roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base);
/* ESN */
outb_priv = roc_nix_inl_onf_ipsec_outb_sa_sw_rsvd((void *)sa);
@@ -793,7 +793,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
flags);
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
- roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base);
cn9k_sso_txq_fc_wait(txq);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
@@ -806,7 +806,7 @@ cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
cn9k_nix_xmit_prepare_tstamp(txq, cmd, m->ol_flags, 4, flags);
if (!CNXK_TT_FROM_EVENT(ev->event)) {
cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
- roc_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+ roc_sso_hws_head_wait(base);
cn9k_sso_txq_fc_wait(txq);
if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
cn9k_nix_xmit_one(cmd, txq->lmt_addr,
@@ -47,6 +47,7 @@
#define CNXK_CLR_SUB_EVENT(x) (~(0xffu << 20) & x)
#define CNXK_GRP_FROM_TAG(x) (((x) >> 36) & 0x3ff)
#define CNXK_SWTAG_PEND(x) (BIT_ULL(62) & x)
+#define CNXK_TAG_IS_HEAD(x) (BIT_ULL(35) & x)
#define CN9K_SSOW_GET_BASE_ADDR(_GW) ((_GW)-SSOW_LF_GWS_OP_GET_WORK0)
@@ -123,6 +124,7 @@ struct cnxk_sso_evdev {
struct cn10k_sso_hws {
uint64_t base;
+ uint64_t gw_rdata;
/* PTP timestamp */
struct cnxk_timesync_info *tstamp;
void *lookup_mem;
@@ -52,11 +52,11 @@ cnxk_sso_hws_swtag_flush(uint64_t tag_op, uint64_t flush_op)
plt_write64(0, flush_op);
}
-static __rte_always_inline void
+static __rte_always_inline uint64_t
cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
{
-#ifdef RTE_ARCH_ARM64
uint64_t swtp;
+#ifdef RTE_ARCH_ARM64
asm volatile(PLT_CPU_FEATURE_PREAMBLE
" ldr %[swtb], [%[swtp_loc]] \n"
@@ -70,9 +70,12 @@ cnxk_sso_hws_swtag_wait(uintptr_t tag_op)
: [swtp_loc] "r"(tag_op));
#else
/* Wait for the SWTAG/SWTAG_FULL operation */
- while (plt_read64(tag_op) & BIT_ULL(62))
- ;
+ do {
+ swtp = plt_read64(tag_op);
+ } while (swtp & BIT_ULL(62));
#endif
+
+ return swtp;
}
#endif
@@ -905,8 +905,8 @@ cn10k_nix_xmit_pkts(void *tx_queue, uint64_t *ws, struct rte_mbuf **tx_pkts,
lnum++;
}
- if (flags & NIX_TX_VWQE_F)
- roc_sso_hws_head_wait(ws[0]);
+ if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
+ ws[1] = roc_sso_hws_head_wait(ws[0]);
left -= burst;
tx_pkts += burst;
@@ -1041,8 +1041,8 @@ cn10k_nix_xmit_pkts_mseg(void *tx_queue, uint64_t *ws,
}
}
- if (flags & NIX_TX_VWQE_F)
- roc_sso_hws_head_wait(ws[0]);
+ if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
+ ws[1] = roc_sso_hws_head_wait(ws[0]);
left -= burst;
tx_pkts += burst;
@@ -2582,8 +2582,8 @@ cn10k_nix_xmit_pkts_vector(void *tx_queue, uint64_t *ws,
if (flags & (NIX_TX_MULTI_SEG_F | NIX_TX_OFFLOAD_SECURITY_F))
wd.data[0] >>= 16;
- if (flags & NIX_TX_VWQE_F)
- roc_sso_hws_head_wait(ws[0]);
+ if ((flags & NIX_TX_VWQE_F) && !(ws[1] & BIT_ULL(35)))
+ ws[1] = roc_sso_hws_head_wait(ws[0]);
left -= burst;