> -----Original Message-----
> From: Lance Richardson <lance.richardson@broadcom.com>
> Sent: Friday, July 9, 2021 3:15 AM
> To: Ajit Khaparde (ajit.khaparde@broadcom.com)
> <ajit.khaparde@broadcom.com>; Somnath Kotur
> <somnath.kotur@broadcom.com>; Bruce Richardson
> <bruce.richardson@intel.com>; Konstantin Ananyev
> <konstantin.ananyev@intel.com>; jerinj@marvell.com; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Stephen Hurd <stephen.hurd@broadcom.com>;
> David Christensen <david.christensen@broadcom.com>
> Cc: dev@dpdk.org; stable@dpdk.org
> Subject: [PATCH] net/bnxt: fix missing barriers in completion handling
>
> Ensure that Rx/Tx/Async completion entry fields are accessed
> only after the completion's valid flag has been loaded and
> verified. This is needed for correct operation on systems that
> use relaxed memory consistency models.
>
> Fixes: 2eb53b134aae ("net/bnxt: add initial Rx code")
> Fixes: 6eb3cc2294fd ("net/bnxt: add initial Tx code")
> Cc: stable@dpdk.org
> Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
> Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
> ---
> drivers/net/bnxt/bnxt_cpr.h | 36 ++++++++++++++++++++++++---
> drivers/net/bnxt/bnxt_ethdev.c | 16 ++++++------
> drivers/net/bnxt/bnxt_irq.c | 7 +++---
> drivers/net/bnxt/bnxt_rxr.c | 9 ++++---
> drivers/net/bnxt/bnxt_rxtx_vec_avx2.c | 2 +-
> drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 2 +-
> drivers/net/bnxt/bnxt_rxtx_vec_sse.c | 2 +-
> drivers/net/bnxt/bnxt_txr.c | 2 +-
> 8 files changed, 54 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/net/bnxt/bnxt_cpr.h b/drivers/net/bnxt/bnxt_cpr.h
> index 2a56ec52c..3ee6b74bc 100644
> --- a/drivers/net/bnxt/bnxt_cpr.h
> +++ b/drivers/net/bnxt/bnxt_cpr.h
> @@ -8,13 +8,10 @@
> #include <stdbool.h>
>
> #include <rte_io.h>
> +#include "hsi_struct_def_dpdk.h"
>
> struct bnxt_db_info;
>
> -#define CMP_VALID(cmp, raw_cons, ring)
> \
> - (!!(rte_le_to_cpu_32(((struct cmpl_base *)(cmp))->info3_v) &
> \
> - CMPL_BASE_V) == !((raw_cons) & ((ring)->ring_size)))
> -
> #define CMP_TYPE(cmp) \
> (((struct cmpl_base *)cmp)->type & CMPL_BASE_TYPE_MASK)
>
> @@ -121,4 +118,35 @@ bool bnxt_is_recovery_enabled(struct bnxt *bp);
> bool bnxt_is_master_func(struct bnxt *bp);
>
> void bnxt_stop_rxtx(struct bnxt *bp);
> +
> +/**
> + * Check validity of a completion ring entry. If the entry is valid, include a
> + * C11 __ATOMIC_ACQUIRE fence to ensure that subsequent loads of fields
> in the
> + * completion are not hoisted by the compiler or by the CPU to come before
> the
> + * loading of the "valid" field.
> + *
> + * Note: the caller must not access any fields in the specified completion
> + * entry prior to calling this function.
> + *
> + * @param cmp
Nit, cmpl
> + * Pointer to an entry in the completion ring.
> + * @param raw_cons
> + * Raw consumer index of entry in completion ring.
> + * @param ring_size
> + * Size of completion ring.
> + */
> +static __rte_always_inline bool
> +bnxt_cpr_cmp_valid(const void *cmpl, uint32_t raw_cons, uint32_t
> ring_size)
> +{
> + const struct cmpl_base *c = cmpl;
> + bool expected, valid;
> +
> + expected = !(raw_cons & ring_size);
> + valid = !!(rte_le_to_cpu_32(c->info3_v) & CMPL_BASE_V);
> + if (valid == expected) {
> + rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
> + return true;
> + }
> + return false;
> +}
> #endif
> diff --git a/drivers/net/bnxt/bnxt_ethdev.c
> b/drivers/net/bnxt/bnxt_ethdev.c
> index ed09f1bf5..ee6929692 100644
> --- a/drivers/net/bnxt/bnxt_ethdev.c
> +++ b/drivers/net/bnxt/bnxt_ethdev.c
<snip>
>
> /* Check to see if hw has posted a completion for the descriptor. */
> @@ -3327,7 +3327,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue,
> uint16_t offset)
> cons = RING_CMPL(ring_mask, raw_cons);
> txcmp = (struct tx_cmpl *)&cp_desc_ring[cons];
>
> - if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct))
> + if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1))
cpr->cp_ring_struct->ring_size can be used instead of 'ring_mask + 1'?
> break;
>
> if (CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2)
<snip>
> diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> index 263e6ec3c..13211060c 100644
> --- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> +++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> @@ -339,7 +339,7 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq)
> cons = RING_CMPL(ring_mask, raw_cons);
> txcmp = (struct tx_cmpl *)&cp_desc_ring[cons];
>
> - if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct))
> + if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1))
Same here. I think cpr->cp_ring_struct->ring_size can be used and it avoids calculation.
Also some places in other vector files.
> break;
>
> if (likely(CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2))
> diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> index 9a53d1fba..6e5630532 100644
> --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c
> @@ -320,7 +320,7 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq)
> cons = RING_CMPL(ring_mask, raw_cons);
> txcmp = (struct tx_cmpl *)&cp_desc_ring[cons];
>
> - if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct))
> + if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1))
> break;
>
> if (likely(CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2))
> diff --git a/drivers/net/bnxt/bnxt_txr.c b/drivers/net/bnxt/bnxt_txr.c
> index 9a6b96e04..47824334a 100644
> --- a/drivers/net/bnxt/bnxt_txr.c
> +++ b/drivers/net/bnxt/bnxt_txr.c
> @@ -461,7 +461,7 @@ static int bnxt_handle_tx_cp(struct bnxt_tx_queue
> *txq)
> cons = RING_CMPL(ring_mask, raw_cons);
> txcmp = (struct tx_cmpl *)&cpr->cp_desc_ring[cons];
>
> - if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct))
> + if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1))
> break;
>
> opaque = rte_le_to_cpu_32(txcmp->opaque);
> --
> 2.25.1
On Fri, Jul 9, 2021 at 2:00 AM Ruifeng Wang <Ruifeng.Wang@arm.com> wrote:
>
<snip>
> > +/**
> > + * Check validity of a completion ring entry. If the entry is valid, include a
> > + * C11 __ATOMIC_ACQUIRE fence to ensure that subsequent loads of fields
> > in the
> > + * completion are not hoisted by the compiler or by the CPU to come before
> > the
> > + * loading of the "valid" field.
> > + *
> > + * Note: the caller must not access any fields in the specified completion
> > + * entry prior to calling this function.
> > + *
> > + * @param cmp
> Nit, cmpl
Thanks, good catch. I'll fix this in v2.
<snip>
>
> >
> > /* Check to see if hw has posted a completion for the descriptor. */
> > @@ -3327,7 +3327,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue,
> > uint16_t offset)
> > cons = RING_CMPL(ring_mask, raw_cons);
> > txcmp = (struct tx_cmpl *)&cp_desc_ring[cons];
> >
> > - if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct))
> > + if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1))
> cpr->cp_ring_struct->ring_size can be used instead of 'ring_mask + 1'?
>
> > break;
> >
> > if (CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2)
>
> <snip>
>
> > diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> > b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> > index 263e6ec3c..13211060c 100644
> > --- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> > +++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c
> > @@ -339,7 +339,7 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq)
> > cons = RING_CMPL(ring_mask, raw_cons);
> > txcmp = (struct tx_cmpl *)&cp_desc_ring[cons];
> >
> > - if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct))
> > + if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1))
> Same here. I think cpr->cp_ring_struct->ring_size can be used and it avoids calculation.
> Also some places in other vector files.
It's true that cpr->cp_ring_struct->ring_size and ring_mask + 1 are
equivalent, but there doesn't seem to be a meaningful difference
between the two in the generated code.
Based on disassembly of x86 and Arm code for this function, the compiler
correctly determines that the value of ring_mask + 1 doesn't change within
the loop, so it is only computed once. The only difference would be in
whether an add instruction or a load instruction is used to put the value
in the register.
@@ -8,13 +8,10 @@
#include <stdbool.h>
#include <rte_io.h>
+#include "hsi_struct_def_dpdk.h"
struct bnxt_db_info;
-#define CMP_VALID(cmp, raw_cons, ring) \
- (!!(rte_le_to_cpu_32(((struct cmpl_base *)(cmp))->info3_v) & \
- CMPL_BASE_V) == !((raw_cons) & ((ring)->ring_size)))
-
#define CMP_TYPE(cmp) \
(((struct cmpl_base *)cmp)->type & CMPL_BASE_TYPE_MASK)
@@ -121,4 +118,35 @@ bool bnxt_is_recovery_enabled(struct bnxt *bp);
bool bnxt_is_master_func(struct bnxt *bp);
void bnxt_stop_rxtx(struct bnxt *bp);
+
+/**
+ * Check validity of a completion ring entry. If the entry is valid, include a
+ * C11 __ATOMIC_ACQUIRE fence to ensure that subsequent loads of fields in the
+ * completion are not hoisted by the compiler or by the CPU to come before the
+ * loading of the "valid" field.
+ *
+ * Note: the caller must not access any fields in the specified completion
+ * entry prior to calling this function.
+ *
+ * @param cmp
+ * Pointer to an entry in the completion ring.
+ * @param raw_cons
+ * Raw consumer index of entry in completion ring.
+ * @param ring_size
+ * Size of completion ring.
+ */
+static __rte_always_inline bool
+bnxt_cpr_cmp_valid(const void *cmpl, uint32_t raw_cons, uint32_t ring_size)
+{
+ const struct cmpl_base *c = cmpl;
+ bool expected, valid;
+
+ expected = !(raw_cons & ring_size);
+ valid = !!(rte_le_to_cpu_32(c->info3_v) & CMPL_BASE_V);
+ if (valid == expected) {
+ rte_atomic_thread_fence(__ATOMIC_ACQUIRE);
+ return true;
+ }
+ return false;
+}
#endif
@@ -3126,7 +3126,7 @@ bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id)
{
struct bnxt *bp = (struct bnxt *)dev->data->dev_private;
struct bnxt_cp_ring_info *cpr;
- uint32_t desc = 0, raw_cons;
+ uint32_t desc = 0, raw_cons, cp_ring_size;
struct bnxt_rx_queue *rxq;
struct rx_pkt_cmpl *rxcmp;
int rc;
@@ -3138,6 +3138,7 @@ bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id)
rxq = dev->data->rx_queues[rx_queue_id];
cpr = rxq->cp_ring;
raw_cons = cpr->cp_raw_cons;
+ cp_ring_size = cpr->cp_ring_struct->ring_size;
while (1) {
uint32_t agg_cnt, cons, cmpl_type;
@@ -3145,7 +3146,7 @@ bnxt_rx_queue_count_op(struct rte_eth_dev *dev, uint16_t rx_queue_id)
cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons];
- if (!CMP_VALID(rxcmp, raw_cons, cpr->cp_ring_struct))
+ if (!bnxt_cpr_cmp_valid(rxcmp, raw_cons, cp_ring_size))
break;
cmpl_type = CMP_TYPE(rxcmp);
@@ -3189,7 +3190,7 @@ bnxt_rx_descriptor_status_op(void *rx_queue, uint16_t offset)
struct bnxt_rx_queue *rxq = rx_queue;
struct bnxt_cp_ring_info *cpr;
struct bnxt_rx_ring_info *rxr;
- uint32_t desc, raw_cons;
+ uint32_t desc, raw_cons, cp_ring_size;
struct bnxt *bp = rxq->bp;
struct rx_pkt_cmpl *rxcmp;
int rc;
@@ -3203,6 +3204,7 @@ bnxt_rx_descriptor_status_op(void *rx_queue, uint16_t offset)
rxr = rxq->rx_ring;
cpr = rxq->cp_ring;
+ cp_ring_size = cpr->cp_ring_struct->ring_size;
/*
* For the vector receive case, the completion at the requested
@@ -3219,7 +3221,7 @@ bnxt_rx_descriptor_status_op(void *rx_queue, uint16_t offset)
cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons];
- if (CMP_VALID(rxcmp, raw_cons, cpr->cp_ring_struct))
+ if (bnxt_cpr_cmp_valid(rxcmp, raw_cons, cp_ring_size))
return RTE_ETH_RX_DESC_DONE;
/* Check whether rx desc has an mbuf attached. */
@@ -3245,7 +3247,7 @@ bnxt_rx_descriptor_status_op(void *rx_queue, uint16_t offset)
cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons];
- if (!CMP_VALID(rxcmp, raw_cons, cpr->cp_ring_struct))
+ if (!bnxt_cpr_cmp_valid(rxcmp, raw_cons, cp_ring_size))
break;
cmpl_type = CMP_TYPE(rxcmp);
@@ -3299,7 +3301,6 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
struct bnxt_tx_queue *txq = (struct bnxt_tx_queue *)tx_queue;
struct bnxt_cp_ring_info *cpr = txq->cp_ring;
uint32_t ring_mask, raw_cons, nb_tx_pkts = 0;
- struct bnxt_ring *cp_ring_struct;
struct cmpl_base *cp_desc_ring;
int rc;
@@ -3316,7 +3317,6 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
raw_cons = cpr->cp_raw_cons;
cp_desc_ring = cpr->cp_desc_ring;
- cp_ring_struct = cpr->cp_ring_struct;
ring_mask = cpr->cp_ring_struct->ring_mask;
/* Check to see if hw has posted a completion for the descriptor. */
@@ -3327,7 +3327,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
cons = RING_CMPL(ring_mask, raw_cons);
txcmp = (struct tx_cmpl *)&cp_desc_ring[cons];
- if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct))
+ if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1))
break;
if (CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2)
@@ -21,10 +21,10 @@ void bnxt_int_handler(void *param)
{
struct rte_eth_dev *eth_dev = (struct rte_eth_dev *)param;
struct bnxt *bp = eth_dev->data->dev_private;
+ uint32_t cons, raw_cons, cp_ring_size;
struct bnxt_cp_ring_info *cpr;
struct cmpl_base *cmp;
- uint32_t raw_cons;
- uint32_t cons;
+
if (bp == NULL)
return;
@@ -33,6 +33,7 @@ void bnxt_int_handler(void *param)
return;
raw_cons = cpr->cp_raw_cons;
+ cp_ring_size = cpr->cp_ring_struct->ring_size;
pthread_mutex_lock(&bp->def_cp_lock);
while (1) {
if (!cpr || !cpr->cp_ring_struct || !cpr->cp_db.doorbell) {
@@ -48,7 +49,7 @@ void bnxt_int_handler(void *param)
cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
cmp = &cpr->cp_desc_ring[cons];
- if (!CMP_VALID(cmp, raw_cons, cpr->cp_ring_struct))
+ if (!bnxt_cpr_cmp_valid(cmp, raw_cons, cp_ring_size))
break;
bnxt_event_hwrm_resp_handler(bp, cmp);
@@ -297,7 +297,8 @@ static int bnxt_agg_bufs_valid(struct bnxt_cp_ring_info *cpr,
raw_cp_cons = ADV_RAW_CMP(raw_cp_cons, agg_bufs);
last_cp_cons = RING_CMP(cpr->cp_ring_struct, raw_cp_cons);
agg_cmpl = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[last_cp_cons];
- return CMP_VALID(agg_cmpl, raw_cp_cons, cpr->cp_ring_struct);
+ return bnxt_cpr_cmp_valid(agg_cmpl, raw_cp_cons,
+ cpr->cp_ring_struct->ring_size);
}
/* TPA consume agg buffer out of order, allocate connected data only */
@@ -892,7 +893,8 @@ static int bnxt_rx_pkt(struct rte_mbuf **rx_pkt,
cp_cons = RING_CMP(cpr->cp_ring_struct, tmp_raw_cons);
rxcmp1 = (struct rx_pkt_cmpl_hi *)&cpr->cp_desc_ring[cp_cons];
- if (!CMP_VALID(rxcmp1, tmp_raw_cons, cpr->cp_ring_struct))
+ if (!bnxt_cpr_cmp_valid(rxcmp1, tmp_raw_cons,
+ cpr->cp_ring_struct->ring_size))
return -EBUSY;
if (cmp_type == RX_TPA_START_CMPL_TYPE_RX_TPA_START ||
@@ -1077,7 +1079,8 @@ uint16_t bnxt_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
cons = RING_CMP(cpr->cp_ring_struct, raw_cons);
rxcmp = (struct rx_pkt_cmpl *)&cpr->cp_desc_ring[cons];
- if (!CMP_VALID(rxcmp, raw_cons, cpr->cp_ring_struct))
+ if (!bnxt_cpr_cmp_valid(rxcmp, raw_cons,
+ cpr->cp_ring_struct->ring_size))
break;
if (CMP_TYPE(rxcmp) == CMPL_BASE_TYPE_HWRM_DONE) {
PMD_DRV_LOG(ERR, "Rx flush done\n");
@@ -408,7 +408,7 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq)
cons = RING_CMPL(ring_mask, raw_cons);
txcmp = (struct tx_cmpl *)&cp_desc_ring[cons];
- if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct))
+ if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1))
break;
nb_tx_pkts += txcmp->opaque;
@@ -339,7 +339,7 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq)
cons = RING_CMPL(ring_mask, raw_cons);
txcmp = (struct tx_cmpl *)&cp_desc_ring[cons];
- if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct))
+ if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1))
break;
if (likely(CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2))
@@ -320,7 +320,7 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq)
cons = RING_CMPL(ring_mask, raw_cons);
txcmp = (struct tx_cmpl *)&cp_desc_ring[cons];
- if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct))
+ if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1))
break;
if (likely(CMP_TYPE(txcmp) == TX_CMPL_TYPE_TX_L2))
@@ -461,7 +461,7 @@ static int bnxt_handle_tx_cp(struct bnxt_tx_queue *txq)
cons = RING_CMPL(ring_mask, raw_cons);
txcmp = (struct tx_cmpl *)&cpr->cp_desc_ring[cons];
- if (!CMP_VALID(txcmp, raw_cons, cp_ring_struct))
+ if (!bnxt_cpr_cmp_valid(txcmp, raw_cons, ring_mask + 1))
break;
opaque = rte_le_to_cpu_32(txcmp->opaque);