net/mlx5: fix concurrent use of Tx offloads
Checks
Commit Message
Original patch implemented the use of match_metadata offload in the
different burst functions.
The concurrent use of match_metadata and multi_segs offloads was
not handled.
This patch updates function txq_scatter_v(), to pass metadata value
from mbuf to wqe, when indicated by offload flags.
Fixes: 6bd7fbd03c62 ("net/mlx5: support metadata as flow rule criteria")
Cc: stable@dpdk.org
Signed-off-by: Dekel Peled <dekelp@mellanox.com>
---
drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 10 +++++++---
drivers/net/mlx5/mlx5_rxtx_vec_sse.h | 10 ++++++----
2 files changed, 13 insertions(+), 7 deletions(-)
Comments
> On Jan 28, 2019, at 10:49 PM, Dekel Peled <dekelp@mellanox.com> wrote:
>
> Original patch implemented the use of match_metadata offload in the
> different burst functions.
> The concurrent use of match_metadata and multi_segs offloads was
> not handled.
>
> This patch updates function txq_scatter_v(), to pass metadata value
> from mbuf to wqe, when indicated by offload flags.
>
> Fixes: 6bd7fbd03c62 ("net/mlx5: support metadata as flow rule criteria")
> Cc: stable@dpdk.org
>
> Signed-off-by: Dekel Peled <dekelp@mellanox.com>
> ---
> drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 10 +++++++---
> drivers/net/mlx5/mlx5_rxtx_vec_sse.h | 10 ++++++----
> 2 files changed, 13 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> index 883fe1b..cfcb923 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
> @@ -104,6 +104,7 @@
> sizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;
> unsigned int n;
> volatile struct mlx5_wqe *wqe = NULL;
> + uint32_t metadata_ol = txq->offloads & DEV_TX_OFFLOAD_MATCH_METADATA;
Shouldn't it be uint64_t?
Or, you just can inline it as there's only one occurrence below.
>
> assert(elts_n > pkts_n);
> mlx5_tx_complete(txq);
> @@ -127,6 +128,9 @@
> uint8x16_t *t_wqe;
> uint8_t *dseg;
> uint8x16_t ctrl;
> + rte_be32_t metadata = (metadata_ol &&
> + (pkts[n]->ol_flags & PKT_TX_METADATA)) ?
> + pkts[n]->tx_metadata : 0;
Please fix indentation.
There're a few more below. Please fix all.
Thanks,
Yongseok
>
> assert(segs_n);
> max_elts = elts_n - (elts_head - txq->elts_tail);
> @@ -164,9 +168,9 @@
> ctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m);
> vst1q_u8((void *)t_wqe, ctrl);
> /* Fill ESEG in the header. */
> - vst1q_u16((void *)(t_wqe + 1),
> - ((uint16x8_t) { 0, 0, cs_flags, rte_cpu_to_be_16(len),
> - 0, 0, 0, 0 }));
> + vst1q_u32((void *)(t_wqe + 1), ((uint32x4_t) { 0,
> + cs_flags << 16 | rte_cpu_to_be_16(len),
> + metadata, 0 }));
> txq->wqe_ci = wqe_ci;
> }
> if (!n)
> diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> index 14117c4..dbc7d6f 100644
> --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
> @@ -104,6 +104,7 @@
> sizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;
> unsigned int n;
> volatile struct mlx5_wqe *wqe = NULL;
> + uint32_t metadata_ol = txq->offloads & DEV_TX_OFFLOAD_MATCH_METADATA;
>
> assert(elts_n > pkts_n);
> mlx5_tx_complete(txq);
> @@ -125,6 +126,9 @@
> uint16_t max_wqe;
> __m128i *t_wqe, *dseg;
> __m128i ctrl;
> + rte_be32_t metadata = (metadata_ol &&
> + (pkts[n]->ol_flags & PKT_TX_METADATA)) ?
> + pkts[n]->tx_metadata : 0;
>
> assert(segs_n);
> max_elts = elts_n - (elts_head - txq->elts_tail);
> @@ -164,10 +168,8 @@
> ctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl);
> _mm_store_si128(t_wqe, ctrl);
> /* Fill ESEG in the header. */
> - _mm_store_si128(t_wqe + 1,
> - _mm_set_epi16(0, 0, 0, 0,
> - rte_cpu_to_be_16(len), cs_flags,
> - 0, 0));
> + _mm_store_si128(t_wqe + 1, _mm_set_epi32(0, metadata,
> + (rte_cpu_to_be_16(len) << 16) | cs_flags, 0));
> txq->wqe_ci = wqe_ci;
> }
> if (!n)
> --
> 1.8.3.1
>
@@ -104,6 +104,7 @@
sizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;
unsigned int n;
volatile struct mlx5_wqe *wqe = NULL;
+ uint32_t metadata_ol = txq->offloads & DEV_TX_OFFLOAD_MATCH_METADATA;
assert(elts_n > pkts_n);
mlx5_tx_complete(txq);
@@ -127,6 +128,9 @@
uint8x16_t *t_wqe;
uint8_t *dseg;
uint8x16_t ctrl;
+ rte_be32_t metadata = (metadata_ol &&
+ (pkts[n]->ol_flags & PKT_TX_METADATA)) ?
+ pkts[n]->tx_metadata : 0;
assert(segs_n);
max_elts = elts_n - (elts_head - txq->elts_tail);
@@ -164,9 +168,9 @@
ctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m);
vst1q_u8((void *)t_wqe, ctrl);
/* Fill ESEG in the header. */
- vst1q_u16((void *)(t_wqe + 1),
- ((uint16x8_t) { 0, 0, cs_flags, rte_cpu_to_be_16(len),
- 0, 0, 0, 0 }));
+ vst1q_u32((void *)(t_wqe + 1), ((uint32x4_t) { 0,
+ cs_flags << 16 | rte_cpu_to_be_16(len),
+ metadata, 0 }));
txq->wqe_ci = wqe_ci;
}
if (!n)
@@ -104,6 +104,7 @@
sizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;
unsigned int n;
volatile struct mlx5_wqe *wqe = NULL;
+ uint32_t metadata_ol = txq->offloads & DEV_TX_OFFLOAD_MATCH_METADATA;
assert(elts_n > pkts_n);
mlx5_tx_complete(txq);
@@ -125,6 +126,9 @@
uint16_t max_wqe;
__m128i *t_wqe, *dseg;
__m128i ctrl;
+ rte_be32_t metadata = (metadata_ol &&
+ (pkts[n]->ol_flags & PKT_TX_METADATA)) ?
+ pkts[n]->tx_metadata : 0;
assert(segs_n);
max_elts = elts_n - (elts_head - txq->elts_tail);
@@ -164,10 +168,8 @@
ctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl);
_mm_store_si128(t_wqe, ctrl);
/* Fill ESEG in the header. */
- _mm_store_si128(t_wqe + 1,
- _mm_set_epi16(0, 0, 0, 0,
- rte_cpu_to_be_16(len), cs_flags,
- 0, 0));
+ _mm_store_si128(t_wqe + 1, _mm_set_epi32(0, metadata,
+ (rte_cpu_to_be_16(len) << 16) | cs_flags, 0));
txq->wqe_ci = wqe_ci;
}
if (!n)