Thursday, October 25, 2018 9:24 AM, Yongseok Koh:
> Subject: [PATCH] net/mlx5: add 128B padding of Rx completion entry
>
> A PMD parameter (rxq_cqe_pad_en) is added to enable 128B padding of
> CQE on RX side. The size of CQE is aligned with the size of a cacheline of the
> core. If cacheline size is 128B, the CQE size is configured to be 128B even
> though the device writes only 64B data on the cacheline. This is to avoid
> unnecessary cache invalidation by device's two consecutive writes on to one
> cacheline. However in some architecture, it is more beneficial to update
> entire cacheline with padding the rest 64B rather than striding because read-
> modify-write could drop performance a lot. On the other hand, writing extra
> data will consume more PCIe bandwidth and could also drop the maximum
> throughput. It is recommended to empirically set this parameter. Disabled by
> default.
>
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Applied to next-net-mlx, thanks.
@@ -246,6 +246,24 @@ Run-time configuration
- x86_64 with ConnectX-4, ConnectX-4 LX, ConnectX-5 and Bluefield.
- POWER8 and ARMv8 with ConnectX-4 LX, ConnectX-5 and Bluefield.
+- ``rxq_cqe_pad_en`` parameter [int]
+
+ A nonzero value enables 128B padding of CQE on RX side. The size of CQE
+ is aligned with the size of a cacheline of the core. If cacheline size is
+ 128B, the CQE size is configured to be 128B even though the device writes
+ only 64B data on the cacheline. This is to avoid unnecessary cache
+ invalidation by device's two consecutive writes on to one cacheline.
+ However in some architecture, it is more beneficial to update entire
+ cacheline with padding the rest 64B rather than striding because
+ read-modify-write could drop performance a lot. On the other hand,
+ writing extra data will consume more PCIe bandwidth and could also drop
+ the maximum throughput. It is recommended to empirically set this
+ parameter. Disabled by default.
+
+ Supported on:
+
+ - CPU having 128B cacheline with ConnectX-5 and Bluefield.
+
- ``mprq_en`` parameter [int]
A nonzero value enables configuring Multi-Packet Rx queues. Rx queue is
@@ -137,6 +137,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
enum MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
+ HAVE_IBV_MLX5_MOD_CQE_128B_PAD \
+ infiniband/mlx5dv.h \
+ enum MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_IBV_FLOW_DV_SUPPORT \
infiniband/mlx5dv.h \
enum MLX5DV_FLOW_ACTION_TAG \
@@ -96,6 +96,8 @@ if build
'MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED' ],
[ 'HAVE_IBV_MLX5_MOD_CQE_128B_COMP', 'infiniband/mlx5dv.h',
'MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP' ],
+ [ 'HAVE_IBV_MLX5_MOD_CQE_128B_PAD', 'infiniband/mlx5dv.h',
+ 'MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD' ],
[ 'HAVE_IBV_FLOW_DV_SUPPORT', 'infiniband/mlx5dv.h',
'MLX5DV_FLOW_ACTION_TAG' ],
[ 'HAVE_IBV_DEVICE_MPLS_SUPPORT', 'infiniband/verbs.h',
@@ -51,6 +51,9 @@
/* Device parameter to enable RX completion queue compression. */
#define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"
+/* Device parameter to enable RX completion entry padding to 128B. */
+#define MLX5_RXQ_CQE_PAD_EN "rxq_cqe_pad_en"
+
/* Device parameter to enable Multi-Packet Rx queue. */
#define MLX5_RX_MPRQ_EN "mprq_en"
@@ -479,6 +482,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
}
if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) {
config->cqe_comp = !!tmp;
+ } else if (strcmp(MLX5_RXQ_CQE_PAD_EN, key) == 0) {
+ config->cqe_pad = !!tmp;
} else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) {
config->mprq.enabled = !!tmp;
} else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) {
@@ -531,6 +536,7 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
{
const char **params = (const char *[]){
MLX5_RXQ_CQE_COMP_EN,
+ MLX5_RXQ_CQE_PAD_EN,
MLX5_RX_MPRQ_EN,
MLX5_RX_MPRQ_LOG_STRIDE_NUM,
MLX5_RX_MPRQ_MAX_MEMCPY_LEN,
@@ -723,6 +729,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
struct mlx5dv_context dv_attr = { .comp_mask = 0 };
struct mlx5_dev_config config = {
.vf = !!vf,
+ .cqe_pad = 0,
.mps = MLX5_ARG_UNSET,
.tx_vec_en = 1,
.rx_vec_en = 1,
@@ -743,6 +750,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
int err = 0;
unsigned int mps;
unsigned int cqe_comp;
+ unsigned int cqe_pad = 0;
unsigned int tunnel_en = 0;
unsigned int mpls_en = 0;
unsigned int swp = 0;
@@ -863,6 +871,11 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
else
cqe_comp = 1;
config.cqe_comp = cqe_comp;
+#ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
+ /* Whether device supports 128B Rx CQE padding. */
+ cqe_pad = RTE_CACHE_LINE_SIZE == 128 &&
+ (dv_attr.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_PAD);
+#endif
#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS) {
tunnel_en = ((dv_attr.tunnel_offloads_caps &
@@ -1079,6 +1092,12 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
DRV_LOG(WARNING, "Rx CQE compression isn't supported");
config.cqe_comp = 0;
}
+ if (config.cqe_pad && !cqe_pad) {
+ DRV_LOG(WARNING, "Rx CQE padding isn't supported");
+ config.cqe_pad = 0;
+ } else if (config.cqe_pad) {
+ DRV_LOG(INFO, "Rx CQE padding is enabled");
+ }
if (config.mprq.enabled && mprq) {
if (config.mprq.stride_num_n > mprq_max_stride_num_n ||
config.mprq.stride_num_n < mprq_min_stride_num_n) {
@@ -115,6 +115,7 @@ struct mlx5_dev_config {
/* Whether tunnel stateless offloads are supported. */
unsigned int mpls_en:1; /* MPLS over GRE/UDP is enabled. */
unsigned int cqe_comp:1; /* CQE compression is enabled. */
+ unsigned int cqe_pad:1; /* CQE padding is enabled. */
unsigned int tso:1; /* Whether TSO is supported. */
unsigned int tx_vec_en:1; /* Tx vector is enabled. */
unsigned int rx_vec_en:1; /* Rx vector is enabled. */
@@ -841,6 +841,12 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
" timestamp",
dev->data->port_id);
}
+#ifdef HAVE_IBV_MLX5_MOD_CQE_128B_PAD
+ if (config->cqe_pad) {
+ attr.cq.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_FLAGS;
+ attr.cq.mlx5.flags |= MLX5DV_CQ_INIT_ATTR_FLAGS_CQE_PAD;
+ }
+#endif
tmpl->cq = mlx5_glue->cq_ex_to_cq
(mlx5_glue->dv_create_cq(priv->ctx, &attr.cq.ibv,
&attr.cq.mlx5));