@@ -94,6 +94,7 @@ Features
- Sub-Function representors.
- Sub-Function.
- Rx queue available descriptor threshold configuration.
+- Host shaper support.
Limitations
@@ -525,6 +526,12 @@ Limitations
- Doesn't support shared Rx queue and Hairpin Rx queue.
+- Host shaper:
+
+ - Support BlueField series NIC from BlueField 2.
+ - When configure host shaper with MLX5_HOST_SHAPER_FLAG_AVAIL_THRESH_TRIGGERED flag set,
+ only rate 0 and 100Mbps are supported.
+
Statistics
----------
@@ -1692,3 +1699,31 @@ Available descriptor threshold is a per Rx queue attribute, it should be configu
a percentage of the Rx queue size.
When Rx queue available descriptors for hardware are below the threshold, an event is sent to PMD.
+Host shaper introduction
+------------------------
+
+Host shaper register is per host port register which sets a shaper
+on the host port.
+All VF/hostPF representors belonging to one host port share one host shaper.
+For example, if representor 0 and representor 1 belong to same host port,
+and a host shaper rate of 1Gbps is configured, the shaper throttles both
+representors' traffic from host.
+Host shaper has two modes for setting the shaper, immediate and deferred to
+available descriptor threshold event trigger. In immediate mode, the rate limit is configured
+immediately to host shaper. When deferring to available descriptor threshold trigger, the shaper
+is not set until an available descriptor threshold event is received by any Rx queue in a VF
+representor belonging to the host port. The only rate supported for deferred
+mode is 100Mbps (there is no limit on the supported rates for immediate mode).
+In deferred mode, the shaper is set on the host port by the firmware upon
+receiving the available descriptor threshold event, which allows throttling host traffic on
+available descriptor threshold events at minimum latency, preventing excess drops in the
+Rx queue.
+
+Host shaper dependency for mstflint package
+-------------------------------------------
+
+In order to configure host shaper register, ``librte_net_mlx5`` depends on ``libmtcr_ul``
+which can be installed from OFED mstflint package.
+Meson detects ``libmtcr_ul`` existence at configure stage.
+If the library is detected, the application must link with ``-lmtcr_ul``,
+as done by the pkg-config file libdpdk.pc.
@@ -90,6 +90,7 @@ New Features
* Added support for MTU on Windows.
* Added matching and RSS on IPsec ESP.
* Added Rx queue available descriptor threshold support.
+ * Added host shaper support.
* **Updated Marvell cnxk crypto driver.**
@@ -45,6 +45,13 @@ if static_ibverbs
ext_deps += declare_dependency(link_args:ibv_ldflags.split())
endif
+libmtcr_ul_found = false
+lib = cc.find_library('mtcr_ul', required:false)
+if lib.found() and run_command('meson', '--version').stdout().version_compare('>= 0.49.2')
+ libmtcr_ul_found = true
+ ext_deps += lib
+endif
+
sources += files('mlx5_nl.c')
sources += files('mlx5_common_auxiliary.c')
sources += files('mlx5_common_os.c')
@@ -207,6 +214,12 @@ has_sym_args = [
[ 'HAVE_MLX5_IBV_IMPORT_CTX_PD_AND_MR', 'infiniband/verbs.h',
'ibv_import_device' ],
]
+if libmtcr_ul_found
+ has_sym_args += [
+ [ 'HAVE_MLX5_MSTFLINT', 'mstflint/mtcr.h',
+ 'mopen'],
+ ]
+endif
config = configuration_data()
foreach arg:has_sym_args
config.set(arg[0], cc.has_header_symbol(arg[1], arg[2], dependencies: libs))
@@ -3771,6 +3771,7 @@ enum {
MLX5_CRYPTO_COMMISSIONING_REGISTER_ID = 0xC003,
MLX5_IMPORT_KEK_HANDLE_REGISTER_ID = 0xC004,
MLX5_CREDENTIAL_HANDLE_REGISTER_ID = 0xC005,
+ MLX5_QSHR_REGISTER_ID = 0x4030,
};
struct mlx5_ifc_register_mtutc_bits {
@@ -3785,6 +3786,30 @@ struct mlx5_ifc_register_mtutc_bits {
u8 time_adjustment[0x20];
};
+struct mlx5_ifc_ets_global_config_register_bits {
+ u8 reserved_at_0[0x2];
+ u8 rate_limit_update[0x1];
+ u8 reserved_at_3[0x29];
+ u8 max_bw_units[0x4];
+ u8 reserved_at_48[0x8];
+ u8 max_bw_value[0x8];
+};
+
+#define ETS_GLOBAL_CONFIG_BW_UNIT_DISABLED 0x0
+#define ETS_GLOBAL_CONFIG_BW_UNIT_HUNDREDS_MBPS 0x3
+#define ETS_GLOBAL_CONFIG_BW_UNIT_GBPS 0x4
+
+struct mlx5_ifc_register_qshr_bits {
+ u8 reserved_at_0[0x4];
+ u8 connected_host[0x1];
+ u8 vqos[0x1];
+ u8 fast_response[0x1];
+ u8 reserved_at_7[0x1];
+ u8 local_port[0x8];
+ u8 reserved_at_16[0x230];
+ struct mlx5_ifc_ets_global_config_register_bits global_config;
+};
+
#define MLX5_MTUTC_TIMESTAMP_MODE_INTERNAL_TIMER 0
#define MLX5_MTUTC_TIMESTAMP_MODE_REAL_TIME 1
@@ -1271,6 +1271,8 @@ struct mlx5_dev_ctx_shared {
void *devx_channel_lwm;
struct rte_intr_handle *intr_handle_lwm;
pthread_mutex_t lwm_config_lock;
+ uint32_t host_shaper_rate:8;
+ uint32_t lwm_triggered:1;
/* Availability of mreg_c's. */
struct mlx5_dev_shared_port port[]; /* per device port data array. */
};
@@ -19,6 +19,7 @@
#include <mlx5_prm.h>
#include <mlx5_common.h>
#include <mlx5_common_mr.h>
+#include <rte_pmd_mlx5.h>
#include "mlx5_autoconf.h"
#include "mlx5_defs.h"
@@ -27,6 +28,9 @@
#include "mlx5_rxtx.h"
#include "mlx5_devx.h"
#include "mlx5_rx.h"
+#ifdef HAVE_MLX5_MSTFLINT
+#include <mstflint/mtcr.h>
+#endif
static __rte_always_inline uint32_t
@@ -1371,3 +1375,103 @@ int mlx5_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
return ret;
}
+/**
+ * Mlx5 access register function to configure host shaper.
+ * It calls API in libmtcr_ul to access QSHR(Qos Shaper Host Register)
+ * in firmware.
+ *
+ * @param dev
+ * Pointer to rte_eth_dev.
+ * @param lwm_triggered
+ * Flag to enable/disable lwm_triggered bit in QSHR.
+ * @param rate
+ * Host shaper rate, unit is 100Mbps, set to 0 means disable the shaper.
+ * @return
+ * 0 : operation success.
+ * Otherwise:
+ * - ENOENT - no ibdev interface.
+ * - EBUSY - the register access unit is busy.
+ * - EIO - the register access command meets IO error.
+ */
+static int
+mlxreg_host_shaper_config(struct rte_eth_dev *dev,
+ bool lwm_triggered, uint8_t rate)
+{
+#ifdef HAVE_MLX5_MSTFLINT
+ struct mlx5_priv *priv = dev->data->dev_private;
+ uint32_t data[MLX5_ST_SZ_DW(register_qshr)] = {0};
+ int rc, retry_count = 3;
+ mfile *mf = NULL;
+ int status;
+ void *ptr;
+
+ mf = mopen(priv->sh->ibdev_name);
+ if (!mf) {
+ DRV_LOG(WARNING, "mopen failed\n");
+ rte_errno = ENOENT;
+ return -rte_errno;
+ }
+ MLX5_SET(register_qshr, data, connected_host, 1);
+ MLX5_SET(register_qshr, data, fast_response, lwm_triggered ? 1 : 0);
+ MLX5_SET(register_qshr, data, local_port, 1);
+ ptr = MLX5_ADDR_OF(register_qshr, data, global_config);
+ MLX5_SET(ets_global_config_register, ptr, rate_limit_update, 1);
+ MLX5_SET(ets_global_config_register, ptr, max_bw_units,
+ rate ? ETS_GLOBAL_CONFIG_BW_UNIT_HUNDREDS_MBPS :
+ ETS_GLOBAL_CONFIG_BW_UNIT_DISABLED);
+ MLX5_SET(ets_global_config_register, ptr, max_bw_value, rate);
+ do {
+ rc = maccess_reg(mf,
+ MLX5_QSHR_REGISTER_ID,
+ MACCESS_REG_METHOD_SET,
+ (u_int32_t *)&data[0],
+ sizeof(data),
+ sizeof(data),
+ sizeof(data),
+ &status);
+ if ((rc != ME_ICMD_STATUS_IFC_BUSY &&
+ status != ME_REG_ACCESS_BAD_PARAM) ||
+ !(mf->flags & MDEVS_REM)) {
+ break;
+ }
+ DRV_LOG(WARNING, "%s retry.", __func__);
+ usleep(10000);
+ } while (retry_count-- > 0);
+ mclose(mf);
+ rte_errno = (rc == ME_REG_ACCESS_DEV_BUSY) ? EBUSY : EIO;
+ return rc ? -rte_errno : 0;
+#else
+ (void)dev;
+ (void)lwm_triggered;
+ (void)rate;
+ return -1;
+#endif
+}
+
+int rte_pmd_mlx5_host_shaper_config(int port_id, uint8_t rate,
+ uint32_t flags)
+{
+ struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+ struct mlx5_priv *priv = dev->data->dev_private;
+ bool lwm_triggered =
+ !!(flags & RTE_BIT32(MLX5_HOST_SHAPER_FLAG_AVAIL_THRESH_TRIGGERED));
+
+ if (!lwm_triggered) {
+ priv->sh->host_shaper_rate = rate;
+ } else {
+ switch (rate) {
+ case 0:
+ /* Rate 0 means disable lwm_triggered. */
+ priv->sh->lwm_triggered = 0;
+ break;
+ case 1:
+ /* Rate 1 means enable lwm_triggered. */
+ priv->sh->lwm_triggered = 1;
+ break;
+ default:
+ return -ENOTSUP;
+ }
+ }
+ return mlxreg_host_shaper_config(dev, priv->sh->lwm_triggered,
+ priv->sh->host_shaper_rate);
+}
@@ -109,6 +109,36 @@ int rte_pmd_mlx5_external_rx_queue_id_map(uint16_t port_id, uint16_t dpdk_idx,
int rte_pmd_mlx5_external_rx_queue_id_unmap(uint16_t port_id,
uint16_t dpdk_idx);
+/**
+ * The rate of the host port shaper will be updated directly at the next
+ * available descriptor threshold event to the rate that comes with this flag set;
+ * set rate 0 to disable this rate update.
+ * Unset this flag to update the rate of the host port shaper directly in
+ * the API call; use rate 0 to disable the current shaper.
+ */
+#define MLX5_HOST_SHAPER_FLAG_AVAIL_THRESH_TRIGGERED 0
+
+/**
+ * Configure a HW shaper to limit Tx rate for a host port.
+ * The configuration will affect all the ethdev ports belonging to
+ * the same rte_device.
+ *
+ * @param[in] port_id
+ * The port identifier of the Ethernet device.
+ * @param[in] rate
+ * Unit is 100Mbps, setting the rate to 0 disables the shaper.
+ * @param[in] flags
+ * Host shaper flags.
+ * @return
+ * 0 : operation success.
+ * Otherwise:
+ * - ENOENT - no ibdev interface.
+ * - EBUSY - the register access unit is busy.
+ * - EIO - the register access command meets IO error.
+ */
+__rte_experimental
+int rte_pmd_mlx5_host_shaper_config(int port_id, uint8_t rate, uint32_t flags);
+
#ifdef __cplusplus
}
#endif
@@ -12,4 +12,6 @@ EXPERIMENTAL {
# added in 22.03
rte_pmd_mlx5_external_rx_queue_id_map;
rte_pmd_mlx5_external_rx_queue_id_unmap;
+ # added in 22.07
+ rte_pmd_mlx5_host_shaper_config;
};