@@ -81,6 +81,12 @@ Limitations
- On Windows, only ``eth`` and ``crypto`` are supported.
+Features
+--------
+
+- Remote PD and CTX - Linux only.
+
+
.. _mlx5_common_compilation:
Compilation Prerequisites
@@ -638,4 +644,33 @@ and below are the arguments supported by the common mlx5 layer.
If ``sq_db_nc`` is omitted, the preset (if any) environment variable
"MLX5_SHUT_UP_BF" value is used. If there is no "MLX5_SHUT_UP_BF", the
- default ``sq_db_nc`` value is zero for ARM64 hosts and one for others.
\ No newline at end of file
+ default ``sq_db_nc`` value is zero for ARM64 hosts and one for others.
+
+- ``cmd_fd`` parameter [int]
+
+ File descriptor of ``ibv_context`` created outside the PMD.
+ PMD will use this FD to import remote CTX. The ``cmd_fd`` is obtained from
+ the ``ibv_context->cmd_fd`` member, which must be dup'd before being passed.
+ This parameter is valid only if ``pd_handle`` parameter is specified.
+
+ By default, the PMD will create a new ``ibv_context``.
+
+ .. note::
+
+ When FD comes from another process, it is the user responsibility to
+ share the FD between the processes (e.g. by SCM_RIGHTS).
+
+- ``pd_handle`` parameter [int]
+
+ Protection domain handle of ``ibv_pd`` created outside the PMD.
+ PMD will use this handle to import remote PD. The ``pd_handle`` can be
+ achieved from the original PD by getting its ``ibv_pd->handle`` member value.
+ This parameter is valid only if ``cmd_fd`` parameter is specified, and its
+ value must be a valid kernel handle for a PD object in the context represented
+ by given ``cmd_fd``.
+
+ By default, the PMD will allocate a new PD.
+
+ .. note::
+
+ The ``ibv_pd->handle`` member is different then ``mlx5dv_pd->pdn`` member.
\ No newline at end of file
@@ -408,27 +408,128 @@ mlx5_glue_constructor(void)
}
/**
- * Allocate Protection Domain object and extract its pdn using DV API.
+ * Validate user arguments for remote PD and CTX.
+ *
+ * @param config
+ * Pointer to device configuration structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_os_remote_pd_and_ctx_validate(struct mlx5_common_dev_config *config)
+{
+ int device_fd = config->device_fd;
+ int pd_handle = config->pd_handle;
+
+#ifdef HAVE_MLX5_IBV_IMPORT_CTX_PD_AND_MR
+ if (device_fd == MLX5_ARG_UNSET && pd_handle != MLX5_ARG_UNSET) {
+ DRV_LOG(ERR, "Remote PD without CTX is not supported.");
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ if (device_fd != MLX5_ARG_UNSET && pd_handle == MLX5_ARG_UNSET) {
+ DRV_LOG(ERR, "Remote CTX without PD is not supported.");
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
+ DRV_LOG(DEBUG, "Remote PD and CTX is supported: (cmd_fd=%d, "
+ "pd_handle=%d).", device_fd, pd_handle);
+#else
+ if (pd_handle != MLX5_ARG_UNSET || device_fd != MLX5_ARG_UNSET) {
+ DRV_LOG(ERR,
+ "Remote PD and CTX is not supported - maybe old rdma-core version?");
+ rte_errno = ENOTSUP;
+ return -rte_errno;
+ }
+#endif
+ return 0;
+}
+
+/**
+ * Release Protection Domain object.
*
* @param[out] cdev
* Pointer to the mlx5 device.
*
* @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
+ * 0 on success, a negative errno value otherwise.
*/
int
+mlx5_os_pd_release(struct mlx5_common_device *cdev)
+{
+ if (cdev->config.pd_handle == MLX5_ARG_UNSET)
+ return mlx5_glue->dealloc_pd(cdev->pd);
+ else
+ return mlx5_glue->unimport_pd(cdev->pd);
+}
+
+/**
+ * Allocate Protection Domain object.
+ *
+ * @param[out] cdev
+ * Pointer to the mlx5 device.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise.
+ */
+static int
mlx5_os_pd_create(struct mlx5_common_device *cdev)
+{
+ cdev->pd = mlx5_glue->alloc_pd(cdev->ctx);
+ if (cdev->pd == NULL) {
+ DRV_LOG(ERR, "Failed to allocate PD: %s", rte_strerror(errno));
+ return errno ? -errno : -ENOMEM;
+ }
+ return 0;
+}
+
+/**
+ * Import Protection Domain object according to given PD handle.
+ *
+ * @param[out] cdev
+ * Pointer to the mlx5 device.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise.
+ */
+static int
+mlx5_os_pd_import(struct mlx5_common_device *cdev)
+{
+ cdev->pd = mlx5_glue->import_pd(cdev->ctx, cdev->config.pd_handle);
+ if (cdev->pd == NULL) {
+ DRV_LOG(ERR, "Failed to import PD using handle=%d: %s",
+ cdev->config.pd_handle, rte_strerror(errno));
+ return errno ? -errno : -ENOMEM;
+ }
+ return 0;
+}
+
+/**
+ * Prepare Protection Domain object and extract its pdn using DV API.
+ *
+ * @param[out] cdev
+ * Pointer to the mlx5 device.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_os_pd_prepare(struct mlx5_common_device *cdev)
{
#ifdef HAVE_IBV_FLOW_DV_SUPPORT
struct mlx5dv_obj obj;
struct mlx5dv_pd pd_info;
- int ret;
#endif
+ int ret;
- cdev->pd = mlx5_glue->alloc_pd(cdev->ctx);
- if (cdev->pd == NULL) {
- DRV_LOG(ERR, "Failed to allocate PD.");
- return errno ? -errno : -ENOMEM;
+ if (cdev->config.pd_handle == MLX5_ARG_UNSET)
+ ret = mlx5_os_pd_create(cdev);
+ else
+ ret = mlx5_os_pd_import(cdev);
+ if (ret) {
+ rte_errno = -ret;
+ return ret;
}
if (cdev->config.devx == 0)
return 0;
@@ -438,15 +539,17 @@ mlx5_os_pd_create(struct mlx5_common_device *cdev)
ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_PD);
if (ret != 0) {
DRV_LOG(ERR, "Fail to get PD object info.");
- mlx5_glue->dealloc_pd(cdev->pd);
+ rte_errno = errno;
+ claim_zero(mlx5_os_pd_release(cdev));
cdev->pd = NULL;
- return -errno;
+ return -rte_errno;
}
cdev->pdn = pd_info.pdn;
return 0;
#else
DRV_LOG(ERR, "Cannot get pdn - no DV support.");
- return -ENOTSUP;
+ rte_errno = ENOTSUP;
+ return -rte_errno;
#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
}
@@ -648,28 +751,28 @@ mlx5_restore_doorbell_mapping_env(int value)
/**
* Function API to open IB device.
*
- *
* @param cdev
* Pointer to the mlx5 device.
* @param classes
* Chosen classes come from device arguments.
*
* @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
+ * Pointer to ibv_context on success, NULL otherwise and rte_errno is set.
*/
-int
-mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes)
+static struct ibv_context *
+mlx5_open_device(struct mlx5_common_device *cdev, uint32_t classes)
{
struct ibv_device *ibv;
struct ibv_context *ctx = NULL;
int dbmap_env;
+ MLX5_ASSERT(cdev->config.device_fd == MLX5_ARG_UNSET);
if (classes & MLX5_CLASS_VDPA)
ibv = mlx5_vdpa_get_ibv_dev(cdev->dev);
else
ibv = mlx5_os_get_ibv_dev(cdev->dev);
if (!ibv)
- return -rte_errno;
+ return NULL;
DRV_LOG(INFO, "Dev information matches for device \"%s\".", ibv->name);
/*
* Configure environment variable "MLX5_BF_SHUT_UP" before the device
@@ -682,29 +785,78 @@ mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes)
ctx = mlx5_glue->dv_open_device(ibv);
if (ctx) {
cdev->config.devx = 1;
- DRV_LOG(DEBUG, "DevX is supported.");
} else if (classes == MLX5_CLASS_ETH) {
/* The environment variable is still configured. */
ctx = mlx5_glue->open_device(ibv);
if (ctx == NULL)
goto error;
- DRV_LOG(DEBUG, "DevX is NOT supported.");
} else {
goto error;
}
/* The device is created, no need for environment. */
mlx5_restore_doorbell_mapping_env(dbmap_env);
- /* Hint libmlx5 to use PMD allocator for data plane resources */
- mlx5_set_context_attr(cdev->dev, ctx);
- cdev->ctx = ctx;
- return 0;
+ return ctx;
error:
rte_errno = errno ? errno : ENODEV;
/* The device creation is failed, no need for environment. */
mlx5_restore_doorbell_mapping_env(dbmap_env);
DRV_LOG(ERR, "Failed to open IB device \"%s\".", ibv->name);
- return -rte_errno;
+ return NULL;
+}
+
+/**
+ * Function API to import IB device.
+ *
+ * @param cdev
+ * Pointer to the mlx5 device.
+ *
+ * @return
+ * Pointer to ibv_context on success, NULL otherwise and rte_errno is set.
+ */
+static struct ibv_context *
+mlx5_import_device(struct mlx5_common_device *cdev)
+{
+ struct ibv_context *ctx = NULL;
+
+ MLX5_ASSERT(cdev->config.device_fd != MLX5_ARG_UNSET);
+ ctx = mlx5_glue->import_device(cdev->config.device_fd);
+ if (!ctx) {
+ DRV_LOG(ERR, "Failed to import device for fd=%d: %s",
+ cdev->config.device_fd, rte_strerror(errno));
+ rte_errno = errno;
+ }
+ return ctx;
+}
+
+/**
+ * Function API to prepare IB device.
+ *
+ * @param cdev
+ * Pointer to the mlx5 device.
+ * @param classes
+ * Chosen classes come from device arguments.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes)
+{
+
+ struct ibv_context *ctx = NULL;
+
+ if (cdev->config.device_fd == MLX5_ARG_UNSET)
+ ctx = mlx5_open_device(cdev, classes);
+ else
+ ctx = mlx5_import_device(cdev);
+ if (ctx == NULL)
+ return -rte_errno;
+ /* Hint libmlx5 to use PMD allocator for data plane resources */
+ mlx5_set_context_attr(cdev->dev, ctx);
+ cdev->ctx = ctx;
+ return 0;
}
+
int
mlx5_get_device_guid(const struct rte_pci_addr *dev, uint8_t *guid, size_t len)
{
@@ -203,12 +203,6 @@ mlx5_os_get_devx_uar_page_id(void *uar)
#endif
}
-static inline int
-mlx5_os_dealloc_pd(void *pd)
-{
- return mlx5_glue->dealloc_pd(pd);
-}
-
__rte_internal
static inline void *
mlx5_os_umem_reg(void *ctx, void *addr, size_t size, uint32_t access)
@@ -24,6 +24,12 @@ uint8_t haswell_broadwell_cpu;
/* Driver type key for new device global syntax. */
#define MLX5_DRIVER_KEY "driver"
+/* Device parameter to get file descriptor for import device. */
+#define MLX5_DEVICE_FD "cmd_fd"
+
+/* Device parameter to get PD number for import Protection Domain. */
+#define MLX5_PD_HANDLE "pd_handle"
+
/* Enable extending memsegs when creating a MR. */
#define MLX5_MR_EXT_MEMSEG_EN "mr_ext_memseg_en"
@@ -283,6 +289,10 @@ mlx5_common_args_check_handler(const char *key, const char *val, void *opaque)
config->mr_mempool_reg_en = !!tmp;
} else if (strcmp(key, MLX5_SYS_MEM_EN) == 0) {
config->sys_mem_en = !!tmp;
+ } else if (strcmp(key, MLX5_DEVICE_FD) == 0) {
+ config->device_fd = tmp;
+ } else if (strcmp(key, MLX5_PD_HANDLE) == 0) {
+ config->pd_handle = tmp;
}
return 0;
}
@@ -310,6 +320,8 @@ mlx5_common_config_get(struct mlx5_kvargs_ctrl *mkvlist,
MLX5_MR_EXT_MEMSEG_EN,
MLX5_SYS_MEM_EN,
MLX5_MR_MEMPOOL_REG_EN,
+ MLX5_DEVICE_FD,
+ MLX5_PD_HANDLE,
NULL,
};
int ret = 0;
@@ -321,13 +333,19 @@ mlx5_common_config_get(struct mlx5_kvargs_ctrl *mkvlist,
config->mr_mempool_reg_en = 1;
config->sys_mem_en = 0;
config->dbnc = MLX5_ARG_UNSET;
+ config->device_fd = MLX5_ARG_UNSET;
+ config->pd_handle = MLX5_ARG_UNSET;
/* Process common parameters. */
ret = mlx5_kvargs_process(mkvlist, params,
mlx5_common_args_check_handler, config);
if (ret) {
rte_errno = EINVAL;
- ret = -rte_errno;
+ return -rte_errno;
}
+ /* Validate user arguments for remote PD and CTX if it is given. */
+ ret = mlx5_os_remote_pd_and_ctx_validate(config);
+ if (ret)
+ return ret;
DRV_LOG(DEBUG, "mr_ext_memseg_en is %u.", config->mr_ext_memseg_en);
DRV_LOG(DEBUG, "mr_mempool_reg_en is %u.", config->mr_mempool_reg_en);
DRV_LOG(DEBUG, "sys_mem_en is %u.", config->sys_mem_en);
@@ -645,7 +663,7 @@ static void
mlx5_dev_hw_global_release(struct mlx5_common_device *cdev)
{
if (cdev->pd != NULL) {
- claim_zero(mlx5_os_dealloc_pd(cdev->pd));
+ claim_zero(mlx5_os_pd_release(cdev));
cdev->pd = NULL;
}
if (cdev->ctx != NULL) {
@@ -674,20 +692,27 @@ mlx5_dev_hw_global_prepare(struct mlx5_common_device *cdev, uint32_t classes)
ret = mlx5_os_open_device(cdev, classes);
if (ret < 0)
return ret;
- /* Allocate Protection Domain object and extract its pdn. */
- ret = mlx5_os_pd_create(cdev);
+ /*
+ * When CTX is created by Verbs, query HCA attribute is unsupported.
+ * When CTX is imported, we cannot know if it is created by DevX or
+ * Verbs. So, we use query HCA attribute function to check it.
+ */
+ if (cdev->config.devx || cdev->config.device_fd != MLX5_ARG_UNSET) {
+ /* Query HCA attributes. */
+ ret = mlx5_devx_cmd_query_hca_attr(cdev->ctx,
+ &cdev->config.hca_attr);
+ if (ret) {
+ DRV_LOG(ERR, "Unable to read HCA caps in DevX mode.");
+ rte_errno = ENOTSUP;
+ goto error;
+ }
+ cdev->config.devx = 1;
+ }
+ DRV_LOG(DEBUG, "DevX is %ssupported.", cdev->config.devx ? "" : "NOT ");
+ /* Prepare Protection Domain object and extract its pdn. */
+ ret = mlx5_os_pd_prepare(cdev);
if (ret)
goto error;
- /* All actions taken below are relevant only when DevX is supported */
- if (cdev->config.devx == 0)
- return 0;
- /* Query HCA attributes. */
- ret = mlx5_devx_cmd_query_hca_attr(cdev->ctx, &cdev->config.hca_attr);
- if (ret) {
- DRV_LOG(ERR, "Unable to read HCA capabilities.");
- rte_errno = ENOTSUP;
- goto error;
- }
return 0;
error:
mlx5_dev_hw_global_release(cdev);
@@ -814,26 +839,39 @@ mlx5_common_probe_again_args_validate(struct mlx5_common_device *cdev,
* Checks the match between the temporary structure and the existing
* common device structure.
*/
- if (cdev->config.mr_ext_memseg_en ^ config->mr_ext_memseg_en) {
- DRV_LOG(ERR, "\"mr_ext_memseg_en\" "
+ if (cdev->config.mr_ext_memseg_en != config->mr_ext_memseg_en) {
+ DRV_LOG(ERR, "\"" MLX5_MR_EXT_MEMSEG_EN "\" "
"configuration mismatch for device %s.",
cdev->dev->name);
goto error;
}
- if (cdev->config.mr_mempool_reg_en ^ config->mr_mempool_reg_en) {
- DRV_LOG(ERR, "\"mr_mempool_reg_en\" "
+ if (cdev->config.mr_mempool_reg_en != config->mr_mempool_reg_en) {
+ DRV_LOG(ERR, "\"" MLX5_MR_MEMPOOL_REG_EN "\" "
"configuration mismatch for device %s.",
cdev->dev->name);
goto error;
}
- if (cdev->config.sys_mem_en ^ config->sys_mem_en) {
- DRV_LOG(ERR,
- "\"sys_mem_en\" configuration mismatch for device %s.",
+ if (cdev->config.device_fd != config->device_fd) {
+ DRV_LOG(ERR, "\"" MLX5_DEVICE_FD "\" "
+ "configuration mismatch for device %s.",
+ cdev->dev->name);
+ goto error;
+ }
+ if (cdev->config.pd_handle != config->pd_handle) {
+ DRV_LOG(ERR, "\"" MLX5_PD_HANDLE "\" "
+ "configuration mismatch for device %s.",
+ cdev->dev->name);
+ goto error;
+ }
+ if (cdev->config.sys_mem_en != config->sys_mem_en) {
+ DRV_LOG(ERR, "\"" MLX5_SYS_MEM_EN "\" "
+ "configuration mismatch for device %s.",
cdev->dev->name);
goto error;
}
- if (cdev->config.dbnc ^ config->dbnc) {
- DRV_LOG(ERR, "\"dbnc\" configuration mismatch for device %s.",
+ if (cdev->config.dbnc != config->dbnc) {
+ DRV_LOG(ERR, "\"" MLX5_SQ_DB_NC "\" "
+ "configuration mismatch for device %s.",
cdev->dev->name);
goto error;
}
@@ -446,6 +446,8 @@ void mlx5_common_init(void);
struct mlx5_common_dev_config {
struct mlx5_hca_attr hca_attr; /* HCA attributes. */
int dbnc; /* Skip doorbell register write barrier. */
+ int device_fd; /* Device file descriptor for importation. */
+ int pd_handle; /* Protection Domain handle for importation. */
unsigned int devx:1; /* Whether devx interface is available or not. */
unsigned int sys_mem_en:1; /* The default memory allocator. */
unsigned int mr_mempool_reg_en:1;
@@ -465,6 +467,23 @@ struct mlx5_common_device {
struct mlx5_common_dev_config config; /* Device configuration. */
};
+/**
+ * Indicates whether PD and CTX are imported from another process,
+ * or created by this process.
+ *
+ * @param cdev
+ * Pointer to common device.
+ *
+ * @return
+ * True if PD and CTX are imported from another process, False otherwise.
+ */
+static inline bool
+mlx5_imported_pd_and_ctx(struct mlx5_common_device *cdev)
+{
+ return cdev->config.device_fd != MLX5_ARG_UNSET &&
+ cdev->config.pd_handle != MLX5_ARG_UNSET;
+}
+
/**
* Initialization function for the driver called during device probing.
*/
@@ -554,7 +573,9 @@ mlx5_devx_uar_release(struct mlx5_uar *uar);
/* mlx5_common_os.c */
int mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes);
-int mlx5_os_pd_create(struct mlx5_common_device *cdev);
+int mlx5_os_pd_prepare(struct mlx5_common_device *cdev);
+int mlx5_os_pd_release(struct mlx5_common_device *cdev);
+int mlx5_os_remote_pd_and_ctx_validate(struct mlx5_common_dev_config *config);
/* mlx5 PMD wrapped MR struct. */
struct mlx5_pmd_wrapped_mr {
@@ -25,21 +25,46 @@ mlx5_glue_constructor(void)
{
}
+/**
+ * Validate user arguments for remote PD and CTX.
+ *
+ * @param config
+ * Pointer to device configuration structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_os_remote_pd_and_ctx_validate(struct mlx5_common_dev_config *config)
+{
+ int device_fd = config->device_fd;
+ int pd_handle = config->pd_handle;
+
+ if (pd_handle != MLX5_ARG_UNSET || device_fd != MLX5_ARG_UNSET) {
+ DRV_LOG(ERR, "Remote PD and CTX is not supported on Windows.");
+ rte_errno = ENOTSUP;
+ return -rte_errno;
+ }
+ return 0;
+}
+
/**
* Release PD. Releases a given mlx5_pd object
*
- * @param[in] pd
- * Pointer to mlx5_pd.
+ * @param[in] cdev
+ * Pointer to the mlx5 device.
*
* @return
* Zero if pd is released successfully, negative number otherwise.
*/
int
-mlx5_os_dealloc_pd(void *pd)
+mlx5_os_pd_release(struct mlx5_common_device *cdev)
{
+ struct mlx5_pd *pd = cdev->pd;
+
if (!pd)
return -EINVAL;
- mlx5_devx_cmd_destroy(((struct mlx5_pd *)pd)->obj);
+ mlx5_devx_cmd_destroy(pd->obj);
mlx5_free(pd);
return 0;
}
@@ -47,14 +72,14 @@ mlx5_os_dealloc_pd(void *pd)
/**
* Allocate Protection Domain object and extract its pdn using DV API.
*
- * @param[out] dev
+ * @param[out] cdev
* Pointer to the mlx5 device.
*
* @return
* 0 on success, a negative value otherwise.
*/
int
-mlx5_os_pd_create(struct mlx5_common_device *cdev)
+mlx5_os_pd_prepare(struct mlx5_common_device *cdev)
{
struct mlx5_pd *pd;
@@ -248,7 +248,6 @@ mlx5_os_devx_subscribe_devx_event(void *eventc,
return -ENOTSUP;
}
-int mlx5_os_dealloc_pd(void *pd);
__rte_internal
void *mlx5_os_umem_reg(void *ctx, void *addr, size_t size, uint32_t access);
__rte_internal