[RFC,4/5] crypto/mlx5: add queue pair setup

Message ID 20230418092325.2578712-5-suanmingm@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: akhil goyal
Headers
Series crypto/mlx5: support AES-GCM |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Suanming Mou April 18, 2023, 9:23 a.m. UTC
  Crypto queue pair is for handling the encryption/decryption operations.

As AES-GCM AEAD API provides AAD, mbuf, digest separately, low-level FW
only accepts the data in a single contiguous memory region, two internal
QPs are created for AES-GCM queue pair. One for organizing the memory
to be contiguous if they are not. The other is for crypto.

If the buffers are checked as implicitly contiguous, the buffer will be
sent to the crypto QP directly for encryption/decryption. If not, the
buffers will be handled by the first UMR QP. The UMR QP will convert
the buffers to be contiguous one. Then the well organized "new" buffer
can be handled by crypto QP.

The crypto QP is initialized as follower, and UMR as leader. Once
crypto operation input buffer requires memory address space converting
by UMR QP, the crypto QP processing will be triggered by UMR QP.
Otherwise, the ring crypto QP doorbell directly.

The existing max_segs_num devarg is used for define how many segments
the chained mbuf contains same as AES-XTS before.

Signed-off-by: Suanming Mou <suanmingm@nvidia.com>
---
 drivers/common/mlx5/mlx5_devx_cmds.c  |   6 +
 drivers/common/mlx5/mlx5_devx_cmds.h  |   3 +
 drivers/common/mlx5/mlx5_prm.h        |  24 +++
 drivers/crypto/mlx5/mlx5_crypto.c     |  17 ++
 drivers/crypto/mlx5/mlx5_crypto.h     |  12 ++
 drivers/crypto/mlx5/mlx5_crypto_gcm.c | 254 ++++++++++++++++++++++++++
 6 files changed, 316 insertions(+)
  

Patch

diff --git a/drivers/common/mlx5/mlx5_devx_cmds.c b/drivers/common/mlx5/mlx5_devx_cmds.c
index 8b51a75cc8..6be02c0a65 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.c
+++ b/drivers/common/mlx5/mlx5_devx_cmds.c
@@ -2563,6 +2563,12 @@  mlx5_devx_cmd_create_qp(void *ctx,
 				 attr->dbr_umem_valid);
 			MLX5_SET(qpc, qpc, dbr_umem_id, attr->dbr_umem_id);
 		}
+		if (attr->cd_master)
+			MLX5_SET(qpc, qpc, cd_master, attr->cd_master);
+		if (attr->cd_slave_send)
+			MLX5_SET(qpc, qpc, cd_slave_send, attr->cd_slave_send);
+		if (attr->cd_slave_recv)
+			MLX5_SET(qpc, qpc, cd_slave_receive, attr->cd_slave_recv);
 		MLX5_SET64(qpc, qpc, dbr_addr, attr->dbr_address);
 		MLX5_SET64(create_qp_in, in, wq_umem_offset,
 			   attr->wq_umem_offset);
diff --git a/drivers/common/mlx5/mlx5_devx_cmds.h b/drivers/common/mlx5/mlx5_devx_cmds.h
index 79502cda08..e68aa077d7 100644
--- a/drivers/common/mlx5/mlx5_devx_cmds.h
+++ b/drivers/common/mlx5/mlx5_devx_cmds.h
@@ -590,6 +590,9 @@  struct mlx5_devx_qp_attr {
 	uint64_t wq_umem_offset;
 	uint32_t user_index:24;
 	uint32_t mmo:1;
+	uint32_t cd_master:1;
+	uint32_t cd_slave_send:1;
+	uint32_t cd_slave_recv:1;
 };
 
 struct mlx5_devx_virtio_q_couners_attr {
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 25ff66ee7e..c8d73a8456 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -594,6 +594,17 @@  struct mlx5_rdma_write_wqe {
 	struct mlx5_wqe_dseg dseg[];
 } __rte_packed;
 
+struct mlx5_wqe_send_en_seg {
+	uint32_t reserve[2];
+	uint32_t sqnpc;
+	uint32_t qpn;
+} __rte_packed;
+
+struct mlx5_wqe_send_en_wqe {
+	struct mlx5_wqe_cseg ctr;
+	struct mlx5_wqe_send_en_seg sseg;
+} __rte_packed;
+
 #ifdef PEDANTIC
 #pragma GCC diagnostic error "-Wpedantic"
 #endif
@@ -668,6 +679,19 @@  union mlx5_gga_compress_opaque {
 	uint32_t data[64];
 };
 
+union mlx5_gga_crypto_opaque {
+	struct {
+		uint32_t syndrome;
+		uint32_t reserved0[2];
+		struct {
+			uint32_t iv[3];
+			uint32_t tag_size;
+			uint32_t aad_size;
+		} cp __rte_packed;
+	} __rte_packed;
+	uint8_t data[64];
+};
+
 struct mlx5_ifc_regexp_mmo_control_bits {
 	uint8_t reserved_at_31[0x2];
 	uint8_t le[0x1];
diff --git a/drivers/crypto/mlx5/mlx5_crypto.c b/drivers/crypto/mlx5/mlx5_crypto.c
index 8946f13e5e..f2e5b25c15 100644
--- a/drivers/crypto/mlx5/mlx5_crypto.c
+++ b/drivers/crypto/mlx5/mlx5_crypto.c
@@ -849,12 +849,27 @@  mlx5_crypto_max_segs_num(uint16_t max_wqe_size)
 	return max_segs_cap;
 }
 
+static __rte_always_inline int
+mlx5_crypto_configure_gcm_wqe_size(struct mlx5_crypto_priv *priv)
+{
+	uint32_t send_en_wqe_size;
+
+	priv->umr_wqe_size = RTE_ALIGN(sizeof(struct mlx5_umr_wqe) + sizeof(struct mlx5_wqe_dseg),
+		MLX5_SEND_WQE_BB);
+	send_en_wqe_size = RTE_ALIGN(sizeof(struct mlx5_wqe_send_en_wqe), MLX5_SEND_WQE_BB);
+	priv->umr_wqe_stride = priv->umr_wqe_size / MLX5_SEND_WQE_BB;
+	priv->wqe_set_size = priv->umr_wqe_size + send_en_wqe_size;
+	return 0;
+}
+
 static int
 mlx5_crypto_configure_wqe_size(struct mlx5_crypto_priv *priv,
 				uint16_t max_wqe_size, uint32_t max_segs_num)
 {
 	uint32_t rdmw_wqe_size, umr_wqe_size;
 
+	if (priv->is_gcm_dek_wrap)
+		return mlx5_crypto_configure_gcm_wqe_size(priv);
 	mlx5_crypto_get_wqe_sizes(max_segs_num, &umr_wqe_size,
 					&rdmw_wqe_size);
 	priv->wqe_set_size = rdmw_wqe_size + umr_wqe_size;
@@ -927,12 +942,14 @@  mlx5_crypto_dev_probe(struct mlx5_common_device *cdev,
 	priv->cdev = cdev;
 	priv->crypto_dev = crypto_dev;
 	priv->is_wrapped_mode = wrapped_mode;
+	priv->max_segs_num = devarg_prms.max_segs_num;
 	priv->caps = mlx5_crypto_caps;
 	/* Init and override AES-GCM configuration. */
 	if (devarg_prms.is_aes_gcm) {
 		ret = mlx5_crypto_gcm_init(priv);
 		if (ret) {
 			DRV_LOG(ERR, "Failed to init AES-GCM crypto.");
+			return -ENOTSUP;
 		}
 	}
 	if (mlx5_devx_uar_prepare(cdev, &priv->uar) != 0) {
diff --git a/drivers/crypto/mlx5/mlx5_crypto.h b/drivers/crypto/mlx5/mlx5_crypto.h
index c34a860404..9945891ea8 100644
--- a/drivers/crypto/mlx5/mlx5_crypto.h
+++ b/drivers/crypto/mlx5/mlx5_crypto.h
@@ -47,15 +47,27 @@  struct mlx5_crypto_qp {
 	struct mlx5_crypto_priv *priv;
 	struct mlx5_devx_cq cq_obj;
 	struct mlx5_devx_qp qp_obj;
+	struct mlx5_devx_cq umr_cq_obj;
+	struct mlx5_devx_qp umr_qp_obj;
 	struct rte_cryptodev_stats stats;
 	struct rte_crypto_op **ops;
 	struct mlx5_devx_obj **mkey; /* WQE's indirect mekys. */
+	struct mlx5_klm *klm_array;
 	struct mlx5_mr_ctrl mr_ctrl;
+	struct mlx5_pmd_mr opaque_mr;
+	struct mlx5_pmd_mr klm_mr;
+	/* Crypto QP. */
 	uint8_t *wqe;
 	uint16_t entries_n;
 	uint16_t pi;
 	uint16_t ci;
 	uint16_t db_pi;
+	/* UMR QP. */
+	uint8_t *umr_wqe;
+	uint16_t umr_wqbbs;
+	uint16_t umr_pi;
+	uint16_t umr_ci;
+	uint32_t umr_errors;
 };
 
 struct mlx5_crypto_dek {
diff --git a/drivers/crypto/mlx5/mlx5_crypto_gcm.c b/drivers/crypto/mlx5/mlx5_crypto_gcm.c
index 6c2c759fba..b67f22c591 100644
--- a/drivers/crypto/mlx5/mlx5_crypto_gcm.c
+++ b/drivers/crypto/mlx5/mlx5_crypto_gcm.c
@@ -123,6 +123,257 @@  mlx5_crypto_sym_gcm_session_configure(struct rte_cryptodev *dev,
 	return 0;
 }
 
+static void
+mlx5_crypto_gcm_indirect_mkeys_release(struct mlx5_crypto_qp *qp, uint16_t n)
+{
+	uint16_t i;
+
+	for (i = 0; i < n; i++)
+		if (qp->mkey[i])
+			claim_zero(mlx5_devx_cmd_destroy(qp->mkey[i]));
+}
+
+static int
+mlx5_crypto_gcm_indirect_mkeys_prepare(struct mlx5_crypto_priv *priv,
+				  struct mlx5_crypto_qp *qp)
+{
+	uint32_t i;
+	struct mlx5_devx_mkey_attr attr = {
+		.pd = priv->cdev->pdn,
+		.umr_en = 1,
+		.set_remote_rw = 1,
+		.klm_num = priv->max_segs_num,
+	};
+
+	for (i = 0; i < qp->entries_n; i++) {
+		attr.klm_array = (struct mlx5_klm *)&qp->klm_array[i * priv->max_segs_num];
+		qp->mkey[i] = mlx5_devx_cmd_mkey_create(priv->cdev->ctx, &attr);
+		if (!qp->mkey[i])
+			goto error;
+	}
+	return 0;
+error:
+	DRV_LOG(ERR, "Failed to allocate gcm indirect mkey.");
+	mlx5_crypto_gcm_indirect_mkeys_release(qp, i);
+	return -1;
+}
+
+static int
+mlx5_crypto_gcm_qp_release(struct rte_cryptodev *dev, uint16_t qp_id)
+{
+	struct mlx5_crypto_qp *qp = dev->data->queue_pairs[qp_id];
+
+	if (qp->umr_qp_obj.qp != NULL)
+		mlx5_devx_qp_destroy(&qp->umr_qp_obj);
+	if (qp->umr_cq_obj.cq != NULL)
+		mlx5_devx_cq_destroy(&qp->umr_cq_obj);
+	if (qp->qp_obj.qp != NULL)
+		mlx5_devx_qp_destroy(&qp->qp_obj);
+	if (qp->cq_obj.cq != NULL)
+		mlx5_devx_cq_destroy(&qp->cq_obj);
+	if (qp->opaque_mr.obj != NULL) {
+		void *opaq = qp->opaque_mr.addr;
+
+		mlx5_common_verbs_dereg_mr(&qp->opaque_mr);
+		rte_free(opaq);
+	}
+	mlx5_crypto_gcm_indirect_mkeys_release(qp, qp->entries_n);
+	if (qp->klm_mr.obj != NULL) {
+		void *klm = qp->klm_mr.addr;
+
+		mlx5_common_verbs_dereg_mr(&qp->klm_mr);
+		rte_free(klm);
+	}
+	mlx5_mr_btree_free(&qp->mr_ctrl.cache_bh);
+	rte_free(qp);
+	dev->data->queue_pairs[qp_id] = NULL;
+	return 0;
+}
+
+static void
+mlx5_crypto_gcm_init_qp(struct mlx5_crypto_qp *qp)
+{
+	volatile struct mlx5_gga_wqe *restrict wqe =
+				    (volatile struct mlx5_gga_wqe *)qp->qp_obj.wqes;
+	volatile union mlx5_gga_crypto_opaque *opaq = qp->opaque_mr.addr;
+	const uint32_t sq_ds = rte_cpu_to_be_32((qp->qp_obj.qp->id << 8) | 4u);
+	const uint32_t flags = RTE_BE32(MLX5_COMP_ALWAYS <<
+					MLX5_COMP_MODE_OFFSET);
+	const uint32_t opaq_lkey = rte_cpu_to_be_32(qp->opaque_mr.lkey);
+	int i;
+
+	/* All the next fields state should stay constant. */
+	for (i = 0; i < qp->entries_n; ++i, ++wqe) {
+		wqe->sq_ds = sq_ds;
+		wqe->flags = flags;
+		wqe->opaque_lkey = opaq_lkey;
+		wqe->opaque_vaddr = rte_cpu_to_be_64((uint64_t)(uintptr_t)&opaq[i]);
+	}
+}
+
+static inline int
+mlx5_crypto_gcm_umr_qp_setup(struct rte_cryptodev *dev, struct mlx5_crypto_qp *qp,
+			     uint16_t log_nb_desc, int socket_id)
+{
+	struct mlx5_crypto_priv *priv = dev->data->dev_private;
+	struct mlx5_devx_qp_attr attr = {0};
+	uint32_t ret;
+	uint32_t log_wqbb_n;
+	struct mlx5_devx_cq_attr cq_attr = {
+		.use_first_only = 1,
+		.uar_page_id = mlx5_os_get_devx_uar_page_id(priv->uar.obj),
+	};
+	size_t klm_size = priv->max_segs_num * sizeof(struct mlx5_klm);
+	void *klm_array;
+
+	klm_array = rte_calloc(__func__, (size_t)qp->entries_n, klm_size, 64);
+	if (klm_array == NULL) {
+		DRV_LOG(ERR, "Failed to allocate opaque memory.");
+		rte_errno = ENOMEM;
+		return -1;
+	}
+	if (mlx5_common_verbs_reg_mr(priv->cdev->pd, klm_array,
+				     qp->entries_n * klm_size,
+				     &qp->klm_mr) != 0) {
+		rte_free(klm_array);
+		DRV_LOG(ERR, "Failed to register klm MR.");
+		rte_errno = ENOMEM;
+		return -1;
+	}
+	qp->klm_array = (struct mlx5_klm *)qp->klm_mr.addr;
+	if (mlx5_devx_cq_create(priv->cdev->ctx, &qp->umr_cq_obj, log_nb_desc,
+				&cq_attr, socket_id) != 0) {
+		DRV_LOG(ERR, "Failed to create UMR CQ.");
+		return -1;
+	}
+	/* Set UMR + SEND_EN WQE as maximum same with crypto. */
+	log_wqbb_n = rte_log2_u32(qp->entries_n *
+			(priv->wqe_set_size / MLX5_SEND_WQE_BB));
+	attr.pd = priv->cdev->pdn;
+	attr.uar_index = mlx5_os_get_devx_uar_page_id(priv->uar.obj);
+	attr.cqn = qp->umr_cq_obj.cq->id;
+	attr.num_of_receive_wqes = 0;
+	attr.num_of_send_wqbbs = RTE_BIT32(log_wqbb_n);
+	attr.ts_format =
+		mlx5_ts_format_conv(priv->cdev->config.hca_attr.qp_ts_format);
+	attr.cd_master = 1;
+	ret = mlx5_devx_qp_create(priv->cdev->ctx, &qp->umr_qp_obj,
+				  attr.num_of_send_wqbbs * MLX5_SEND_WQE_BB,
+				  &attr, socket_id);
+	if (ret) {
+		DRV_LOG(ERR, "Failed to create UMR QP.");
+		return -1;
+	}
+	if (mlx5_devx_qp2rts(&qp->umr_qp_obj, qp->umr_qp_obj.qp->id)) {
+		DRV_LOG(ERR, "Failed to change UMR QP state to RTS.");
+		return -1;
+	}
+	/* Save the UMR WQEBBS for checking the WQE boundary. */
+	qp->umr_wqbbs = attr.num_of_send_wqbbs;
+	return 0;
+}
+
+static int
+mlx5_crypto_gcm_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,
+			 const struct rte_cryptodev_qp_conf *qp_conf,
+			 int socket_id)
+{
+	struct mlx5_crypto_priv *priv = dev->data->dev_private;
+	struct mlx5_hca_attr *attr = &priv->cdev->config.hca_attr;
+	struct mlx5_crypto_qp *qp;
+	struct mlx5_devx_cq_attr cq_attr = {
+		.uar_page_id = mlx5_os_get_devx_uar_page_id(priv->uar.obj),
+	};
+	struct mlx5_devx_qp_attr qp_attr = {
+		.pd = priv->cdev->pdn,
+		.uar_index = mlx5_os_get_devx_uar_page_id(priv->uar.obj),
+		.user_index = qp_id,
+	};
+	uint32_t log_ops_n = rte_log2_u32(qp_conf->nb_descriptors);
+	uint32_t entries = RTE_BIT32(log_ops_n);
+	uint32_t alloc_size = sizeof(*qp);
+	void *opaq_buf;
+	int ret;
+
+	alloc_size = RTE_ALIGN(alloc_size, RTE_CACHE_LINE_SIZE);
+	alloc_size += (sizeof(struct rte_crypto_op *) +
+		       sizeof(struct mlx5_devx_obj *)) * entries;
+	qp = rte_zmalloc_socket(__func__, alloc_size, RTE_CACHE_LINE_SIZE,
+				socket_id);
+	if (qp == NULL) {
+		DRV_LOG(ERR, "Failed to allocate qp memory.");
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+	qp->priv = priv;
+	qp->entries_n = entries;
+	if (mlx5_mr_ctrl_init(&qp->mr_ctrl, &priv->cdev->mr_scache.dev_gen,
+				  priv->dev_config.socket_id)) {
+		DRV_LOG(ERR, "Cannot allocate MR Btree for qp %u.",
+			(uint32_t)qp_id);
+		rte_errno = ENOMEM;
+		goto err;
+	}
+	opaq_buf = rte_calloc(__func__, (size_t)entries,
+			      sizeof(union mlx5_gga_crypto_opaque),
+			      sizeof(union mlx5_gga_crypto_opaque));
+	if (opaq_buf == NULL) {
+		DRV_LOG(ERR, "Failed to allocate opaque memory.");
+		rte_errno = ENOMEM;
+		goto err;
+	}
+	if (mlx5_common_verbs_reg_mr(priv->cdev->pd, opaq_buf, entries *
+				     sizeof(union mlx5_gga_crypto_opaque),
+				     &qp->opaque_mr) != 0) {
+		rte_free(opaq_buf);
+		DRV_LOG(ERR, "Failed to register opaque MR.");
+		rte_errno = ENOMEM;
+		goto err;
+	}
+	ret = mlx5_devx_cq_create(priv->cdev->ctx, &qp->cq_obj, log_ops_n,
+				  &cq_attr, socket_id);
+	if (ret != 0) {
+		DRV_LOG(ERR, "Failed to create CQ.");
+		goto err;
+	}
+	qp_attr.cqn = qp->cq_obj.cq->id;
+	qp_attr.ts_format = mlx5_ts_format_conv(attr->qp_ts_format);
+	qp_attr.num_of_receive_wqes = 0;
+	qp_attr.num_of_send_wqbbs = entries;
+	qp_attr.mmo = attr->crypto_mmo.crypto_mmo_qp;
+	/* Set MMO QP as follower as the input data may depend on UMR. */
+	qp_attr.cd_slave_send = 1;
+	ret = mlx5_devx_qp_create(priv->cdev->ctx, &qp->qp_obj,
+				  qp_attr.num_of_send_wqbbs * MLX5_WQE_SIZE,
+				  &qp_attr, socket_id);
+	if (ret != 0) {
+		DRV_LOG(ERR, "Failed to create QP.");
+		goto err;
+	}
+	mlx5_crypto_gcm_init_qp(qp);
+	ret = mlx5_devx_qp2rts(&qp->qp_obj, 0);
+	if (ret)
+		goto err;
+	qp->ops = (struct rte_crypto_op **)(qp + 1);
+	qp->mkey = (struct mlx5_devx_obj **)(qp->ops + entries);
+	if (mlx5_crypto_gcm_umr_qp_setup(dev, qp, log_ops_n, socket_id)) {
+		DRV_LOG(ERR, "Failed to setup UMR QP.");
+		goto err;
+	}
+	DRV_LOG(INFO, "QP %u: SQN=0x%X CQN=0x%X entries num = %u",
+		(uint32_t)qp_id, qp->qp_obj.qp->id, qp->cq_obj.cq->id, entries);
+	if (mlx5_crypto_gcm_indirect_mkeys_prepare(priv, qp)) {
+		DRV_LOG(ERR, "Cannot allocate indirect memory regions.");
+		rte_errno = ENOMEM;
+		goto err;
+	}
+	dev->data->queue_pairs[qp_id] = qp;
+	return 0;
+err:
+	mlx5_crypto_gcm_qp_release(dev, qp_id);
+	return -1;
+}
+
 int
 mlx5_crypto_gcm_init(struct mlx5_crypto_priv *priv)
 {
@@ -133,6 +384,8 @@  mlx5_crypto_gcm_init(struct mlx5_crypto_priv *priv)
 
 	/* Override AES-GCM specified ops. */
 	dev_ops->sym_session_configure = mlx5_crypto_sym_gcm_session_configure;
+	dev_ops->queue_pair_setup = mlx5_crypto_gcm_qp_setup;
+	dev_ops->queue_pair_release = mlx5_crypto_gcm_qp_release;
 	/* Generate GCM capability. */
 	ret = mlx5_crypto_generate_gcm_cap(&cdev->config.hca_attr.crypto_mmo,
 					   mlx5_crypto_gcm_caps);
@@ -140,6 +393,7 @@  mlx5_crypto_gcm_init(struct mlx5_crypto_priv *priv)
 		DRV_LOG(ERR, "No enough AES-GCM cap.");
 		return -1;
 	}
+	priv->max_segs_num = rte_align32pow2((priv->max_segs_num + 2) * 2);
 	priv->caps = mlx5_crypto_gcm_caps;
 	priv->is_gcm_dek_wrap = !!(cdev->config.hca_attr.sw_wrapped_dek &
 				(1 << MLX5_CRYPTO_KEY_PURPOSE_GCM));