@@ -185,6 +185,25 @@ for an additional list of options shared with other mlx5 drivers.
Maximum number of mbuf chain segments(src or dest), default value is 8.
+- ``crypto_mode`` parameter [string]
+
+ Only valid in AES-GCM mode. Will be ignored in AES-XTS mode.
+
+ - ``full_capable``
+ Use UMR WQE for inputs not as contiguous AAD/Payload/Digest.
+
+ - ``ipsec_opt``
+ Do software AAD shrink for inputs as contiguous AAD/IV/Payload/Digest.
+ The PMD relies on the IPsec layout, expecting the memory to align with
+ AAD/IV/Payload/Digest in a contiguous manner, all within a single mbuf
+ for any given OP.
+ The PMD extracts the ESP.IV bytes from the input memory and binds the
+ AAD (ESP SPI and SN) to the payload during enqueue OP. It then restores
+ the original memory layout in the decrypt OP.
+ ESP.IV size supported range is [0,16] bytes.
+
+ Set to ``full_capable`` by default.
+
Supported NICs
--------------
@@ -205,6 +224,8 @@ Limitations
values.
- AES-GCM is supported only on BlueField-3.
- AES-GCM supports only key import plaintext mode.
+- AES-GCM ``ipsec_opt`` mode does not support non-contiguous AAD/Payload/Digest
+ and multi-segment mode.
Prerequisites
@@ -144,6 +144,10 @@ New Features
Added an API that allows the user to reclaim the defer queue with RCU.
+* **Updated NVIDIA mlx5 crypto driver.**
+
+ * Added AES-GCM IPsec operation optimization.
+
Removed Items
-------------
@@ -25,10 +25,6 @@
#define MLX5_CRYPTO_FEATURE_FLAGS(wrapped_mode) \
(RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO | RTE_CRYPTODEV_FF_HW_ACCELERATED | \
- RTE_CRYPTODEV_FF_IN_PLACE_SGL | RTE_CRYPTODEV_FF_OOP_SGL_IN_SGL_OUT | \
- RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT | \
- RTE_CRYPTODEV_FF_OOP_LB_IN_SGL_OUT | \
- RTE_CRYPTODEV_FF_OOP_LB_IN_LB_OUT | \
(wrapped_mode ? RTE_CRYPTODEV_FF_CIPHER_WRAPPED_KEY : 0) | \
RTE_CRYPTODEV_FF_CIPHER_MULTIPLE_DATA_UNITS)
@@ -60,6 +56,14 @@ mlx5_crypto_dev_infos_get(struct rte_cryptodev *dev,
dev_info->driver_id = mlx5_crypto_driver_id;
dev_info->feature_flags =
MLX5_CRYPTO_FEATURE_FLAGS(priv->is_wrapped_mode);
+ if (!mlx5_crypto_is_ipsec_opt(priv))
+ dev_info->feature_flags |=
+ RTE_CRYPTODEV_FF_IN_PLACE_SGL |
+ RTE_CRYPTODEV_FF_OOP_SGL_IN_SGL_OUT |
+ RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT |
+ RTE_CRYPTODEV_FF_OOP_LB_IN_LB_OUT |
+ RTE_CRYPTODEV_FF_OOP_LB_IN_SGL_OUT;
+
dev_info->capabilities = priv->caps;
dev_info->max_nb_queue_pairs = MLX5_CRYPTO_MAX_QPS;
if (priv->caps->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AEAD) {
@@ -249,6 +253,16 @@ mlx5_crypto_args_check_handler(const char *key, const char *val, void *opaque)
fclose(file);
devarg_prms->login_devarg = true;
return 0;
+ } else if (strcmp(key, "crypto_mode") == 0) {
+ if (strcmp(val, "full_capable") == 0) {
+ devarg_prms->crypto_mode = MLX5_CRYPTO_FULL_CAPABLE;
+ } else if (strcmp(val, "ipsec_opt") == 0) {
+ devarg_prms->crypto_mode = MLX5_CRYPTO_IPSEC_OPT;
+ } else {
+ DRV_LOG(ERR, "Invalid crypto mode: %s", val);
+ rte_errno = EINVAL;
+ return -rte_errno;
+ }
}
errno = 0;
tmp = strtoul(val, NULL, 0);
@@ -294,6 +308,7 @@ mlx5_crypto_parse_devargs(struct mlx5_kvargs_ctrl *mkvlist,
"max_segs_num",
"wcs_file",
"algo",
+ "crypto_mode",
NULL,
};
@@ -379,6 +394,7 @@ mlx5_crypto_dev_probe(struct mlx5_common_device *cdev,
priv->crypto_dev = crypto_dev;
priv->is_wrapped_mode = wrapped_mode;
priv->max_segs_num = devarg_prms.max_segs_num;
+ priv->crypto_mode = devarg_prms.crypto_mode;
/* Init and override AES-GCM configuration. */
if (devarg_prms.is_aes_gcm) {
ret = mlx5_crypto_gcm_init(priv);
@@ -25,6 +25,16 @@
MLX5_WSEG_SIZE)
#define MLX5_CRYPTO_GCM_MAX_AAD 64
#define MLX5_CRYPTO_GCM_MAX_DIGEST 16
+#define MLX5_CRYPTO_GCM_IPSEC_IV_SIZE 16
+
+enum mlx5_crypto_mode {
+ MLX5_CRYPTO_FULL_CAPABLE,
+ MLX5_CRYPTO_IPSEC_OPT,
+};
+
+struct mlx5_crypto_ipsec_mem {
+ uint8_t mem[MLX5_CRYPTO_GCM_IPSEC_IV_SIZE];
+} __rte_packed;
struct mlx5_crypto_priv {
TAILQ_ENTRY(mlx5_crypto_priv) next;
@@ -45,6 +55,7 @@ struct mlx5_crypto_priv {
uint16_t umr_wqe_stride;
uint16_t max_rdmar_ds;
uint32_t is_wrapped_mode:1;
+ enum mlx5_crypto_mode crypto_mode;
};
struct mlx5_crypto_qp {
@@ -57,6 +68,7 @@ struct mlx5_crypto_qp {
struct mlx5_devx_obj **mkey; /* WQE's indirect mekys. */
struct mlx5_klm *klm_array;
union mlx5_gga_crypto_opaque *opaque_addr;
+ struct mlx5_crypto_ipsec_mem *ipsec_mem;
struct mlx5_mr_ctrl mr_ctrl;
struct mlx5_pmd_mr mr;
/* Crypto QP. */
@@ -93,6 +105,7 @@ struct mlx5_crypto_devarg_params {
uint64_t keytag;
uint32_t max_segs_num;
uint32_t is_aes_gcm:1;
+ enum mlx5_crypto_mode crypto_mode;
};
struct mlx5_crypto_session {
@@ -139,6 +152,12 @@ struct mlx5_crypto_dek_ctx {
struct mlx5_crypto_priv *priv;
};
+static __rte_always_inline bool
+mlx5_crypto_is_ipsec_opt(struct mlx5_crypto_priv *priv)
+{
+ return priv->crypto_mode == MLX5_CRYPTO_IPSEC_OPT;
+}
+
typedef void *(*mlx5_crypto_mkey_update_t)(struct mlx5_crypto_priv *priv,
struct mlx5_crypto_qp *qp,
uint32_t idx);
@@ -181,6 +181,7 @@ mlx5_crypto_sym_gcm_session_configure(struct rte_cryptodev *dev,
DRV_LOG(ERR, "Only AES-GCM algorithm is supported.");
return -ENOTSUP;
}
+
if (aead->op == RTE_CRYPTO_AEAD_OP_ENCRYPT)
op_type = MLX5_CRYPTO_OP_TYPE_ENCRYPTION;
else
@@ -235,6 +236,7 @@ mlx5_crypto_gcm_qp_release(struct rte_cryptodev *dev, uint16_t qp_id)
}
mlx5_crypto_indirect_mkeys_release(qp, qp->entries_n);
mlx5_mr_btree_free(&qp->mr_ctrl.cache_bh);
+ rte_free(qp->ipsec_mem);
rte_free(qp);
dev->data->queue_pairs[qp_id] = NULL;
return 0;
@@ -321,13 +323,16 @@ mlx5_crypto_gcm_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,
uint32_t log_ops_n = rte_log2_u32(qp_conf->nb_descriptors);
uint32_t entries = RTE_BIT32(log_ops_n);
uint32_t alloc_size = sizeof(*qp);
+ uint32_t extra_obj_size = 0;
size_t mr_size, opaq_size;
void *mr_buf;
int ret;
+ if (!mlx5_crypto_is_ipsec_opt(priv))
+ extra_obj_size = sizeof(struct mlx5_devx_obj *);
alloc_size = RTE_ALIGN(alloc_size, RTE_CACHE_LINE_SIZE);
alloc_size += (sizeof(struct rte_crypto_op *) +
- sizeof(struct mlx5_devx_obj *)) * entries;
+ extra_obj_size) * entries;
qp = rte_zmalloc_socket(__func__, alloc_size, RTE_CACHE_LINE_SIZE,
socket_id);
if (qp == NULL) {
@@ -370,7 +375,7 @@ mlx5_crypto_gcm_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,
* Triple the CQ size as UMR QP which contains UMR and SEND_EN WQE
* will share this CQ .
*/
- qp->cq_entries_n = rte_align32pow2(entries * 3);
+ qp->cq_entries_n = rte_align32pow2(entries * (mlx5_crypto_is_ipsec_opt(priv) ? 1 : 3));
ret = mlx5_devx_cq_create(priv->cdev->ctx, &qp->cq_obj,
rte_log2_u32(qp->cq_entries_n),
&cq_attr, socket_id);
@@ -384,7 +389,7 @@ mlx5_crypto_gcm_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,
qp_attr.num_of_send_wqbbs = entries;
qp_attr.mmo = attr->crypto_mmo.crypto_mmo_qp;
/* Set MMO QP as follower as the input data may depend on UMR. */
- qp_attr.cd_slave_send = 1;
+ qp_attr.cd_slave_send = !mlx5_crypto_is_ipsec_opt(priv);
ret = mlx5_devx_qp_create(priv->cdev->ctx, &qp->qp_obj,
qp_attr.num_of_send_wqbbs * MLX5_WQE_SIZE,
&qp_attr, socket_id);
@@ -397,18 +402,28 @@ mlx5_crypto_gcm_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,
if (ret)
goto err;
qp->ops = (struct rte_crypto_op **)(qp + 1);
- qp->mkey = (struct mlx5_devx_obj **)(qp->ops + entries);
- if (mlx5_crypto_gcm_umr_qp_setup(dev, qp, socket_id)) {
- DRV_LOG(ERR, "Failed to setup UMR QP.");
- goto err;
- }
- DRV_LOG(INFO, "QP %u: SQN=0x%X CQN=0x%X entries num = %u",
- (uint32_t)qp_id, qp->qp_obj.qp->id, qp->cq_obj.cq->id, entries);
- if (mlx5_crypto_indirect_mkeys_prepare(priv, qp, &mkey_attr,
- mlx5_crypto_gcm_mkey_klm_update)) {
- DRV_LOG(ERR, "Cannot allocate indirect memory regions.");
- rte_errno = ENOMEM;
- goto err;
+ if (!mlx5_crypto_is_ipsec_opt(priv)) {
+ qp->mkey = (struct mlx5_devx_obj **)(qp->ops + entries);
+ if (mlx5_crypto_gcm_umr_qp_setup(dev, qp, socket_id)) {
+ DRV_LOG(ERR, "Failed to setup UMR QP.");
+ goto err;
+ }
+ DRV_LOG(INFO, "QP %u: SQN=0x%X CQN=0x%X entries num = %u",
+ (uint32_t)qp_id, qp->qp_obj.qp->id, qp->cq_obj.cq->id, entries);
+ if (mlx5_crypto_indirect_mkeys_prepare(priv, qp, &mkey_attr,
+ mlx5_crypto_gcm_mkey_klm_update)) {
+ DRV_LOG(ERR, "Cannot allocate indirect memory regions.");
+ rte_errno = ENOMEM;
+ goto err;
+ }
+ } else {
+ extra_obj_size = sizeof(struct mlx5_crypto_ipsec_mem) * entries;
+ qp->ipsec_mem = rte_calloc(__func__, (size_t)1, extra_obj_size,
+ RTE_CACHE_LINE_SIZE);
+ if (!qp->ipsec_mem) {
+ DRV_LOG(ERR, "Failed to allocate ipsec_mem.");
+ goto err;
+ }
}
dev->data->queue_pairs[qp_id] = qp;
return 0;
@@ -974,6 +989,168 @@ mlx5_crypto_gcm_dequeue_burst(void *queue_pair,
return op_num;
}
+static uint16_t
+mlx5_crypto_gcm_ipsec_enqueue_burst(void *queue_pair,
+ struct rte_crypto_op **ops,
+ uint16_t nb_ops)
+{
+ struct mlx5_crypto_qp *qp = queue_pair;
+ struct mlx5_crypto_session *sess;
+ struct mlx5_crypto_priv *priv = qp->priv;
+ struct mlx5_crypto_gcm_data gcm_data;
+ struct rte_crypto_op *op;
+ struct rte_mbuf *m_src;
+ uint16_t mask = qp->entries_n - 1;
+ uint16_t remain = qp->entries_n - (qp->pi - qp->qp_ci);
+ uint32_t idx;
+ uint32_t pkt_iv_len;
+ uint8_t *payload;
+
+ if (remain < nb_ops)
+ nb_ops = remain;
+ else
+ remain = nb_ops;
+ if (unlikely(remain == 0))
+ return 0;
+ do {
+ op = *ops++;
+ sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
+ idx = qp->pi & mask;
+ m_src = op->sym->m_src;
+ MLX5_ASSERT(m_src->nb_segs == 1);
+ payload = rte_pktmbuf_mtod_offset(m_src, void *, op->sym->aead.data.offset);
+ gcm_data.src_addr = RTE_PTR_SUB(payload, sess->aad_len);
+ /*
+ * IPsec IV between payload and AAD should be equal or less than
+ * MLX5_CRYPTO_GCM_IPSEC_IV_SIZE.
+ */
+ pkt_iv_len = RTE_PTR_DIFF(payload,
+ RTE_PTR_ADD(op->sym->aead.aad.data, sess->aad_len));
+ MLX5_ASSERT(pkt_iv_len <= MLX5_CRYPTO_GCM_IPSEC_IV_SIZE);
+ gcm_data.src_bytes = op->sym->aead.data.length + sess->aad_len;
+ gcm_data.src_mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, op->sym->m_src);
+ /* OOP mode is not supported. */
+ MLX5_ASSERT(!op->sym->m_dst || op->sym->m_dst == m_src);
+ gcm_data.dst_addr = gcm_data.src_addr;
+ gcm_data.dst_mkey = gcm_data.src_mkey;
+ gcm_data.dst_bytes = gcm_data.src_bytes;
+ /* Digest should follow payload. */
+ MLX5_ASSERT(RTE_PTR_ADD
+ (gcm_data.src_addr, sess->aad_len + op->sym->aead.data.length) ==
+ op->sym->aead.digest.data);
+ if (sess->op_type == MLX5_CRYPTO_OP_TYPE_ENCRYPTION)
+ gcm_data.dst_bytes += sess->tag_len;
+ else
+ gcm_data.src_bytes += sess->tag_len;
+ mlx5_crypto_gcm_wqe_set(qp, op, idx, &gcm_data);
+ /*
+ * All the data such as IV have been copied above,
+ * shrink AAD before payload. First backup the mem,
+ * then do shrink.
+ */
+ rte_memcpy(&qp->ipsec_mem[idx],
+ RTE_PTR_SUB(payload, MLX5_CRYPTO_GCM_IPSEC_IV_SIZE),
+ MLX5_CRYPTO_GCM_IPSEC_IV_SIZE);
+ /* If no memory overlap, do copy directly, otherwise memmove. */
+ if (likely(pkt_iv_len >= sess->aad_len))
+ rte_memcpy(gcm_data.src_addr, op->sym->aead.aad.data, sess->aad_len);
+ else
+ memmove(gcm_data.src_addr, op->sym->aead.aad.data, sess->aad_len);
+ op->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
+ qp->ops[idx] = op;
+ qp->pi++;
+ } while (--remain);
+ qp->stats.enqueued_count += nb_ops;
+ /* Update the last GGA cseg with COMP. */
+ ((struct mlx5_wqe_cseg *)qp->wqe)->flags =
+ RTE_BE32(MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET);
+ mlx5_doorbell_ring(&priv->uar.bf_db, *(volatile uint64_t *)qp->wqe,
+ qp->pi, &qp->qp_obj.db_rec[MLX5_SND_DBR],
+ !priv->uar.dbnc);
+ return nb_ops;
+}
+
+static __rte_always_inline void
+mlx5_crypto_gcm_restore_ipsec_mem(struct mlx5_crypto_qp *qp,
+ uint16_t orci,
+ uint16_t rci,
+ uint16_t op_mask)
+{
+ uint32_t idx;
+ struct mlx5_crypto_session *sess;
+ struct rte_crypto_op *op;
+ struct rte_mbuf *m_src;
+ uint8_t *payload;
+
+ while (orci != rci) {
+ idx = orci & op_mask;
+ op = qp->ops[idx];
+ sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
+ m_src = op->sym->m_src;
+ payload = rte_pktmbuf_mtod_offset(m_src, void *,
+ op->sym->aead.data.offset);
+ /* Restore the IPsec memory. */
+ if (unlikely(sess->aad_len > MLX5_CRYPTO_GCM_IPSEC_IV_SIZE))
+ memmove(op->sym->aead.aad.data,
+ RTE_PTR_SUB(payload, sess->aad_len), sess->aad_len);
+ rte_memcpy(RTE_PTR_SUB(payload, MLX5_CRYPTO_GCM_IPSEC_IV_SIZE),
+ &qp->ipsec_mem[idx], MLX5_CRYPTO_GCM_IPSEC_IV_SIZE);
+ orci++;
+ }
+}
+
+static uint16_t
+mlx5_crypto_gcm_ipsec_dequeue_burst(void *queue_pair,
+ struct rte_crypto_op **ops,
+ uint16_t nb_ops)
+{
+ struct mlx5_crypto_qp *qp = queue_pair;
+ volatile struct mlx5_cqe *restrict cqe;
+ const unsigned int cq_size = qp->cq_entries_n;
+ const unsigned int mask = cq_size - 1;
+ const unsigned int op_mask = qp->entries_n - 1;
+ uint32_t idx;
+ uint32_t next_idx = qp->cq_ci & mask;
+ uint16_t reported_ci = qp->reported_ci;
+ uint16_t qp_ci = qp->qp_ci;
+ const uint16_t max = RTE_MIN((uint16_t)(qp->pi - reported_ci), nb_ops);
+ uint16_t op_num = 0;
+ int ret;
+
+ if (unlikely(max == 0))
+ return 0;
+ while (qp_ci - reported_ci < max) {
+ idx = next_idx;
+ next_idx = (qp->cq_ci + 1) & mask;
+ cqe = &qp->cq_obj.cqes[idx];
+ ret = check_cqe(cqe, cq_size, qp->cq_ci);
+ if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
+ if (unlikely(ret != MLX5_CQE_STATUS_HW_OWN))
+ mlx5_crypto_gcm_cqe_err_handle(qp,
+ qp->ops[reported_ci & op_mask]);
+ break;
+ }
+ qp_ci = rte_be_to_cpu_16(cqe->wqe_counter) + 1;
+ qp->cq_ci++;
+ }
+ /* If wqe_counter changed, means CQE handled. */
+ if (likely(qp->qp_ci != qp_ci)) {
+ qp->qp_ci = qp_ci;
+ rte_io_wmb();
+ qp->cq_obj.db_rec[0] = rte_cpu_to_be_32(qp->cq_ci);
+ }
+ /* If reported_ci is not same with qp_ci, means op retrieved. */
+ if (qp_ci != reported_ci) {
+ op_num = RTE_MIN((uint16_t)(qp_ci - reported_ci), max);
+ reported_ci += op_num;
+ mlx5_crypto_gcm_restore_ipsec_mem(qp, qp->reported_ci, reported_ci, op_mask);
+ mlx5_crypto_gcm_fill_op(qp, ops, qp->reported_ci, reported_ci, op_mask);
+ qp->stats.dequeued_count += op_num;
+ qp->reported_ci = reported_ci;
+ }
+ return op_num;
+}
+
int
mlx5_crypto_gcm_init(struct mlx5_crypto_priv *priv)
{
@@ -987,9 +1164,16 @@ mlx5_crypto_gcm_init(struct mlx5_crypto_priv *priv)
mlx5_os_set_reg_mr_cb(&priv->reg_mr_cb, &priv->dereg_mr_cb);
dev_ops->queue_pair_setup = mlx5_crypto_gcm_qp_setup;
dev_ops->queue_pair_release = mlx5_crypto_gcm_qp_release;
- crypto_dev->dequeue_burst = mlx5_crypto_gcm_dequeue_burst;
- crypto_dev->enqueue_burst = mlx5_crypto_gcm_enqueue_burst;
- priv->max_klm_num = RTE_ALIGN((priv->max_segs_num + 1) * 2 + 1, MLX5_UMR_KLM_NUM_ALIGN);
+ if (mlx5_crypto_is_ipsec_opt(priv)) {
+ crypto_dev->dequeue_burst = mlx5_crypto_gcm_ipsec_dequeue_burst;
+ crypto_dev->enqueue_burst = mlx5_crypto_gcm_ipsec_enqueue_burst;
+ priv->max_klm_num = 0;
+ } else {
+ crypto_dev->dequeue_burst = mlx5_crypto_gcm_dequeue_burst;
+ crypto_dev->enqueue_burst = mlx5_crypto_gcm_enqueue_burst;
+ priv->max_klm_num = RTE_ALIGN((priv->max_segs_num + 1) * 2 + 1,
+ MLX5_UMR_KLM_NUM_ALIGN);
+ }
/* Generate GCM capability. */
ret = mlx5_crypto_generate_gcm_cap(&cdev->config.hca_attr.crypto_mmo,
mlx5_crypto_gcm_caps);