@@ -613,6 +613,7 @@ struct mlx5_wqe_send_en_wqe {
/* MMO metadata segment */
#define MLX5_OPCODE_MMO 0x2fu
+#define MLX5_OPC_MOD_MMO_CRYPTO 0x6u
#define MLX5_OPC_MOD_MMO_REGEX 0x4u
#define MLX5_OPC_MOD_MMO_COMP 0x2u
#define MLX5_OPC_MOD_MMO_DECOMP 0x3u
@@ -66,8 +66,10 @@ struct mlx5_crypto_qp {
uint8_t *umr_wqe;
uint16_t umr_wqbbs;
uint16_t umr_pi;
+ uint16_t umr_last_pi;
uint16_t umr_ci;
uint32_t umr_errors;
+ bool has_umr;
};
struct mlx5_crypto_dek {
@@ -9,6 +9,7 @@
#include <rte_log.h>
#include <bus_pci_driver.h>
#include <rte_memory.h>
+#include <rte_io.h>
#include <mlx5_glue.h>
#include <mlx5_common.h>
@@ -18,6 +19,17 @@
#include "mlx5_crypto_utils.h"
#include "mlx5_crypto.h"
+#define MLX5_MMO_CRYPTO_OPC (MLX5_OPCODE_MMO | \
+ (MLX5_OPC_MOD_MMO_CRYPTO << WQE_CSEG_OPC_MOD_OFFSET))
+
+struct mlx5_crypto_gcm_data {
+ void *src_addr;
+ uint32_t src_bytes;
+ void *dst_addr;
+ uint32_t dst_bytes;
+ uint32_t mkey;
+};
+
static struct rte_cryptodev_capabilities mlx5_crypto_gcm_caps[] = {
{
.op = RTE_CRYPTO_OP_TYPE_UNDEFINED,
@@ -246,6 +258,10 @@ mlx5_crypto_gcm_umr_qp_setup(struct rte_cryptodev *dev, struct mlx5_crypto_qp *q
DRV_LOG(ERR, "Failed to create UMR CQ.");
return -1;
}
+ /* Init CQ to ones to be in HW owner in the start. */
+ qp->umr_cq_obj.cqes[0].op_own = MLX5_CQE_OWNER_MASK;
+ qp->umr_cq_obj.cqes[0].wqe_counter = rte_cpu_to_be_16(UINT16_MAX);
+ qp->umr_last_pi = UINT16_MAX;
/* Set UMR + SEND_EN WQE as maximum same with crypto. */
log_wqbb_n = rte_log2_u32(qp->entries_n *
(priv->wqe_set_size / MLX5_SEND_WQE_BB));
@@ -374,6 +390,389 @@ mlx5_crypto_gcm_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,
return -1;
}
+static __rte_always_inline bool
+mlx5_crypto_is_gcm_input_continuous(struct rte_crypto_op *op)
+{
+ struct mlx5_crypto_session *sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
+ struct rte_mbuf *m_src = op->sym->m_src;
+ void *aad_addr = op->sym->aead.aad.data;
+ void *tag_addr = op->sym->aead.digest.data;
+ void *pkt_addr = rte_pktmbuf_mtod_offset(m_src, void *, op->sym->aead.data.offset);
+
+ /* Out of place mode, AAD will never satisfy the expectation. */
+ if ((op->sym->m_dst && op->sym->m_dst != m_src) ||
+ (m_src->nb_segs > 1) ||
+ (RTE_PTR_ADD(aad_addr, sess->aad_len) != pkt_addr) ||
+ (RTE_PTR_ADD(pkt_addr, op->sym->aead.data.length) != tag_addr))
+ return false;
+ return true;
+}
+
+static __rte_always_inline uint32_t
+mlx5_crypto_gcm_umr_klm_set(struct mlx5_crypto_qp *qp, struct rte_mbuf *mbuf,
+ struct mlx5_klm *klm, uint32_t offset,
+ uint32_t *remain)
+{
+ uint32_t data_len = (rte_pktmbuf_data_len(mbuf) - offset);
+ uintptr_t addr = rte_pktmbuf_mtod_offset(mbuf, uintptr_t, offset);
+
+ if (data_len > *remain)
+ data_len = *remain;
+ *remain -= data_len;
+ klm->byte_count = rte_cpu_to_be_32(data_len);
+ klm->address = rte_cpu_to_be_64(addr);
+ klm->mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, mbuf);
+ return klm->mkey;
+}
+
+static __rte_always_inline int
+mlx5_crypto_gcm_build_klm(struct mlx5_crypto_qp *qp,
+ struct rte_crypto_op *op,
+ struct rte_mbuf *mbuf,
+ struct mlx5_klm *klm)
+{
+ struct mlx5_crypto_session *sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
+ uint32_t remain_len = op->sym->aead.data.length;
+ uint32_t nb_segs = mbuf->nb_segs;
+ uint32_t klm_n = 0;
+
+ /* Set AAD. */
+ klm->byte_count = rte_cpu_to_be_32(sess->aad_len);
+ klm->address = rte_cpu_to_be_64((uintptr_t)op->sym->aead.aad.data);
+ klm->mkey = mlx5_mr_addr2mr_bh(&qp->mr_ctrl, (uintptr_t)op->sym->aead.aad.data);
+ klm_n++;
+ /* First mbuf needs to take the data offset. */
+ if (unlikely(mlx5_crypto_gcm_umr_klm_set(qp, mbuf, ++klm,
+ op->sym->aead.data.offset, &remain_len) == UINT32_MAX)) {
+ op->status = RTE_CRYPTO_OP_STATUS_ERROR;
+ return 0;
+ }
+ klm_n++;
+ while (remain_len) {
+ nb_segs--;
+ mbuf = mbuf->next;
+ if (unlikely(mbuf == NULL || nb_segs == 0)) {
+ op->status = RTE_CRYPTO_OP_STATUS_INVALID_ARGS;
+ return 0;
+ }
+ if (unlikely(mlx5_crypto_gcm_umr_klm_set(qp, mbuf, ++klm, 0,
+ &remain_len) == UINT32_MAX)) {
+ op->status = RTE_CRYPTO_OP_STATUS_ERROR;
+ return 0;
+ }
+ klm_n++;
+ }
+ /* Set TAG. */
+ klm++;
+ klm->byte_count = rte_cpu_to_be_32((uint32_t)sess->tag_len);
+ klm->address = rte_cpu_to_be_64((uintptr_t)op->sym->aead.digest.data);
+ klm->mkey = mlx5_mr_addr2mr_bh(&qp->mr_ctrl, (uintptr_t)op->sym->aead.digest.data);
+ klm_n++;
+ return klm_n;
+}
+
+static __rte_always_inline void*
+mlx5_crypto_gcm_get_umr_wqe(struct mlx5_crypto_qp *qp)
+{
+ struct mlx5_crypto_priv *priv = qp->priv;
+ uint32_t wqe_offset = qp->umr_pi & (qp->umr_wqbbs - 1);
+ uint32_t left_wqbbs = qp->umr_wqbbs - wqe_offset;
+ struct mlx5_wqe_cseg *wqe;
+
+ /* If UMR WQE is near the boundary. */
+ if (left_wqbbs < priv->umr_wqe_stride) {
+ /* Append NOP WQE as the left WQEBBS is not enough for UMR. */
+ wqe = (struct mlx5_wqe_cseg *)RTE_PTR_ADD(qp->umr_qp_obj.umem_buf,
+ wqe_offset * MLX5_SEND_WQE_BB);
+ wqe->opcode = RTE_BE32(MLX5_OPCODE_NOP | ((uint32_t)qp->umr_pi << 8));
+ wqe->sq_ds = rte_cpu_to_be_32((qp->umr_qp_obj.qp->id << 8) | (left_wqbbs << 2));
+ wqe->flags = RTE_BE32(0);
+ wqe->misc = RTE_BE32(0);
+ qp->umr_pi += left_wqbbs;
+ wqe_offset = qp->umr_pi & (qp->umr_wqbbs - 1);
+ }
+ wqe_offset *= MLX5_SEND_WQE_BB;
+ return RTE_PTR_ADD(qp->umr_qp_obj.umem_buf, wqe_offset);
+}
+
+static __rte_always_inline int
+mlx5_crypto_gcm_build_umr(struct mlx5_crypto_qp *qp,
+ struct rte_crypto_op *op,
+ uint32_t idx,
+ struct mlx5_crypto_gcm_data *data)
+{
+ struct mlx5_crypto_priv *priv = qp->priv;
+ struct mlx5_crypto_session *sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
+ struct mlx5_wqe_cseg *wqe;
+ struct mlx5_wqe_umr_ctrl_seg *ucseg;
+ struct mlx5_wqe_mkey_context_seg *mkc;
+ struct mlx5_klm *iklm;
+ struct mlx5_klm *klm = &qp->klm_array[idx * priv->max_segs_num];
+ uint16_t klm_size, klm_align;
+ uint16_t klm_src = 0, klm_dst = 0;
+ uint32_t total_len = op->sym->aead.data.length + sess->aad_len + sess->tag_len;
+ uint32_t i;
+
+ /* Build KLM base on the op. */
+ klm_src = mlx5_crypto_gcm_build_klm(qp, op, op->sym->m_src, klm);
+ if (!klm_src)
+ return -EINVAL;
+ if (op->sym->m_dst && op->sym->m_dst != op->sym->m_src) {
+ klm_dst = mlx5_crypto_gcm_build_klm(qp, op, op->sym->m_dst, klm + klm_src);
+ if (!klm_dst)
+ return -EINVAL;
+ total_len *= 2;
+ }
+ klm_size = klm_src + klm_dst;
+ klm_align = RTE_ALIGN(klm_size, 4);
+ /* Get UMR WQE memory. */
+ wqe = (struct mlx5_wqe_cseg *)mlx5_crypto_gcm_get_umr_wqe(qp);
+ memset(wqe, 0, priv->umr_wqe_size);
+ /* Set WQE control seg. Non-inline KLM UMR WQE size must be 9 WQE_DS. */
+ wqe->opcode = RTE_BE32(MLX5_OPCODE_UMR | ((uint32_t)qp->umr_pi << 8));
+ wqe->sq_ds = rte_cpu_to_be_32((qp->umr_qp_obj.qp->id << 8) | 9);
+ wqe->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << MLX5_COMP_MODE_OFFSET);
+ wqe->misc = rte_cpu_to_be_32(qp->mkey[idx]->id);
+ /* Set UMR WQE control seg. */
+ ucseg = (struct mlx5_wqe_umr_ctrl_seg *)(wqe + 1);
+ ucseg->mkey_mask |= rte_cpu_to_be_64(MLX5_WQE_UMR_CTRL_MKEY_MASK_LEN);
+ ucseg->klm_octowords = rte_cpu_to_be_16(klm_align);
+ /* Set mkey context seg. */
+ mkc = (struct mlx5_wqe_mkey_context_seg *)(ucseg + 1);
+ mkc->len = rte_cpu_to_be_64(total_len);
+ mkc->qpn_mkey = rte_cpu_to_be_32(0xffffff00 | (qp->mkey[idx]->id & 0xff));
+ /* Set UMR pointer to data seg. */
+ iklm = (struct mlx5_klm *)(mkc + 1);
+ iklm->address = rte_cpu_to_be_64((uintptr_t)((char *)klm));
+ iklm->mkey = rte_cpu_to_be_32(qp->klm_mr.lkey);
+ iklm->byte_count = rte_cpu_to_be_32(klm_align);
+ data->mkey = rte_cpu_to_be_32(qp->mkey[idx]->id);
+ data->src_addr = 0;
+ data->src_bytes = sess->aad_len + op->sym->aead.data.length;
+ data->dst_bytes = data->src_bytes;
+ if (klm_dst)
+ data->dst_addr = (void *)(uintptr_t)(data->src_bytes + sess->tag_len);
+ else
+ data->dst_addr = 0;
+ if (sess->op_type == MLX5_CRYPTO_OP_TYPE_ENCRYPTION)
+ data->dst_bytes += sess->tag_len;
+ else
+ data->src_bytes += sess->tag_len;
+ /* Clear the padding memory. */
+ for (i = klm_size; i < klm_align; i++) {
+ klm[i].mkey = UINT32_MAX;
+ klm[i].address = 0;
+ klm[i].byte_count = 0;
+ }
+ /* Update PI and WQE */
+ qp->umr_pi += priv->umr_wqe_stride;
+ qp->umr_wqe = (uint8_t *)wqe;
+ return 0;
+}
+
+static __rte_always_inline void
+mlx5_crypto_gcm_build_send_en(struct mlx5_crypto_qp *qp)
+{
+ uint32_t wqe_offset = (qp->umr_pi & (qp->umr_wqbbs - 1)) * MLX5_SEND_WQE_BB;
+ struct mlx5_wqe_cseg *cs = RTE_PTR_ADD(qp->umr_qp_obj.wqes, wqe_offset);
+ struct mlx5_wqe_qseg *qs = RTE_PTR_ADD(cs, sizeof(struct mlx5_wqe_cseg));
+
+ cs->opcode = RTE_BE32(MLX5_OPCODE_SEND_EN | ((uint32_t)qp->umr_pi << 8));
+ cs->sq_ds = rte_cpu_to_be_32((qp->umr_qp_obj.qp->id << 8) | 2);
+ cs->flags = RTE_BE32((MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET) |
+ MLX5_WQE_CTRL_FENCE);
+ cs->misc = RTE_BE32(0);
+ qs->max_index = rte_cpu_to_be_32(qp->pi);
+ qs->qpn_cqn = rte_cpu_to_be_32(qp->qp_obj.qp->id);
+ qp->umr_wqe = (uint8_t *)cs;
+ qp->umr_pi += 1;
+}
+
+static __rte_always_inline void
+mlx5_crypto_gcm_wqe_set(struct mlx5_crypto_qp *qp,
+ struct rte_crypto_op *op,
+ uint32_t idx,
+ struct mlx5_crypto_gcm_data *data)
+{
+ struct mlx5_crypto_session *sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
+ struct mlx5_gga_wqe *wqe = &((struct mlx5_gga_wqe *)qp->qp_obj.wqes)[idx];
+ union mlx5_gga_crypto_opaque *opaq = qp->opaque_mr.addr;
+
+ memcpy(opaq[idx].cp.iv,
+ rte_crypto_op_ctod_offset(op, uint8_t *, sess->iv_offset), sess->iv_len);
+ opaq[idx].cp.tag_size = rte_cpu_to_be_32((uint32_t)sess->tag_len);
+ opaq[idx].cp.aad_size = rte_cpu_to_be_32((uint32_t)sess->aad_len);
+ /* Update control seg. */
+ wqe->opcode = rte_cpu_to_be_32(MLX5_MMO_CRYPTO_OPC + (qp->pi << 8));
+ wqe->gga_ctrl1 = sess->mmo_ctrl;
+ wqe->gga_ctrl2 = sess->dek_id;
+ /* Update input seg. */
+ wqe->gather.bcount = rte_cpu_to_be_32(data->src_bytes);
+ wqe->gather.lkey = data->mkey;
+ wqe->gather.pbuf = rte_cpu_to_be_64((uintptr_t)data->src_addr);
+ /* Update output seg. */
+ wqe->scatter.bcount = rte_cpu_to_be_32(data->dst_bytes);
+ wqe->scatter.lkey = data->mkey;
+ wqe->scatter.pbuf = rte_cpu_to_be_64((uintptr_t)data->dst_addr);
+ qp->wqe = (uint8_t *)wqe;
+}
+
+static uint16_t
+mlx5_crypto_gcm_enqueue_burst(void *queue_pair,
+ struct rte_crypto_op **ops,
+ uint16_t nb_ops)
+{
+ struct mlx5_crypto_qp *qp = queue_pair;
+ struct mlx5_crypto_session *sess;
+ struct mlx5_crypto_priv *priv = qp->priv;
+ struct mlx5_crypto_gcm_data gcm_data;
+ struct rte_crypto_op *op;
+ uint16_t mask = qp->entries_n - 1;
+ uint16_t remain = qp->entries_n - (qp->pi - qp->ci);
+ uint32_t idx;
+ uint16_t umr_cnt = 0;
+
+ if (remain < nb_ops)
+ nb_ops = remain;
+ else
+ remain = nb_ops;
+ if (unlikely(remain == 0))
+ return 0;
+ do {
+ op = *ops++;
+ sess = CRYPTODEV_GET_SYM_SESS_PRIV(op->sym->session);
+ idx = qp->pi & mask;
+ if (mlx5_crypto_is_gcm_input_continuous(op)) {
+ gcm_data.src_addr = op->sym->aead.aad.data;
+ gcm_data.src_bytes = op->sym->aead.data.length + sess->aad_len;
+ gcm_data.dst_addr = gcm_data.src_addr;
+ gcm_data.dst_bytes = gcm_data.src_bytes;
+ if (sess->op_type == MLX5_CRYPTO_OP_TYPE_ENCRYPTION)
+ gcm_data.dst_bytes += sess->tag_len;
+ else
+ gcm_data.src_bytes += sess->tag_len;
+ gcm_data.mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, op->sym->m_src);
+ } else {
+ if (unlikely(mlx5_crypto_gcm_build_umr(qp, op, idx, &gcm_data))) {
+ qp->stats.enqueue_err_count++;
+ if (remain != nb_ops) {
+ qp->stats.enqueued_count -= remain;
+ break;
+ }
+ return 0;
+ }
+ umr_cnt++;
+ }
+ mlx5_crypto_gcm_wqe_set(qp, op, idx, &gcm_data);
+ qp->ops[idx] = op;
+ qp->pi++;
+ } while (--remain);
+ qp->stats.enqueued_count += nb_ops;
+ if (!umr_cnt) {
+ mlx5_doorbell_ring(&priv->uar.bf_db, *(volatile uint64_t *)qp->wqe,
+ qp->pi, &qp->qp_obj.db_rec[MLX5_SND_DBR],
+ !priv->uar.dbnc);
+ } else {
+ mlx5_crypto_gcm_build_send_en(qp);
+ mlx5_doorbell_ring(&priv->uar.bf_db, *(volatile uint64_t *)qp->umr_wqe,
+ qp->umr_pi, &qp->umr_qp_obj.db_rec[MLX5_SND_DBR],
+ !priv->uar.dbnc);
+ }
+ qp->has_umr = !!umr_cnt;
+ return nb_ops;
+}
+
+static __rte_noinline void
+mlx5_crypto_gcm_cqe_err_handle(struct mlx5_crypto_qp *qp, struct rte_crypto_op *op)
+{
+ const uint32_t idx = qp->ci & (qp->entries_n - 1);
+ volatile struct mlx5_err_cqe *cqe = (volatile struct mlx5_err_cqe *)
+ &qp->cq_obj.cqes[idx];
+
+ if (op)
+ op->status = RTE_CRYPTO_OP_STATUS_ERROR;
+ qp->stats.dequeue_err_count++;
+ DRV_LOG(ERR, "CQE ERR:%x.\n", rte_be_to_cpu_32(cqe->syndrome));
+}
+
+static __rte_always_inline void
+mlx5_crypto_gcm_umr_cq_poll(struct mlx5_crypto_qp *qp)
+{
+ union {
+ struct {
+ uint16_t wqe_counter;
+ uint8_t rsvd5;
+ uint8_t op_own;
+ };
+ uint32_t word;
+ } last_word;
+ uint16_t cur_wqe_counter;
+
+ if (!qp->has_umr)
+ return;
+ last_word.word = rte_read32(&qp->umr_cq_obj.cqes[0].wqe_counter);
+ cur_wqe_counter = rte_be_to_cpu_16(last_word.wqe_counter);
+ if (cur_wqe_counter == qp->umr_last_pi)
+ return;
+ MLX5_ASSERT(MLX5_CQE_OPCODE(last_word.op_own) !=
+ MLX5_CQE_INVALID);
+ if (unlikely((MLX5_CQE_OPCODE(last_word.op_own) ==
+ MLX5_CQE_RESP_ERR ||
+ MLX5_CQE_OPCODE(last_word.op_own) ==
+ MLX5_CQE_REQ_ERR)))
+ qp->umr_errors++;
+ qp->umr_last_pi = cur_wqe_counter;
+ qp->umr_ci++;
+ rte_io_wmb();
+ /* Ring CQ doorbell record. */
+ qp->umr_cq_obj.db_rec[0] = rte_cpu_to_be_32(qp->umr_ci);
+ qp->has_umr = false;
+}
+
+static uint16_t
+mlx5_crypto_gcm_dequeue_burst(void *queue_pair,
+ struct rte_crypto_op **ops,
+ uint16_t nb_ops)
+{
+ struct mlx5_crypto_qp *qp = queue_pair;
+ volatile struct mlx5_cqe *restrict cqe;
+ struct rte_crypto_op *restrict op;
+ const unsigned int cq_size = qp->entries_n;
+ const unsigned int mask = cq_size - 1;
+ uint32_t idx;
+ uint32_t next_idx = qp->ci & mask;
+ const uint16_t max = RTE_MIN((uint16_t)(qp->pi - qp->ci), nb_ops);
+ uint16_t i = 0;
+ int ret;
+
+ if (unlikely(max == 0))
+ return 0;
+ /* Handle UMR CQE firstly.*/
+ mlx5_crypto_gcm_umr_cq_poll(qp);
+ do {
+ idx = next_idx;
+ next_idx = (qp->ci + 1) & mask;
+ op = qp->ops[idx];
+ cqe = &qp->cq_obj.cqes[idx];
+ ret = check_cqe(cqe, cq_size, qp->ci);
+ rte_io_rmb();
+ if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
+ if (unlikely(ret != MLX5_CQE_STATUS_HW_OWN))
+ mlx5_crypto_gcm_cqe_err_handle(qp, op);
+ break;
+ }
+ op->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
+ ops[i++] = op;
+ qp->ci++;
+ } while (i < max);
+ if (likely(i != 0)) {
+ rte_io_wmb();
+ qp->cq_obj.db_rec[0] = rte_cpu_to_be_32(qp->ci);
+ qp->stats.dequeued_count += i;
+ }
+ return i;
+}
+
int
mlx5_crypto_gcm_init(struct mlx5_crypto_priv *priv)
{
@@ -386,6 +785,8 @@ mlx5_crypto_gcm_init(struct mlx5_crypto_priv *priv)
dev_ops->sym_session_configure = mlx5_crypto_sym_gcm_session_configure;
dev_ops->queue_pair_setup = mlx5_crypto_gcm_qp_setup;
dev_ops->queue_pair_release = mlx5_crypto_gcm_qp_release;
+ crypto_dev->dequeue_burst = mlx5_crypto_gcm_dequeue_burst;
+ crypto_dev->enqueue_burst = mlx5_crypto_gcm_enqueue_burst;
/* Generate GCM capability. */
ret = mlx5_crypto_generate_gcm_cap(&cdev->config.hca_attr.crypto_mmo,
mlx5_crypto_gcm_caps);