[v2,3/5] crypto/openssl: per-qp cipher context clones

Message ID 20240603185939.1312680-4-jack.bond-preston@foss.arm.com (mailing list archive)
State Superseded
Delegated to: akhil goyal
Headers
Series OpenSSL PMD Optimisations |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Jack Bond-Preston June 3, 2024, 6:59 p.m. UTC
  Currently EVP_CIPHER_CTXs are allocated, copied to (from
openssl_session), and then freed for every cipher operation (ie. per
packet). This is very inefficient, and avoidable.

Make each openssl_session hold an array of pointers to per-queue-pair
cipher context copies. These are populated on first use by allocating a
new context and copying from the main context. These copies can then be
used in a thread-safe manner by different worker lcores simultaneously.
Consequently the cipher context allocation and copy only has to happen
once - the first time a given qp uses an openssl_session. This brings
about a large performance boost.

Throughput performance uplift measurements for AES-CBC-128 encrypt on
Ampere Altra Max platform:
1 worker lcore
|   buffer sz (B) |   prev (Gbps) |   optimised (Gbps) |   uplift |
|-----------------+---------------+--------------------+----------|
|              64 |          1.51 |               2.94 |    94.4% |
|             256 |          4.90 |               8.05 |    64.3% |
|            1024 |         11.07 |              14.21 |    28.3% |
|            2048 |         14.03 |              16.28 |    16.0% |
|            4096 |         16.20 |              17.59 |     8.6% |

8 worker lcores
|   buffer sz (B) |   prev (Gbps) |   optimised (Gbps) |   uplift |
|-----------------+---------------+--------------------+----------|
|              64 |          3.05 |              23.74 |   678.8% |
|             256 |         10.46 |              64.86 |   520.3% |
|            1024 |         40.97 |             113.80 |   177.7% |
|            2048 |         73.25 |             130.21 |    77.8% |
|            4096 |        103.89 |             140.62 |    35.4% |

Signed-off-by: Jack Bond-Preston <jack.bond-preston@foss.arm.com>
Reviewed-by: Wathsala Vithanage <wathsala.vithanage@arm.com>
---
 drivers/crypto/openssl/openssl_pmd_private.h | 11 ++-
 drivers/crypto/openssl/rte_openssl_pmd.c     | 78 ++++++++++++++------
 drivers/crypto/openssl/rte_openssl_pmd_ops.c | 34 ++++++++-
 3 files changed, 94 insertions(+), 29 deletions(-)
  

Patch

diff --git a/drivers/crypto/openssl/openssl_pmd_private.h b/drivers/crypto/openssl/openssl_pmd_private.h
index 0f038b218c..bad7dcf2f5 100644
--- a/drivers/crypto/openssl/openssl_pmd_private.h
+++ b/drivers/crypto/openssl/openssl_pmd_private.h
@@ -166,6 +166,14 @@  struct __rte_cache_aligned openssl_session {
 		/**< digest length */
 	} auth;
 
+	uint16_t ctx_copies_len;
+	/* < number of entries in ctx_copies */
+	EVP_CIPHER_CTX *qp_ctx[];
+	/**< Flexible array member of per-queue-pair pointers to copies of EVP
+	 * context structure. Cipher contexts are not safe to use from multiple
+	 * cores simultaneously, so maintaining these copies allows avoiding
+	 * per-buffer copying into a temporary context.
+	 */
 };
 
 /** OPENSSL crypto private asymmetric session structure */
@@ -217,7 +225,8 @@  struct __rte_cache_aligned openssl_asym_session {
 /** Set and validate OPENSSL crypto session parameters */
 extern int
 openssl_set_session_parameters(struct openssl_session *sess,
-		const struct rte_crypto_sym_xform *xform);
+		const struct rte_crypto_sym_xform *xform,
+		uint16_t nb_queue_pairs);
 
 /** Reset OPENSSL crypto session parameters */
 extern void
diff --git a/drivers/crypto/openssl/rte_openssl_pmd.c b/drivers/crypto/openssl/rte_openssl_pmd.c
index 175ffda2b9..ebd1cab667 100644
--- a/drivers/crypto/openssl/rte_openssl_pmd.c
+++ b/drivers/crypto/openssl/rte_openssl_pmd.c
@@ -788,7 +788,8 @@  openssl_set_session_aead_parameters(struct openssl_session *sess,
 /** Parse crypto xform chain and set private session parameters */
 int
 openssl_set_session_parameters(struct openssl_session *sess,
-		const struct rte_crypto_sym_xform *xform)
+		const struct rte_crypto_sym_xform *xform,
+		uint16_t nb_queue_pairs)
 {
 	const struct rte_crypto_sym_xform *cipher_xform = NULL;
 	const struct rte_crypto_sym_xform *auth_xform = NULL;
@@ -850,6 +851,12 @@  openssl_set_session_parameters(struct openssl_session *sess,
 		}
 	}
 
+	/*
+	 * With only one queue pair, the array of copies is not needed.
+	 * Otherwise, one entry per queue pair is required.
+	 */
+	sess->ctx_copies_len = nb_queue_pairs > 1 ? nb_queue_pairs : 0;
+
 	return 0;
 }
 
@@ -857,6 +864,13 @@  openssl_set_session_parameters(struct openssl_session *sess,
 void
 openssl_reset_session(struct openssl_session *sess)
 {
+	for (uint16_t i = 0; i < sess->ctx_copies_len; i++) {
+		if (sess->qp_ctx[i] != NULL) {
+			EVP_CIPHER_CTX_free(sess->qp_ctx[i]);
+			sess->qp_ctx[i] = NULL;
+		}
+	}
+
 	EVP_CIPHER_CTX_free(sess->cipher.ctx);
 
 	if (sess->chain_order == OPENSSL_CHAIN_CIPHER_BPI)
@@ -923,7 +937,7 @@  get_session(struct openssl_qp *qp, struct rte_crypto_op *op)
 		sess = (struct openssl_session *)_sess->driver_priv_data;
 
 		if (unlikely(openssl_set_session_parameters(sess,
-				op->sym->xform) != 0)) {
+				op->sym->xform, 1) != 0)) {
 			rte_mempool_put(qp->sess_mp, _sess);
 			sess = NULL;
 		}
@@ -1571,11 +1585,33 @@  process_openssl_auth_cmac(struct rte_mbuf *mbuf_src, uint8_t *dst, int offset,
 # endif
 /*----------------------------------------------------------------------------*/
 
+static inline EVP_CIPHER_CTX *
+get_local_cipher_ctx(struct openssl_session *sess, struct openssl_qp *qp)
+{
+	/* If the array is not being used, just return the main context. */
+	if (sess->ctx_copies_len == 0)
+		return sess->cipher.ctx;
+
+	EVP_CIPHER_CTX **lctx = &sess->qp_ctx[qp->id];
+
+	if (unlikely(*lctx == NULL)) {
+#if OPENSSL_VERSION_NUMBER >= 0x30200000L
+		/* EVP_CIPHER_CTX_dup() added in OSSL 3.2 */
+		*lctx = EVP_CIPHER_CTX_dup(sess->cipher.ctx);
+#else
+		*lctx = EVP_CIPHER_CTX_new();
+		EVP_CIPHER_CTX_copy(*lctx, sess->cipher.ctx);
+#endif
+	}
+
+	return *lctx;
+}
+
 /** Process auth/cipher combined operation */
 static void
-process_openssl_combined_op
-		(struct rte_crypto_op *op, struct openssl_session *sess,
-		struct rte_mbuf *mbuf_src, struct rte_mbuf *mbuf_dst)
+process_openssl_combined_op(struct openssl_qp *qp, struct rte_crypto_op *op,
+		struct openssl_session *sess, struct rte_mbuf *mbuf_src,
+		struct rte_mbuf *mbuf_dst)
 {
 	/* cipher */
 	uint8_t *dst = NULL, *iv, *tag, *aad;
@@ -1592,8 +1628,7 @@  process_openssl_combined_op
 		return;
 	}
 
-	EVP_CIPHER_CTX *ctx = EVP_CIPHER_CTX_new();
-	EVP_CIPHER_CTX_copy(ctx, sess->cipher.ctx);
+	EVP_CIPHER_CTX *ctx = get_local_cipher_ctx(sess, qp);
 
 	iv = rte_crypto_op_ctod_offset(op, uint8_t *,
 			sess->iv.offset);
@@ -1649,8 +1684,6 @@  process_openssl_combined_op
 					dst, tag, taglen, ctx);
 	}
 
-	EVP_CIPHER_CTX_free(ctx);
-
 	if (status != 0) {
 		if (status == (-EFAULT) &&
 				sess->auth.operation ==
@@ -1663,14 +1696,13 @@  process_openssl_combined_op
 
 /** Process cipher operation */
 static void
-process_openssl_cipher_op
-		(struct rte_crypto_op *op, struct openssl_session *sess,
-		struct rte_mbuf *mbuf_src, struct rte_mbuf *mbuf_dst)
+process_openssl_cipher_op(struct openssl_qp *qp, struct rte_crypto_op *op,
+		struct openssl_session *sess, struct rte_mbuf *mbuf_src,
+		struct rte_mbuf *mbuf_dst)
 {
 	uint8_t *dst, *iv;
 	int srclen, status;
 	uint8_t inplace = (mbuf_src == mbuf_dst) ? 1 : 0;
-	EVP_CIPHER_CTX *ctx_copy;
 
 	/*
 	 * Segmented OOP destination buffer is not supported for encryption/
@@ -1689,24 +1721,22 @@  process_openssl_cipher_op
 
 	iv = rte_crypto_op_ctod_offset(op, uint8_t *,
 			sess->iv.offset);
-	ctx_copy = EVP_CIPHER_CTX_new();
-	EVP_CIPHER_CTX_copy(ctx_copy, sess->cipher.ctx);
+
+	EVP_CIPHER_CTX *ctx = get_local_cipher_ctx(sess, qp);
 
 	if (sess->cipher.mode == OPENSSL_CIPHER_LIB)
 		if (sess->cipher.direction == RTE_CRYPTO_CIPHER_OP_ENCRYPT)
 			status = process_openssl_cipher_encrypt(mbuf_src, dst,
 					op->sym->cipher.data.offset, iv,
-					srclen, ctx_copy, inplace);
+					srclen, ctx, inplace);
 		else
 			status = process_openssl_cipher_decrypt(mbuf_src, dst,
 					op->sym->cipher.data.offset, iv,
-					srclen, ctx_copy, inplace);
+					srclen, ctx, inplace);
 	else
 		status = process_openssl_cipher_des3ctr(mbuf_src, dst,
-				op->sym->cipher.data.offset, iv, srclen,
-				ctx_copy);
+				op->sym->cipher.data.offset, iv, srclen, ctx);
 
-	EVP_CIPHER_CTX_free(ctx_copy);
 	if (status != 0)
 		op->status = RTE_CRYPTO_OP_STATUS_ERROR;
 }
@@ -3111,13 +3141,13 @@  process_op(struct openssl_qp *qp, struct rte_crypto_op *op,
 
 	switch (sess->chain_order) {
 	case OPENSSL_CHAIN_ONLY_CIPHER:
-		process_openssl_cipher_op(op, sess, msrc, mdst);
+		process_openssl_cipher_op(qp, op, sess, msrc, mdst);
 		break;
 	case OPENSSL_CHAIN_ONLY_AUTH:
 		process_openssl_auth_op(qp, op, sess, msrc, mdst);
 		break;
 	case OPENSSL_CHAIN_CIPHER_AUTH:
-		process_openssl_cipher_op(op, sess, msrc, mdst);
+		process_openssl_cipher_op(qp, op, sess, msrc, mdst);
 		/* OOP */
 		if (msrc != mdst)
 			copy_plaintext(msrc, mdst, op);
@@ -3125,10 +3155,10 @@  process_op(struct openssl_qp *qp, struct rte_crypto_op *op,
 		break;
 	case OPENSSL_CHAIN_AUTH_CIPHER:
 		process_openssl_auth_op(qp, op, sess, msrc, mdst);
-		process_openssl_cipher_op(op, sess, msrc, mdst);
+		process_openssl_cipher_op(qp, op, sess, msrc, mdst);
 		break;
 	case OPENSSL_CHAIN_COMBINED:
-		process_openssl_combined_op(op, sess, msrc, mdst);
+		process_openssl_combined_op(qp, op, sess, msrc, mdst);
 		break;
 	case OPENSSL_CHAIN_CIPHER_BPI:
 		process_openssl_docsis_bpi_op(op, sess, msrc, mdst);
diff --git a/drivers/crypto/openssl/rte_openssl_pmd_ops.c b/drivers/crypto/openssl/rte_openssl_pmd_ops.c
index b16baaa08f..4209c6ab6f 100644
--- a/drivers/crypto/openssl/rte_openssl_pmd_ops.c
+++ b/drivers/crypto/openssl/rte_openssl_pmd_ops.c
@@ -794,9 +794,34 @@  openssl_pmd_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,
 
 /** Returns the size of the symmetric session structure */
 static unsigned
-openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev __rte_unused)
+openssl_pmd_sym_session_get_size(struct rte_cryptodev *dev)
 {
-	return sizeof(struct openssl_session);
+	/*
+	 * For 0 qps, return the max size of the session - this is necessary if
+	 * the user calls into this function to create the session mempool,
+	 * without first configuring the number of qps for the cryptodev.
+	 */
+	if (dev->data->nb_queue_pairs == 0) {
+		unsigned int max_nb_qps = ((struct openssl_private *)
+				dev->data->dev_private)->max_nb_qpairs;
+		return sizeof(struct openssl_session) +
+				(sizeof(void *) * max_nb_qps);
+	}
+
+	/*
+	 * With only one queue pair, the thread safety of multiple context
+	 * copies is not necessary, so don't allocate extra memory for the
+	 * array.
+	 */
+	if (dev->data->nb_queue_pairs == 1)
+		return sizeof(struct openssl_session);
+
+	/*
+	 * Otherwise, the size of the flexible array member should be enough to
+	 * fit pointers to per-qp contexts.
+	 */
+	return sizeof(struct openssl_session) +
+		(sizeof(void *) * dev->data->nb_queue_pairs);
 }
 
 /** Returns the size of the asymmetric session structure */
@@ -808,7 +833,7 @@  openssl_pmd_asym_session_get_size(struct rte_cryptodev *dev __rte_unused)
 
 /** Configure the session from a crypto xform chain */
 static int
-openssl_pmd_sym_session_configure(struct rte_cryptodev *dev __rte_unused,
+openssl_pmd_sym_session_configure(struct rte_cryptodev *dev,
 		struct rte_crypto_sym_xform *xform,
 		struct rte_cryptodev_sym_session *sess)
 {
@@ -820,7 +845,8 @@  openssl_pmd_sym_session_configure(struct rte_cryptodev *dev __rte_unused,
 		return -EINVAL;
 	}
 
-	ret = openssl_set_session_parameters(sess_private_data, xform);
+	ret = openssl_set_session_parameters(sess_private_data, xform,
+			dev->data->nb_queue_pairs);
 	if (ret != 0) {
 		OPENSSL_LOG(ERR, "failed configure session parameters");