[v4,06/10] ipsec: implement SA data-path API

Message ID 1544804589-10338-6-git-send-email-konstantin.ananyev@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series [v3,1/9] cryptodev: add opaque userdata pointer into crypto sym session |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Ananyev, Konstantin Dec. 14, 2018, 4:23 p.m. UTC
  Provide implementation for rte_ipsec_pkt_crypto_prepare() and
rte_ipsec_pkt_process().
Current implementation:
 - supports ESP protocol tunnel mode.
 - supports ESP protocol transport mode.
 - supports ESN and replay window.
 - supports algorithms: AES-CBC, AES-GCM, HMAC-SHA1, NULL.
 - covers all currently defined security session types:
        - RTE_SECURITY_ACTION_TYPE_NONE
        - RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO
        - RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL
        - RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL

For first two types SQN check/update is done by SW (inside the library).
For last two type it is HW/PMD responsibility.

Signed-off-by: Mohammad Abdul Awal <mohammad.abdul.awal@intel.com>
Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
Acked-by: Declan Doherty <declan.doherty@intel.com>
---
 lib/librte_ipsec/crypto.h    |  123 ++++
 lib/librte_ipsec/iph.h       |   84 +++
 lib/librte_ipsec/ipsec_sqn.h |  186 ++++++
 lib/librte_ipsec/pad.h       |   45 ++
 lib/librte_ipsec/sa.c        | 1044 +++++++++++++++++++++++++++++++++-
 5 files changed, 1480 insertions(+), 2 deletions(-)
 create mode 100644 lib/librte_ipsec/crypto.h
 create mode 100644 lib/librte_ipsec/iph.h
 create mode 100644 lib/librte_ipsec/pad.h
  

Comments

Akhil Goyal Dec. 19, 2018, 3:32 p.m. UTC | #1
On 12/14/2018 9:53 PM, Konstantin Ananyev wrote:
> Provide implementation for rte_ipsec_pkt_crypto_prepare() and
> rte_ipsec_pkt_process().
> Current implementation:
>   - supports ESP protocol tunnel mode.
>   - supports ESP protocol transport mode.
>   - supports ESN and replay window.
>   - supports algorithms: AES-CBC, AES-GCM, HMAC-SHA1, NULL.
>   - covers all currently defined security session types:
>          - RTE_SECURITY_ACTION_TYPE_NONE
>          - RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO
>          - RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL
>          - RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL
>
> For first two types SQN check/update is done by SW (inside the library).
> For last two type it is HW/PMD responsibility.
>
> Signed-off-by: Mohammad Abdul Awal <mohammad.abdul.awal@intel.com>
> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
> Acked-by: Declan Doherty <declan.doherty@intel.com>
> ---
>   lib/librte_ipsec/crypto.h    |  123 ++++
>   lib/librte_ipsec/iph.h       |   84 +++
>   lib/librte_ipsec/ipsec_sqn.h |  186 ++++++
>   lib/librte_ipsec/pad.h       |   45 ++
>   lib/librte_ipsec/sa.c        | 1044 +++++++++++++++++++++++++++++++++-
>   5 files changed, 1480 insertions(+), 2 deletions(-)
>   create mode 100644 lib/librte_ipsec/crypto.h
>   create mode 100644 lib/librte_ipsec/iph.h
>   create mode 100644 lib/librte_ipsec/pad.h
>
> diff --git a/lib/librte_ipsec/crypto.h b/lib/librte_ipsec/crypto.h
> new file mode 100644
> index 000000000..61f5c1433
> --- /dev/null
> +++ b/lib/librte_ipsec/crypto.h
> @@ -0,0 +1,123 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2018 Intel Corporation
> + */
> +
> +#ifndef _CRYPTO_H_
> +#define _CRYPTO_H_
> +
> +/**
> + * @file crypto.h
> + * Contains crypto specific functions/structures/macros used internally
> + * by ipsec library.
> + */
> +
> + /*
> +  * AES-GCM devices have some specific requirements for IV and AAD formats.
> +  * Ideally that to be done by the driver itself.
> +  */
I believe these can be moved to rte_crypto_sym.h. All crypto related 
stuff should be at same place.
> +
> +struct aead_gcm_iv {
> +	uint32_t salt;
> +	uint64_t iv;
> +	uint32_t cnt;
> +} __attribute__((packed));
> +
> +struct aead_gcm_aad {
> +	uint32_t spi;
> +	/*
> +	 * RFC 4106, section 5:
> +	 * Two formats of the AAD are defined:
> +	 * one for 32-bit sequence numbers, and one for 64-bit ESN.
> +	 */
> +	union {
> +		uint32_t u32[2];
> +		uint64_t u64;
> +	} sqn;
> +	uint32_t align0; /* align to 16B boundary */
> +} __attribute__((packed));
> +
> +struct gcm_esph_iv {
> +	struct esp_hdr esph;
> +	uint64_t iv;
> +} __attribute__((packed));
> +
> +
> +static inline void
> +aead_gcm_iv_fill(struct aead_gcm_iv *gcm, uint64_t iv, uint32_t salt)
> +{
> +	gcm->salt = salt;
> +	gcm->iv = iv;
> +	gcm->cnt = rte_cpu_to_be_32(1);
> +}
> +
> +/*
> + * RFC 4106, 5 AAD Construction
> + * spi and sqn should already be converted into network byte order.
> + * Make sure that not used bytes are zeroed.
> + */
> +static inline void
> +aead_gcm_aad_fill(struct aead_gcm_aad *aad, rte_be32_t spi, rte_be64_t sqn,
> +	int esn)
> +{
> +	aad->spi = spi;
> +	if (esn)
> +		aad->sqn.u64 = sqn;
> +	else {
> +		aad->sqn.u32[0] = sqn_low32(sqn);
> +		aad->sqn.u32[1] = 0;
> +	}
> +	aad->align0 = 0;
> +}
> +
> +static inline void
> +gen_iv(uint64_t iv[IPSEC_MAX_IV_QWORD], rte_be64_t sqn)
> +{
> +	iv[0] = sqn;
> +	iv[1] = 0;
> +}
> +
> +/*
> + * from RFC 4303 3.3.2.1.4:
> + * If the ESN option is enabled for the SA, the high-order 32
> + * bits of the sequence number are appended after the Next Header field
> + * for purposes of this computation, but are not transmitted.
> + */
> +
> +/*
> + * Helper function that moves ICV by 4B below, and inserts SQN.hibits.
> + * icv parameter points to the new start of ICV.
> + */
> +static inline void
> +insert_sqh(uint32_t sqh, void *picv, uint32_t icv_len)
> +{
> +	uint32_t *icv;
> +	int32_t i;
> +
> +	RTE_ASSERT(icv_len % sizeof(uint32_t) == 0);
> +
> +	icv = picv;
> +	icv_len = icv_len / sizeof(uint32_t);
> +	for (i = icv_len; i-- != 0; icv[i] = icv[i - 1])
> +		;
> +
> +	icv[i] = sqh;
> +}
> +
> +/*
> + * Helper function that moves ICV by 4B up, and removes SQN.hibits.
> + * icv parameter points to the new start of ICV.
> + */
> +static inline void
> +remove_sqh(void *picv, uint32_t icv_len)
> +{
> +	uint32_t i, *icv;
> +
> +	RTE_ASSERT(icv_len % sizeof(uint32_t) == 0);
> +
> +	icv = picv;
> +	icv_len = icv_len / sizeof(uint32_t);
> +	for (i = 0; i != icv_len; i++)
> +		icv[i] = icv[i + 1];
> +}
> +
> +#endif /* _CRYPTO_H_ */
> diff --git a/lib/librte_ipsec/iph.h b/lib/librte_ipsec/iph.h
> new file mode 100644
> index 000000000..3fd93016d
> --- /dev/null
> +++ b/lib/librte_ipsec/iph.h
> @@ -0,0 +1,84 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2018 Intel Corporation
> + */
> +
> +#ifndef _IPH_H_
> +#define _IPH_H_
> +
> +/**
> + * @file iph.h
> + * Contains functions/structures/macros to manipulate IPv/IPv6 headers
IPv4
> + * used internally by ipsec library.
> + */
> +
> +/*
> + * Move preceding (L3) headers down to remove ESP header and IV.
> + */
why cant we use rte_mbuf APIs to append/prepend/trim/adjust lengths.
I believe these adjustments are happening in the mbuf itself.
Moreover these APIs are not specific to esp headers.
> +static inline void
> +remove_esph(char *np, char *op, uint32_t hlen)
> +{
> +	uint32_t i;
> +
> +	for (i = hlen; i-- != 0; np[i] = op[i])
> +		;
> +}
> +
> +/*
> + * Move preceding (L3) headers up to free space for ESP header and IV.
> + */
> +static inline void
> +insert_esph(char *np, char *op, uint32_t hlen)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i != hlen; i++)
> +		np[i] = op[i];
> +}
> +
> +/* update original ip header fields for trasnport case */
spell check
> +static inline int
> +update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> +		uint32_t l2len, uint32_t l3len, uint8_t proto)
> +{
> +	struct ipv4_hdr *v4h;
> +	struct ipv6_hdr *v6h;
> +	int32_t rc;
> +
> +	if ((sa->type & RTE_IPSEC_SATP_IPV_MASK) == RTE_IPSEC_SATP_IPV4) {
> +		v4h = p;
> +		rc = v4h->next_proto_id;
> +		v4h->next_proto_id = proto;
> +		v4h->total_length = rte_cpu_to_be_16(plen - l2len);
> +	} else if (l3len == sizeof(*v6h)) {
> +		v6h = p;
> +		rc = v6h->proto;
> +		v6h->proto = proto;
> +		v6h->payload_len = rte_cpu_to_be_16(plen - l2len -
> +				sizeof(*v6h));
> +	/* need to add support for IPv6 with options */
> +	} else
> +		rc = -ENOTSUP;
> +
> +	return rc;
> +}
> +
> +/* update original and new ip header fields for tunnel case */
> +static inline void
> +update_tun_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> +		uint32_t l2len, rte_be16_t pid)
> +{
> +	struct ipv4_hdr *v4h;
> +	struct ipv6_hdr *v6h;
> +
> +	if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4) {
> +		v4h = p;
> +		v4h->packet_id = pid;
> +		v4h->total_length = rte_cpu_to_be_16(plen - l2len);
where are we updating the rest of the fields, like ttl, checksum, ip 
addresses, etc
> +	} else {
> +		v6h = p;
> +		v6h->payload_len = rte_cpu_to_be_16(plen - l2len -
> +				sizeof(*v6h));
> +	}
> +}
> +
> +#endif /* _IPH_H_ */
> diff --git a/lib/librte_ipsec/ipsec_sqn.h b/lib/librte_ipsec/ipsec_sqn.h
> index 1935f6e30..6e18c34eb 100644
> --- a/lib/librte_ipsec/ipsec_sqn.h
> +++ b/lib/librte_ipsec/ipsec_sqn.h
> @@ -15,6 +15,45 @@
>   
>   #define IS_ESN(sa)	((sa)->sqn_mask == UINT64_MAX)
>   
> +/*
> + * gets SQN.hi32 bits, SQN supposed to be in network byte order.
> + */
> +static inline rte_be32_t
> +sqn_hi32(rte_be64_t sqn)
> +{
> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> +	return (sqn >> 32);
> +#else
> +	return sqn;
> +#endif
> +}
> +
> +/*
> + * gets SQN.low32 bits, SQN supposed to be in network byte order.
> + */
> +static inline rte_be32_t
> +sqn_low32(rte_be64_t sqn)
> +{
> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> +	return sqn;
> +#else
> +	return (sqn >> 32);
> +#endif
> +}
> +
> +/*
> + * gets SQN.low16 bits, SQN supposed to be in network byte order.
> + */
> +static inline rte_be16_t
> +sqn_low16(rte_be64_t sqn)
> +{
> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> +	return sqn;
> +#else
> +	return (sqn >> 48);
> +#endif
> +}
> +
shouldn't we move these seq number APIs in rte_esp.h and make them generic
>   /*
>    * for given size, calculate required number of buckets.
>    */
> @@ -30,6 +69,153 @@ replay_num_bucket(uint32_t wsz)
>   	return nb;
>   }
>   
> +/*
> + * According to RFC4303 A2.1, determine the high-order bit of sequence number.
> + * use 32bit arithmetic inside, return uint64_t.
> + */
> +static inline uint64_t
> +reconstruct_esn(uint64_t t, uint32_t sqn, uint32_t w)
> +{
> +	uint32_t th, tl, bl;
> +
> +	tl = t;
> +	th = t >> 32;
> +	bl = tl - w + 1;
> +
> +	/* case A: window is within one sequence number subspace */
> +	if (tl >= (w - 1))
> +		th += (sqn < bl);
> +	/* case B: window spans two sequence number subspaces */
> +	else if (th != 0)
> +		th -= (sqn >= bl);
> +
> +	/* return constructed sequence with proper high-order bits */
> +	return (uint64_t)th << 32 | sqn;
> +}
> +
> +/**
> + * Perform the replay checking.
> + *
> + * struct rte_ipsec_sa contains the window and window related parameters,
> + * such as the window size, bitmask, and the last acknowledged sequence number.
> + *
> + * Based on RFC 6479.
> + * Blocks are 64 bits unsigned integers
> + */
> +static inline int32_t
> +esn_inb_check_sqn(const struct replay_sqn *rsn, const struct rte_ipsec_sa *sa,
> +	uint64_t sqn)
> +{
> +	uint32_t bit, bucket;
> +
> +	/* replay not enabled */
> +	if (sa->replay.win_sz == 0)
> +		return 0;
> +
> +	/* seq is larger than lastseq */
> +	if (sqn > rsn->sqn)
> +		return 0;
> +
> +	/* seq is outside window */
> +	if (sqn == 0 || sqn + sa->replay.win_sz < rsn->sqn)
> +		return -EINVAL;
> +
> +	/* seq is inside the window */
> +	bit = sqn & WINDOW_BIT_LOC_MASK;
> +	bucket = (sqn >> WINDOW_BUCKET_BITS) & sa->replay.bucket_index_mask;
> +
> +	/* already seen packet */
> +	if (rsn->window[bucket] & ((uint64_t)1 << bit))
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +/**
> + * For outbound SA perform the sequence number update.
> + */
> +static inline uint64_t
> +esn_outb_update_sqn(struct rte_ipsec_sa *sa, uint32_t *num)
> +{
> +	uint64_t n, s, sqn;
> +
> +	n = *num;
> +	sqn = sa->sqn.outb + n;
> +	sa->sqn.outb = sqn;
> +
> +	/* overflow */
> +	if (sqn > sa->sqn_mask) {
> +		s = sqn - sa->sqn_mask;
> +		*num = (s < n) ?  n - s : 0;
> +	}
> +
> +	return sqn - n;
> +}
> +
> +/**
> + * For inbound SA perform the sequence number and replay window update.
> + */
> +static inline int32_t
> +esn_inb_update_sqn(struct replay_sqn *rsn, const struct rte_ipsec_sa *sa,
> +	uint64_t sqn)
> +{
> +	uint32_t bit, bucket, last_bucket, new_bucket, diff, i;
> +
> +	/* replay not enabled */
> +	if (sa->replay.win_sz == 0)
> +		return 0;
> +
> +	/* handle ESN */
> +	if (IS_ESN(sa))
> +		sqn = reconstruct_esn(rsn->sqn, sqn, sa->replay.win_sz);
> +
> +	/* seq is outside window*/
> +	if (sqn == 0 || sqn + sa->replay.win_sz < rsn->sqn)
> +		return -EINVAL;
> +
> +	/* update the bit */
> +	bucket = (sqn >> WINDOW_BUCKET_BITS);
> +
> +	/* check if the seq is within the range */
> +	if (sqn > rsn->sqn) {
> +		last_bucket = rsn->sqn >> WINDOW_BUCKET_BITS;
> +		diff = bucket - last_bucket;
> +		/* seq is way after the range of WINDOW_SIZE */
> +		if (diff > sa->replay.nb_bucket)
> +			diff = sa->replay.nb_bucket;
> +
> +		for (i = 0; i != diff; i++) {
> +			new_bucket = (i + last_bucket + 1) &
> +				sa->replay.bucket_index_mask;
> +			rsn->window[new_bucket] = 0;
> +		}
> +		rsn->sqn = sqn;
> +	}
> +
> +	bucket &= sa->replay.bucket_index_mask;
> +	bit = (uint64_t)1 << (sqn & WINDOW_BIT_LOC_MASK);
> +
> +	/* already seen packet */
> +	if (rsn->window[bucket] & bit)
> +		return -EINVAL;
> +
> +	rsn->window[bucket] |= bit;
> +	return 0;
> +}
> +
> +/**
> + * To achieve ability to do multiple readers single writer for
> + * SA replay window information and sequence number (RSN)
> + * basic RCU schema is used:
> + * SA have 2 copies of RSN (one for readers, another for writers).
> + * Each RSN contains a rwlock that has to be grabbed (for read/write)
> + * to avoid races between readers and writer.
> + * Writer is responsible to make a copy or reader RSN, update it
> + * and mark newly updated RSN as readers one.
> + * That approach is intended to minimize contention and cache sharing
> + * between writer and readers.
> + */
> +
>   /**
>    * Based on number of buckets calculated required size for the
>    * structure that holds replay window and sequence number (RSN) information.
> diff --git a/lib/librte_ipsec/pad.h b/lib/librte_ipsec/pad.h
> new file mode 100644
> index 000000000..2f5ccd00e
> --- /dev/null
> +++ b/lib/librte_ipsec/pad.h
> @@ -0,0 +1,45 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2018 Intel Corporation
> + */
> +
> +#ifndef _PAD_H_
> +#define _PAD_H_
> +
> +#define IPSEC_MAX_PAD_SIZE	UINT8_MAX
> +
> +static const uint8_t esp_pad_bytes[IPSEC_MAX_PAD_SIZE] = {
> +	1, 2, 3, 4, 5, 6, 7, 8,
> +	9, 10, 11, 12, 13, 14, 15, 16,
> +	17, 18, 19, 20, 21, 22, 23, 24,
> +	25, 26, 27, 28, 29, 30, 31, 32,
> +	33, 34, 35, 36, 37, 38, 39, 40,
> +	41, 42, 43, 44, 45, 46, 47, 48,
> +	49, 50, 51, 52, 53, 54, 55, 56,
> +	57, 58, 59, 60, 61, 62, 63, 64,
> +	65, 66, 67, 68, 69, 70, 71, 72,
> +	73, 74, 75, 76, 77, 78, 79, 80,
> +	81, 82, 83, 84, 85, 86, 87, 88,
> +	89, 90, 91, 92, 93, 94, 95, 96,
> +	97, 98, 99, 100, 101, 102, 103, 104,
> +	105, 106, 107, 108, 109, 110, 111, 112,
> +	113, 114, 115, 116, 117, 118, 119, 120,
> +	121, 122, 123, 124, 125, 126, 127, 128,
> +	129, 130, 131, 132, 133, 134, 135, 136,
> +	137, 138, 139, 140, 141, 142, 143, 144,
> +	145, 146, 147, 148, 149, 150, 151, 152,
> +	153, 154, 155, 156, 157, 158, 159, 160,
> +	161, 162, 163, 164, 165, 166, 167, 168,
> +	169, 170, 171, 172, 173, 174, 175, 176,
> +	177, 178, 179, 180, 181, 182, 183, 184,
> +	185, 186, 187, 188, 189, 190, 191, 192,
> +	193, 194, 195, 196, 197, 198, 199, 200,
> +	201, 202, 203, 204, 205, 206, 207, 208,
> +	209, 210, 211, 212, 213, 214, 215, 216,
> +	217, 218, 219, 220, 221, 222, 223, 224,
> +	225, 226, 227, 228, 229, 230, 231, 232,
> +	233, 234, 235, 236, 237, 238, 239, 240,
> +	241, 242, 243, 244, 245, 246, 247, 248,
> +	249, 250, 251, 252, 253, 254, 255,
> +};
> +
> +#endif /* _PAD_H_ */
> diff --git a/lib/librte_ipsec/sa.c b/lib/librte_ipsec/sa.c
> index e4c5361e7..bb56f42eb 100644
> --- a/lib/librte_ipsec/sa.c
> +++ b/lib/librte_ipsec/sa.c
> @@ -6,9 +6,13 @@
>   #include <rte_esp.h>
>   #include <rte_ip.h>
>   #include <rte_errno.h>
> +#include <rte_cryptodev.h>
>   
>   #include "sa.h"
>   #include "ipsec_sqn.h"
> +#include "crypto.h"
> +#include "iph.h"
> +#include "pad.h"
>   
>   /* some helper structures */
>   struct crypto_xform {
> @@ -207,6 +211,7 @@ esp_sa_init(struct rte_ipsec_sa *sa, const struct rte_ipsec_sa_prm *prm,
>   		/* RFC 4106 */
>   		if (cxf->aead->algo != RTE_CRYPTO_AEAD_AES_GCM)
>   			return -EINVAL;
> +		sa->aad_len = sizeof(struct aead_gcm_aad);
>   		sa->icv_len = cxf->aead->digest_length;
>   		sa->iv_ofs = cxf->aead->iv.offset;
>   		sa->iv_len = sizeof(uint64_t);
> @@ -326,18 +331,1053 @@ rte_ipsec_sa_init(struct rte_ipsec_sa *sa, const struct rte_ipsec_sa_prm *prm,
>   	return sz;
>   }
>   
> +static inline void
> +mbuf_bulk_copy(struct rte_mbuf *dst[], struct rte_mbuf * const src[],
> +	uint32_t num)
> +{
> +	uint32_t i;
> +
> +	for (i = 0; i != num; i++)
> +		dst[i] = src[i];
> +}
> +
> +static inline void
> +lksd_none_cop_prepare(const struct rte_ipsec_session *ss,
> +	struct rte_mbuf *mb[], struct rte_crypto_op *cop[], uint16_t num)
> +{
> +	uint32_t i;
> +	struct rte_crypto_sym_op *sop;
> +
> +	for (i = 0; i != num; i++) {
> +		sop = cop[i]->sym;
> +		cop[i]->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
> +		cop[i]->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
> +		cop[i]->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
> +		sop->m_src = mb[i];
> +		__rte_crypto_sym_op_attach_sym_session(sop, ss->crypto.ses);
> +	}
> +}
> +
> +static inline void
> +esp_outb_cop_prepare(struct rte_crypto_op *cop,
> +	const struct rte_ipsec_sa *sa, const uint64_t ivp[IPSEC_MAX_IV_QWORD],
> +	const union sym_op_data *icv, uint32_t hlen, uint32_t plen)
> +{
> +	struct rte_crypto_sym_op *sop;
> +	struct aead_gcm_iv *gcm;
> +
> +	/* fill sym op fields */
> +	sop = cop->sym;
> +
> +	/* AEAD (AES_GCM) case */
> +	if (sa->aad_len != 0) {
> +		sop->aead.data.offset = sa->ctp.cipher.offset + hlen;
> +		sop->aead.data.length = sa->ctp.cipher.length + plen;
> +		sop->aead.digest.data = icv->va;
> +		sop->aead.digest.phys_addr = icv->pa;
> +		sop->aead.aad.data = icv->va + sa->icv_len;
> +		sop->aead.aad.phys_addr = icv->pa + sa->icv_len;
> +
> +		/* fill AAD IV (located inside crypto op) */
> +		gcm = rte_crypto_op_ctod_offset(cop, struct aead_gcm_iv *,
> +			sa->iv_ofs);
> +		aead_gcm_iv_fill(gcm, ivp[0], sa->salt);
> +	/* CRYPT+AUTH case */
> +	} else {
> +		sop->cipher.data.offset = sa->ctp.cipher.offset + hlen;
> +		sop->cipher.data.length = sa->ctp.cipher.length + plen;
> +		sop->auth.data.offset = sa->ctp.auth.offset + hlen;
> +		sop->auth.data.length = sa->ctp.auth.length + plen;
> +		sop->auth.digest.data = icv->va;
> +		sop->auth.digest.phys_addr = icv->pa;
please ignore my previous comment on ctp in the previous patch.
you are making the sym_op also in this library. It would be better to 
use sym_op instead of sop to align with rest of dpdk code.
> +	}
> +}
> +
> +static inline int32_t
> +esp_outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> +	const uint64_t ivp[IPSEC_MAX_IV_QWORD], struct rte_mbuf *mb,
> +	union sym_op_data *icv)
> +{
> +	uint32_t clen, hlen, l2len, pdlen, pdofs, plen, tlen;
> +	struct rte_mbuf *ml;
> +	struct esp_hdr *esph;
> +	struct esp_tail *espt;
> +	char *ph, *pt;
> +	uint64_t *iv;
> +
> +	/* calculate extra header space required */
> +	hlen = sa->hdr_len + sa->iv_len + sizeof(*esph);
> +
> +	/* size of ipsec protected data */
> +	l2len = mb->l2_len;
> +	plen = mb->pkt_len - mb->l2_len;
> +
> +	/* number of bytes to encrypt */
> +	clen = plen + sizeof(*espt);
> +	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> +
> +	/* pad length + esp tail */
> +	pdlen = clen - plen;
> +	tlen = pdlen + sa->icv_len;
> +
> +	/* do append and prepend */
> +	ml = rte_pktmbuf_lastseg(mb);
> +	if (tlen + sa->sqh_len + sa->aad_len > rte_pktmbuf_tailroom(ml))
> +		return -ENOSPC;
> +
> +	/* prepend header */
> +	ph = rte_pktmbuf_prepend(mb, hlen - l2len);
> +	if (ph == NULL)
> +		return -ENOSPC;
> +
> +	/* append tail */
> +	pdofs = ml->data_len;
> +	ml->data_len += tlen;
> +	mb->pkt_len += tlen;
> +	pt = rte_pktmbuf_mtod_offset(ml, typeof(pt), pdofs);
> +
> +	/* update pkt l2/l3 len */
> +	mb->l2_len = sa->hdr_l3_off;
> +	mb->l3_len = sa->hdr_len - sa->hdr_l3_off;
> +
> +	/* copy tunnel pkt header */
> +	rte_memcpy(ph, sa->hdr, sa->hdr_len);
> +
> +	/* update original and new ip header fields */
> +	update_tun_l3hdr(sa, ph + sa->hdr_l3_off, mb->pkt_len, sa->hdr_l3_off,
> +			sqn_low16(sqc));
> +
> +	/* update spi, seqn and iv */
> +	esph = (struct esp_hdr *)(ph + sa->hdr_len);
> +	iv = (uint64_t *)(esph + 1);
> +	rte_memcpy(iv, ivp, sa->iv_len);
> +
> +	esph->spi = sa->spi;
> +	esph->seq = sqn_low32(sqc);
> +
> +	/* offset for ICV */
> +	pdofs += pdlen + sa->sqh_len;
> +
> +	/* pad length */
> +	pdlen -= sizeof(*espt);
> +
> +	/* copy padding data */
> +	rte_memcpy(pt, esp_pad_bytes, pdlen);
> +
> +	/* update esp trailer */
> +	espt = (struct esp_tail *)(pt + pdlen);
> +	espt->pad_len = pdlen;
> +	espt->next_proto = sa->proto;
> +
> +	icv->va = rte_pktmbuf_mtod_offset(ml, void *, pdofs);
> +	icv->pa = rte_pktmbuf_iova_offset(ml, pdofs);
> +
> +	return clen;
> +}
> +
> +/*
> + * for pure cryptodev (lookaside none) depending on SA settings,
> + * we might have to write some extra data to the packet.
> + */
> +static inline void
> +outb_pkt_xprepare(const struct rte_ipsec_sa *sa, rte_be64_t sqc,
> +	const union sym_op_data *icv)
> +{
> +	uint32_t *psqh;
> +	struct aead_gcm_aad *aad;
> +
> +	/* insert SQN.hi between ESP trailer and ICV */
> +	if (sa->sqh_len != 0) {
> +		psqh = (uint32_t *)(icv->va - sa->sqh_len);
> +		psqh[0] = sqn_hi32(sqc);
> +	}
> +
> +	/*
> +	 * fill IV and AAD fields, if any (aad fields are placed after icv),
> +	 * right now we support only one AEAD algorithm: AES-GCM .
> +	 */
> +	if (sa->aad_len != 0) {
> +		aad = (struct aead_gcm_aad *)(icv->va + sa->icv_len);
> +		aead_gcm_aad_fill(aad, sa->spi, sqc, IS_ESN(sa));
> +	}
> +}
> +
probably a comment before every function would be better in a library code
> +static uint16_t
> +outb_tun_prepare(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
> +	struct rte_crypto_op *cop[], uint16_t num)
> +{
> +	int32_t rc;
> +	uint32_t i, k, n;
> +	uint64_t sqn;
> +	rte_be64_t sqc;
> +	struct rte_ipsec_sa *sa;
> +	union sym_op_data icv;
> +	uint64_t iv[IPSEC_MAX_IV_QWORD];
> +	struct rte_mbuf *dr[num];
> +
> +	sa = ss->sa;
> +
> +	n = num;
> +	sqn = esn_outb_update_sqn(sa, &n);
> +	if (n != num)
> +		rte_errno = EOVERFLOW;
> +
> +	k = 0;
> +	for (i = 0; i != n; i++) {
> +
> +		sqc = rte_cpu_to_be_64(sqn + i);
> +		gen_iv(iv, sqc);
> +
> +		/* try to update the packet itself */
> +		rc = esp_outb_tun_pkt_prepare(sa, sqc, iv, mb[i], &icv);
> +
> +		/* success, setup crypto op */
> +		if (rc >= 0) {
> +			mb[k] = mb[i];
> +			outb_pkt_xprepare(sa, sqc, &icv);
> +			esp_outb_cop_prepare(cop[k], sa, iv, &icv, 0, rc);
> +			k++;
> +		/* failure, put packet into the death-row */
> +		} else {
> +			dr[i - k] = mb[i];
> +			rte_errno = -rc;
> +		}
> +	}
> +
> +	/* update cops */
> +	lksd_none_cop_prepare(ss, mb, cop, k);
> +
> +	 /* copy not prepared mbufs beyond good ones */
> +	if (k != num && k != 0)
> +		mbuf_bulk_copy(mb + k, dr, num - k);
> +
> +	return k;
> +}
> +
> +static inline int32_t
> +esp_outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
> +	const uint64_t ivp[IPSEC_MAX_IV_QWORD], struct rte_mbuf *mb,
> +	uint32_t l2len, uint32_t l3len, union sym_op_data *icv)
> +{
> +	uint8_t np;
> +	uint32_t clen, hlen, pdlen, pdofs, plen, tlen, uhlen;
> +	struct rte_mbuf *ml;
> +	struct esp_hdr *esph;
> +	struct esp_tail *espt;
> +	char *ph, *pt;
> +	uint64_t *iv;
> +
> +	uhlen = l2len + l3len;
> +	plen = mb->pkt_len - uhlen;
> +
> +	/* calculate extra header space required */
> +	hlen = sa->iv_len + sizeof(*esph);
> +
> +	/* number of bytes to encrypt */
> +	clen = plen + sizeof(*espt);
> +	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
> +
> +	/* pad length + esp tail */
> +	pdlen = clen - plen;
> +	tlen = pdlen + sa->icv_len;
> +
> +	/* do append and insert */
> +	ml = rte_pktmbuf_lastseg(mb);
> +	if (tlen + sa->sqh_len + sa->aad_len > rte_pktmbuf_tailroom(ml))
> +		return -ENOSPC;
> +
> +	/* prepend space for ESP header */
> +	ph = rte_pktmbuf_prepend(mb, hlen);
> +	if (ph == NULL)
> +		return -ENOSPC;
> +
> +	/* append tail */
> +	pdofs = ml->data_len;
> +	ml->data_len += tlen;
> +	mb->pkt_len += tlen;
> +	pt = rte_pktmbuf_mtod_offset(ml, typeof(pt), pdofs);
> +
> +	/* shift L2/L3 headers */
> +	insert_esph(ph, ph + hlen, uhlen);
> +
> +	/* update ip  header fields */
> +	np = update_trs_l3hdr(sa, ph + l2len, mb->pkt_len, l2len, l3len,
> +			IPPROTO_ESP);
> +
> +	/* update spi, seqn and iv */
> +	esph = (struct esp_hdr *)(ph + uhlen);
> +	iv = (uint64_t *)(esph + 1);
> +	rte_memcpy(iv, ivp, sa->iv_len);
> +
> +	esph->spi = sa->spi;
> +	esph->seq = sqn_low32(sqc);
> +
> +	/* offset for ICV */
> +	pdofs += pdlen + sa->sqh_len;
> +
> +	/* pad length */
> +	pdlen -= sizeof(*espt);
> +
> +	/* copy padding data */
> +	rte_memcpy(pt, esp_pad_bytes, pdlen);
> +
> +	/* update esp trailer */
> +	espt = (struct esp_tail *)(pt + pdlen);
> +	espt->pad_len = pdlen;
> +	espt->next_proto = np;
> +
> +	icv->va = rte_pktmbuf_mtod_offset(ml, void *, pdofs);
> +	icv->pa = rte_pktmbuf_iova_offset(ml, pdofs);
> +
> +	return clen;
> +}
> +
> +static uint16_t
> +outb_trs_prepare(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
> +	struct rte_crypto_op *cop[], uint16_t num)
> +{
> +	int32_t rc;
> +	uint32_t i, k, n, l2, l3;
> +	uint64_t sqn;
> +	rte_be64_t sqc;
> +	struct rte_ipsec_sa *sa;
> +	union sym_op_data icv;
> +	uint64_t iv[IPSEC_MAX_IV_QWORD];
> +	struct rte_mbuf *dr[num];
> +
> +	sa = ss->sa;
> +
> +	n = num;
> +	sqn = esn_outb_update_sqn(sa, &n);
> +	if (n != num)
> +		rte_errno = EOVERFLOW;
> +
> +	k = 0;
> +	for (i = 0; i != n; i++) {
> +
> +		l2 = mb[i]->l2_len;
> +		l3 = mb[i]->l3_len;
> +
> +		sqc = rte_cpu_to_be_64(sqn + i);
> +		gen_iv(iv, sqc);
> +
> +		/* try to update the packet itself */
> +		rc = esp_outb_trs_pkt_prepare(sa, sqc, iv, mb[i],
> +				l2, l3, &icv);
> +
> +		/* success, setup crypto op */
> +		if (rc >= 0) {
> +			mb[k] = mb[i];
> +			outb_pkt_xprepare(sa, sqc, &icv);
> +			esp_outb_cop_prepare(cop[k], sa, iv, &icv, l2 + l3, rc);
> +			k++;
> +		/* failure, put packet into the death-row */
> +		} else {
> +			dr[i - k] = mb[i];
> +			rte_errno = -rc;
> +		}
> +	}
> +
> +	/* update cops */
> +	lksd_none_cop_prepare(ss, mb, cop, k);
> +
> +	/* copy not prepared mbufs beyond good ones */
> +	if (k != num && k != 0)
> +		mbuf_bulk_copy(mb + k, dr, num - k);
> +
> +	return k;
> +}
> +
> +static inline int32_t
> +esp_inb_tun_cop_prepare(struct rte_crypto_op *cop,
> +	const struct rte_ipsec_sa *sa, struct rte_mbuf *mb,
> +	const union sym_op_data *icv, uint32_t pofs, uint32_t plen)
> +{
> +	struct rte_crypto_sym_op *sop;
> +	struct aead_gcm_iv *gcm;
> +	uint64_t *ivc, *ivp;
> +	uint32_t clen;
> +
> +	clen = plen - sa->ctp.cipher.length;
> +	if ((int32_t)clen < 0 || (clen & (sa->pad_align - 1)) != 0)
> +		return -EINVAL;
> +
> +	/* fill sym op fields */
> +	sop = cop->sym;
> +
> +	/* AEAD (AES_GCM) case */
> +	if (sa->aad_len != 0) {
> +		sop->aead.data.offset = pofs + sa->ctp.cipher.offset;
> +		sop->aead.data.length = clen;
> +		sop->aead.digest.data = icv->va;
> +		sop->aead.digest.phys_addr = icv->pa;
> +		sop->aead.aad.data = icv->va + sa->icv_len;
> +		sop->aead.aad.phys_addr = icv->pa + sa->icv_len;
> +
> +		/* fill AAD IV (located inside crypto op) */
> +		gcm = rte_crypto_op_ctod_offset(cop, struct aead_gcm_iv *,
> +			sa->iv_ofs);
> +		ivp = rte_pktmbuf_mtod_offset(mb, uint64_t *,
> +			pofs + sizeof(struct esp_hdr));
> +		aead_gcm_iv_fill(gcm, ivp[0], sa->salt);
> +	/* CRYPT+AUTH case */
> +	} else {
> +		sop->cipher.data.offset = pofs + sa->ctp.cipher.offset;
> +		sop->cipher.data.length = clen;
> +		sop->auth.data.offset = pofs + sa->ctp.auth.offset;
> +		sop->auth.data.length = plen - sa->ctp.auth.length;
> +		sop->auth.digest.data = icv->va;
> +		sop->auth.digest.phys_addr = icv->pa;
> +
> +		/* copy iv from the input packet to the cop */
> +		ivc = rte_crypto_op_ctod_offset(cop, uint64_t *, sa->iv_ofs);
> +		ivp = rte_pktmbuf_mtod_offset(mb, uint64_t *,
> +			pofs + sizeof(struct esp_hdr));
> +		rte_memcpy(ivc, ivp, sa->iv_len);
> +	}
> +	return 0;
> +}
> +
> +/*
> + * for pure cryptodev (lookaside none) depending on SA settings,
> + * we might have to write some extra data to the packet.
> + */
> +static inline void
> +inb_pkt_xprepare(const struct rte_ipsec_sa *sa, rte_be64_t sqc,
> +	const union sym_op_data *icv)
> +{
> +	struct aead_gcm_aad *aad;
> +
> +	/* insert SQN.hi between ESP trailer and ICV */
> +	if (sa->sqh_len != 0)
> +		insert_sqh(sqn_hi32(sqc), icv->va, sa->icv_len);
> +
> +	/*
> +	 * fill AAD fields, if any (aad fields are placed after icv),
> +	 * right now we support only one AEAD algorithm: AES-GCM.
> +	 */
> +	if (sa->aad_len != 0) {
> +		aad = (struct aead_gcm_aad *)(icv->va + sa->icv_len);
> +		aead_gcm_aad_fill(aad, sa->spi, sqc, IS_ESN(sa));
> +	}
> +}
> +
> +static inline int32_t
> +esp_inb_tun_pkt_prepare(const struct rte_ipsec_sa *sa,
> +	const struct replay_sqn *rsn, struct rte_mbuf *mb,
> +	uint32_t hlen, union sym_op_data *icv)
> +{
> +	int32_t rc;
> +	uint64_t sqn;
> +	uint32_t icv_ofs, plen;
> +	struct rte_mbuf *ml;
> +	struct esp_hdr *esph;
> +
> +	esph = rte_pktmbuf_mtod_offset(mb, struct esp_hdr *, hlen);
> +
> +	/*
> +	 * retrieve and reconstruct SQN, then check it, then
> +	 * convert it back into network byte order.
> +	 */
> +	sqn = rte_be_to_cpu_32(esph->seq);
> +	if (IS_ESN(sa))
> +		sqn = reconstruct_esn(rsn->sqn, sqn, sa->replay.win_sz);
> +
> +	rc = esn_inb_check_sqn(rsn, sa, sqn);
> +	if (rc != 0)
> +		return rc;
> +
> +	sqn = rte_cpu_to_be_64(sqn);
> +
> +	/* start packet manipulation */
> +	plen = mb->pkt_len;
> +	plen = plen - hlen;
> +
> +	ml = rte_pktmbuf_lastseg(mb);
> +	icv_ofs = ml->data_len - sa->icv_len + sa->sqh_len;
> +
> +	/* we have to allocate space for AAD somewhere,
> +	 * right now - just use free trailing space at the last segment.
> +	 * Would probably be more convenient to reserve space for AAD
> +	 * inside rte_crypto_op itself
> +	 * (again for IV space is already reserved inside cop).
> +	 */
> +	if (sa->aad_len + sa->sqh_len > rte_pktmbuf_tailroom(ml))
> +		return -ENOSPC;
> +
> +	icv->va = rte_pktmbuf_mtod_offset(ml, void *, icv_ofs);
> +	icv->pa = rte_pktmbuf_iova_offset(ml, icv_ofs);
> +
> +	inb_pkt_xprepare(sa, sqn, icv);
> +	return plen;
> +}
> +
> +static uint16_t
> +inb_pkt_prepare(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
> +	struct rte_crypto_op *cop[], uint16_t num)
> +{
> +	int32_t rc;
> +	uint32_t i, k, hl;
> +	struct rte_ipsec_sa *sa;
> +	struct replay_sqn *rsn;
> +	union sym_op_data icv;
> +	struct rte_mbuf *dr[num];
> +
> +	sa = ss->sa;
> +	rsn = sa->sqn.inb;
> +
> +	k = 0;
> +	for (i = 0; i != num; i++) {
> +
> +		hl = mb[i]->l2_len + mb[i]->l3_len;
> +		rc = esp_inb_tun_pkt_prepare(sa, rsn, mb[i], hl, &icv);
> +		if (rc >= 0)
> +			rc = esp_inb_tun_cop_prepare(cop[k], sa, mb[i], &icv,
> +				hl, rc);
> +
> +		if (rc == 0)
> +			mb[k++] = mb[i];
> +		else {
> +			dr[i - k] = mb[i];
> +			rte_errno = -rc;
> +		}
> +	}
> +
> +	/* update cops */
> +	lksd_none_cop_prepare(ss, mb, cop, k);
> +
> +	/* copy not prepared mbufs beyond good ones */
> +	if (k != num && k != 0)
> +		mbuf_bulk_copy(mb + k, dr, num - k);
> +
> +	return k;
> +}
> +
naming convention used is cryptic. A comment would be appreciated.
> +static inline void
> +lksd_proto_cop_prepare(const struct rte_ipsec_session *ss,
> +	struct rte_mbuf *mb[], struct rte_crypto_op *cop[], uint16_t num)
> +{
> +	uint32_t i;
> +	struct rte_crypto_sym_op *sop;
> +
> +	for (i = 0; i != num; i++) {
> +		sop = cop[i]->sym;
> +		cop[i]->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
> +		cop[i]->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
> +		cop[i]->sess_type = RTE_CRYPTO_OP_SECURITY_SESSION;
> +		sop->m_src = mb[i];
> +		__rte_security_attach_session(sop, ss->security.ses);
> +	}
> +}
> +
> +static uint16_t
> +lksd_proto_prepare(const struct rte_ipsec_session *ss,
> +	struct rte_mbuf *mb[], struct rte_crypto_op *cop[], uint16_t num)
> +{
> +	lksd_proto_cop_prepare(ss, mb, cop, num);
> +	return num;
> +}
> +
> +static inline int
> +esp_inb_tun_single_pkt_process(struct rte_ipsec_sa *sa, struct rte_mbuf *mb,
> +	uint32_t *sqn)
> +{
> +	uint32_t hlen, icv_len, tlen;
> +	struct esp_hdr *esph;
> +	struct esp_tail *espt;
> +	struct rte_mbuf *ml;
> +	char *pd;
> +
> +	if (mb->ol_flags & PKT_RX_SEC_OFFLOAD_FAILED)
> +		return -EBADMSG;
> +
> +	icv_len = sa->icv_len;
> +
> +	ml = rte_pktmbuf_lastseg(mb);
> +	espt = rte_pktmbuf_mtod_offset(ml, struct esp_tail *,
> +		ml->data_len - icv_len - sizeof(*espt));
> +
> +	/*
> +	 * check padding and next proto.
> +	 * return an error if something is wrong.
> +	 */
> +	pd = (char *)espt - espt->pad_len;
> +	if (espt->next_proto != sa->proto ||
> +			memcmp(pd, esp_pad_bytes, espt->pad_len))
> +		return -EINVAL;
> +
> +	/* cut of ICV, ESP tail and padding bytes */
> +	tlen = icv_len + sizeof(*espt) + espt->pad_len;
> +	ml->data_len -= tlen;
> +	mb->pkt_len -= tlen;
> +
> +	/* cut of L2/L3 headers, ESP header and IV */
> +	hlen = mb->l2_len + mb->l3_len;
> +	esph = rte_pktmbuf_mtod_offset(mb, struct esp_hdr *, hlen);
> +	rte_pktmbuf_adj(mb, hlen + sa->ctp.cipher.offset);
> +
> +	/* retrieve SQN for later check */
> +	*sqn = rte_be_to_cpu_32(esph->seq);
> +
> +	/* reset mbuf metatdata: L2/L3 len, packet type */
> +	mb->packet_type = RTE_PTYPE_UNKNOWN;
> +	mb->l2_len = 0;
> +	mb->l3_len = 0;
> +
> +	/* clear the PKT_RX_SEC_OFFLOAD flag if set */
> +	mb->ol_flags &= ~(mb->ol_flags & PKT_RX_SEC_OFFLOAD);
> +	return 0;
> +}
> +
> +static inline int
> +esp_inb_trs_single_pkt_process(struct rte_ipsec_sa *sa, struct rte_mbuf *mb,
> +	uint32_t *sqn)
> +{
> +	uint32_t hlen, icv_len, l2len, l3len, tlen;
> +	struct esp_hdr *esph;
> +	struct esp_tail *espt;
> +	struct rte_mbuf *ml;
> +	char *np, *op, *pd;
> +
> +	if (mb->ol_flags & PKT_RX_SEC_OFFLOAD_FAILED)
> +		return -EBADMSG;
> +
> +	icv_len = sa->icv_len;
> +
> +	ml = rte_pktmbuf_lastseg(mb);
> +	espt = rte_pktmbuf_mtod_offset(ml, struct esp_tail *,
> +		ml->data_len - icv_len - sizeof(*espt));
> +
> +	/* check padding, return an error if something is wrong. */
> +	pd = (char *)espt - espt->pad_len;
> +	if (memcmp(pd, esp_pad_bytes, espt->pad_len))
> +		return -EINVAL;
> +
> +	/* cut of ICV, ESP tail and padding bytes */
> +	tlen = icv_len + sizeof(*espt) + espt->pad_len;
> +	ml->data_len -= tlen;
> +	mb->pkt_len -= tlen;
> +
> +	/* retrieve SQN for later check */
> +	l2len = mb->l2_len;
> +	l3len = mb->l3_len;
> +	hlen = l2len + l3len;
> +	op = rte_pktmbuf_mtod(mb, char *);
> +	esph = (struct esp_hdr *)(op + hlen);
> +	*sqn = rte_be_to_cpu_32(esph->seq);
> +
> +	/* cut off ESP header and IV, update L3 header */
> +	np = rte_pktmbuf_adj(mb, sa->ctp.cipher.offset);
> +	remove_esph(np, op, hlen);
> +	update_trs_l3hdr(sa, np + l2len, mb->pkt_len, l2len, l3len,
> +			espt->next_proto);
> +
> +	/* reset mbuf packet type */
> +	mb->packet_type &= (RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK);
> +
> +	/* clear the PKT_RX_SEC_OFFLOAD flag if set */
> +	mb->ol_flags &= ~(mb->ol_flags & PKT_RX_SEC_OFFLOAD);
> +	return 0;
> +}
> +
> +static inline uint16_t
> +esp_inb_rsn_update(struct rte_ipsec_sa *sa, const uint32_t sqn[],
> +	struct rte_mbuf *mb[], struct rte_mbuf *dr[], uint16_t num)
> +{
> +	uint32_t i, k;
> +	struct replay_sqn *rsn;
> +
> +	rsn = sa->sqn.inb;
> +
> +	k = 0;
> +	for (i = 0; i != num; i++) {
> +		if (esn_inb_update_sqn(rsn, sa, sqn[i]) == 0)
> +			mb[k++] = mb[i];
> +		else
> +			dr[i - k] = mb[i];
> +	}
> +
> +	return k;
> +}
> +
> +static uint16_t
> +inb_tun_pkt_process(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
> +	uint16_t num)
> +{
> +	uint32_t i, k;
> +	struct rte_ipsec_sa *sa;
> +	uint32_t sqn[num];
> +	struct rte_mbuf *dr[num];
> +
> +	sa = ss->sa;
> +
> +	/* process packets, extract seq numbers */
> +
> +	k = 0;
> +	for (i = 0; i != num; i++) {
> +		/* good packet */
> +		if (esp_inb_tun_single_pkt_process(sa, mb[i], sqn + k) == 0)
> +			mb[k++] = mb[i];
> +		/* bad packet, will drop from furhter processing */
> +		else
> +			dr[i - k] = mb[i];
> +	}
> +
> +	/* update seq # and replay winow */
> +	k = esp_inb_rsn_update(sa, sqn, mb, dr + i - k, k);
> +
> +	/* handle unprocessed mbufs */
> +	if (k != num) {
> +		rte_errno = EBADMSG;
> +		if (k != 0)
> +			mbuf_bulk_copy(mb + k, dr, num - k);
> +	}
> +
> +	return k;
> +}
> +
> +static uint16_t
> +inb_trs_pkt_process(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
> +	uint16_t num)
> +{
> +	uint32_t i, k;
> +	uint32_t sqn[num];
> +	struct rte_ipsec_sa *sa;
> +	struct rte_mbuf *dr[num];
> +
> +	sa = ss->sa;
> +
> +	/* process packets, extract seq numbers */
> +
> +	k = 0;
> +	for (i = 0; i != num; i++) {
> +		/* good packet */
> +		if (esp_inb_trs_single_pkt_process(sa, mb[i], sqn + k) == 0)
> +			mb[k++] = mb[i];
> +		/* bad packet, will drop from furhter processing */
> +		else
> +			dr[i - k] = mb[i];
> +	}
> +
> +	/* update seq # and replay winow */
> +	k = esp_inb_rsn_update(sa, sqn, mb, dr + i - k, k);
> +
> +	/* handle unprocessed mbufs */
> +	if (k != num) {
> +		rte_errno = EBADMSG;
> +		if (k != 0)
> +			mbuf_bulk_copy(mb + k, dr, num - k);
> +	}
> +
> +	return k;
> +}
> +
> +/*
> + * process outbound packets for SA with ESN support,
> + * for algorithms that require SQN.hibits to be implictly included
> + * into digest computation.
> + * In that case we have to move ICV bytes back to their proper place.
> + */
> +static uint16_t
> +outb_sqh_process(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
> +	uint16_t num)
> +{
> +	uint32_t i, k, icv_len, *icv;
> +	struct rte_mbuf *ml;
> +	struct rte_ipsec_sa *sa;
> +	struct rte_mbuf *dr[num];
> +
> +	sa = ss->sa;
> +
> +	k = 0;
> +	icv_len = sa->icv_len;
> +
> +	for (i = 0; i != num; i++) {
> +		if ((mb[i]->ol_flags & PKT_RX_SEC_OFFLOAD_FAILED) == 0) {
> +			ml = rte_pktmbuf_lastseg(mb[i]);
> +			icv = rte_pktmbuf_mtod_offset(ml, void *,
> +				ml->data_len - icv_len);
> +			remove_sqh(icv, icv_len);
> +			mb[k++] = mb[i];
> +		} else
> +			dr[i - k] = mb[i];
> +	}
> +
> +	/* handle unprocessed mbufs */
> +	if (k != num) {
> +		rte_errno = EBADMSG;
> +		if (k != 0)
> +			mbuf_bulk_copy(mb + k, dr, num - k);
> +	}
> +
> +	return k;
> +}
> +
> +/*
> + * simplest pkt process routine:
> + * all actual processing is done already doneby HW/PMD,

all actual processing is already done by HW/PMD

> + * just check mbuf ol_flags.
> + * used for:
> + * - inbound for RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL
> + * - inbound/outbound for RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL
> + * - outbound for RTE_SECURITY_ACTION_TYPE_NONE when ESN is disabled
> + */
> +static uint16_t
> +pkt_flag_process(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
> +	uint16_t num)
> +{
> +	uint32_t i, k;
> +	struct rte_mbuf *dr[num];
> +
> +	RTE_SET_USED(ss);
> +
> +	k = 0;
> +	for (i = 0; i != num; i++) {
> +		if ((mb[i]->ol_flags & PKT_RX_SEC_OFFLOAD_FAILED) == 0)
> +			mb[k++] = mb[i];
> +		else
> +			dr[i - k] = mb[i];
> +	}
> +
> +	/* handle unprocessed mbufs */
> +	if (k != num) {
> +		rte_errno = EBADMSG;
> +		if (k != 0)
> +			mbuf_bulk_copy(mb + k, dr, num - k);
> +	}
> +
> +	return k;
> +}
> +
> +/*
> + * prepare packets for inline ipsec processing:
> + * set ol_flags and attach metadata.
> + */
> +static inline void
> +inline_outb_mbuf_prepare(const struct rte_ipsec_session *ss,
> +	struct rte_mbuf *mb[], uint16_t num)
> +{
> +	uint32_t i, ol_flags;
> +
> +	ol_flags = ss->security.ol_flags & RTE_SECURITY_TX_OLOAD_NEED_MDATA;
> +	for (i = 0; i != num; i++) {
> +
> +		mb[i]->ol_flags |= PKT_TX_SEC_OFFLOAD;
> +		if (ol_flags != 0)
> +			rte_security_set_pkt_metadata(ss->security.ctx,
> +				ss->security.ses, mb[i], NULL);
> +	}
> +}
> +
> +static uint16_t
> +inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
> +	struct rte_mbuf *mb[], uint16_t num)
> +{
> +	int32_t rc;
> +	uint32_t i, k, n;
> +	uint64_t sqn;
> +	rte_be64_t sqc;
> +	struct rte_ipsec_sa *sa;
> +	union sym_op_data icv;
> +	uint64_t iv[IPSEC_MAX_IV_QWORD];
> +	struct rte_mbuf *dr[num];
> +
> +	sa = ss->sa;
> +
> +	n = num;
> +	sqn = esn_outb_update_sqn(sa, &n);
> +	if (n != num)
> +		rte_errno = EOVERFLOW;
> +
> +	k = 0;
> +	for (i = 0; i != n; i++) {
> +
> +		sqc = rte_cpu_to_be_64(sqn + i);
> +		gen_iv(iv, sqc);
> +
> +		/* try to update the packet itself */
> +		rc = esp_outb_tun_pkt_prepare(sa, sqc, iv, mb[i], &icv);
> +
> +		/* success, update mbuf fields */
> +		if (rc >= 0)
> +			mb[k++] = mb[i];
> +		/* failure, put packet into the death-row */
> +		else {
> +			dr[i - k] = mb[i];
> +			rte_errno = -rc;
> +		}
> +	}
> +
> +	inline_outb_mbuf_prepare(ss, mb, k);
> +
> +	/* copy not processed mbufs beyond good ones */
> +	if (k != num && k != 0)
> +		mbuf_bulk_copy(mb + k, dr, num - k);
> +
> +	return k;
> +}
> +
> +static uint16_t
> +inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
> +	struct rte_mbuf *mb[], uint16_t num)
> +{
> +	int32_t rc;
> +	uint32_t i, k, n, l2, l3;
> +	uint64_t sqn;
> +	rte_be64_t sqc;
> +	struct rte_ipsec_sa *sa;
> +	union sym_op_data icv;
> +	uint64_t iv[IPSEC_MAX_IV_QWORD];
> +	struct rte_mbuf *dr[num];
> +
> +	sa = ss->sa;
> +
> +	n = num;
> +	sqn = esn_outb_update_sqn(sa, &n);
> +	if (n != num)
> +		rte_errno = EOVERFLOW;
> +
> +	k = 0;
> +	for (i = 0; i != n; i++) {
> +
> +		l2 = mb[i]->l2_len;
> +		l3 = mb[i]->l3_len;
> +
> +		sqc = rte_cpu_to_be_64(sqn + i);
> +		gen_iv(iv, sqc);
> +
> +		/* try to update the packet itself */
> +		rc = esp_outb_trs_pkt_prepare(sa, sqc, iv, mb[i],
> +				l2, l3, &icv);
> +
> +		/* success, update mbuf fields */
> +		if (rc >= 0)
> +			mb[k++] = mb[i];
> +		/* failure, put packet into the death-row */
> +		else {
> +			dr[i - k] = mb[i];
> +			rte_errno = -rc;
> +		}
> +	}
> +
> +	inline_outb_mbuf_prepare(ss, mb, k);
> +
> +	/* copy not processed mbufs beyond good ones */
> +	if (k != num && k != 0)
> +		mbuf_bulk_copy(mb + k, dr, num - k);
> +
> +	return k;
> +}
> +
> +/*
> + * outbound for RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL:
> + * actual processing is done by HW/PMD, just set flags and metadata.
> + */
> +static uint16_t
> +outb_inline_proto_process(const struct rte_ipsec_session *ss,
> +		struct rte_mbuf *mb[], uint16_t num)
> +{
> +	inline_outb_mbuf_prepare(ss, mb, num);
> +	return num;
> +}
> +
> +static int
> +lksd_none_pkt_func_select(const struct rte_ipsec_sa *sa,
> +		struct rte_ipsec_sa_pkt_func *pf)
> +{
> +	int32_t rc;
> +
> +	static const uint64_t msk = RTE_IPSEC_SATP_DIR_MASK |
> +			RTE_IPSEC_SATP_MODE_MASK;
> +
> +	rc = 0;
> +	switch (sa->type & msk) {
> +	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TUNLV4):
> +	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TUNLV6):
> +		pf->prepare = inb_pkt_prepare;
> +		pf->process = inb_tun_pkt_process;
> +		break;
> +	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TRANS):
> +		pf->prepare = inb_pkt_prepare;
> +		pf->process = inb_trs_pkt_process;
> +		break;
> +	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TUNLV4):
> +	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TUNLV6):
> +		pf->prepare = outb_tun_prepare;
> +		pf->process = (sa->sqh_len != 0) ?
> +			outb_sqh_process : pkt_flag_process;
> +		break;
> +	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TRANS):
> +		pf->prepare = outb_trs_prepare;
> +		pf->process = (sa->sqh_len != 0) ?
> +			outb_sqh_process : pkt_flag_process;
> +		break;
> +	default:
> +		rc = -ENOTSUP;
> +	}
> +
> +	return rc;
> +}
> +
> +static int
> +inline_crypto_pkt_func_select(const struct rte_ipsec_sa *sa,
> +		struct rte_ipsec_sa_pkt_func *pf)
> +{
> +	int32_t rc;
> +
> +	static const uint64_t msk = RTE_IPSEC_SATP_DIR_MASK |
> +			RTE_IPSEC_SATP_MODE_MASK;
> +
> +	rc = 0;
> +	switch (sa->type & msk) {
> +	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TUNLV4):
> +	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TUNLV6):
> +		pf->process = inb_tun_pkt_process;
> +		break;
> +	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TRANS):
> +		pf->process = inb_trs_pkt_process;
> +		break;
> +	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TUNLV4):
> +	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TUNLV6):
> +		pf->process = inline_outb_tun_pkt_process;
> +		break;
> +	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TRANS):
> +		pf->process = inline_outb_trs_pkt_process;
> +		break;
> +	default:
> +		rc = -ENOTSUP;
> +	}
> +
> +	return rc;
> +}
> +
>   int
>   ipsec_sa_pkt_func_select(const struct rte_ipsec_session *ss,
>   	const struct rte_ipsec_sa *sa, struct rte_ipsec_sa_pkt_func *pf)
>   {
>   	int32_t rc;
>   
> -	RTE_SET_USED(sa);
> -
>   	rc = 0;
>   	pf[0] = (struct rte_ipsec_sa_pkt_func) { 0 };
>   
>   	switch (ss->type) {
> +	case RTE_SECURITY_ACTION_TYPE_NONE:
> +		rc = lksd_none_pkt_func_select(sa, pf);
> +		break;
> +	case RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO:
> +		rc = inline_crypto_pkt_func_select(sa, pf);
> +		break;
> +	case RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL:
> +		if ((sa->type & RTE_IPSEC_SATP_DIR_MASK) ==
> +				RTE_IPSEC_SATP_DIR_IB)
> +			pf->process = pkt_flag_process;
> +		else
> +			pf->process = outb_inline_proto_process;
> +		break;
> +	case RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL:
> +		pf->prepare = lksd_proto_prepare;
better to use lookaside instead of lksd
> +		pf->process = pkt_flag_process;
> +		break;
>   	default:
>   		rc = -ENOTSUP;
>   	}
  
Ananyev, Konstantin Dec. 20, 2018, 12:56 p.m. UTC | #2
> >
> > diff --git a/lib/librte_ipsec/crypto.h b/lib/librte_ipsec/crypto.h
> > new file mode 100644
> > index 000000000..61f5c1433
> > --- /dev/null
> > +++ b/lib/librte_ipsec/crypto.h
> > @@ -0,0 +1,123 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2018 Intel Corporation
> > + */
> > +
> > +#ifndef _CRYPTO_H_
> > +#define _CRYPTO_H_
> > +
> > +/**
> > + * @file crypto.h
> > + * Contains crypto specific functions/structures/macros used internally
> > + * by ipsec library.
> > + */
> > +
> > + /*
> > +  * AES-GCM devices have some specific requirements for IV and AAD formats.
> > +  * Ideally that to be done by the driver itself.
> > +  */
> I believe these can be moved to rte_crypto_sym.h. All crypto related
> stuff should be at same place.

Not sure what exactly you suggest to put into rte_crypto_sym.h?
struct aead_gcm_iv? Something else?
From my perspective it would be good if user in ctypto_sym_op
just fill salt and IV fields, and then PMD setup things in needed 
format internally.
Again it would be really good if crypto_sym_op has reserved space
for AAD...
But  all that implies quite a big change in cryptodev and PMDs,
so I think should be subject of a separate patch.

> > +
> > +struct aead_gcm_iv {
> > +	uint32_t salt;
> > +	uint64_t iv;
> > +	uint32_t cnt;
> > +} __attribute__((packed));
> > +
> > +struct aead_gcm_aad {
> > +	uint32_t spi;
> > +	/*
> > +	 * RFC 4106, section 5:
> > +	 * Two formats of the AAD are defined:
> > +	 * one for 32-bit sequence numbers, and one for 64-bit ESN.
> > +	 */
> > +	union {
> > +		uint32_t u32[2];
> > +		uint64_t u64;
> > +	} sqn;
> > +	uint32_t align0; /* align to 16B boundary */
> > +} __attribute__((packed));
> > +
> > +struct gcm_esph_iv {
> > +	struct esp_hdr esph;
> > +	uint64_t iv;
> > +} __attribute__((packed));
> > +
> > +
> > +static inline void
> > +aead_gcm_iv_fill(struct aead_gcm_iv *gcm, uint64_t iv, uint32_t salt)
> > +{
> > +	gcm->salt = salt;
> > +	gcm->iv = iv;
> > +	gcm->cnt = rte_cpu_to_be_32(1);
> > +}
> > +
> > +/*


> > diff --git a/lib/librte_ipsec/iph.h b/lib/librte_ipsec/iph.h
> > new file mode 100644
> > index 000000000..3fd93016d
> > --- /dev/null
> > +++ b/lib/librte_ipsec/iph.h
> > @@ -0,0 +1,84 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright(c) 2018 Intel Corporation
> > + */
> > +
> > +#ifndef _IPH_H_
> > +#define _IPH_H_
> > +
> > +/**
> > + * @file iph.h
> > + * Contains functions/structures/macros to manipulate IPv/IPv6 headers
> IPv4
> > + * used internally by ipsec library.
> > + */
> > +
> > +/*
> > + * Move preceding (L3) headers down to remove ESP header and IV.
> > + */
> why cant we use rte_mbuf APIs to append/prepend/trim/adjust lengths.

We do use rte_mbuf append/trim, etc. adjust mbuf's data_ofs and data_len.
But apart from that for transport mode we have to move actual packet headers.
Let say for inbound we have to get rid of ESP header (which is after IP header),
but preserve IP header, so we moving L2/L3 headers down, overwriting ESP header.

> I believe these adjustments are happening in the mbuf itself.
> Moreover these APIs are not specific to esp headers.

I didn't get your last sentence: that function is used to remove esp header
(see above) - that's why I named it that way.

> > +static inline void
> > +remove_esph(char *np, char *op, uint32_t hlen)
> > +{
> > +	uint32_t i;
> > +
> > +	for (i = hlen; i-- != 0; np[i] = op[i])
> > +		;
> > +}
> > +
> > +/*


> > +
> > +/* update original and new ip header fields for tunnel case */
> > +static inline void
> > +update_tun_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> > +		uint32_t l2len, rte_be16_t pid)
> > +{
> > +	struct ipv4_hdr *v4h;
> > +	struct ipv6_hdr *v6h;
> > +
> > +	if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4) {
> > +		v4h = p;
> > +		v4h->packet_id = pid;
> > +		v4h->total_length = rte_cpu_to_be_16(plen - l2len);
> where are we updating the rest of the fields, like ttl, checksum, ip
> addresses, etc

TTL, ip addresses and other fileds supposed to be setuped by user
and provided via rte_ipsec_sa_init():
struct rte_ipsec_sa_prm.tun.hdr  should contain prepared template
for L3(and L2 if user wants to) header.
Checksum calculation is not done inside the lib right now -
it is a user responsibility to caclucate/set it after librte_ipsec
finishes processing the packet.

> > +	} else {
> > +		v6h = p;
> > +		v6h->payload_len = rte_cpu_to_be_16(plen - l2len -
> > +				sizeof(*v6h));
> > +	}
> > +}
> > +
> > +#endif /* _IPH_H_ */
> > diff --git a/lib/librte_ipsec/ipsec_sqn.h b/lib/librte_ipsec/ipsec_sqn.h
> > index 1935f6e30..6e18c34eb 100644
> > --- a/lib/librte_ipsec/ipsec_sqn.h
> > +++ b/lib/librte_ipsec/ipsec_sqn.h
> > @@ -15,6 +15,45 @@
> >
> >   #define IS_ESN(sa)	((sa)->sqn_mask == UINT64_MAX)
> >
> > +/*
> > + * gets SQN.hi32 bits, SQN supposed to be in network byte order.
> > + */
> > +static inline rte_be32_t
> > +sqn_hi32(rte_be64_t sqn)
> > +{
> > +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> > +	return (sqn >> 32);
> > +#else
> > +	return sqn;
> > +#endif
> > +}
> > +
> > +/*
> > + * gets SQN.low32 bits, SQN supposed to be in network byte order.
> > + */
> > +static inline rte_be32_t
> > +sqn_low32(rte_be64_t sqn)
> > +{
> > +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> > +	return sqn;
> > +#else
> > +	return (sqn >> 32);
> > +#endif
> > +}
> > +
> > +/*
> > + * gets SQN.low16 bits, SQN supposed to be in network byte order.
> > + */
> > +static inline rte_be16_t
> > +sqn_low16(rte_be64_t sqn)
> > +{
> > +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> > +	return sqn;
> > +#else
> > +	return (sqn >> 48);
> > +#endif
> > +}
> > +
> shouldn't we move these seq number APIs in rte_esp.h and make them generic

It could be done, but who will use them except librte_ipsec?
  
Akhil Goyal Dec. 21, 2018, 12:36 p.m. UTC | #3
On 12/20/2018 6:26 PM, Ananyev, Konstantin wrote:
>
>>> diff --git a/lib/librte_ipsec/crypto.h b/lib/librte_ipsec/crypto.h
>>> new file mode 100644
>>> index 000000000..61f5c1433
>>> --- /dev/null
>>> +++ b/lib/librte_ipsec/crypto.h
>>> @@ -0,0 +1,123 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright(c) 2018 Intel Corporation
>>> + */
>>> +
>>> +#ifndef _CRYPTO_H_
>>> +#define _CRYPTO_H_
>>> +
>>> +/**
>>> + * @file crypto.h
>>> + * Contains crypto specific functions/structures/macros used internally
>>> + * by ipsec library.
>>> + */
>>> +
>>> + /*
>>> +  * AES-GCM devices have some specific requirements for IV and AAD formats.
>>> +  * Ideally that to be done by the driver itself.
>>> +  */
>> I believe these can be moved to rte_crypto_sym.h. All crypto related
>> stuff should be at same place.
> Not sure what exactly you suggest to put into rte_crypto_sym.h?
> struct aead_gcm_iv? Something else?
>  From my perspective it would be good if user in ctypto_sym_op
> just fill salt and IV fields, and then PMD setup things in needed
> format internally.
> Again it would be really good if crypto_sym_op has reserved space
> for AAD...
> But  all that implies quite a big change in cryptodev and PMDs,
> so I think should be subject of a separate patch.
>
>>> +
>>> +struct aead_gcm_iv {
>>> +	uint32_t salt;
>>> +	uint64_t iv;
>>> +	uint32_t cnt;
>>> +} __attribute__((packed));
>>> +
>>> +struct aead_gcm_aad {
>>> +	uint32_t spi;
>>> +	/*
>>> +	 * RFC 4106, section 5:
>>> +	 * Two formats of the AAD are defined:
>>> +	 * one for 32-bit sequence numbers, and one for 64-bit ESN.
>>> +	 */
>>> +	union {
>>> +		uint32_t u32[2];
>>> +		uint64_t u64;
>>> +	} sqn;
>>> +	uint32_t align0; /* align to 16B boundary */
>>> +} __attribute__((packed));
>>> +
>>> +struct gcm_esph_iv {
>>> +	struct esp_hdr esph;
>>> +	uint64_t iv;
>>> +} __attribute__((packed));
>>> +
>>> +
>>> +static inline void
>>> +aead_gcm_iv_fill(struct aead_gcm_iv *gcm, uint64_t iv, uint32_t salt)
>>> +{
>>> +	gcm->salt = salt;
>>> +	gcm->iv = iv;
>>> +	gcm->cnt = rte_cpu_to_be_32(1);
>>> +}
>>> +
>>> +/*
>
>>> diff --git a/lib/librte_ipsec/iph.h b/lib/librte_ipsec/iph.h
>>> new file mode 100644
>>> index 000000000..3fd93016d
>>> --- /dev/null
>>> +++ b/lib/librte_ipsec/iph.h
>>> @@ -0,0 +1,84 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright(c) 2018 Intel Corporation
>>> + */
>>> +
>>> +#ifndef _IPH_H_
>>> +#define _IPH_H_
>>> +
>>> +/**
>>> + * @file iph.h
>>> + * Contains functions/structures/macros to manipulate IPv/IPv6 headers
>> IPv4
>>> + * used internally by ipsec library.
>>> + */
>>> +
>>> +/*
>>> + * Move preceding (L3) headers down to remove ESP header and IV.
>>> + */
>> why cant we use rte_mbuf APIs to append/prepend/trim/adjust lengths.
> We do use rte_mbuf append/trim, etc. adjust mbuf's data_ofs and data_len.
> But apart from that for transport mode we have to move actual packet headers.
> Let say for inbound we have to get rid of ESP header (which is after IP header),
> but preserve IP header, so we moving L2/L3 headers down, overwriting ESP header.
ok got your point
>> I believe these adjustments are happening in the mbuf itself.
>> Moreover these APIs are not specific to esp headers.
> I didn't get your last sentence: that function is used to remove esp header
> (see above) - that's why I named it that way.
These can be used to remove any header and not specifically esp. So this 
API could be generic in rte_mbuf.
>
>>> +static inline void
>>> +remove_esph(char *np, char *op, uint32_t hlen)
>>> +{
>>> +	uint32_t i;
>>> +
>>> +	for (i = hlen; i-- != 0; np[i] = op[i])
>>> +		;
>>> +}
>>> +
>>> +/*
>
>>> +
>>> +/* update original and new ip header fields for tunnel case */
>>> +static inline void
>>> +update_tun_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
>>> +		uint32_t l2len, rte_be16_t pid)
>>> +{
>>> +	struct ipv4_hdr *v4h;
>>> +	struct ipv6_hdr *v6h;
>>> +
>>> +	if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4) {
>>> +		v4h = p;
>>> +		v4h->packet_id = pid;
>>> +		v4h->total_length = rte_cpu_to_be_16(plen - l2len);
>> where are we updating the rest of the fields, like ttl, checksum, ip
>> addresses, etc
> TTL, ip addresses and other fileds supposed to be setuped by user
> and provided via rte_ipsec_sa_init():
> struct rte_ipsec_sa_prm.tun.hdr  should contain prepared template
> for L3(and L2 if user wants to) header.
> Checksum calculation is not done inside the lib right now -
> it is a user responsibility to caclucate/set it after librte_ipsec
> finishes processing the packet.
I believe static fields are updated during sa init but some fields like 
ttl and checksum,
can be updated in the library itself which is updated for every packet. 
(https://tools.ietf.org/html/rfc1624)
>
>>> +	} else {
>>> +		v6h = p;
>>> +		v6h->payload_len = rte_cpu_to_be_16(plen - l2len -
>>> +				sizeof(*v6h));
>>> +	}
>>> +}
>>> +
>>> +#endif /* _IPH_H_ */
>>> diff --git a/lib/librte_ipsec/ipsec_sqn.h b/lib/librte_ipsec/ipsec_sqn.h
>>> index 1935f6e30..6e18c34eb 100644
>>> --- a/lib/librte_ipsec/ipsec_sqn.h
>>> +++ b/lib/librte_ipsec/ipsec_sqn.h
>>> @@ -15,6 +15,45 @@
>>>
>>>    #define IS_ESN(sa)	((sa)->sqn_mask == UINT64_MAX)
>>>
>>> +/*
>>> + * gets SQN.hi32 bits, SQN supposed to be in network byte order.
>>> + */
>>> +static inline rte_be32_t
>>> +sqn_hi32(rte_be64_t sqn)
>>> +{
>>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
>>> +	return (sqn >> 32);
>>> +#else
>>> +	return sqn;
>>> +#endif
>>> +}
>>> +
>>> +/*
>>> + * gets SQN.low32 bits, SQN supposed to be in network byte order.
>>> + */
>>> +static inline rte_be32_t
>>> +sqn_low32(rte_be64_t sqn)
>>> +{
>>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
>>> +	return sqn;
>>> +#else
>>> +	return (sqn >> 32);
>>> +#endif
>>> +}
>>> +
>>> +/*
>>> + * gets SQN.low16 bits, SQN supposed to be in network byte order.
>>> + */
>>> +static inline rte_be16_t
>>> +sqn_low16(rte_be64_t sqn)
>>> +{
>>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
>>> +	return sqn;
>>> +#else
>>> +	return (sqn >> 48);
>>> +#endif
>>> +}
>>> +
>> shouldn't we move these seq number APIs in rte_esp.h and make them generic
> It could be done, but who will use them except librte_ipsec?
Whoever uses rte_esp.h and not use ipsec lib. The intent of rte_esp.h is 
just for that only, otherwise we don't need rte_esp.h, we can have the 
content of rte_esp.h in ipsec itself.
  
Ananyev, Konstantin Dec. 21, 2018, 2:27 p.m. UTC | #4
> >>> + */
> >>> +
> >>> +/*
> >>> + * Move preceding (L3) headers down to remove ESP header and IV.
> >>> + */
> >> why cant we use rte_mbuf APIs to append/prepend/trim/adjust lengths.
> > We do use rte_mbuf append/trim, etc. adjust mbuf's data_ofs and data_len.
> > But apart from that for transport mode we have to move actual packet headers.
> > Let say for inbound we have to get rid of ESP header (which is after IP header),
> > but preserve IP header, so we moving L2/L3 headers down, overwriting ESP header.
> ok got your point
> >> I believe these adjustments are happening in the mbuf itself.
> >> Moreover these APIs are not specific to esp headers.
> > I didn't get your last sentence: that function is used to remove esp header
> > (see above) - that's why I named it that way.
> These can be used to remove any header and not specifically esp. So this
> API could be generic in rte_mbuf.

That function has nothing to do with mbuf in general.
It just copies bytes between overlapping in certain way buffers
(src.start < dst.start < src.end < dst.end).
Right now it is very primitive - copies on byte at a time in
descending order.
Wrote it just to avoid using memmove(). 
I don't think there is any point to have such dummy function in the lib/eal.

> >
> >>> +static inline void
> >>> +remove_esph(char *np, char *op, uint32_t hlen)
> >>> +{
> >>> +	uint32_t i;
> >>> +
> >>> +	for (i = hlen; i-- != 0; np[i] = op[i])
> >>> +		;
> >>> +}
> >>> +
> >>> +/*
> >
> >>> +
> >>> +/* update original and new ip header fields for tunnel case */
> >>> +static inline void
> >>> +update_tun_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> >>> +		uint32_t l2len, rte_be16_t pid)
> >>> +{
> >>> +	struct ipv4_hdr *v4h;
> >>> +	struct ipv6_hdr *v6h;
> >>> +
> >>> +	if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4) {
> >>> +		v4h = p;
> >>> +		v4h->packet_id = pid;
> >>> +		v4h->total_length = rte_cpu_to_be_16(plen - l2len);
> >> where are we updating the rest of the fields, like ttl, checksum, ip
> >> addresses, etc
> > TTL, ip addresses and other fileds supposed to be setuped by user
> > and provided via rte_ipsec_sa_init():
> > struct rte_ipsec_sa_prm.tun.hdr  should contain prepared template
> > for L3(and L2 if user wants to) header.
> > Checksum calculation is not done inside the lib right now -
> > it is a user responsibility to caclucate/set it after librte_ipsec
> > finishes processing the packet.
> I believe static fields are updated during sa init but some fields like
> ttl and checksum,
> can be updated in the library itself which is updated for every packet.
> (https://tools.ietf.org/html/rfc1624)

About checksum - there is no point to calculate cksum it in the lib,
as user may choose to use HW chksum offload.
All other libraries ip_frag, GSO, etc. leave it to the user,
I don't see why ipsec should be different here.
About TTL and other fields - I suppose you refer to:
https://tools.ietf.org/html/rfc4301#section-5.1.2
Header Construction for Tunnel Mode
right?
Surely that has to be supported, one way or the other,
but we don't plan to implement it in 19.02.
Current plan to add it in 19.05, if time permits.

> >
> >>> +	} else {
> >>> +		v6h = p;
> >>> +		v6h->payload_len = rte_cpu_to_be_16(plen - l2len -
> >>> +				sizeof(*v6h));
> >>> +	}
> >>> +}
> >>> +
> >>> +#endif /* _IPH_H_ */
> >>> diff --git a/lib/librte_ipsec/ipsec_sqn.h b/lib/librte_ipsec/ipsec_sqn.h
> >>> index 1935f6e30..6e18c34eb 100644
> >>> --- a/lib/librte_ipsec/ipsec_sqn.h
> >>> +++ b/lib/librte_ipsec/ipsec_sqn.h
> >>> @@ -15,6 +15,45 @@
> >>>
> >>>    #define IS_ESN(sa)	((sa)->sqn_mask == UINT64_MAX)
> >>>
> >>> +/*
> >>> + * gets SQN.hi32 bits, SQN supposed to be in network byte order.
> >>> + */
> >>> +static inline rte_be32_t
> >>> +sqn_hi32(rte_be64_t sqn)
> >>> +{
> >>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> >>> +	return (sqn >> 32);
> >>> +#else
> >>> +	return sqn;
> >>> +#endif
> >>> +}
> >>> +
> >>> +/*
> >>> + * gets SQN.low32 bits, SQN supposed to be in network byte order.
> >>> + */
> >>> +static inline rte_be32_t
> >>> +sqn_low32(rte_be64_t sqn)
> >>> +{
> >>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> >>> +	return sqn;
> >>> +#else
> >>> +	return (sqn >> 32);
> >>> +#endif
> >>> +}
> >>> +
> >>> +/*
> >>> + * gets SQN.low16 bits, SQN supposed to be in network byte order.
> >>> + */
> >>> +static inline rte_be16_t
> >>> +sqn_low16(rte_be64_t sqn)
> >>> +{
> >>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> >>> +	return sqn;
> >>> +#else
> >>> +	return (sqn >> 48);
> >>> +#endif
> >>> +}
> >>> +
> >> shouldn't we move these seq number APIs in rte_esp.h and make them generic
> > It could be done, but who will use them except librte_ipsec?
> Whoever uses rte_esp.h and not use ipsec lib. The intent of rte_esp.h is
> just for that only, otherwise we don't need rte_esp.h, we can have the
> content of rte_esp.h in ipsec itself.

Again these functions are used just inside the lib to help avoid
extra byteswapping during crypto-data/packet header constructions.
I don't see how they will be useful in general. 
Sure, if there will be demand from users in future - we can move them,
but right now I don't think that would happen. 
Konstantin
  
Thomas Monjalon Dec. 21, 2018, 2:39 p.m. UTC | #5
21/12/2018 15:27, Ananyev, Konstantin:
> 
> > >>> + */
> > >>> +
> > >>> +/*
> > >>> + * Move preceding (L3) headers down to remove ESP header and IV.
> > >>> + */
> > >> why cant we use rte_mbuf APIs to append/prepend/trim/adjust lengths.
> > > We do use rte_mbuf append/trim, etc. adjust mbuf's data_ofs and data_len.
> > > But apart from that for transport mode we have to move actual packet headers.
> > > Let say for inbound we have to get rid of ESP header (which is after IP header),
> > > but preserve IP header, so we moving L2/L3 headers down, overwriting ESP header.
> > ok got your point
> > >> I believe these adjustments are happening in the mbuf itself.
> > >> Moreover these APIs are not specific to esp headers.
> > > I didn't get your last sentence: that function is used to remove esp header
> > > (see above) - that's why I named it that way.
> > These can be used to remove any header and not specifically esp. So this
> > API could be generic in rte_mbuf.
> 
> That function has nothing to do with mbuf in general.
> It just copies bytes between overlapping in certain way buffers
> (src.start < dst.start < src.end < dst.end).
> Right now it is very primitive - copies on byte at a time in
> descending order.
> Wrote it just to avoid using memmove(). 
> I don't think there is any point to have such dummy function in the lib/eal.
> 
> > >
> > >>> +static inline void
> > >>> +remove_esph(char *np, char *op, uint32_t hlen)
> > >>> +{
> > >>> +	uint32_t i;
> > >>> +
> > >>> +	for (i = hlen; i-- != 0; np[i] = op[i])
> > >>> +		;
> > >>> +}
> > >>> +
> > >>> +/*
> > >
> > >>> +
> > >>> +/* update original and new ip header fields for tunnel case */
> > >>> +static inline void
> > >>> +update_tun_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> > >>> +		uint32_t l2len, rte_be16_t pid)
> > >>> +{
> > >>> +	struct ipv4_hdr *v4h;
> > >>> +	struct ipv6_hdr *v6h;
> > >>> +
> > >>> +	if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4) {
> > >>> +		v4h = p;
> > >>> +		v4h->packet_id = pid;
> > >>> +		v4h->total_length = rte_cpu_to_be_16(plen - l2len);
> > >> where are we updating the rest of the fields, like ttl, checksum, ip
> > >> addresses, etc
> > > TTL, ip addresses and other fileds supposed to be setuped by user
> > > and provided via rte_ipsec_sa_init():
> > > struct rte_ipsec_sa_prm.tun.hdr  should contain prepared template
> > > for L3(and L2 if user wants to) header.
> > > Checksum calculation is not done inside the lib right now -
> > > it is a user responsibility to caclucate/set it after librte_ipsec
> > > finishes processing the packet.
> > I believe static fields are updated during sa init but some fields like
> > ttl and checksum,
> > can be updated in the library itself which is updated for every packet.
> > (https://tools.ietf.org/html/rfc1624)
> 
> About checksum - there is no point to calculate cksum it in the lib,
> as user may choose to use HW chksum offload.
> All other libraries ip_frag, GSO, etc. leave it to the user,
> I don't see why ipsec should be different here.
> About TTL and other fields - I suppose you refer to:
> https://tools.ietf.org/html/rfc4301#section-5.1.2
> Header Construction for Tunnel Mode
> right?
> Surely that has to be supported, one way or the other,
> but we don't plan to implement it in 19.02.
> Current plan to add it in 19.05, if time permits.
> 
> > >
> > >>> +	} else {
> > >>> +		v6h = p;
> > >>> +		v6h->payload_len = rte_cpu_to_be_16(plen - l2len -
> > >>> +				sizeof(*v6h));
> > >>> +	}
> > >>> +}
> > >>> +
> > >>> +#endif /* _IPH_H_ */
> > >>> diff --git a/lib/librte_ipsec/ipsec_sqn.h b/lib/librte_ipsec/ipsec_sqn.h
> > >>> index 1935f6e30..6e18c34eb 100644
> > >>> --- a/lib/librte_ipsec/ipsec_sqn.h
> > >>> +++ b/lib/librte_ipsec/ipsec_sqn.h
> > >>> @@ -15,6 +15,45 @@
> > >>>
> > >>>    #define IS_ESN(sa)	((sa)->sqn_mask == UINT64_MAX)
> > >>>
> > >>> +/*
> > >>> + * gets SQN.hi32 bits, SQN supposed to be in network byte order.
> > >>> + */
> > >>> +static inline rte_be32_t
> > >>> +sqn_hi32(rte_be64_t sqn)
> > >>> +{
> > >>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> > >>> +	return (sqn >> 32);
> > >>> +#else
> > >>> +	return sqn;
> > >>> +#endif
> > >>> +}
> > >>> +
> > >>> +/*
> > >>> + * gets SQN.low32 bits, SQN supposed to be in network byte order.
> > >>> + */
> > >>> +static inline rte_be32_t
> > >>> +sqn_low32(rte_be64_t sqn)
> > >>> +{
> > >>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> > >>> +	return sqn;
> > >>> +#else
> > >>> +	return (sqn >> 32);
> > >>> +#endif
> > >>> +}
> > >>> +
> > >>> +/*
> > >>> + * gets SQN.low16 bits, SQN supposed to be in network byte order.
> > >>> + */
> > >>> +static inline rte_be16_t
> > >>> +sqn_low16(rte_be64_t sqn)
> > >>> +{
> > >>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> > >>> +	return sqn;
> > >>> +#else
> > >>> +	return (sqn >> 48);
> > >>> +#endif
> > >>> +}
> > >>> +
> > >> shouldn't we move these seq number APIs in rte_esp.h and make them generic
> > > It could be done, but who will use them except librte_ipsec?
> > Whoever uses rte_esp.h and not use ipsec lib. The intent of rte_esp.h is
> > just for that only, otherwise we don't need rte_esp.h, we can have the
> > content of rte_esp.h in ipsec itself.
> 
> Again these functions are used just inside the lib to help avoid
> extra byteswapping during crypto-data/packet header constructions.
> I don't see how they will be useful in general. 
> Sure, if there will be demand from users in future - we can move them,
> but right now I don't think that would happen. 

I am not an expert of IPsec, but in general it is better to offer modular
code, so we can use very basic code and allow implementing an alternative
for higher level.
That's why I would be in favor to keep protocol definitions and checksum
in rte_net, as it is done for TCP.
About how much modular we want to be, it is a difficult question,
matter of tradeoff.
  
Akhil Goyal Dec. 21, 2018, 2:51 p.m. UTC | #6
On 12/21/2018 7:57 PM, Ananyev, Konstantin wrote:
>>>>> + */
>>>>> +
>>>>> +/*
>>>>> + * Move preceding (L3) headers down to remove ESP header and IV.
>>>>> + */
>>>> why cant we use rte_mbuf APIs to append/prepend/trim/adjust lengths.
>>> We do use rte_mbuf append/trim, etc. adjust mbuf's data_ofs and data_len.
>>> But apart from that for transport mode we have to move actual packet headers.
>>> Let say for inbound we have to get rid of ESP header (which is after IP header),
>>> but preserve IP header, so we moving L2/L3 headers down, overwriting ESP header.
>> ok got your point
>>>> I believe these adjustments are happening in the mbuf itself.
>>>> Moreover these APIs are not specific to esp headers.
>>> I didn't get your last sentence: that function is used to remove esp header
>>> (see above) - that's why I named it that way.
>> These can be used to remove any header and not specifically esp. So this
>> API could be generic in rte_mbuf.
> That function has nothing to do with mbuf in general.
> It just copies bytes between overlapping in certain way buffers
> (src.start < dst.start < src.end < dst.end).
> Right now it is very primitive - copies on byte at a time in
> descending order.
> Wrote it just to avoid using memmove().
> I don't think there is any point to have such dummy function in the lib/eal.
If this is better than memmove, then probably it is a candidate to a 
function in lib.
I think Thomas/ Olivier can better comment on this
>
>>>>> +static inline void
>>>>> +remove_esph(char *np, char *op, uint32_t hlen)
>>>>> +{
>>>>> +	uint32_t i;
>>>>> +
>>>>> +	for (i = hlen; i-- != 0; np[i] = op[i])
>>>>> +		;
>>>>> +}
>>>>> +
>>>>> +/*
>>>>> +
>>>>> +/* update original and new ip header fields for tunnel case */
>>>>> +static inline void
>>>>> +update_tun_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
>>>>> +		uint32_t l2len, rte_be16_t pid)
>>>>> +{
>>>>> +	struct ipv4_hdr *v4h;
>>>>> +	struct ipv6_hdr *v6h;
>>>>> +
>>>>> +	if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4) {
>>>>> +		v4h = p;
>>>>> +		v4h->packet_id = pid;
>>>>> +		v4h->total_length = rte_cpu_to_be_16(plen - l2len);
>>>> where are we updating the rest of the fields, like ttl, checksum, ip
>>>> addresses, etc
>>> TTL, ip addresses and other fileds supposed to be setuped by user
>>> and provided via rte_ipsec_sa_init():
>>> struct rte_ipsec_sa_prm.tun.hdr  should contain prepared template
>>> for L3(and L2 if user wants to) header.
>>> Checksum calculation is not done inside the lib right now -
>>> it is a user responsibility to caclucate/set it after librte_ipsec
>>> finishes processing the packet.
>> I believe static fields are updated during sa init but some fields like
>> ttl and checksum,
>> can be updated in the library itself which is updated for every packet.
>> (https://tools.ietf.org/html/rfc1624)
> About checksum - there is no point to calculate cksum it in the lib,
> as user may choose to use HW chksum offload.
> All other libraries ip_frag, GSO, etc. leave it to the user,
> I don't see why ipsec should be different here.
> About TTL and other fields - I suppose you refer to:
> https://tools.ietf.org/html/rfc4301#section-5.1.2
> Header Construction for Tunnel Mode
> right?
> Surely that has to be supported, one way or the other,
> but we don't plan to implement it in 19.02.
> Current plan to add it in 19.05, if time permits.
I am not talking about the outer ip checksum. Sorry the placement of the 
comment was not quite right. But I do not see that happening.
My question is will the function ipip_outbound in ipsec-secgw called 
from the application or will it be moved inside the library.
I believe this should be inside the lib.


>>>>> +	} else {
>>>>> +		v6h = p;
>>>>> +		v6h->payload_len = rte_cpu_to_be_16(plen - l2len -
>>>>> +				sizeof(*v6h));
>>>>> +	}
>>>>> +}
>>>>> +
>>>>> +#endif /* _IPH_H_ */
>>>>> diff --git a/lib/librte_ipsec/ipsec_sqn.h b/lib/librte_ipsec/ipsec_sqn.h
>>>>> index 1935f6e30..6e18c34eb 100644
>>>>> --- a/lib/librte_ipsec/ipsec_sqn.h
>>>>> +++ b/lib/librte_ipsec/ipsec_sqn.h
>>>>> @@ -15,6 +15,45 @@
>>>>>
>>>>>     #define IS_ESN(sa)	((sa)->sqn_mask == UINT64_MAX)
>>>>>
>>>>> +/*
>>>>> + * gets SQN.hi32 bits, SQN supposed to be in network byte order.
>>>>> + */
>>>>> +static inline rte_be32_t
>>>>> +sqn_hi32(rte_be64_t sqn)
>>>>> +{
>>>>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
>>>>> +	return (sqn >> 32);
>>>>> +#else
>>>>> +	return sqn;
>>>>> +#endif
>>>>> +}
>>>>> +
>>>>> +/*
>>>>> + * gets SQN.low32 bits, SQN supposed to be in network byte order.
>>>>> + */
>>>>> +static inline rte_be32_t
>>>>> +sqn_low32(rte_be64_t sqn)
>>>>> +{
>>>>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
>>>>> +	return sqn;
>>>>> +#else
>>>>> +	return (sqn >> 32);
>>>>> +#endif
>>>>> +}
>>>>> +
>>>>> +/*
>>>>> + * gets SQN.low16 bits, SQN supposed to be in network byte order.
>>>>> + */
>>>>> +static inline rte_be16_t
>>>>> +sqn_low16(rte_be64_t sqn)
>>>>> +{
>>>>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
>>>>> +	return sqn;
>>>>> +#else
>>>>> +	return (sqn >> 48);
>>>>> +#endif
>>>>> +}
>>>>> +
>>>> shouldn't we move these seq number APIs in rte_esp.h and make them generic
>>> It could be done, but who will use them except librte_ipsec?
>> Whoever uses rte_esp.h and not use ipsec lib. The intent of rte_esp.h is
>> just for that only, otherwise we don't need rte_esp.h, we can have the
>> content of rte_esp.h in ipsec itself.
> Again these functions are used just inside the lib to help avoid
> extra byteswapping during crypto-data/packet header constructions.
Agreed, my point is why adding a new file for managing seq numbering in 
esp headers, when this can be easily moved to rte_esp.h.

> I don't see how they will be useful in general.
> Sure, if there will be demand from users in future - we can move them,
> but right now I don't think that would happen.
In that case we can get away with esp.h as well and move that in this 
new file and see if users need it separately, then we move it.
> Konstantin
  
Ananyev, Konstantin Dec. 21, 2018, 3:16 p.m. UTC | #7
> -----Original Message-----
> From: Akhil Goyal [mailto:akhil.goyal@nxp.com]
> Sent: Friday, December 21, 2018 2:51 PM
> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; dev@dpdk.org
> Cc: Thomas Monjalon <thomas@monjalon.net>; Awal, Mohammad Abdul <mohammad.abdul.awal@intel.com>; olivier.matz@6wind.com
> Subject: Re: [dpdk-dev] [PATCH v4 06/10] ipsec: implement SA data-path API
> 
> 
> 
> On 12/21/2018 7:57 PM, Ananyev, Konstantin wrote:
> >>>>> + */
> >>>>> +
> >>>>> +/*
> >>>>> + * Move preceding (L3) headers down to remove ESP header and IV.
> >>>>> + */
> >>>> why cant we use rte_mbuf APIs to append/prepend/trim/adjust lengths.
> >>> We do use rte_mbuf append/trim, etc. adjust mbuf's data_ofs and data_len.
> >>> But apart from that for transport mode we have to move actual packet headers.
> >>> Let say for inbound we have to get rid of ESP header (which is after IP header),
> >>> but preserve IP header, so we moving L2/L3 headers down, overwriting ESP header.
> >> ok got your point
> >>>> I believe these adjustments are happening in the mbuf itself.
> >>>> Moreover these APIs are not specific to esp headers.
> >>> I didn't get your last sentence: that function is used to remove esp header
> >>> (see above) - that's why I named it that way.
> >> These can be used to remove any header and not specifically esp. So this
> >> API could be generic in rte_mbuf.
> > That function has nothing to do with mbuf in general.
> > It just copies bytes between overlapping in certain way buffers
> > (src.start < dst.start < src.end < dst.end).
> > Right now it is very primitive - copies on byte at a time in
> > descending order.
> > Wrote it just to avoid using memmove().
> > I don't think there is any point to have such dummy function in the lib/eal.
> If this is better than memmove, then probably it is a candidate to a
> function in lib.

If it would be something really smart - I would try to push it into the EAL myself.
But it is a dumb for() loop, nothing more.

> I think Thomas/ Olivier can better comment on this
> >
> >>>>> +static inline void
> >>>>> +remove_esph(char *np, char *op, uint32_t hlen)
> >>>>> +{
> >>>>> +	uint32_t i;
> >>>>> +
> >>>>> +	for (i = hlen; i-- != 0; np[i] = op[i])
> >>>>> +		;
> >>>>> +}
> >>>>> +
> >>>>> +/*
> >>>>> +
> >>>>> +/* update original and new ip header fields for tunnel case */
> >>>>> +static inline void
> >>>>> +update_tun_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
> >>>>> +		uint32_t l2len, rte_be16_t pid)
> >>>>> +{
> >>>>> +	struct ipv4_hdr *v4h;
> >>>>> +	struct ipv6_hdr *v6h;
> >>>>> +
> >>>>> +	if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4) {
> >>>>> +		v4h = p;
> >>>>> +		v4h->packet_id = pid;
> >>>>> +		v4h->total_length = rte_cpu_to_be_16(plen - l2len);
> >>>> where are we updating the rest of the fields, like ttl, checksum, ip
> >>>> addresses, etc
> >>> TTL, ip addresses and other fileds supposed to be setuped by user
> >>> and provided via rte_ipsec_sa_init():
> >>> struct rte_ipsec_sa_prm.tun.hdr  should contain prepared template
> >>> for L3(and L2 if user wants to) header.
> >>> Checksum calculation is not done inside the lib right now -
> >>> it is a user responsibility to caclucate/set it after librte_ipsec
> >>> finishes processing the packet.
> >> I believe static fields are updated during sa init but some fields like
> >> ttl and checksum,
> >> can be updated in the library itself which is updated for every packet.
> >> (https://tools.ietf.org/html/rfc1624)
> > About checksum - there is no point to calculate cksum it in the lib,
> > as user may choose to use HW chksum offload.
> > All other libraries ip_frag, GSO, etc. leave it to the user,
> > I don't see why ipsec should be different here.
> > About TTL and other fields - I suppose you refer to:
> > https://tools.ietf.org/html/rfc4301#section-5.1.2
> > Header Construction for Tunnel Mode
> > right?
> > Surely that has to be supported, one way or the other,
> > but we don't plan to implement it in 19.02.
> > Current plan to add it in 19.05, if time permits.
> I am not talking about the outer ip checksum.
> Sorry the placement of the
> comment was not quite right. But I do not see that happening.
> My question is will the function ipip_outbound in ipsec-secgw called
> from the application or will it be moved inside the library.
> I believe this should be inside the lib.

I think the same - we probably need to support all described in RFC
header updates inside the process/prepare lib functions,
or at least provide a separate function for the user to perform them.
Though as I said above it is definitely not in 19.02 scope.

> 
> 
> >>>>> +	} else {
> >>>>> +		v6h = p;
> >>>>> +		v6h->payload_len = rte_cpu_to_be_16(plen - l2len -
> >>>>> +				sizeof(*v6h));
> >>>>> +	}
> >>>>> +}
> >>>>> +
> >>>>> +#endif /* _IPH_H_ */
> >>>>> diff --git a/lib/librte_ipsec/ipsec_sqn.h b/lib/librte_ipsec/ipsec_sqn.h
> >>>>> index 1935f6e30..6e18c34eb 100644
> >>>>> --- a/lib/librte_ipsec/ipsec_sqn.h
> >>>>> +++ b/lib/librte_ipsec/ipsec_sqn.h
> >>>>> @@ -15,6 +15,45 @@
> >>>>>
> >>>>>     #define IS_ESN(sa)	((sa)->sqn_mask == UINT64_MAX)
> >>>>>
> >>>>> +/*
> >>>>> + * gets SQN.hi32 bits, SQN supposed to be in network byte order.
> >>>>> + */
> >>>>> +static inline rte_be32_t
> >>>>> +sqn_hi32(rte_be64_t sqn)
> >>>>> +{
> >>>>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> >>>>> +	return (sqn >> 32);
> >>>>> +#else
> >>>>> +	return sqn;
> >>>>> +#endif
> >>>>> +}
> >>>>> +
> >>>>> +/*
> >>>>> + * gets SQN.low32 bits, SQN supposed to be in network byte order.
> >>>>> + */
> >>>>> +static inline rte_be32_t
> >>>>> +sqn_low32(rte_be64_t sqn)
> >>>>> +{
> >>>>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> >>>>> +	return sqn;
> >>>>> +#else
> >>>>> +	return (sqn >> 32);
> >>>>> +#endif
> >>>>> +}
> >>>>> +
> >>>>> +/*
> >>>>> + * gets SQN.low16 bits, SQN supposed to be in network byte order.
> >>>>> + */
> >>>>> +static inline rte_be16_t
> >>>>> +sqn_low16(rte_be64_t sqn)
> >>>>> +{
> >>>>> +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
> >>>>> +	return sqn;
> >>>>> +#else
> >>>>> +	return (sqn >> 48);
> >>>>> +#endif
> >>>>> +}
> >>>>> +
> >>>> shouldn't we move these seq number APIs in rte_esp.h and make them generic
> >>> It could be done, but who will use them except librte_ipsec?
> >> Whoever uses rte_esp.h and not use ipsec lib. The intent of rte_esp.h is
> >> just for that only, otherwise we don't need rte_esp.h, we can have the
> >> content of rte_esp.h in ipsec itself.
> > Again these functions are used just inside the lib to help avoid
> > extra byteswapping during crypto-data/packet header constructions.
> Agreed, my point is why adding a new file for managing seq numbering in
> esp headers, when this can be easily moved to rte_esp.h.
> 
> > I don't see how they will be useful in general.
> > Sure, if there will be demand from users in future - we can move them,
> > but right now I don't think that would happen.
> In that case we can get away with esp.h as well and move that in this
> new file and see if users need it separately, then we move it.

esp.h already exists and is used in several other places: 
find lib drivers -type f | xargs grep '<rte_esp.h>' | grep include
lib/librte_pipeline/rte_table_action.c:#include <rte_esp.h>
lib/librte_ethdev/rte_flow.h:#include <rte_esp.h>

Konstantin
  

Patch

diff --git a/lib/librte_ipsec/crypto.h b/lib/librte_ipsec/crypto.h
new file mode 100644
index 000000000..61f5c1433
--- /dev/null
+++ b/lib/librte_ipsec/crypto.h
@@ -0,0 +1,123 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _CRYPTO_H_
+#define _CRYPTO_H_
+
+/**
+ * @file crypto.h
+ * Contains crypto specific functions/structures/macros used internally
+ * by ipsec library.
+ */
+
+ /*
+  * AES-GCM devices have some specific requirements for IV and AAD formats.
+  * Ideally that to be done by the driver itself.
+  */
+
+struct aead_gcm_iv {
+	uint32_t salt;
+	uint64_t iv;
+	uint32_t cnt;
+} __attribute__((packed));
+
+struct aead_gcm_aad {
+	uint32_t spi;
+	/*
+	 * RFC 4106, section 5:
+	 * Two formats of the AAD are defined:
+	 * one for 32-bit sequence numbers, and one for 64-bit ESN.
+	 */
+	union {
+		uint32_t u32[2];
+		uint64_t u64;
+	} sqn;
+	uint32_t align0; /* align to 16B boundary */
+} __attribute__((packed));
+
+struct gcm_esph_iv {
+	struct esp_hdr esph;
+	uint64_t iv;
+} __attribute__((packed));
+
+
+static inline void
+aead_gcm_iv_fill(struct aead_gcm_iv *gcm, uint64_t iv, uint32_t salt)
+{
+	gcm->salt = salt;
+	gcm->iv = iv;
+	gcm->cnt = rte_cpu_to_be_32(1);
+}
+
+/*
+ * RFC 4106, 5 AAD Construction
+ * spi and sqn should already be converted into network byte order.
+ * Make sure that not used bytes are zeroed.
+ */
+static inline void
+aead_gcm_aad_fill(struct aead_gcm_aad *aad, rte_be32_t spi, rte_be64_t sqn,
+	int esn)
+{
+	aad->spi = spi;
+	if (esn)
+		aad->sqn.u64 = sqn;
+	else {
+		aad->sqn.u32[0] = sqn_low32(sqn);
+		aad->sqn.u32[1] = 0;
+	}
+	aad->align0 = 0;
+}
+
+static inline void
+gen_iv(uint64_t iv[IPSEC_MAX_IV_QWORD], rte_be64_t sqn)
+{
+	iv[0] = sqn;
+	iv[1] = 0;
+}
+
+/*
+ * from RFC 4303 3.3.2.1.4:
+ * If the ESN option is enabled for the SA, the high-order 32
+ * bits of the sequence number are appended after the Next Header field
+ * for purposes of this computation, but are not transmitted.
+ */
+
+/*
+ * Helper function that moves ICV by 4B below, and inserts SQN.hibits.
+ * icv parameter points to the new start of ICV.
+ */
+static inline void
+insert_sqh(uint32_t sqh, void *picv, uint32_t icv_len)
+{
+	uint32_t *icv;
+	int32_t i;
+
+	RTE_ASSERT(icv_len % sizeof(uint32_t) == 0);
+
+	icv = picv;
+	icv_len = icv_len / sizeof(uint32_t);
+	for (i = icv_len; i-- != 0; icv[i] = icv[i - 1])
+		;
+
+	icv[i] = sqh;
+}
+
+/*
+ * Helper function that moves ICV by 4B up, and removes SQN.hibits.
+ * icv parameter points to the new start of ICV.
+ */
+static inline void
+remove_sqh(void *picv, uint32_t icv_len)
+{
+	uint32_t i, *icv;
+
+	RTE_ASSERT(icv_len % sizeof(uint32_t) == 0);
+
+	icv = picv;
+	icv_len = icv_len / sizeof(uint32_t);
+	for (i = 0; i != icv_len; i++)
+		icv[i] = icv[i + 1];
+}
+
+#endif /* _CRYPTO_H_ */
diff --git a/lib/librte_ipsec/iph.h b/lib/librte_ipsec/iph.h
new file mode 100644
index 000000000..3fd93016d
--- /dev/null
+++ b/lib/librte_ipsec/iph.h
@@ -0,0 +1,84 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _IPH_H_
+#define _IPH_H_
+
+/**
+ * @file iph.h
+ * Contains functions/structures/macros to manipulate IPv/IPv6 headers
+ * used internally by ipsec library.
+ */
+
+/*
+ * Move preceding (L3) headers down to remove ESP header and IV.
+ */
+static inline void
+remove_esph(char *np, char *op, uint32_t hlen)
+{
+	uint32_t i;
+
+	for (i = hlen; i-- != 0; np[i] = op[i])
+		;
+}
+
+/*
+ * Move preceding (L3) headers up to free space for ESP header and IV.
+ */
+static inline void
+insert_esph(char *np, char *op, uint32_t hlen)
+{
+	uint32_t i;
+
+	for (i = 0; i != hlen; i++)
+		np[i] = op[i];
+}
+
+/* update original ip header fields for trasnport case */
+static inline int
+update_trs_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
+		uint32_t l2len, uint32_t l3len, uint8_t proto)
+{
+	struct ipv4_hdr *v4h;
+	struct ipv6_hdr *v6h;
+	int32_t rc;
+
+	if ((sa->type & RTE_IPSEC_SATP_IPV_MASK) == RTE_IPSEC_SATP_IPV4) {
+		v4h = p;
+		rc = v4h->next_proto_id;
+		v4h->next_proto_id = proto;
+		v4h->total_length = rte_cpu_to_be_16(plen - l2len);
+	} else if (l3len == sizeof(*v6h)) {
+		v6h = p;
+		rc = v6h->proto;
+		v6h->proto = proto;
+		v6h->payload_len = rte_cpu_to_be_16(plen - l2len -
+				sizeof(*v6h));
+	/* need to add support for IPv6 with options */
+	} else
+		rc = -ENOTSUP;
+
+	return rc;
+}
+
+/* update original and new ip header fields for tunnel case */
+static inline void
+update_tun_l3hdr(const struct rte_ipsec_sa *sa, void *p, uint32_t plen,
+		uint32_t l2len, rte_be16_t pid)
+{
+	struct ipv4_hdr *v4h;
+	struct ipv6_hdr *v6h;
+
+	if (sa->type & RTE_IPSEC_SATP_MODE_TUNLV4) {
+		v4h = p;
+		v4h->packet_id = pid;
+		v4h->total_length = rte_cpu_to_be_16(plen - l2len);
+	} else {
+		v6h = p;
+		v6h->payload_len = rte_cpu_to_be_16(plen - l2len -
+				sizeof(*v6h));
+	}
+}
+
+#endif /* _IPH_H_ */
diff --git a/lib/librte_ipsec/ipsec_sqn.h b/lib/librte_ipsec/ipsec_sqn.h
index 1935f6e30..6e18c34eb 100644
--- a/lib/librte_ipsec/ipsec_sqn.h
+++ b/lib/librte_ipsec/ipsec_sqn.h
@@ -15,6 +15,45 @@ 
 
 #define IS_ESN(sa)	((sa)->sqn_mask == UINT64_MAX)
 
+/*
+ * gets SQN.hi32 bits, SQN supposed to be in network byte order.
+ */
+static inline rte_be32_t
+sqn_hi32(rte_be64_t sqn)
+{
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+	return (sqn >> 32);
+#else
+	return sqn;
+#endif
+}
+
+/*
+ * gets SQN.low32 bits, SQN supposed to be in network byte order.
+ */
+static inline rte_be32_t
+sqn_low32(rte_be64_t sqn)
+{
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+	return sqn;
+#else
+	return (sqn >> 32);
+#endif
+}
+
+/*
+ * gets SQN.low16 bits, SQN supposed to be in network byte order.
+ */
+static inline rte_be16_t
+sqn_low16(rte_be64_t sqn)
+{
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+	return sqn;
+#else
+	return (sqn >> 48);
+#endif
+}
+
 /*
  * for given size, calculate required number of buckets.
  */
@@ -30,6 +69,153 @@  replay_num_bucket(uint32_t wsz)
 	return nb;
 }
 
+/*
+ * According to RFC4303 A2.1, determine the high-order bit of sequence number.
+ * use 32bit arithmetic inside, return uint64_t.
+ */
+static inline uint64_t
+reconstruct_esn(uint64_t t, uint32_t sqn, uint32_t w)
+{
+	uint32_t th, tl, bl;
+
+	tl = t;
+	th = t >> 32;
+	bl = tl - w + 1;
+
+	/* case A: window is within one sequence number subspace */
+	if (tl >= (w - 1))
+		th += (sqn < bl);
+	/* case B: window spans two sequence number subspaces */
+	else if (th != 0)
+		th -= (sqn >= bl);
+
+	/* return constructed sequence with proper high-order bits */
+	return (uint64_t)th << 32 | sqn;
+}
+
+/**
+ * Perform the replay checking.
+ *
+ * struct rte_ipsec_sa contains the window and window related parameters,
+ * such as the window size, bitmask, and the last acknowledged sequence number.
+ *
+ * Based on RFC 6479.
+ * Blocks are 64 bits unsigned integers
+ */
+static inline int32_t
+esn_inb_check_sqn(const struct replay_sqn *rsn, const struct rte_ipsec_sa *sa,
+	uint64_t sqn)
+{
+	uint32_t bit, bucket;
+
+	/* replay not enabled */
+	if (sa->replay.win_sz == 0)
+		return 0;
+
+	/* seq is larger than lastseq */
+	if (sqn > rsn->sqn)
+		return 0;
+
+	/* seq is outside window */
+	if (sqn == 0 || sqn + sa->replay.win_sz < rsn->sqn)
+		return -EINVAL;
+
+	/* seq is inside the window */
+	bit = sqn & WINDOW_BIT_LOC_MASK;
+	bucket = (sqn >> WINDOW_BUCKET_BITS) & sa->replay.bucket_index_mask;
+
+	/* already seen packet */
+	if (rsn->window[bucket] & ((uint64_t)1 << bit))
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * For outbound SA perform the sequence number update.
+ */
+static inline uint64_t
+esn_outb_update_sqn(struct rte_ipsec_sa *sa, uint32_t *num)
+{
+	uint64_t n, s, sqn;
+
+	n = *num;
+	sqn = sa->sqn.outb + n;
+	sa->sqn.outb = sqn;
+
+	/* overflow */
+	if (sqn > sa->sqn_mask) {
+		s = sqn - sa->sqn_mask;
+		*num = (s < n) ?  n - s : 0;
+	}
+
+	return sqn - n;
+}
+
+/**
+ * For inbound SA perform the sequence number and replay window update.
+ */
+static inline int32_t
+esn_inb_update_sqn(struct replay_sqn *rsn, const struct rte_ipsec_sa *sa,
+	uint64_t sqn)
+{
+	uint32_t bit, bucket, last_bucket, new_bucket, diff, i;
+
+	/* replay not enabled */
+	if (sa->replay.win_sz == 0)
+		return 0;
+
+	/* handle ESN */
+	if (IS_ESN(sa))
+		sqn = reconstruct_esn(rsn->sqn, sqn, sa->replay.win_sz);
+
+	/* seq is outside window*/
+	if (sqn == 0 || sqn + sa->replay.win_sz < rsn->sqn)
+		return -EINVAL;
+
+	/* update the bit */
+	bucket = (sqn >> WINDOW_BUCKET_BITS);
+
+	/* check if the seq is within the range */
+	if (sqn > rsn->sqn) {
+		last_bucket = rsn->sqn >> WINDOW_BUCKET_BITS;
+		diff = bucket - last_bucket;
+		/* seq is way after the range of WINDOW_SIZE */
+		if (diff > sa->replay.nb_bucket)
+			diff = sa->replay.nb_bucket;
+
+		for (i = 0; i != diff; i++) {
+			new_bucket = (i + last_bucket + 1) &
+				sa->replay.bucket_index_mask;
+			rsn->window[new_bucket] = 0;
+		}
+		rsn->sqn = sqn;
+	}
+
+	bucket &= sa->replay.bucket_index_mask;
+	bit = (uint64_t)1 << (sqn & WINDOW_BIT_LOC_MASK);
+
+	/* already seen packet */
+	if (rsn->window[bucket] & bit)
+		return -EINVAL;
+
+	rsn->window[bucket] |= bit;
+	return 0;
+}
+
+/**
+ * To achieve ability to do multiple readers single writer for
+ * SA replay window information and sequence number (RSN)
+ * basic RCU schema is used:
+ * SA have 2 copies of RSN (one for readers, another for writers).
+ * Each RSN contains a rwlock that has to be grabbed (for read/write)
+ * to avoid races between readers and writer.
+ * Writer is responsible to make a copy or reader RSN, update it
+ * and mark newly updated RSN as readers one.
+ * That approach is intended to minimize contention and cache sharing
+ * between writer and readers.
+ */
+
 /**
  * Based on number of buckets calculated required size for the
  * structure that holds replay window and sequence number (RSN) information.
diff --git a/lib/librte_ipsec/pad.h b/lib/librte_ipsec/pad.h
new file mode 100644
index 000000000..2f5ccd00e
--- /dev/null
+++ b/lib/librte_ipsec/pad.h
@@ -0,0 +1,45 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _PAD_H_
+#define _PAD_H_
+
+#define IPSEC_MAX_PAD_SIZE	UINT8_MAX
+
+static const uint8_t esp_pad_bytes[IPSEC_MAX_PAD_SIZE] = {
+	1, 2, 3, 4, 5, 6, 7, 8,
+	9, 10, 11, 12, 13, 14, 15, 16,
+	17, 18, 19, 20, 21, 22, 23, 24,
+	25, 26, 27, 28, 29, 30, 31, 32,
+	33, 34, 35, 36, 37, 38, 39, 40,
+	41, 42, 43, 44, 45, 46, 47, 48,
+	49, 50, 51, 52, 53, 54, 55, 56,
+	57, 58, 59, 60, 61, 62, 63, 64,
+	65, 66, 67, 68, 69, 70, 71, 72,
+	73, 74, 75, 76, 77, 78, 79, 80,
+	81, 82, 83, 84, 85, 86, 87, 88,
+	89, 90, 91, 92, 93, 94, 95, 96,
+	97, 98, 99, 100, 101, 102, 103, 104,
+	105, 106, 107, 108, 109, 110, 111, 112,
+	113, 114, 115, 116, 117, 118, 119, 120,
+	121, 122, 123, 124, 125, 126, 127, 128,
+	129, 130, 131, 132, 133, 134, 135, 136,
+	137, 138, 139, 140, 141, 142, 143, 144,
+	145, 146, 147, 148, 149, 150, 151, 152,
+	153, 154, 155, 156, 157, 158, 159, 160,
+	161, 162, 163, 164, 165, 166, 167, 168,
+	169, 170, 171, 172, 173, 174, 175, 176,
+	177, 178, 179, 180, 181, 182, 183, 184,
+	185, 186, 187, 188, 189, 190, 191, 192,
+	193, 194, 195, 196, 197, 198, 199, 200,
+	201, 202, 203, 204, 205, 206, 207, 208,
+	209, 210, 211, 212, 213, 214, 215, 216,
+	217, 218, 219, 220, 221, 222, 223, 224,
+	225, 226, 227, 228, 229, 230, 231, 232,
+	233, 234, 235, 236, 237, 238, 239, 240,
+	241, 242, 243, 244, 245, 246, 247, 248,
+	249, 250, 251, 252, 253, 254, 255,
+};
+
+#endif /* _PAD_H_ */
diff --git a/lib/librte_ipsec/sa.c b/lib/librte_ipsec/sa.c
index e4c5361e7..bb56f42eb 100644
--- a/lib/librte_ipsec/sa.c
+++ b/lib/librte_ipsec/sa.c
@@ -6,9 +6,13 @@ 
 #include <rte_esp.h>
 #include <rte_ip.h>
 #include <rte_errno.h>
+#include <rte_cryptodev.h>
 
 #include "sa.h"
 #include "ipsec_sqn.h"
+#include "crypto.h"
+#include "iph.h"
+#include "pad.h"
 
 /* some helper structures */
 struct crypto_xform {
@@ -207,6 +211,7 @@  esp_sa_init(struct rte_ipsec_sa *sa, const struct rte_ipsec_sa_prm *prm,
 		/* RFC 4106 */
 		if (cxf->aead->algo != RTE_CRYPTO_AEAD_AES_GCM)
 			return -EINVAL;
+		sa->aad_len = sizeof(struct aead_gcm_aad);
 		sa->icv_len = cxf->aead->digest_length;
 		sa->iv_ofs = cxf->aead->iv.offset;
 		sa->iv_len = sizeof(uint64_t);
@@ -326,18 +331,1053 @@  rte_ipsec_sa_init(struct rte_ipsec_sa *sa, const struct rte_ipsec_sa_prm *prm,
 	return sz;
 }
 
+static inline void
+mbuf_bulk_copy(struct rte_mbuf *dst[], struct rte_mbuf * const src[],
+	uint32_t num)
+{
+	uint32_t i;
+
+	for (i = 0; i != num; i++)
+		dst[i] = src[i];
+}
+
+static inline void
+lksd_none_cop_prepare(const struct rte_ipsec_session *ss,
+	struct rte_mbuf *mb[], struct rte_crypto_op *cop[], uint16_t num)
+{
+	uint32_t i;
+	struct rte_crypto_sym_op *sop;
+
+	for (i = 0; i != num; i++) {
+		sop = cop[i]->sym;
+		cop[i]->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
+		cop[i]->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+		cop[i]->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
+		sop->m_src = mb[i];
+		__rte_crypto_sym_op_attach_sym_session(sop, ss->crypto.ses);
+	}
+}
+
+static inline void
+esp_outb_cop_prepare(struct rte_crypto_op *cop,
+	const struct rte_ipsec_sa *sa, const uint64_t ivp[IPSEC_MAX_IV_QWORD],
+	const union sym_op_data *icv, uint32_t hlen, uint32_t plen)
+{
+	struct rte_crypto_sym_op *sop;
+	struct aead_gcm_iv *gcm;
+
+	/* fill sym op fields */
+	sop = cop->sym;
+
+	/* AEAD (AES_GCM) case */
+	if (sa->aad_len != 0) {
+		sop->aead.data.offset = sa->ctp.cipher.offset + hlen;
+		sop->aead.data.length = sa->ctp.cipher.length + plen;
+		sop->aead.digest.data = icv->va;
+		sop->aead.digest.phys_addr = icv->pa;
+		sop->aead.aad.data = icv->va + sa->icv_len;
+		sop->aead.aad.phys_addr = icv->pa + sa->icv_len;
+
+		/* fill AAD IV (located inside crypto op) */
+		gcm = rte_crypto_op_ctod_offset(cop, struct aead_gcm_iv *,
+			sa->iv_ofs);
+		aead_gcm_iv_fill(gcm, ivp[0], sa->salt);
+	/* CRYPT+AUTH case */
+	} else {
+		sop->cipher.data.offset = sa->ctp.cipher.offset + hlen;
+		sop->cipher.data.length = sa->ctp.cipher.length + plen;
+		sop->auth.data.offset = sa->ctp.auth.offset + hlen;
+		sop->auth.data.length = sa->ctp.auth.length + plen;
+		sop->auth.digest.data = icv->va;
+		sop->auth.digest.phys_addr = icv->pa;
+	}
+}
+
+static inline int32_t
+esp_outb_tun_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
+	const uint64_t ivp[IPSEC_MAX_IV_QWORD], struct rte_mbuf *mb,
+	union sym_op_data *icv)
+{
+	uint32_t clen, hlen, l2len, pdlen, pdofs, plen, tlen;
+	struct rte_mbuf *ml;
+	struct esp_hdr *esph;
+	struct esp_tail *espt;
+	char *ph, *pt;
+	uint64_t *iv;
+
+	/* calculate extra header space required */
+	hlen = sa->hdr_len + sa->iv_len + sizeof(*esph);
+
+	/* size of ipsec protected data */
+	l2len = mb->l2_len;
+	plen = mb->pkt_len - mb->l2_len;
+
+	/* number of bytes to encrypt */
+	clen = plen + sizeof(*espt);
+	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
+
+	/* pad length + esp tail */
+	pdlen = clen - plen;
+	tlen = pdlen + sa->icv_len;
+
+	/* do append and prepend */
+	ml = rte_pktmbuf_lastseg(mb);
+	if (tlen + sa->sqh_len + sa->aad_len > rte_pktmbuf_tailroom(ml))
+		return -ENOSPC;
+
+	/* prepend header */
+	ph = rte_pktmbuf_prepend(mb, hlen - l2len);
+	if (ph == NULL)
+		return -ENOSPC;
+
+	/* append tail */
+	pdofs = ml->data_len;
+	ml->data_len += tlen;
+	mb->pkt_len += tlen;
+	pt = rte_pktmbuf_mtod_offset(ml, typeof(pt), pdofs);
+
+	/* update pkt l2/l3 len */
+	mb->l2_len = sa->hdr_l3_off;
+	mb->l3_len = sa->hdr_len - sa->hdr_l3_off;
+
+	/* copy tunnel pkt header */
+	rte_memcpy(ph, sa->hdr, sa->hdr_len);
+
+	/* update original and new ip header fields */
+	update_tun_l3hdr(sa, ph + sa->hdr_l3_off, mb->pkt_len, sa->hdr_l3_off,
+			sqn_low16(sqc));
+
+	/* update spi, seqn and iv */
+	esph = (struct esp_hdr *)(ph + sa->hdr_len);
+	iv = (uint64_t *)(esph + 1);
+	rte_memcpy(iv, ivp, sa->iv_len);
+
+	esph->spi = sa->spi;
+	esph->seq = sqn_low32(sqc);
+
+	/* offset for ICV */
+	pdofs += pdlen + sa->sqh_len;
+
+	/* pad length */
+	pdlen -= sizeof(*espt);
+
+	/* copy padding data */
+	rte_memcpy(pt, esp_pad_bytes, pdlen);
+
+	/* update esp trailer */
+	espt = (struct esp_tail *)(pt + pdlen);
+	espt->pad_len = pdlen;
+	espt->next_proto = sa->proto;
+
+	icv->va = rte_pktmbuf_mtod_offset(ml, void *, pdofs);
+	icv->pa = rte_pktmbuf_iova_offset(ml, pdofs);
+
+	return clen;
+}
+
+/*
+ * for pure cryptodev (lookaside none) depending on SA settings,
+ * we might have to write some extra data to the packet.
+ */
+static inline void
+outb_pkt_xprepare(const struct rte_ipsec_sa *sa, rte_be64_t sqc,
+	const union sym_op_data *icv)
+{
+	uint32_t *psqh;
+	struct aead_gcm_aad *aad;
+
+	/* insert SQN.hi between ESP trailer and ICV */
+	if (sa->sqh_len != 0) {
+		psqh = (uint32_t *)(icv->va - sa->sqh_len);
+		psqh[0] = sqn_hi32(sqc);
+	}
+
+	/*
+	 * fill IV and AAD fields, if any (aad fields are placed after icv),
+	 * right now we support only one AEAD algorithm: AES-GCM .
+	 */
+	if (sa->aad_len != 0) {
+		aad = (struct aead_gcm_aad *)(icv->va + sa->icv_len);
+		aead_gcm_aad_fill(aad, sa->spi, sqc, IS_ESN(sa));
+	}
+}
+
+static uint16_t
+outb_tun_prepare(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
+	struct rte_crypto_op *cop[], uint16_t num)
+{
+	int32_t rc;
+	uint32_t i, k, n;
+	uint64_t sqn;
+	rte_be64_t sqc;
+	struct rte_ipsec_sa *sa;
+	union sym_op_data icv;
+	uint64_t iv[IPSEC_MAX_IV_QWORD];
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+
+	n = num;
+	sqn = esn_outb_update_sqn(sa, &n);
+	if (n != num)
+		rte_errno = EOVERFLOW;
+
+	k = 0;
+	for (i = 0; i != n; i++) {
+
+		sqc = rte_cpu_to_be_64(sqn + i);
+		gen_iv(iv, sqc);
+
+		/* try to update the packet itself */
+		rc = esp_outb_tun_pkt_prepare(sa, sqc, iv, mb[i], &icv);
+
+		/* success, setup crypto op */
+		if (rc >= 0) {
+			mb[k] = mb[i];
+			outb_pkt_xprepare(sa, sqc, &icv);
+			esp_outb_cop_prepare(cop[k], sa, iv, &icv, 0, rc);
+			k++;
+		/* failure, put packet into the death-row */
+		} else {
+			dr[i - k] = mb[i];
+			rte_errno = -rc;
+		}
+	}
+
+	/* update cops */
+	lksd_none_cop_prepare(ss, mb, cop, k);
+
+	 /* copy not prepared mbufs beyond good ones */
+	if (k != num && k != 0)
+		mbuf_bulk_copy(mb + k, dr, num - k);
+
+	return k;
+}
+
+static inline int32_t
+esp_outb_trs_pkt_prepare(struct rte_ipsec_sa *sa, rte_be64_t sqc,
+	const uint64_t ivp[IPSEC_MAX_IV_QWORD], struct rte_mbuf *mb,
+	uint32_t l2len, uint32_t l3len, union sym_op_data *icv)
+{
+	uint8_t np;
+	uint32_t clen, hlen, pdlen, pdofs, plen, tlen, uhlen;
+	struct rte_mbuf *ml;
+	struct esp_hdr *esph;
+	struct esp_tail *espt;
+	char *ph, *pt;
+	uint64_t *iv;
+
+	uhlen = l2len + l3len;
+	plen = mb->pkt_len - uhlen;
+
+	/* calculate extra header space required */
+	hlen = sa->iv_len + sizeof(*esph);
+
+	/* number of bytes to encrypt */
+	clen = plen + sizeof(*espt);
+	clen = RTE_ALIGN_CEIL(clen, sa->pad_align);
+
+	/* pad length + esp tail */
+	pdlen = clen - plen;
+	tlen = pdlen + sa->icv_len;
+
+	/* do append and insert */
+	ml = rte_pktmbuf_lastseg(mb);
+	if (tlen + sa->sqh_len + sa->aad_len > rte_pktmbuf_tailroom(ml))
+		return -ENOSPC;
+
+	/* prepend space for ESP header */
+	ph = rte_pktmbuf_prepend(mb, hlen);
+	if (ph == NULL)
+		return -ENOSPC;
+
+	/* append tail */
+	pdofs = ml->data_len;
+	ml->data_len += tlen;
+	mb->pkt_len += tlen;
+	pt = rte_pktmbuf_mtod_offset(ml, typeof(pt), pdofs);
+
+	/* shift L2/L3 headers */
+	insert_esph(ph, ph + hlen, uhlen);
+
+	/* update ip  header fields */
+	np = update_trs_l3hdr(sa, ph + l2len, mb->pkt_len, l2len, l3len,
+			IPPROTO_ESP);
+
+	/* update spi, seqn and iv */
+	esph = (struct esp_hdr *)(ph + uhlen);
+	iv = (uint64_t *)(esph + 1);
+	rte_memcpy(iv, ivp, sa->iv_len);
+
+	esph->spi = sa->spi;
+	esph->seq = sqn_low32(sqc);
+
+	/* offset for ICV */
+	pdofs += pdlen + sa->sqh_len;
+
+	/* pad length */
+	pdlen -= sizeof(*espt);
+
+	/* copy padding data */
+	rte_memcpy(pt, esp_pad_bytes, pdlen);
+
+	/* update esp trailer */
+	espt = (struct esp_tail *)(pt + pdlen);
+	espt->pad_len = pdlen;
+	espt->next_proto = np;
+
+	icv->va = rte_pktmbuf_mtod_offset(ml, void *, pdofs);
+	icv->pa = rte_pktmbuf_iova_offset(ml, pdofs);
+
+	return clen;
+}
+
+static uint16_t
+outb_trs_prepare(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
+	struct rte_crypto_op *cop[], uint16_t num)
+{
+	int32_t rc;
+	uint32_t i, k, n, l2, l3;
+	uint64_t sqn;
+	rte_be64_t sqc;
+	struct rte_ipsec_sa *sa;
+	union sym_op_data icv;
+	uint64_t iv[IPSEC_MAX_IV_QWORD];
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+
+	n = num;
+	sqn = esn_outb_update_sqn(sa, &n);
+	if (n != num)
+		rte_errno = EOVERFLOW;
+
+	k = 0;
+	for (i = 0; i != n; i++) {
+
+		l2 = mb[i]->l2_len;
+		l3 = mb[i]->l3_len;
+
+		sqc = rte_cpu_to_be_64(sqn + i);
+		gen_iv(iv, sqc);
+
+		/* try to update the packet itself */
+		rc = esp_outb_trs_pkt_prepare(sa, sqc, iv, mb[i],
+				l2, l3, &icv);
+
+		/* success, setup crypto op */
+		if (rc >= 0) {
+			mb[k] = mb[i];
+			outb_pkt_xprepare(sa, sqc, &icv);
+			esp_outb_cop_prepare(cop[k], sa, iv, &icv, l2 + l3, rc);
+			k++;
+		/* failure, put packet into the death-row */
+		} else {
+			dr[i - k] = mb[i];
+			rte_errno = -rc;
+		}
+	}
+
+	/* update cops */
+	lksd_none_cop_prepare(ss, mb, cop, k);
+
+	/* copy not prepared mbufs beyond good ones */
+	if (k != num && k != 0)
+		mbuf_bulk_copy(mb + k, dr, num - k);
+
+	return k;
+}
+
+static inline int32_t
+esp_inb_tun_cop_prepare(struct rte_crypto_op *cop,
+	const struct rte_ipsec_sa *sa, struct rte_mbuf *mb,
+	const union sym_op_data *icv, uint32_t pofs, uint32_t plen)
+{
+	struct rte_crypto_sym_op *sop;
+	struct aead_gcm_iv *gcm;
+	uint64_t *ivc, *ivp;
+	uint32_t clen;
+
+	clen = plen - sa->ctp.cipher.length;
+	if ((int32_t)clen < 0 || (clen & (sa->pad_align - 1)) != 0)
+		return -EINVAL;
+
+	/* fill sym op fields */
+	sop = cop->sym;
+
+	/* AEAD (AES_GCM) case */
+	if (sa->aad_len != 0) {
+		sop->aead.data.offset = pofs + sa->ctp.cipher.offset;
+		sop->aead.data.length = clen;
+		sop->aead.digest.data = icv->va;
+		sop->aead.digest.phys_addr = icv->pa;
+		sop->aead.aad.data = icv->va + sa->icv_len;
+		sop->aead.aad.phys_addr = icv->pa + sa->icv_len;
+
+		/* fill AAD IV (located inside crypto op) */
+		gcm = rte_crypto_op_ctod_offset(cop, struct aead_gcm_iv *,
+			sa->iv_ofs);
+		ivp = rte_pktmbuf_mtod_offset(mb, uint64_t *,
+			pofs + sizeof(struct esp_hdr));
+		aead_gcm_iv_fill(gcm, ivp[0], sa->salt);
+	/* CRYPT+AUTH case */
+	} else {
+		sop->cipher.data.offset = pofs + sa->ctp.cipher.offset;
+		sop->cipher.data.length = clen;
+		sop->auth.data.offset = pofs + sa->ctp.auth.offset;
+		sop->auth.data.length = plen - sa->ctp.auth.length;
+		sop->auth.digest.data = icv->va;
+		sop->auth.digest.phys_addr = icv->pa;
+
+		/* copy iv from the input packet to the cop */
+		ivc = rte_crypto_op_ctod_offset(cop, uint64_t *, sa->iv_ofs);
+		ivp = rte_pktmbuf_mtod_offset(mb, uint64_t *,
+			pofs + sizeof(struct esp_hdr));
+		rte_memcpy(ivc, ivp, sa->iv_len);
+	}
+	return 0;
+}
+
+/*
+ * for pure cryptodev (lookaside none) depending on SA settings,
+ * we might have to write some extra data to the packet.
+ */
+static inline void
+inb_pkt_xprepare(const struct rte_ipsec_sa *sa, rte_be64_t sqc,
+	const union sym_op_data *icv)
+{
+	struct aead_gcm_aad *aad;
+
+	/* insert SQN.hi between ESP trailer and ICV */
+	if (sa->sqh_len != 0)
+		insert_sqh(sqn_hi32(sqc), icv->va, sa->icv_len);
+
+	/*
+	 * fill AAD fields, if any (aad fields are placed after icv),
+	 * right now we support only one AEAD algorithm: AES-GCM.
+	 */
+	if (sa->aad_len != 0) {
+		aad = (struct aead_gcm_aad *)(icv->va + sa->icv_len);
+		aead_gcm_aad_fill(aad, sa->spi, sqc, IS_ESN(sa));
+	}
+}
+
+static inline int32_t
+esp_inb_tun_pkt_prepare(const struct rte_ipsec_sa *sa,
+	const struct replay_sqn *rsn, struct rte_mbuf *mb,
+	uint32_t hlen, union sym_op_data *icv)
+{
+	int32_t rc;
+	uint64_t sqn;
+	uint32_t icv_ofs, plen;
+	struct rte_mbuf *ml;
+	struct esp_hdr *esph;
+
+	esph = rte_pktmbuf_mtod_offset(mb, struct esp_hdr *, hlen);
+
+	/*
+	 * retrieve and reconstruct SQN, then check it, then
+	 * convert it back into network byte order.
+	 */
+	sqn = rte_be_to_cpu_32(esph->seq);
+	if (IS_ESN(sa))
+		sqn = reconstruct_esn(rsn->sqn, sqn, sa->replay.win_sz);
+
+	rc = esn_inb_check_sqn(rsn, sa, sqn);
+	if (rc != 0)
+		return rc;
+
+	sqn = rte_cpu_to_be_64(sqn);
+
+	/* start packet manipulation */
+	plen = mb->pkt_len;
+	plen = plen - hlen;
+
+	ml = rte_pktmbuf_lastseg(mb);
+	icv_ofs = ml->data_len - sa->icv_len + sa->sqh_len;
+
+	/* we have to allocate space for AAD somewhere,
+	 * right now - just use free trailing space at the last segment.
+	 * Would probably be more convenient to reserve space for AAD
+	 * inside rte_crypto_op itself
+	 * (again for IV space is already reserved inside cop).
+	 */
+	if (sa->aad_len + sa->sqh_len > rte_pktmbuf_tailroom(ml))
+		return -ENOSPC;
+
+	icv->va = rte_pktmbuf_mtod_offset(ml, void *, icv_ofs);
+	icv->pa = rte_pktmbuf_iova_offset(ml, icv_ofs);
+
+	inb_pkt_xprepare(sa, sqn, icv);
+	return plen;
+}
+
+static uint16_t
+inb_pkt_prepare(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
+	struct rte_crypto_op *cop[], uint16_t num)
+{
+	int32_t rc;
+	uint32_t i, k, hl;
+	struct rte_ipsec_sa *sa;
+	struct replay_sqn *rsn;
+	union sym_op_data icv;
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+	rsn = sa->sqn.inb;
+
+	k = 0;
+	for (i = 0; i != num; i++) {
+
+		hl = mb[i]->l2_len + mb[i]->l3_len;
+		rc = esp_inb_tun_pkt_prepare(sa, rsn, mb[i], hl, &icv);
+		if (rc >= 0)
+			rc = esp_inb_tun_cop_prepare(cop[k], sa, mb[i], &icv,
+				hl, rc);
+
+		if (rc == 0)
+			mb[k++] = mb[i];
+		else {
+			dr[i - k] = mb[i];
+			rte_errno = -rc;
+		}
+	}
+
+	/* update cops */
+	lksd_none_cop_prepare(ss, mb, cop, k);
+
+	/* copy not prepared mbufs beyond good ones */
+	if (k != num && k != 0)
+		mbuf_bulk_copy(mb + k, dr, num - k);
+
+	return k;
+}
+
+static inline void
+lksd_proto_cop_prepare(const struct rte_ipsec_session *ss,
+	struct rte_mbuf *mb[], struct rte_crypto_op *cop[], uint16_t num)
+{
+	uint32_t i;
+	struct rte_crypto_sym_op *sop;
+
+	for (i = 0; i != num; i++) {
+		sop = cop[i]->sym;
+		cop[i]->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
+		cop[i]->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+		cop[i]->sess_type = RTE_CRYPTO_OP_SECURITY_SESSION;
+		sop->m_src = mb[i];
+		__rte_security_attach_session(sop, ss->security.ses);
+	}
+}
+
+static uint16_t
+lksd_proto_prepare(const struct rte_ipsec_session *ss,
+	struct rte_mbuf *mb[], struct rte_crypto_op *cop[], uint16_t num)
+{
+	lksd_proto_cop_prepare(ss, mb, cop, num);
+	return num;
+}
+
+static inline int
+esp_inb_tun_single_pkt_process(struct rte_ipsec_sa *sa, struct rte_mbuf *mb,
+	uint32_t *sqn)
+{
+	uint32_t hlen, icv_len, tlen;
+	struct esp_hdr *esph;
+	struct esp_tail *espt;
+	struct rte_mbuf *ml;
+	char *pd;
+
+	if (mb->ol_flags & PKT_RX_SEC_OFFLOAD_FAILED)
+		return -EBADMSG;
+
+	icv_len = sa->icv_len;
+
+	ml = rte_pktmbuf_lastseg(mb);
+	espt = rte_pktmbuf_mtod_offset(ml, struct esp_tail *,
+		ml->data_len - icv_len - sizeof(*espt));
+
+	/*
+	 * check padding and next proto.
+	 * return an error if something is wrong.
+	 */
+	pd = (char *)espt - espt->pad_len;
+	if (espt->next_proto != sa->proto ||
+			memcmp(pd, esp_pad_bytes, espt->pad_len))
+		return -EINVAL;
+
+	/* cut of ICV, ESP tail and padding bytes */
+	tlen = icv_len + sizeof(*espt) + espt->pad_len;
+	ml->data_len -= tlen;
+	mb->pkt_len -= tlen;
+
+	/* cut of L2/L3 headers, ESP header and IV */
+	hlen = mb->l2_len + mb->l3_len;
+	esph = rte_pktmbuf_mtod_offset(mb, struct esp_hdr *, hlen);
+	rte_pktmbuf_adj(mb, hlen + sa->ctp.cipher.offset);
+
+	/* retrieve SQN for later check */
+	*sqn = rte_be_to_cpu_32(esph->seq);
+
+	/* reset mbuf metatdata: L2/L3 len, packet type */
+	mb->packet_type = RTE_PTYPE_UNKNOWN;
+	mb->l2_len = 0;
+	mb->l3_len = 0;
+
+	/* clear the PKT_RX_SEC_OFFLOAD flag if set */
+	mb->ol_flags &= ~(mb->ol_flags & PKT_RX_SEC_OFFLOAD);
+	return 0;
+}
+
+static inline int
+esp_inb_trs_single_pkt_process(struct rte_ipsec_sa *sa, struct rte_mbuf *mb,
+	uint32_t *sqn)
+{
+	uint32_t hlen, icv_len, l2len, l3len, tlen;
+	struct esp_hdr *esph;
+	struct esp_tail *espt;
+	struct rte_mbuf *ml;
+	char *np, *op, *pd;
+
+	if (mb->ol_flags & PKT_RX_SEC_OFFLOAD_FAILED)
+		return -EBADMSG;
+
+	icv_len = sa->icv_len;
+
+	ml = rte_pktmbuf_lastseg(mb);
+	espt = rte_pktmbuf_mtod_offset(ml, struct esp_tail *,
+		ml->data_len - icv_len - sizeof(*espt));
+
+	/* check padding, return an error if something is wrong. */
+	pd = (char *)espt - espt->pad_len;
+	if (memcmp(pd, esp_pad_bytes, espt->pad_len))
+		return -EINVAL;
+
+	/* cut of ICV, ESP tail and padding bytes */
+	tlen = icv_len + sizeof(*espt) + espt->pad_len;
+	ml->data_len -= tlen;
+	mb->pkt_len -= tlen;
+
+	/* retrieve SQN for later check */
+	l2len = mb->l2_len;
+	l3len = mb->l3_len;
+	hlen = l2len + l3len;
+	op = rte_pktmbuf_mtod(mb, char *);
+	esph = (struct esp_hdr *)(op + hlen);
+	*sqn = rte_be_to_cpu_32(esph->seq);
+
+	/* cut off ESP header and IV, update L3 header */
+	np = rte_pktmbuf_adj(mb, sa->ctp.cipher.offset);
+	remove_esph(np, op, hlen);
+	update_trs_l3hdr(sa, np + l2len, mb->pkt_len, l2len, l3len,
+			espt->next_proto);
+
+	/* reset mbuf packet type */
+	mb->packet_type &= (RTE_PTYPE_L2_MASK | RTE_PTYPE_L3_MASK);
+
+	/* clear the PKT_RX_SEC_OFFLOAD flag if set */
+	mb->ol_flags &= ~(mb->ol_flags & PKT_RX_SEC_OFFLOAD);
+	return 0;
+}
+
+static inline uint16_t
+esp_inb_rsn_update(struct rte_ipsec_sa *sa, const uint32_t sqn[],
+	struct rte_mbuf *mb[], struct rte_mbuf *dr[], uint16_t num)
+{
+	uint32_t i, k;
+	struct replay_sqn *rsn;
+
+	rsn = sa->sqn.inb;
+
+	k = 0;
+	for (i = 0; i != num; i++) {
+		if (esn_inb_update_sqn(rsn, sa, sqn[i]) == 0)
+			mb[k++] = mb[i];
+		else
+			dr[i - k] = mb[i];
+	}
+
+	return k;
+}
+
+static uint16_t
+inb_tun_pkt_process(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
+	uint16_t num)
+{
+	uint32_t i, k;
+	struct rte_ipsec_sa *sa;
+	uint32_t sqn[num];
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+
+	/* process packets, extract seq numbers */
+
+	k = 0;
+	for (i = 0; i != num; i++) {
+		/* good packet */
+		if (esp_inb_tun_single_pkt_process(sa, mb[i], sqn + k) == 0)
+			mb[k++] = mb[i];
+		/* bad packet, will drop from furhter processing */
+		else
+			dr[i - k] = mb[i];
+	}
+
+	/* update seq # and replay winow */
+	k = esp_inb_rsn_update(sa, sqn, mb, dr + i - k, k);
+
+	/* handle unprocessed mbufs */
+	if (k != num) {
+		rte_errno = EBADMSG;
+		if (k != 0)
+			mbuf_bulk_copy(mb + k, dr, num - k);
+	}
+
+	return k;
+}
+
+static uint16_t
+inb_trs_pkt_process(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
+	uint16_t num)
+{
+	uint32_t i, k;
+	uint32_t sqn[num];
+	struct rte_ipsec_sa *sa;
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+
+	/* process packets, extract seq numbers */
+
+	k = 0;
+	for (i = 0; i != num; i++) {
+		/* good packet */
+		if (esp_inb_trs_single_pkt_process(sa, mb[i], sqn + k) == 0)
+			mb[k++] = mb[i];
+		/* bad packet, will drop from furhter processing */
+		else
+			dr[i - k] = mb[i];
+	}
+
+	/* update seq # and replay winow */
+	k = esp_inb_rsn_update(sa, sqn, mb, dr + i - k, k);
+
+	/* handle unprocessed mbufs */
+	if (k != num) {
+		rte_errno = EBADMSG;
+		if (k != 0)
+			mbuf_bulk_copy(mb + k, dr, num - k);
+	}
+
+	return k;
+}
+
+/*
+ * process outbound packets for SA with ESN support,
+ * for algorithms that require SQN.hibits to be implictly included
+ * into digest computation.
+ * In that case we have to move ICV bytes back to their proper place.
+ */
+static uint16_t
+outb_sqh_process(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
+	uint16_t num)
+{
+	uint32_t i, k, icv_len, *icv;
+	struct rte_mbuf *ml;
+	struct rte_ipsec_sa *sa;
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+
+	k = 0;
+	icv_len = sa->icv_len;
+
+	for (i = 0; i != num; i++) {
+		if ((mb[i]->ol_flags & PKT_RX_SEC_OFFLOAD_FAILED) == 0) {
+			ml = rte_pktmbuf_lastseg(mb[i]);
+			icv = rte_pktmbuf_mtod_offset(ml, void *,
+				ml->data_len - icv_len);
+			remove_sqh(icv, icv_len);
+			mb[k++] = mb[i];
+		} else
+			dr[i - k] = mb[i];
+	}
+
+	/* handle unprocessed mbufs */
+	if (k != num) {
+		rte_errno = EBADMSG;
+		if (k != 0)
+			mbuf_bulk_copy(mb + k, dr, num - k);
+	}
+
+	return k;
+}
+
+/*
+ * simplest pkt process routine:
+ * all actual processing is done already doneby HW/PMD,
+ * just check mbuf ol_flags.
+ * used for:
+ * - inbound for RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL
+ * - inbound/outbound for RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL
+ * - outbound for RTE_SECURITY_ACTION_TYPE_NONE when ESN is disabled
+ */
+static uint16_t
+pkt_flag_process(const struct rte_ipsec_session *ss, struct rte_mbuf *mb[],
+	uint16_t num)
+{
+	uint32_t i, k;
+	struct rte_mbuf *dr[num];
+
+	RTE_SET_USED(ss);
+
+	k = 0;
+	for (i = 0; i != num; i++) {
+		if ((mb[i]->ol_flags & PKT_RX_SEC_OFFLOAD_FAILED) == 0)
+			mb[k++] = mb[i];
+		else
+			dr[i - k] = mb[i];
+	}
+
+	/* handle unprocessed mbufs */
+	if (k != num) {
+		rte_errno = EBADMSG;
+		if (k != 0)
+			mbuf_bulk_copy(mb + k, dr, num - k);
+	}
+
+	return k;
+}
+
+/*
+ * prepare packets for inline ipsec processing:
+ * set ol_flags and attach metadata.
+ */
+static inline void
+inline_outb_mbuf_prepare(const struct rte_ipsec_session *ss,
+	struct rte_mbuf *mb[], uint16_t num)
+{
+	uint32_t i, ol_flags;
+
+	ol_flags = ss->security.ol_flags & RTE_SECURITY_TX_OLOAD_NEED_MDATA;
+	for (i = 0; i != num; i++) {
+
+		mb[i]->ol_flags |= PKT_TX_SEC_OFFLOAD;
+		if (ol_flags != 0)
+			rte_security_set_pkt_metadata(ss->security.ctx,
+				ss->security.ses, mb[i], NULL);
+	}
+}
+
+static uint16_t
+inline_outb_tun_pkt_process(const struct rte_ipsec_session *ss,
+	struct rte_mbuf *mb[], uint16_t num)
+{
+	int32_t rc;
+	uint32_t i, k, n;
+	uint64_t sqn;
+	rte_be64_t sqc;
+	struct rte_ipsec_sa *sa;
+	union sym_op_data icv;
+	uint64_t iv[IPSEC_MAX_IV_QWORD];
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+
+	n = num;
+	sqn = esn_outb_update_sqn(sa, &n);
+	if (n != num)
+		rte_errno = EOVERFLOW;
+
+	k = 0;
+	for (i = 0; i != n; i++) {
+
+		sqc = rte_cpu_to_be_64(sqn + i);
+		gen_iv(iv, sqc);
+
+		/* try to update the packet itself */
+		rc = esp_outb_tun_pkt_prepare(sa, sqc, iv, mb[i], &icv);
+
+		/* success, update mbuf fields */
+		if (rc >= 0)
+			mb[k++] = mb[i];
+		/* failure, put packet into the death-row */
+		else {
+			dr[i - k] = mb[i];
+			rte_errno = -rc;
+		}
+	}
+
+	inline_outb_mbuf_prepare(ss, mb, k);
+
+	/* copy not processed mbufs beyond good ones */
+	if (k != num && k != 0)
+		mbuf_bulk_copy(mb + k, dr, num - k);
+
+	return k;
+}
+
+static uint16_t
+inline_outb_trs_pkt_process(const struct rte_ipsec_session *ss,
+	struct rte_mbuf *mb[], uint16_t num)
+{
+	int32_t rc;
+	uint32_t i, k, n, l2, l3;
+	uint64_t sqn;
+	rte_be64_t sqc;
+	struct rte_ipsec_sa *sa;
+	union sym_op_data icv;
+	uint64_t iv[IPSEC_MAX_IV_QWORD];
+	struct rte_mbuf *dr[num];
+
+	sa = ss->sa;
+
+	n = num;
+	sqn = esn_outb_update_sqn(sa, &n);
+	if (n != num)
+		rte_errno = EOVERFLOW;
+
+	k = 0;
+	for (i = 0; i != n; i++) {
+
+		l2 = mb[i]->l2_len;
+		l3 = mb[i]->l3_len;
+
+		sqc = rte_cpu_to_be_64(sqn + i);
+		gen_iv(iv, sqc);
+
+		/* try to update the packet itself */
+		rc = esp_outb_trs_pkt_prepare(sa, sqc, iv, mb[i],
+				l2, l3, &icv);
+
+		/* success, update mbuf fields */
+		if (rc >= 0)
+			mb[k++] = mb[i];
+		/* failure, put packet into the death-row */
+		else {
+			dr[i - k] = mb[i];
+			rte_errno = -rc;
+		}
+	}
+
+	inline_outb_mbuf_prepare(ss, mb, k);
+
+	/* copy not processed mbufs beyond good ones */
+	if (k != num && k != 0)
+		mbuf_bulk_copy(mb + k, dr, num - k);
+
+	return k;
+}
+
+/*
+ * outbound for RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL:
+ * actual processing is done by HW/PMD, just set flags and metadata.
+ */
+static uint16_t
+outb_inline_proto_process(const struct rte_ipsec_session *ss,
+		struct rte_mbuf *mb[], uint16_t num)
+{
+	inline_outb_mbuf_prepare(ss, mb, num);
+	return num;
+}
+
+static int
+lksd_none_pkt_func_select(const struct rte_ipsec_sa *sa,
+		struct rte_ipsec_sa_pkt_func *pf)
+{
+	int32_t rc;
+
+	static const uint64_t msk = RTE_IPSEC_SATP_DIR_MASK |
+			RTE_IPSEC_SATP_MODE_MASK;
+
+	rc = 0;
+	switch (sa->type & msk) {
+	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TUNLV4):
+	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TUNLV6):
+		pf->prepare = inb_pkt_prepare;
+		pf->process = inb_tun_pkt_process;
+		break;
+	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TRANS):
+		pf->prepare = inb_pkt_prepare;
+		pf->process = inb_trs_pkt_process;
+		break;
+	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TUNLV4):
+	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TUNLV6):
+		pf->prepare = outb_tun_prepare;
+		pf->process = (sa->sqh_len != 0) ?
+			outb_sqh_process : pkt_flag_process;
+		break;
+	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TRANS):
+		pf->prepare = outb_trs_prepare;
+		pf->process = (sa->sqh_len != 0) ?
+			outb_sqh_process : pkt_flag_process;
+		break;
+	default:
+		rc = -ENOTSUP;
+	}
+
+	return rc;
+}
+
+static int
+inline_crypto_pkt_func_select(const struct rte_ipsec_sa *sa,
+		struct rte_ipsec_sa_pkt_func *pf)
+{
+	int32_t rc;
+
+	static const uint64_t msk = RTE_IPSEC_SATP_DIR_MASK |
+			RTE_IPSEC_SATP_MODE_MASK;
+
+	rc = 0;
+	switch (sa->type & msk) {
+	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TUNLV4):
+	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TUNLV6):
+		pf->process = inb_tun_pkt_process;
+		break;
+	case (RTE_IPSEC_SATP_DIR_IB | RTE_IPSEC_SATP_MODE_TRANS):
+		pf->process = inb_trs_pkt_process;
+		break;
+	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TUNLV4):
+	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TUNLV6):
+		pf->process = inline_outb_tun_pkt_process;
+		break;
+	case (RTE_IPSEC_SATP_DIR_OB | RTE_IPSEC_SATP_MODE_TRANS):
+		pf->process = inline_outb_trs_pkt_process;
+		break;
+	default:
+		rc = -ENOTSUP;
+	}
+
+	return rc;
+}
+
 int
 ipsec_sa_pkt_func_select(const struct rte_ipsec_session *ss,
 	const struct rte_ipsec_sa *sa, struct rte_ipsec_sa_pkt_func *pf)
 {
 	int32_t rc;
 
-	RTE_SET_USED(sa);
-
 	rc = 0;
 	pf[0] = (struct rte_ipsec_sa_pkt_func) { 0 };
 
 	switch (ss->type) {
+	case RTE_SECURITY_ACTION_TYPE_NONE:
+		rc = lksd_none_pkt_func_select(sa, pf);
+		break;
+	case RTE_SECURITY_ACTION_TYPE_INLINE_CRYPTO:
+		rc = inline_crypto_pkt_func_select(sa, pf);
+		break;
+	case RTE_SECURITY_ACTION_TYPE_INLINE_PROTOCOL:
+		if ((sa->type & RTE_IPSEC_SATP_DIR_MASK) ==
+				RTE_IPSEC_SATP_DIR_IB)
+			pf->process = pkt_flag_process;
+		else
+			pf->process = outb_inline_proto_process;
+		break;
+	case RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL:
+		pf->prepare = lksd_proto_prepare;
+		pf->process = pkt_flag_process;
+		break;
 	default:
 		rc = -ENOTSUP;
 	}