[dpdk-dev,04/16] crypto/cpt/base: add hardware enq/deq API for CPT

Message ID 1528476325-15585-5-git-send-email-anoob.joseph@caviumnetworks.com (mailing list archive)
State Superseded, archived
Delegated to: akhil goyal
Headers
Series Adding Cavium's crypto device(CPT) driver |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Anoob Joseph June 8, 2018, 4:45 p.m. UTC
  From: Ankur Dwivedi <ankur.dwivedi@cavium.com>

Adds hardware enqueue/dequeue API of instructions to a queue pair
for Cavium CPT device.

Signed-off-by: Ankur Dwivedi <ankur.dwivedi@cavium.com>
Signed-off-by: Murthy NSSR <Nidadavolu.Murthy@cavium.com>
Signed-off-by: Nithin Dabilpuram <nithin.dabilpuram@cavium.com>
Signed-off-by: Ragothaman Jayaraman <Ragothaman.Jayaraman@cavium.com>
Signed-off-by: Srisivasubramanian Srinivasan <Srisivasubramanian.Srinivasan@cavium.com>
---
 drivers/crypto/cpt/base/cpt.h             | 102 +++++++
 drivers/crypto/cpt/base/cpt_device.c      |   4 +-
 drivers/crypto/cpt/base/cpt_request_mgr.c | 424 ++++++++++++++++++++++++++++++
 drivers/crypto/cpt/base/cpt_request_mgr.h |  75 ++++++
 4 files changed, 603 insertions(+), 2 deletions(-)
 create mode 100644 drivers/crypto/cpt/base/cpt.h
 create mode 100644 drivers/crypto/cpt/base/cpt_request_mgr.c
 create mode 100644 drivers/crypto/cpt/base/cpt_request_mgr.h
  

Comments

Jerin Jacob June 14, 2018, 3:20 a.m. UTC | #1
-----Original Message-----
> Date: Fri,  8 Jun 2018 22:15:13 +0530
> From: Anoob Joseph <anoob.joseph@caviumnetworks.com>
> To: Akhil Goyal <akhil.goyal@nxp.com>, Pablo de Lara
>  <pablo.de.lara.guarch@intel.com>, Thomas Monjalon <thomas@monjalon.net>
> Cc: Ankur Dwivedi <ankur.dwivedi@cavium.com>, Jerin Jacob
>  <jerin.jacob@caviumnetworks.com>, Murthy NSSR
>  <Nidadavolu.Murthy@cavium.com>, Narayana Prasad
>  <narayanaprasad.athreya@caviumnetworks.com>, Nithin Dabilpuram
>  <nithin.dabilpuram@cavium.com>, Ragothaman Jayaraman
>  <Ragothaman.Jayaraman@cavium.com>, Srisivasubramanian Srinivasan
>  <Srisivasubramanian.Srinivasan@cavium.com>, dev@dpdk.org
> Subject: [PATCH 04/16] crypto/cpt/base: add hardware enq/deq API for CPT
> X-Mailer: git-send-email 2.7.4
> 
> From: Ankur Dwivedi <ankur.dwivedi@cavium.com>
> 
> Adds hardware enqueue/dequeue API of instructions to a queue pair
> for Cavium CPT device.
> 
> Signed-off-by: Ankur Dwivedi <ankur.dwivedi@cavium.com>
> Signed-off-by: Murthy NSSR <Nidadavolu.Murthy@cavium.com>
> Signed-off-by: Nithin Dabilpuram <nithin.dabilpuram@cavium.com>
> Signed-off-by: Ragothaman Jayaraman <Ragothaman.Jayaraman@cavium.com>
> Signed-off-by: Srisivasubramanian Srinivasan <Srisivasubramanian.Srinivasan@cavium.com>
> ---
>  drivers/crypto/cpt/base/cpt.h             | 102 +++++++
>  drivers/crypto/cpt/base/cpt_device.c      |   4 +-
>  drivers/crypto/cpt/base/cpt_request_mgr.c | 424 ++++++++++++++++++++++++++++++
>  drivers/crypto/cpt/base/cpt_request_mgr.h |  75 ++++++
>  4 files changed, 603 insertions(+), 2 deletions(-)
>  create mode 100644 drivers/crypto/cpt/base/cpt.h
>  create mode 100644 drivers/crypto/cpt/base/cpt_request_mgr.c
>  create mode 100644 drivers/crypto/cpt/base/cpt_request_mgr.h
> 
> diff --git a/drivers/crypto/cpt/base/cpt.h b/drivers/crypto/cpt/base/cpt.h
> new file mode 100644
> index 0000000..11407ae
> --- /dev/null
> +++ b/drivers/crypto/cpt/base/cpt.h
> @@ -0,0 +1,102 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2017 Cavium, Inc
> + */
> +
> +#ifndef __BASE_CPT_H__
> +#define __BASE_CPT_H__
> +
> +/* Linux Includes */
> +#include <endian.h>
> +#include <stdint.h>
> +#include <string.h>
> +#include <stdio.h>
> +#include <stdbool.h>
> +#include <errno.h>
> +#include <sys/cdefs.h>
> +#include <unistd.h>
> +#include <assert.h>

alphabetical order

> +
> +/* DPDK includes */
> +#include <rte_byteorder.h>
> +#include <rte_common.h>
> +#include <rte_errno.h>
> +#include <rte_memory.h>
> +#include <rte_prefetch.h>
> +
> +#include "../cpt_pmd_logs.h"
> +#include "mcode_defines.h"
> +
> +/** @cond __INTERNAL_DOCUMENTATION__ */
> +
> +/* Declarations */
> +typedef struct cpt_instance cpt_instance_t;
> +
> +/*
> + * Generic Defines
> + */
> +
> +/* Buffer pointer */
> +typedef struct buf_ptr {
> +	void *vaddr;
> +	phys_addr_t dma_addr;
> +	uint32_t size;
> +	uint32_t resv;
> +} buf_ptr_t;
> +
> +/* IOV Pointer */
> +typedef struct{
> +	int buf_cnt;
> +	buf_ptr_t bufs[0];
> +} iov_ptr_t;
> +
> +typedef struct app_data {
> +	uint64_t pktout;
> +	void *marker;
> +} app_data_t;
> +
> +/* Instance operations */
> +
> +/* Enqueue an SE/AE request */
> +int cpt_enqueue_req(cpt_instance_t *inst, void *req, uint8_t flags,
> +	      void *event, uint64_t event_flags);
> +
> +/* Dequeue completed SE requests as burst */
> +int32_t cpt_dequeue_burst(cpt_instance_t *instance, uint16_t cnt,
> +			  void *resp[], uint8_t cc[]);
> +
> +/* Marks event as done in event driven mode */
> +int32_t cpt_event_mark_done(void *marker, uint8_t *op_error);
> +
> +/* Checks queue full condition */
> +uint16_t cpt_queue_full(cpt_instance_t *instance);
> +
> +/* Misc */
> +uint32_t cpt_get_instance_count(void);
> +
> +#define ENQ_FLAG_SYNC		0x01
> +#define ENQ_FLAG_EVENT		0x02
> +#define ENQ_FLAG_NODOORBELL	0x04
> +#define ENQ_FLAG_ONLY_DOORBELL	0x08
> +
> +
> +#define OCTTX_EVENT_TAG(__flags) (__flags & 0xffffffff)
> +#define OCTTX_EVENT_GRP(__flags) ((__flags >> 32) & 0xffff)
> +#define OCTTX_EVENT_TT(__flags) ((__flags >> 48) & 0xff)
> +
> +#define OCTTX_EVENT_FLAGS(__tag, __grp, __tt)    \
> +	(((uint64_t)__tag & 0xffffffff) |        \
> +	 (((uint64_t)__grp & 0xffff) << 32) |    \
> +	 (((uint64_t)__tt & 0xff) << 48))
> +
> +
> +/* cpt instance */
> +struct cpt_instance {
> +	/* 0th cache line */
> +	uint32_t queue_id;
> +	uint64_t rsvd;
> +};
> +

#ifndef __hot
> +#define __hot __attribute__((hot))
#endif

> +/** @endcond */
> +
> +#endif /* __BASE_CPT_H__ */
> diff --git a/drivers/crypto/cpt/base/cpt_device.c b/drivers/crypto/cpt/base/cpt_device.c
> index b7cd5b5..a50e5b8 100644
> --- a/drivers/crypto/cpt/base/cpt_device.c
> +++ b/drivers/crypto/cpt/base/cpt_device.c
> @@ -193,7 +193,7 @@ int cptvf_get_resource(struct cpt_vf *dev,
>  	uint64_t *next_ptr;
>  	uint64_t pg_sz = sysconf(_SC_PAGESIZE);
>  
> -	PMD_DRV_LOG(DEBUG, "Initializing csp resource %s\n", cptvf->dev_name);
> +	PMD_DRV_LOG(DEBUG, "Initializing cpt resource %s\n", cptvf->dev_name);
>  
>  	cpt_instance = &cptvf->instance;
>  
> @@ -323,7 +323,7 @@ int cptvf_put_resource(cpt_instance_t *instance)
>  		return -EINVAL;
>  	}
>  
> -	PMD_DRV_LOG(DEBUG, "Releasing csp device %s\n", cptvf->dev_name);
> +	PMD_DRV_LOG(DEBUG, "Releasing cpt device %s\n", cptvf->dev_name);
>  
>  	rz = (struct rte_memzone *)instance->rsvd;
>  	rte_memzone_free(rz);
> diff --git a/drivers/crypto/cpt/base/cpt_request_mgr.c b/drivers/crypto/cpt/base/cpt_request_mgr.c
> new file mode 100644
> index 0000000..8b9b1ff
> --- /dev/null
> +++ b/drivers/crypto/cpt/base/cpt_request_mgr.c
> @@ -0,0 +1,424 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2017 Cavium, Inc
> + */
> +
> +#include "cpt_request_mgr.h"
> +#include "cpt_debug.h"
> +#include <rte_atomic.h>
> +
> +#define MOD_INC(i, l)   ((i) == (l - 1) ? (i) = 0 : (i)++)
> +
> +#define __hot __attribute__((hot))

same as above

> +
> +static inline uint64_t cpu_cycles(void)
> +{
> +	return rte_get_timer_cycles();
> +}
> +
> +static inline uint64_t cpu_cycles_freq(void)
> +{
> +	return rte_get_timer_hz();
> +}
> +
> +static inline void *
> +get_cpt_inst(struct command_queue *cqueue, void *req)
> +{
> +	(void)req;
> +	PMD_TX_LOG(DEBUG, "CPT queue idx %u, req %p\n", cqueue->idx, req);
> +	return &cqueue->qhead[cqueue->idx * CPT_INST_SIZE];
> +}
> +
> +static inline void
> +mark_cpt_inst(struct cpt_vf *cptvf,
> +	      struct command_queue *queue,
> +	      uint32_t ring_door_bell)
> +{
> +#ifdef CMD_DEBUG
> +	/* DEBUG */
> +	{
> +		uint32_t i = queue->idx * CPT_INST_SIZE;
> +		cpt_inst_s_t *cmd = (void *)&queue->qhead[i];
> +		uint64_t *p = (void *)&queue->qhead[i];
> +
> +		PRINT("\nQUEUE parameters:");
> +		PRINT("Queue index           = %u\n",
> +		      queue->idx);
> +		PRINT("Queue HEAD            = %p\n",
> +		      queue->qhead);
> +		PRINT("Command Entry         = %p\n",
> +		      cmd);
> +
> +		PRINT("\nCPT_INST_S format:");
> +		PRINT("cmd->s.doneint = %x\n", cmd->s.doneint);
> +		PRINT("cmd->s.res_addr  = %lx\n", cmd->s.res_addr);
> +		PRINT("cmd->s.grp       = %x\n", cmd->s.grp);
> +		PRINT("cmd->s.tag       = %x\n", cmd->s.tag);
> +		PRINT("cmd->s.tt        = %x\n", cmd->s.tt);
> +		PRINT("cmd->s.wq_ptr    = %lx\n", cmd->s.wq_ptr);
> +		PRINT("cmd->s.ei0       = %lx\n", cmd->s.ei0);
> +		PRINT("cmd->s.ei1       = %lx\n", cmd->s.ei1);
> +		PRINT("cmd->s.ei2       = %lx\n", cmd->s.ei2);
> +		PRINT("cmd->s.ei3       = %lx\n", cmd->s.ei3);
> +
> +		PRINT("\nCommand dump from queue HEAD:");
> +		for (i = 0; i < CPT_INST_SIZE / 8; i++)
> +			PRINT("%lx\n", p[i]);
> +	}
> +#endif
> +	if (unlikely(++queue->idx >= DEFAULT_CMD_QCHUNK_SIZE)) {
> +		uint32_t cchunk = queue->cchunk;
> +		MOD_INC(cchunk, DEFAULT_CMD_QCHUNKS);
> +		queue->qhead = queue->chead[cchunk].head;
> +		queue->idx = 0;
> +		queue->cchunk = cchunk;
> +	}
> +
> +	if (ring_door_bell) {
> +		/* Memory barrier to flush pending writes */
> +		rte_smp_wmb();
> +		cptvf_write_vq_doorbell(cptvf, ring_door_bell);
> +	}
> +}
> +
> +static inline uint8_t
> +check_nb_command_id(cpt_request_info_t *user_req, struct cpt_vf *cptvf)
> +{
> +	uint8_t ret = ERR_REQ_PENDING;
> +	volatile cpt_res_s_t *cptres;
> +
> +	cptres = (volatile cpt_res_s_t *)user_req->completion_addr;
> +
> +	if (unlikely(cptres->s.compcode == CPT_COMP_E_NOTDONE)) {
> +		/*
> +		 * Wait for some time for this command to get completed
> +		 * before timing out
> +		 */
> +		if (cpu_cycles() < user_req->time_out)
> +			return ret;
> +		/*
> +		 * TODO: See if alternate caddr can be used to not loop
> +		 * longer than needed.
> +		 */
> +		if ((cptres->s.compcode == CPT_COMP_E_NOTDONE) &&
> +		    (user_req->extra_time < TIME_IN_RESET_COUNT)) {
> +			user_req->extra_time++;
> +			return ret;
> +		}
> +
> +		if (cptres->s.compcode != CPT_COMP_E_NOTDONE)
> +			goto complete;
> +
> +		ret = ERR_REQ_TIMEOUT;
> +		PMD_DRV_LOG_RAW(ERR, "Request %p timedout\n", user_req);
> +		cptvf_poll_misc(cptvf);
> +		dump_cpt_request_sglist(&user_req->dbg_inst,
> +					"Response Packet Gather in", 1, 1);
> +		goto exit;
> +	}
> +
> +complete:
> +	if (likely(cptres->s.compcode == CPT_COMP_E_GOOD)) {
> +		ret = 0; /* success */
> +		PMD_RX_LOG(DEBUG, "MC status %.8x\n",
> +			   *((volatile uint32_t *)user_req->alternate_caddr));
> +		PMD_RX_LOG(DEBUG, "HW status %.8x\n",
> +			   *((volatile uint32_t *)user_req->completion_addr));
> +	} else if ((cptres->s.compcode == CPT_COMP_E_SWERR) ||
> +		   (cptres->s.compcode == CPT_COMP_E_FAULT)) {
> +		ret = (uint8_t)*user_req->alternate_caddr;
> +		if (!ret)
> +			ret = ERR_BAD_ALT_CCODE;
> +		PMD_RX_LOG(DEBUG, "Request %p : failed with %s : err code :"
> +			   "%x\n", user_req,
> +			   (cptres->s.compcode == CPT_COMP_E_FAULT) ?
> +			   "DMA Fault" : "Software error", ret);
> +	} else {
> +		PMD_DRV_LOG_RAW(ERR, "Request %p : unexpected completion code"
> +			   " %d\n",
> +			   user_req, cptres->s.compcode);
> +		ret = (uint8_t)*user_req->alternate_caddr;
> +	}
> +
> +exit:
> +	dump_cpt_request_sglist(&user_req->dbg_inst,
> +				"Response Packet Scatter Out", 1, 0);
> +	return ret;
> +}
> +
> +
> +/*
> + * cpt_enqueue_req()
> + *
> + * SE & AE request enqueue function
> + */
> +int32_t __hot
> +cpt_enqueue_req(cpt_instance_t *instance, void *req, uint8_t flags,
> +		void *event, uint64_t event_flags)
> +{
> +	struct pending_queue *pqueue;
> +	struct cpt_vf *cptvf;
> +	cpt_inst_s_t *cpt_ist_p = NULL;
> +	cpt_request_info_t *user_req = (cpt_request_info_t *)req;
> +	struct command_queue *cqueue;
> +	int32_t ret = 0;
> +
> +#ifdef CPTVF_STRICT_PARAM_CHECK
> +	if (unlikely(!instance)) {
> +		PMD_DRV_LOG_RAW(ERR, "Invalid inputs (instance: %p, req: %p)\n",
> +			   instance, req);
> +		return -EINVAL;
> +	}
> +#endif
> +
> +	cptvf = (struct cpt_vf *)instance;
> +	pqueue = &cptvf->pqueue;
> +
> +	if (unlikely(!req)) {
> +		/* ring only pending doorbells */
> +		if ((flags & ENQ_FLAG_ONLY_DOORBELL) && pqueue->p_doorbell) {
> +			/* Memory barrier to flush pending writes */
> +			rte_smp_wmb();

Cross check it is rte_wmb() indented here as it barrier between device
and CPU

> +			cptvf_write_vq_doorbell(cptvf, pqueue->p_doorbell);
> +			pqueue->p_doorbell = 0;
> +		}
> +		return 0;
> +	}
> +
> +#if defined(ATOMIC_THROTTLING_COUNTER)

if this config useful for end user then expose it in config file and
explain the details in documentation.

> +	/* Ask the application to try again later */
> +	if (unlikely(cpt_pmd_pcount_load(&pqueue->pending_count) >=
> +		     DEFAULT_CMD_QLEN)) {
> +		return -EAGAIN;
> +	}
> +#else
> +	if (unlikely(pqueue->pending_count >= DEFAULT_CMD_QLEN))
> +		return -EAGAIN;
> +#endif
> +	cqueue = &cptvf->cqueue;
> +	cpt_ist_p = get_cpt_inst(cqueue, req);
> +	rte_prefetch_non_temporal(cpt_ist_p);
> +
> +	/* EI0, EI1, EI2, EI3 are already prepared */
> +	/* HW W0 */
> +	cpt_ist_p->u[0] = 0;
> +	/* HW W1 */
> +	cpt_ist_p->s.res_addr = user_req->comp_baddr;
> +	/* HW W2 */
> +	cpt_ist_p->u[2] = 0;
> +	/* HW W3 */
> +	cpt_ist_p->s.wq_ptr = 0;
> +
> +	/* MC EI0 */
> +	cpt_ist_p->s.ei0 = user_req->ist.ei0;
> +	/* MC EI1 */
> +	cpt_ist_p->s.ei1 = user_req->ist.ei1;
> +	/* MC EI2 */
> +	cpt_ist_p->s.ei2 = user_req->ist.ei2;
> +	/* MC EI3 */
> +	cpt_ist_p->s.ei3 = user_req->ist.ei3;
> +
> +	PMD_TX_LOG(DEBUG, "req: %p op: %p dma_mode 0x%x se_req %u\n",
> +		   req,
> +		   user_req->op,
> +		   user_req->dma_mode,
> +		   user_req->se_req);
> +
> +#ifdef CPT_DEBUG
> +	{
> +		vq_cmd_word0_t vq_cmd_w0;
> +		vq_cmd_word3_t vq_cmd_w3;
> +
> +		vq_cmd_w3.u64 = cpt_ist_p->s.ei3;
> +		vq_cmd_w0.u64 = be64toh(cpt_ist_p->s.ei0);
> +		user_req->dbg_inst = *cpt_ist_p;
> +
> +		if (vq_cmd_w3.s.cptr) {
> +			PMD_TX_LOG(DEBUG, "Context Handle: 0x%016lx\n",
> +				   (uint64_t)vq_cmd_w3.s.cptr);
> +			/* Dump max context i.e 448 bytes */
> +			cpt_dump_buffer("CONTEXT",
> +					os_iova2va((uint64_t)vq_cmd_w3.s.cptr),
> +					448);
> +		}
> +
> +		dump_cpt_request_info(user_req, cpt_ist_p);
> +		dump_cpt_request_sglist(cpt_ist_p, "Request (src)", 1, 1);
> +		dump_cpt_request_sglist(cpt_ist_p, "Request (dst)", 0, 0);
> +		cpt_dump_buffer("VQ command word0", &cpt_ist_p->u[4],
> +				sizeof(vq_cmd_w0));
> +		cpt_dump_buffer("VQ command word1", &cpt_ist_p->u[5],
> +				sizeof(uint64_t));
> +		cpt_dump_buffer("VQ command word2", &cpt_ist_p->u[6],
> +				sizeof(uint64_t));
> +		cpt_dump_buffer("VQ command word3", &cpt_ist_p->u[7],
> +				sizeof(vq_cmd_w3));
> +	}
> +#endif
> +
> +	if (likely(!(flags & ENQ_FLAG_SYNC))) {
> +		void *op = user_req->op;
> +
> +		if (unlikely(flags & ENQ_FLAG_EVENT)) {
> +			app_data_t *app_data = op;
> +
> +			/* Event based completion */
> +			cpt_ist_p->s.tag = OCTTX_EVENT_TAG(event_flags);
> +			cpt_ist_p->s.grp = OCTTX_EVENT_GRP(event_flags);
> +			cpt_ist_p->s.tt = OCTTX_EVENT_TT(event_flags);
> +			cpt_ist_p->s.wq_ptr = (uint64_t)event;
> +
> +#if defined(ATOMIC_THROTTLING_COUNTER)
> +			app_data->marker = user_req;
> +			__atomic_fetch_add(&pqueue->pending_count,
> +					   1, __ATOMIC_RELAXED);
> +#else
> +			rid_t *rid_e;
> +			/*
> +			 * Mark it as in progress in pending queue, software
> +			 * will mark it when completion is received
> +			 */
> +			rid_e = &pqueue->rid_queue[pqueue->enq_tail];
> +			rid_e->rid = (uint64_t)user_req;
> +			/* rid_e->op = op; */
> +			MOD_INC(pqueue->enq_tail, DEFAULT_CMD_QLEN);
> +			app_data->marker = rid_e;
> +#endif
> +
> +			cpt_dump_buffer("CPT Instruction with wqe", cpt_ist_p,
> +					sizeof(*cpt_ist_p));
> +
> +			mark_cpt_inst(cptvf, cqueue, 1);
> +
> +		} else {
> +			uint32_t doorbell = 0;
> +
> +			if (likely(flags & ENQ_FLAG_NODOORBELL))
> +				pqueue->p_doorbell++;
> +			else
> +				doorbell = ++pqueue->p_doorbell;
> +
> +			/* Fill time_out cycles */
> +			user_req->time_out = cpu_cycles() +
> +				DEFAULT_COMMAND_TIMEOUT * cpu_cycles_freq();
> +			user_req->extra_time = 0;
> +
> +			cpt_dump_buffer("CPT Instruction", cpt_ist_p,
> +					sizeof(*cpt_ist_p));
> +
> +			/* Default mode of software queue */
> +			mark_cpt_inst(cptvf, cqueue, doorbell);
> +
> +			pqueue->p_doorbell -= doorbell;
> +			pqueue->rid_queue[pqueue->enq_tail].rid =
> +				(uint64_t)user_req;
> +			/* pqueue->rid_queue[pqueue->enq_tail].op = op; */
> +			/* We will use soft queue length here to limit
> +			 * requests
> +			 */
> +			MOD_INC(pqueue->enq_tail, DEFAULT_CMD_QLEN);
> +			pqueue->pending_count += 1;
> +		}
> +
> +		PMD_TX_LOG(DEBUG, "Submitted NB cmd with request: %p op: %p\n",
> +			   user_req, op);
> +	} else {
> +		/*
> +		 * Synchronous operation,
> +		 * hold until completion / timeout
> +		 */
> +		/* Fill time_out cycles */
> +		user_req->time_out = cpu_cycles() +
> +			DEFAULT_COMMAND_TIMEOUT * cpu_cycles_freq();
> +		user_req->extra_time = 0;
> +
> +		cpt_dump_buffer("CPT Instruction", cpt_ist_p,
> +				sizeof(*cpt_ist_p));
> +
> +		/* Default mode of software queue */
> +		mark_cpt_inst(cptvf, cqueue, 1);
> +
> +		do {
> +			/* TODO: should we pause */
> +			ret = check_nb_command_id(user_req, cptvf);
> +			cptvf_poll_misc(cptvf);
> +#if 0

???

> +			PMD_TX_LOG(DEBUG, "Doorbell count for cptvf %s: %u\n",
> +				   cptvf->dev_name,
> +				   cptvf_read_vq_doorbell(cptvf));
> +#endif
> +		} while (ret == ERR_REQ_PENDING);
> +
  
De Lara Guarch, Pablo June 19, 2018, 2:36 p.m. UTC | #2
> -----Original Message-----
> From: Anoob Joseph [mailto:anoob.joseph@caviumnetworks.com]
> Sent: Friday, June 8, 2018 5:45 PM
> To: Akhil Goyal <akhil.goyal@nxp.com>; De Lara Guarch, Pablo
> <pablo.de.lara.guarch@intel.com>; Thomas Monjalon <thomas@monjalon.net>
> Cc: Ankur Dwivedi <ankur.dwivedi@cavium.com>; Jerin Jacob
> <jerin.jacob@caviumnetworks.com>; Murthy NSSR
> <Nidadavolu.Murthy@cavium.com>; Narayana Prasad
> <narayanaprasad.athreya@caviumnetworks.com>; Nithin Dabilpuram
> <nithin.dabilpuram@cavium.com>; Ragothaman Jayaraman
> <Ragothaman.Jayaraman@cavium.com>; Srisivasubramanian Srinivasan
> <Srisivasubramanian.Srinivasan@cavium.com>; dev@dpdk.org
> Subject: [PATCH 04/16] crypto/cpt/base: add hardware enq/deq API for CPT

No need to use "for CPT" here, as the "crypto/cpt/base" already states that.
Same applies in other patches.

> 
> From: Ankur Dwivedi <ankur.dwivedi@cavium.com>
> 
> Adds hardware enqueue/dequeue API of instructions to a queue pair for Cavium
> CPT device.
> 
> Signed-off-by: Ankur Dwivedi <ankur.dwivedi@cavium.com>
> Signed-off-by: Murthy NSSR <Nidadavolu.Murthy@cavium.com>
> Signed-off-by: Nithin Dabilpuram <nithin.dabilpuram@cavium.com>
> Signed-off-by: Ragothaman Jayaraman
> <Ragothaman.Jayaraman@cavium.com>
> Signed-off-by: Srisivasubramanian Srinivasan
> <Srisivasubramanian.Srinivasan@cavium.com>
> ---
>  drivers/crypto/cpt/base/cpt.h             | 102 +++++++
>  drivers/crypto/cpt/base/cpt_device.c      |   4 +-
>  drivers/crypto/cpt/base/cpt_request_mgr.c | 424
> ++++++++++++++++++++++++++++++
> drivers/crypto/cpt/base/cpt_request_mgr.h |  75 ++++++
>  4 files changed, 603 insertions(+), 2 deletions(-)  create mode 100644
> drivers/crypto/cpt/base/cpt.h  create mode 100644
> drivers/crypto/cpt/base/cpt_request_mgr.c
>  create mode 100644 drivers/crypto/cpt/base/cpt_request_mgr.h
> 
> diff --git a/drivers/crypto/cpt/base/cpt.h b/drivers/crypto/cpt/base/cpt.h new
> file mode 100644 index 0000000..11407ae
> --- /dev/null
> +++ b/drivers/crypto/cpt/base/cpt.h

...
> +/* cpt instance */
> +struct cpt_instance {
> +	/* 0th cache line */

Is this comment useful for only 12 bytes of data?


...

> diff --git a/drivers/crypto/cpt/base/cpt_device.c
> b/drivers/crypto/cpt/base/cpt_device.c
> index b7cd5b5..a50e5b8 100644
> --- a/drivers/crypto/cpt/base/cpt_device.c
> +++ b/drivers/crypto/cpt/base/cpt_device.c
> @@ -193,7 +193,7 @@ int cptvf_get_resource(struct cpt_vf *dev,
>  	uint64_t *next_ptr;
>  	uint64_t pg_sz = sysconf(_SC_PAGESIZE);
> 
> -	PMD_DRV_LOG(DEBUG, "Initializing csp resource %s\n", cptvf-
> >dev_name);
> +	PMD_DRV_LOG(DEBUG, "Initializing cpt resource %s\n", cptvf-
> >dev_name);

This should be "cpt resource" since the beginning, in the patch that introduced this line.
Same below.

> 
>  	cpt_instance = &cptvf->instance;
> 
> @@ -323,7 +323,7 @@ int cptvf_put_resource(cpt_instance_t *instance)
>  		return -EINVAL;
>  	}
> 
> -	PMD_DRV_LOG(DEBUG, "Releasing csp device %s\n", cptvf-
> >dev_name);
> +	PMD_DRV_LOG(DEBUG, "Releasing cpt device %s\n", cptvf-
> >dev_name);
> 
>  	rz = (struct rte_memzone *)instance->rsvd;
>  	rte_memzone_free(rz);
> diff --git a/drivers/crypto/cpt/base/cpt_request_mgr.c
> b/drivers/crypto/cpt/base/cpt_request_mgr.c
> new file mode 100644
> index 0000000..8b9b1ff
> --- /dev/null
> +++ b/drivers/crypto/cpt/base/cpt_request_mgr.c
> @@ -0,0 +1,424 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2017 Cavium, Inc
> + */
> +
> +#include "cpt_request_mgr.h"
> +#include "cpt_debug.h"
> +#include <rte_atomic.h>

Same comment about the includes applies here.
Separate them with blank lines, between external and internal to DPDK includes.

> +
> +#define MOD_INC(i, l)   ((i) == (l - 1) ? (i) = 0 : (i)++)
> +
> +#define __hot __attribute__((hot))
> +
> +static inline uint64_t cpu_cycles(void) {
> +	return rte_get_timer_cycles();
> +}
> +
> +static inline uint64_t cpu_cycles_freq(void) {
> +	return rte_get_timer_hz();
> +}
> +
> +static inline void *
> +get_cpt_inst(struct command_queue *cqueue, void *req) {
> +	(void)req;
> +	PMD_TX_LOG(DEBUG, "CPT queue idx %u, req %p\n", cqueue->idx, req);
> +	return &cqueue->qhead[cqueue->idx * CPT_INST_SIZE]; }
> +
> +static inline void
> +mark_cpt_inst(struct cpt_vf *cptvf,
> +	      struct command_queue *queue,
> +	      uint32_t ring_door_bell)
> +{
> +#ifdef CMD_DEBUG

Try to avoid this compile time checks, as Jerin suggested.

> +	/* DEBUG */
> +	{
  
De Lara Guarch, Pablo June 19, 2018, 3:08 p.m. UTC | #3
> -----Original Message-----
> From: Anoob Joseph [mailto:anoob.joseph@caviumnetworks.com]
> Sent: Friday, June 8, 2018 5:45 PM
> To: Akhil Goyal <akhil.goyal@nxp.com>; De Lara Guarch, Pablo
> <pablo.de.lara.guarch@intel.com>; Thomas Monjalon <thomas@monjalon.net>
> Cc: Ankur Dwivedi <ankur.dwivedi@cavium.com>; Jerin Jacob
> <jerin.jacob@caviumnetworks.com>; Murthy NSSR
> <Nidadavolu.Murthy@cavium.com>; Narayana Prasad
> <narayanaprasad.athreya@caviumnetworks.com>; Nithin Dabilpuram
> <nithin.dabilpuram@cavium.com>; Ragothaman Jayaraman
> <Ragothaman.Jayaraman@cavium.com>; Srisivasubramanian Srinivasan
> <Srisivasubramanian.Srinivasan@cavium.com>; dev@dpdk.org
> Subject: [PATCH 04/16] crypto/cpt/base: add hardware enq/deq API for CPT
> 
> From: Ankur Dwivedi <ankur.dwivedi@cavium.com>
> 
> Adds hardware enqueue/dequeue API of instructions to a queue pair for Cavium
> CPT device.
> 
> Signed-off-by: Ankur Dwivedi <ankur.dwivedi@cavium.com>
> Signed-off-by: Murthy NSSR <Nidadavolu.Murthy@cavium.com>
> Signed-off-by: Nithin Dabilpuram <nithin.dabilpuram@cavium.com>
> Signed-off-by: Ragothaman Jayaraman
> <Ragothaman.Jayaraman@cavium.com>
> Signed-off-by: Srisivasubramanian Srinivasan
> <Srisivasubramanian.Srinivasan@cavium.com>

...

> +
> +			/* Event based completion */
> +			cpt_ist_p->s.tag = OCTTX_EVENT_TAG(event_flags);
> +			cpt_ist_p->s.grp = OCTTX_EVENT_GRP(event_flags);
> +			cpt_ist_p->s.tt = OCTTX_EVENT_TT(event_flags);
> +			cpt_ist_p->s.wq_ptr = (uint64_t)event;

I am seeing an error when building with gcc 32 bits.

drivers/crypto/cpt/base/cpt_request_mgr.c:268:26: error:
cast from pointer to integer of different size [-Werror=pointer-to-int-cast]
    cpt_ist_p->s.wq_ptr = (uint64_t)event;
                          ^
drivers/crypto/cpt/base/cpt_request_mgr.c:281:17: error:
cast from pointer to integer of different size [-Werror=pointer-to-int-cast]
    rid_e->rid = (uint64_t)user_req;
                 ^
drivers/crypto/cpt/base/cpt_request_mgr.c:313:5: error:
cast from pointer to integer of different size [-Werror=pointer-to-int-cast]
     (uint64_t)user_req;
     ^
drivers/crypto/cpt/base/cpt_request_mgr.c: In function 'cpt_dequeue_burst':
drivers/crypto/cpt/base/cpt_request_mgr.c:375:14:
error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast]
   user_req = (cpt_request_info_t *)(rid_e->rid);
              ^
drivers/crypto/cpt/base/cpt_request_mgr.c:378:30: error:
cast to pointer from integer of different size [-Werror=int-to-pointer-cast]
    rte_prefetch_non_temporal((void *)rid_e[1].rid);
                              ^
  

Patch

diff --git a/drivers/crypto/cpt/base/cpt.h b/drivers/crypto/cpt/base/cpt.h
new file mode 100644
index 0000000..11407ae
--- /dev/null
+++ b/drivers/crypto/cpt/base/cpt.h
@@ -0,0 +1,102 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Cavium, Inc
+ */
+
+#ifndef __BASE_CPT_H__
+#define __BASE_CPT_H__
+
+/* Linux Includes */
+#include <endian.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <sys/cdefs.h>
+#include <unistd.h>
+#include <assert.h>
+
+/* DPDK includes */
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_memory.h>
+#include <rte_prefetch.h>
+
+#include "../cpt_pmd_logs.h"
+#include "mcode_defines.h"
+
+/** @cond __INTERNAL_DOCUMENTATION__ */
+
+/* Declarations */
+typedef struct cpt_instance cpt_instance_t;
+
+/*
+ * Generic Defines
+ */
+
+/* Buffer pointer */
+typedef struct buf_ptr {
+	void *vaddr;
+	phys_addr_t dma_addr;
+	uint32_t size;
+	uint32_t resv;
+} buf_ptr_t;
+
+/* IOV Pointer */
+typedef struct{
+	int buf_cnt;
+	buf_ptr_t bufs[0];
+} iov_ptr_t;
+
+typedef struct app_data {
+	uint64_t pktout;
+	void *marker;
+} app_data_t;
+
+/* Instance operations */
+
+/* Enqueue an SE/AE request */
+int cpt_enqueue_req(cpt_instance_t *inst, void *req, uint8_t flags,
+	      void *event, uint64_t event_flags);
+
+/* Dequeue completed SE requests as burst */
+int32_t cpt_dequeue_burst(cpt_instance_t *instance, uint16_t cnt,
+			  void *resp[], uint8_t cc[]);
+
+/* Marks event as done in event driven mode */
+int32_t cpt_event_mark_done(void *marker, uint8_t *op_error);
+
+/* Checks queue full condition */
+uint16_t cpt_queue_full(cpt_instance_t *instance);
+
+/* Misc */
+uint32_t cpt_get_instance_count(void);
+
+#define ENQ_FLAG_SYNC		0x01
+#define ENQ_FLAG_EVENT		0x02
+#define ENQ_FLAG_NODOORBELL	0x04
+#define ENQ_FLAG_ONLY_DOORBELL	0x08
+
+
+#define OCTTX_EVENT_TAG(__flags) (__flags & 0xffffffff)
+#define OCTTX_EVENT_GRP(__flags) ((__flags >> 32) & 0xffff)
+#define OCTTX_EVENT_TT(__flags) ((__flags >> 48) & 0xff)
+
+#define OCTTX_EVENT_FLAGS(__tag, __grp, __tt)    \
+	(((uint64_t)__tag & 0xffffffff) |        \
+	 (((uint64_t)__grp & 0xffff) << 32) |    \
+	 (((uint64_t)__tt & 0xff) << 48))
+
+
+/* cpt instance */
+struct cpt_instance {
+	/* 0th cache line */
+	uint32_t queue_id;
+	uint64_t rsvd;
+};
+
+#define __hot __attribute__((hot))
+/** @endcond */
+
+#endif /* __BASE_CPT_H__ */
diff --git a/drivers/crypto/cpt/base/cpt_device.c b/drivers/crypto/cpt/base/cpt_device.c
index b7cd5b5..a50e5b8 100644
--- a/drivers/crypto/cpt/base/cpt_device.c
+++ b/drivers/crypto/cpt/base/cpt_device.c
@@ -193,7 +193,7 @@  int cptvf_get_resource(struct cpt_vf *dev,
 	uint64_t *next_ptr;
 	uint64_t pg_sz = sysconf(_SC_PAGESIZE);
 
-	PMD_DRV_LOG(DEBUG, "Initializing csp resource %s\n", cptvf->dev_name);
+	PMD_DRV_LOG(DEBUG, "Initializing cpt resource %s\n", cptvf->dev_name);
 
 	cpt_instance = &cptvf->instance;
 
@@ -323,7 +323,7 @@  int cptvf_put_resource(cpt_instance_t *instance)
 		return -EINVAL;
 	}
 
-	PMD_DRV_LOG(DEBUG, "Releasing csp device %s\n", cptvf->dev_name);
+	PMD_DRV_LOG(DEBUG, "Releasing cpt device %s\n", cptvf->dev_name);
 
 	rz = (struct rte_memzone *)instance->rsvd;
 	rte_memzone_free(rz);
diff --git a/drivers/crypto/cpt/base/cpt_request_mgr.c b/drivers/crypto/cpt/base/cpt_request_mgr.c
new file mode 100644
index 0000000..8b9b1ff
--- /dev/null
+++ b/drivers/crypto/cpt/base/cpt_request_mgr.c
@@ -0,0 +1,424 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Cavium, Inc
+ */
+
+#include "cpt_request_mgr.h"
+#include "cpt_debug.h"
+#include <rte_atomic.h>
+
+#define MOD_INC(i, l)   ((i) == (l - 1) ? (i) = 0 : (i)++)
+
+#define __hot __attribute__((hot))
+
+static inline uint64_t cpu_cycles(void)
+{
+	return rte_get_timer_cycles();
+}
+
+static inline uint64_t cpu_cycles_freq(void)
+{
+	return rte_get_timer_hz();
+}
+
+static inline void *
+get_cpt_inst(struct command_queue *cqueue, void *req)
+{
+	(void)req;
+	PMD_TX_LOG(DEBUG, "CPT queue idx %u, req %p\n", cqueue->idx, req);
+	return &cqueue->qhead[cqueue->idx * CPT_INST_SIZE];
+}
+
+static inline void
+mark_cpt_inst(struct cpt_vf *cptvf,
+	      struct command_queue *queue,
+	      uint32_t ring_door_bell)
+{
+#ifdef CMD_DEBUG
+	/* DEBUG */
+	{
+		uint32_t i = queue->idx * CPT_INST_SIZE;
+		cpt_inst_s_t *cmd = (void *)&queue->qhead[i];
+		uint64_t *p = (void *)&queue->qhead[i];
+
+		PRINT("\nQUEUE parameters:");
+		PRINT("Queue index           = %u\n",
+		      queue->idx);
+		PRINT("Queue HEAD            = %p\n",
+		      queue->qhead);
+		PRINT("Command Entry         = %p\n",
+		      cmd);
+
+		PRINT("\nCPT_INST_S format:");
+		PRINT("cmd->s.doneint = %x\n", cmd->s.doneint);
+		PRINT("cmd->s.res_addr  = %lx\n", cmd->s.res_addr);
+		PRINT("cmd->s.grp       = %x\n", cmd->s.grp);
+		PRINT("cmd->s.tag       = %x\n", cmd->s.tag);
+		PRINT("cmd->s.tt        = %x\n", cmd->s.tt);
+		PRINT("cmd->s.wq_ptr    = %lx\n", cmd->s.wq_ptr);
+		PRINT("cmd->s.ei0       = %lx\n", cmd->s.ei0);
+		PRINT("cmd->s.ei1       = %lx\n", cmd->s.ei1);
+		PRINT("cmd->s.ei2       = %lx\n", cmd->s.ei2);
+		PRINT("cmd->s.ei3       = %lx\n", cmd->s.ei3);
+
+		PRINT("\nCommand dump from queue HEAD:");
+		for (i = 0; i < CPT_INST_SIZE / 8; i++)
+			PRINT("%lx\n", p[i]);
+	}
+#endif
+	if (unlikely(++queue->idx >= DEFAULT_CMD_QCHUNK_SIZE)) {
+		uint32_t cchunk = queue->cchunk;
+		MOD_INC(cchunk, DEFAULT_CMD_QCHUNKS);
+		queue->qhead = queue->chead[cchunk].head;
+		queue->idx = 0;
+		queue->cchunk = cchunk;
+	}
+
+	if (ring_door_bell) {
+		/* Memory barrier to flush pending writes */
+		rte_smp_wmb();
+		cptvf_write_vq_doorbell(cptvf, ring_door_bell);
+	}
+}
+
+static inline uint8_t
+check_nb_command_id(cpt_request_info_t *user_req, struct cpt_vf *cptvf)
+{
+	uint8_t ret = ERR_REQ_PENDING;
+	volatile cpt_res_s_t *cptres;
+
+	cptres = (volatile cpt_res_s_t *)user_req->completion_addr;
+
+	if (unlikely(cptres->s.compcode == CPT_COMP_E_NOTDONE)) {
+		/*
+		 * Wait for some time for this command to get completed
+		 * before timing out
+		 */
+		if (cpu_cycles() < user_req->time_out)
+			return ret;
+		/*
+		 * TODO: See if alternate caddr can be used to not loop
+		 * longer than needed.
+		 */
+		if ((cptres->s.compcode == CPT_COMP_E_NOTDONE) &&
+		    (user_req->extra_time < TIME_IN_RESET_COUNT)) {
+			user_req->extra_time++;
+			return ret;
+		}
+
+		if (cptres->s.compcode != CPT_COMP_E_NOTDONE)
+			goto complete;
+
+		ret = ERR_REQ_TIMEOUT;
+		PMD_DRV_LOG_RAW(ERR, "Request %p timedout\n", user_req);
+		cptvf_poll_misc(cptvf);
+		dump_cpt_request_sglist(&user_req->dbg_inst,
+					"Response Packet Gather in", 1, 1);
+		goto exit;
+	}
+
+complete:
+	if (likely(cptres->s.compcode == CPT_COMP_E_GOOD)) {
+		ret = 0; /* success */
+		PMD_RX_LOG(DEBUG, "MC status %.8x\n",
+			   *((volatile uint32_t *)user_req->alternate_caddr));
+		PMD_RX_LOG(DEBUG, "HW status %.8x\n",
+			   *((volatile uint32_t *)user_req->completion_addr));
+	} else if ((cptres->s.compcode == CPT_COMP_E_SWERR) ||
+		   (cptres->s.compcode == CPT_COMP_E_FAULT)) {
+		ret = (uint8_t)*user_req->alternate_caddr;
+		if (!ret)
+			ret = ERR_BAD_ALT_CCODE;
+		PMD_RX_LOG(DEBUG, "Request %p : failed with %s : err code :"
+			   "%x\n", user_req,
+			   (cptres->s.compcode == CPT_COMP_E_FAULT) ?
+			   "DMA Fault" : "Software error", ret);
+	} else {
+		PMD_DRV_LOG_RAW(ERR, "Request %p : unexpected completion code"
+			   " %d\n",
+			   user_req, cptres->s.compcode);
+		ret = (uint8_t)*user_req->alternate_caddr;
+	}
+
+exit:
+	dump_cpt_request_sglist(&user_req->dbg_inst,
+				"Response Packet Scatter Out", 1, 0);
+	return ret;
+}
+
+
+/*
+ * cpt_enqueue_req()
+ *
+ * SE & AE request enqueue function
+ */
+int32_t __hot
+cpt_enqueue_req(cpt_instance_t *instance, void *req, uint8_t flags,
+		void *event, uint64_t event_flags)
+{
+	struct pending_queue *pqueue;
+	struct cpt_vf *cptvf;
+	cpt_inst_s_t *cpt_ist_p = NULL;
+	cpt_request_info_t *user_req = (cpt_request_info_t *)req;
+	struct command_queue *cqueue;
+	int32_t ret = 0;
+
+#ifdef CPTVF_STRICT_PARAM_CHECK
+	if (unlikely(!instance)) {
+		PMD_DRV_LOG_RAW(ERR, "Invalid inputs (instance: %p, req: %p)\n",
+			   instance, req);
+		return -EINVAL;
+	}
+#endif
+
+	cptvf = (struct cpt_vf *)instance;
+	pqueue = &cptvf->pqueue;
+
+	if (unlikely(!req)) {
+		/* ring only pending doorbells */
+		if ((flags & ENQ_FLAG_ONLY_DOORBELL) && pqueue->p_doorbell) {
+			/* Memory barrier to flush pending writes */
+			rte_smp_wmb();
+			cptvf_write_vq_doorbell(cptvf, pqueue->p_doorbell);
+			pqueue->p_doorbell = 0;
+		}
+		return 0;
+	}
+
+#if defined(ATOMIC_THROTTLING_COUNTER)
+	/* Ask the application to try again later */
+	if (unlikely(cpt_pmd_pcount_load(&pqueue->pending_count) >=
+		     DEFAULT_CMD_QLEN)) {
+		return -EAGAIN;
+	}
+#else
+	if (unlikely(pqueue->pending_count >= DEFAULT_CMD_QLEN))
+		return -EAGAIN;
+#endif
+	cqueue = &cptvf->cqueue;
+	cpt_ist_p = get_cpt_inst(cqueue, req);
+	rte_prefetch_non_temporal(cpt_ist_p);
+
+	/* EI0, EI1, EI2, EI3 are already prepared */
+	/* HW W0 */
+	cpt_ist_p->u[0] = 0;
+	/* HW W1 */
+	cpt_ist_p->s.res_addr = user_req->comp_baddr;
+	/* HW W2 */
+	cpt_ist_p->u[2] = 0;
+	/* HW W3 */
+	cpt_ist_p->s.wq_ptr = 0;
+
+	/* MC EI0 */
+	cpt_ist_p->s.ei0 = user_req->ist.ei0;
+	/* MC EI1 */
+	cpt_ist_p->s.ei1 = user_req->ist.ei1;
+	/* MC EI2 */
+	cpt_ist_p->s.ei2 = user_req->ist.ei2;
+	/* MC EI3 */
+	cpt_ist_p->s.ei3 = user_req->ist.ei3;
+
+	PMD_TX_LOG(DEBUG, "req: %p op: %p dma_mode 0x%x se_req %u\n",
+		   req,
+		   user_req->op,
+		   user_req->dma_mode,
+		   user_req->se_req);
+
+#ifdef CPT_DEBUG
+	{
+		vq_cmd_word0_t vq_cmd_w0;
+		vq_cmd_word3_t vq_cmd_w3;
+
+		vq_cmd_w3.u64 = cpt_ist_p->s.ei3;
+		vq_cmd_w0.u64 = be64toh(cpt_ist_p->s.ei0);
+		user_req->dbg_inst = *cpt_ist_p;
+
+		if (vq_cmd_w3.s.cptr) {
+			PMD_TX_LOG(DEBUG, "Context Handle: 0x%016lx\n",
+				   (uint64_t)vq_cmd_w3.s.cptr);
+			/* Dump max context i.e 448 bytes */
+			cpt_dump_buffer("CONTEXT",
+					os_iova2va((uint64_t)vq_cmd_w3.s.cptr),
+					448);
+		}
+
+		dump_cpt_request_info(user_req, cpt_ist_p);
+		dump_cpt_request_sglist(cpt_ist_p, "Request (src)", 1, 1);
+		dump_cpt_request_sglist(cpt_ist_p, "Request (dst)", 0, 0);
+		cpt_dump_buffer("VQ command word0", &cpt_ist_p->u[4],
+				sizeof(vq_cmd_w0));
+		cpt_dump_buffer("VQ command word1", &cpt_ist_p->u[5],
+				sizeof(uint64_t));
+		cpt_dump_buffer("VQ command word2", &cpt_ist_p->u[6],
+				sizeof(uint64_t));
+		cpt_dump_buffer("VQ command word3", &cpt_ist_p->u[7],
+				sizeof(vq_cmd_w3));
+	}
+#endif
+
+	if (likely(!(flags & ENQ_FLAG_SYNC))) {
+		void *op = user_req->op;
+
+		if (unlikely(flags & ENQ_FLAG_EVENT)) {
+			app_data_t *app_data = op;
+
+			/* Event based completion */
+			cpt_ist_p->s.tag = OCTTX_EVENT_TAG(event_flags);
+			cpt_ist_p->s.grp = OCTTX_EVENT_GRP(event_flags);
+			cpt_ist_p->s.tt = OCTTX_EVENT_TT(event_flags);
+			cpt_ist_p->s.wq_ptr = (uint64_t)event;
+
+#if defined(ATOMIC_THROTTLING_COUNTER)
+			app_data->marker = user_req;
+			__atomic_fetch_add(&pqueue->pending_count,
+					   1, __ATOMIC_RELAXED);
+#else
+			rid_t *rid_e;
+			/*
+			 * Mark it as in progress in pending queue, software
+			 * will mark it when completion is received
+			 */
+			rid_e = &pqueue->rid_queue[pqueue->enq_tail];
+			rid_e->rid = (uint64_t)user_req;
+			/* rid_e->op = op; */
+			MOD_INC(pqueue->enq_tail, DEFAULT_CMD_QLEN);
+			app_data->marker = rid_e;
+#endif
+
+			cpt_dump_buffer("CPT Instruction with wqe", cpt_ist_p,
+					sizeof(*cpt_ist_p));
+
+			mark_cpt_inst(cptvf, cqueue, 1);
+
+		} else {
+			uint32_t doorbell = 0;
+
+			if (likely(flags & ENQ_FLAG_NODOORBELL))
+				pqueue->p_doorbell++;
+			else
+				doorbell = ++pqueue->p_doorbell;
+
+			/* Fill time_out cycles */
+			user_req->time_out = cpu_cycles() +
+				DEFAULT_COMMAND_TIMEOUT * cpu_cycles_freq();
+			user_req->extra_time = 0;
+
+			cpt_dump_buffer("CPT Instruction", cpt_ist_p,
+					sizeof(*cpt_ist_p));
+
+			/* Default mode of software queue */
+			mark_cpt_inst(cptvf, cqueue, doorbell);
+
+			pqueue->p_doorbell -= doorbell;
+			pqueue->rid_queue[pqueue->enq_tail].rid =
+				(uint64_t)user_req;
+			/* pqueue->rid_queue[pqueue->enq_tail].op = op; */
+			/* We will use soft queue length here to limit
+			 * requests
+			 */
+			MOD_INC(pqueue->enq_tail, DEFAULT_CMD_QLEN);
+			pqueue->pending_count += 1;
+		}
+
+		PMD_TX_LOG(DEBUG, "Submitted NB cmd with request: %p op: %p\n",
+			   user_req, op);
+	} else {
+		/*
+		 * Synchronous operation,
+		 * hold until completion / timeout
+		 */
+		/* Fill time_out cycles */
+		user_req->time_out = cpu_cycles() +
+			DEFAULT_COMMAND_TIMEOUT * cpu_cycles_freq();
+		user_req->extra_time = 0;
+
+		cpt_dump_buffer("CPT Instruction", cpt_ist_p,
+				sizeof(*cpt_ist_p));
+
+		/* Default mode of software queue */
+		mark_cpt_inst(cptvf, cqueue, 1);
+
+		do {
+			/* TODO: should we pause */
+			ret = check_nb_command_id(user_req, cptvf);
+			cptvf_poll_misc(cptvf);
+#if 0
+			PMD_TX_LOG(DEBUG, "Doorbell count for cptvf %s: %u\n",
+				   cptvf->dev_name,
+				   cptvf_read_vq_doorbell(cptvf));
+#endif
+		} while (ret == ERR_REQ_PENDING);
+
+		PMD_TX_LOG(DEBUG, "Completed blocking cmd req: 0x%016llx, rc "
+			   "0x%x\n", (unsigned long long)user_req, ret);
+	}
+
+	return ret;
+}
+
+
+int32_t __hot
+cpt_dequeue_burst(cpt_instance_t *instance, uint16_t cnt,
+		  void *resp[], uint8_t cc[])
+{
+	struct cpt_vf *cptvf = (struct cpt_vf *)instance;
+	struct pending_queue *pqueue = &cptvf->pqueue;
+	cpt_request_info_t *user_req;
+	rid_t *rid_e;
+	int i, count, pcount;
+	uint8_t ret;
+
+	pcount = pqueue->pending_count;
+	count = (cnt > pcount) ? pcount : cnt;
+
+	for (i = 0; i < count; i++) {
+		rid_e = &pqueue->rid_queue[pqueue->deq_head];
+		user_req = (cpt_request_info_t *)(rid_e->rid);
+
+		if (likely((i+1) < count))
+			rte_prefetch_non_temporal((void *)rid_e[1].rid);
+
+		ret = check_nb_command_id(user_req, cptvf);
+
+		if (unlikely(ret == ERR_REQ_PENDING)) {
+			/* Stop checking for completions */
+			break;
+		}
+
+		/* Return completion code and op handle */
+		cc[i] = (uint8_t)ret;
+		resp[i] = user_req->op;
+		PMD_RX_LOG(DEBUG, "Request %p Op %p completed with code %d",
+			   user_req, user_req->op, ret);
+
+		MOD_INC(pqueue->deq_head, DEFAULT_CMD_QLEN);
+		pqueue->pending_count -= 1;
+	}
+
+	return i;
+}
+
+uint16_t __hot
+cpt_queue_full(cpt_instance_t *instance)
+{
+	struct cpt_vf *cptvf;
+	struct pending_queue *pqueue;
+	uint16_t avail;
+
+	cptvf = (struct cpt_vf *)instance;
+	pqueue = &cptvf->pqueue;
+#if defined(ATOMIC_THROTTLING_COUNTER)
+	avail = DEFAULT_CMD_QLEN - cpt_pmd_pcount_load(&pqueue->pending_count);
+	/* Ask the application to try again later */
+	if (avail <= 0)
+		return 0;
+
+	return avail;
+#else
+	avail = DEFAULT_CMD_QLEN - pqueue->pending_count;
+	/*
+	 * This will be NULL if instruction
+	 * that was sent earlier which this entry was complete
+	 */
+	return avail;
+#endif
+}
diff --git a/drivers/crypto/cpt/base/cpt_request_mgr.h b/drivers/crypto/cpt/base/cpt_request_mgr.h
new file mode 100644
index 0000000..dfa4046
--- /dev/null
+++ b/drivers/crypto/cpt/base/cpt_request_mgr.h
@@ -0,0 +1,75 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017 Cavium, Inc
+ */
+
+#ifndef __REQUEST_MANGER_H
+#define __REQUEST_MANGER_H
+
+#include "cpt8xxx_device.h"
+
+#define TIME_IN_RESET_COUNT  5
+#define COMPLETION_CODE_SIZE 8
+#define COMPLETION_CODE_INIT 0
+
+#define SG_LIST_HDR_SIZE  (8u)
+#define SG_ENTRY_SIZE sizeof(sg_comp_t)
+
+#define AE_CORE_REQ 0
+#define SE_CORE_REQ 1
+
+#define CTRL_DMA_MODE_SGIO	2	/* DMA Mode but SGIO is already setup */
+
+#define MRS(reg) \
+	({ \
+	 uint64_t val; \
+	 __asm volatile("mrs %0, " #reg : "=r" (val)); \
+	 val; \
+	 })
+
+int calculate_pad(uint8_t *ipad, uint8_t *opad, auth_type_t hash_type,
+	      uint8_t *key, uint32_t keylen);
+
+typedef union opcode_info {
+	uint16_t flags;
+	struct {
+		uint8_t major;
+		uint8_t minor;
+	} s;
+} opcode_info_t;
+
+typedef struct sglist_comp {
+	union {
+		uint64_t len;
+		struct {
+			uint16_t len[4];
+		} s;
+	} u;
+	uint64_t ptr[4];
+} sg_comp_t;
+
+struct cpt_request_info {
+	/* fast path fields */
+	uint64_t dma_mode	: 2;	/**< DMA mode */
+	uint64_t se_req		: 1;	/**< To SE core */
+	uint64_t comp_baddr	: 61;
+	volatile uint64_t *completion_addr;
+	volatile uint64_t *alternate_caddr;
+	void *op; /** Reference to operation */
+	struct {
+		uint64_t ei0;
+		uint64_t ei1;
+		uint64_t ei2;
+		uint64_t ei3;
+	} ist;
+
+	/* slow path fields */
+	uint64_t time_out;
+	uint8_t extra_time;
+#ifdef CPT_DEBUG
+	cpt_inst_s_t dbg_inst;
+#endif
+
+};
+
+typedef struct cpt_request_info cpt_request_info_t;
+#endif