[v2,1/1] compress/octeontx: support scatter gather mode

Message ID 20230203022100.3364599-1-mchalla@marvell.com (mailing list archive)
State Accepted, archived
Delegated to: akhil goyal
Headers
Series [v2,1/1] compress/octeontx: support scatter gather mode |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/github-robot: build success github build: passed
ci/iol-broadcom-Performance fail Performance Testing issues
ci/iol-intel-Functional success Functional Testing PASS
ci/loongarch-compilation success Compilation OK
ci/iol-intel-Performance success Performance Testing PASS
ci/loongarch-unit-testing success Unit Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-abi-testing success Testing PASS

Commit Message

Mahipal Challa Feb. 3, 2023, 2:21 a.m. UTC
  Scatter gather mode feature support is added to compress or decompress
the larger data in a single compression or decompression operation.

Signed-off-by: Mahipal Challa <mchalla@marvell.com>

---
v2:
 - Checkpatch warning is resolved.
 - PMD debug logs are used for debug prints.
 - Documentation is updated.
---
---
 doc/guides/compressdevs/features/octeontx.ini |   3 +
 drivers/compress/octeontx/otx_zip.h           | 143 +++++++++++++++---
 drivers/compress/octeontx/otx_zip_pmd.c       |  84 +++++++---
 3 files changed, 194 insertions(+), 36 deletions(-)
  

Comments

Akhil Goyal Feb. 5, 2023, 5:25 p.m. UTC | #1
> Scatter gather mode feature support is added to compress or decompress
> the larger data in a single compression or decompression operation.
> 
> Signed-off-by: Mahipal Challa <mchalla@marvell.com>
> 
> ---
> v2:
>  - Checkpatch warning is resolved.
>  - PMD debug logs are used for debug prints.
>  - Documentation is updated.
> ---
Please use in-reply-to while sending subsequent version of patches.
Applied to dpdk-next-crypto

Thanks.
  

Patch

diff --git a/doc/guides/compressdevs/features/octeontx.ini b/doc/guides/compressdevs/features/octeontx.ini
index cc8b025682..8a9b2906d8 100644
--- a/doc/guides/compressdevs/features/octeontx.ini
+++ b/doc/guides/compressdevs/features/octeontx.ini
@@ -8,3 +8,6 @@  HW Accelerated = Y
 Deflate        = Y
 Fixed          = Y
 Dynamic        = Y
+OOP SGL In SGL Out  = Y
+OOP SGL In LB  Out  = Y
+OOP LB  In SGL Out  = Y
diff --git a/drivers/compress/octeontx/otx_zip.h b/drivers/compress/octeontx/otx_zip.h
index ee14deb03d..7391360925 100644
--- a/drivers/compress/octeontx/otx_zip.h
+++ b/drivers/compress/octeontx/otx_zip.h
@@ -55,10 +55,13 @@  extern int octtx_zip_logtype_driver;
 				ZIP_MAX_NCBP_SIZE)/* ~8072ull */
 
 #define ZIP_BUF_SIZE	256
+#define ZIP_SGBUF_SIZE	(5 * 1024)
 #define ZIP_BURST_SIZE	64
 
 #define ZIP_MAXSEG_SIZE      59460
 #define ZIP_EXTRABUF_SIZE    4096
+#define ZIP_MAX_SEGS         300
+#define ZIP_MAX_DATA_SIZE    (16*1024*1024)
 
 #define ZIP_SGPTR_ALIGN	16
 #define ZIP_CMDQ_ALIGN	128
@@ -102,6 +105,12 @@  struct zipvf_qp;
 typedef int (*comp_func_t)(struct rte_comp_op *op, struct zipvf_qp *qp,
 			   struct zip_stream *zstrm, int num);
 
+/* Scatter gather list */
+struct zipvf_sginfo {
+	union zip_zptr_addr_s  sg_addr;
+	union zip_zptr_ctl_s   sg_ctl;
+} __rte_aligned(16);
+
 /**
  * ZIP private stream structure
  */
@@ -144,6 +153,11 @@  struct zipvf_qp {
 	/* Unique Queue Pair Name */
 	struct zip_vf *vf;
 	/* pointer to device, queue belongs to */
+	struct zipvf_sginfo *g_info;
+	struct zipvf_sginfo *s_info;
+	/* SGL pointers */
+	uint64_t num_sgbuf;
+	uint64_t enqed;
 } __rte_cache_aligned;
 
 /**
@@ -161,50 +175,134 @@  struct zip_vf {
 	uint32_t  max_nb_queue_pairs;
 	/* pointer to device qps */
 	struct rte_mempool *zip_mp;
+	struct rte_mempool *sg_mp;
 	/* pointer to pools */
 } __rte_cache_aligned;
 
 
-static inline void
-zipvf_prepare_in_buf(union zip_inst_s *inst, struct rte_comp_op *op)
+static inline int
+zipvf_prepare_sgl(struct rte_mbuf *buf, int64_t offset, struct zipvf_sginfo *sg_list,
+		  uint32_t data_len, const uint16_t max_segs, struct zipvf_qp *qp)
+{
+	struct zipvf_sginfo *sginfo = (struct zipvf_sginfo *)sg_list;
+	uint32_t tot_buf_len, sgidx;
+	int ret = -EINVAL;
+
+	for (sgidx = tot_buf_len = 0; buf && sgidx < max_segs; buf = buf->next) {
+		if (offset >= rte_pktmbuf_data_len(buf)) {
+			offset -= rte_pktmbuf_data_len(buf);
+			continue;
+		}
+
+		sginfo[sgidx].sg_ctl.s.length = (uint16_t)(rte_pktmbuf_data_len(buf) - offset);
+		sginfo[sgidx].sg_addr.s.addr = rte_pktmbuf_iova_offset(buf, offset);
+
+		offset = 0;
+		tot_buf_len += sginfo[sgidx].sg_ctl.s.length;
+
+		if (tot_buf_len >= data_len) {
+			sginfo[sgidx].sg_ctl.s.length -= tot_buf_len - data_len;
+			ret = 0;
+			break;
+		}
+
+		ZIP_PMD_LOG(DEBUG, "ZIP SGL buf[%d], len = %d, iova = 0x%"PRIx64"\n",
+			    sgidx, sginfo[sgidx].sg_ctl.s.length, sginfo[sgidx].sg_addr.s.addr);
+		++sgidx;
+	}
+
+	if (unlikely(ret != 0)) {
+		if (sgidx == max_segs)
+			ZIP_PMD_ERR("Exceeded max segments in ZIP SGL (%u)", max_segs);
+		else
+			ZIP_PMD_ERR("Mbuf chain is too short");
+	}
+	qp->num_sgbuf = ++sgidx;
+
+	ZIP_PMD_LOG(DEBUG, "Tot_buf_len:%d max_segs:%"PRIx64"\n", tot_buf_len,
+		    qp->num_sgbuf);
+	return ret;
+}
+
+static inline int
+zipvf_prepare_in_buf(union zip_inst_s *inst, struct zipvf_qp *qp, struct rte_comp_op *op)
 {
 	uint32_t offset, inlen;
 	struct rte_mbuf *m_src;
+	int ret = 0;
 
 	inlen = op->src.length;
 	offset = op->src.offset;
 	m_src = op->m_src;
 
+	/* Gather input */
+	if (op->m_src->next != NULL && inlen > ZIP_MAXSEG_SIZE) {
+		inst->s.dg = 1;
+
+		ret = zipvf_prepare_sgl(m_src, offset, qp->g_info, inlen,
+					op->m_src->nb_segs, qp);
+
+		inst->s.inp_ptr_addr.s.addr = rte_mem_virt2iova(qp->g_info);
+		inst->s.inp_ptr_ctl.s.length = qp->num_sgbuf;
+		inst->s.inp_ptr_ctl.s.fw = 0;
+
+		ZIP_PMD_LOG(DEBUG, "Gather(input): len(nb_segs):%d, iova: 0x%"PRIx64"\n",
+			    inst->s.inp_ptr_ctl.s.length, inst->s.inp_ptr_addr.s.addr);
+		return ret;
+	}
+
 	/* Prepare direct input data pointer */
 	inst->s.dg = 0;
-	inst->s.inp_ptr_addr.s.addr =
-			rte_pktmbuf_iova_offset(m_src, offset);
+	inst->s.inp_ptr_addr.s.addr = rte_pktmbuf_iova_offset(m_src, offset);
 	inst->s.inp_ptr_ctl.s.length = inlen;
+
+	ZIP_PMD_LOG(DEBUG, "Direct input - inlen:%d\n", inlen);
+	return ret;
 }
 
-static inline void
-zipvf_prepare_out_buf(union zip_inst_s *inst, struct rte_comp_op *op)
+static inline int
+zipvf_prepare_out_buf(union zip_inst_s *inst, struct zipvf_qp *qp, struct rte_comp_op *op)
 {
-	uint32_t offset;
+	uint32_t offset, outlen;
 	struct rte_mbuf *m_dst;
+	int ret = 0;
 
 	offset = op->dst.offset;
 	m_dst = op->m_dst;
+	outlen = rte_pktmbuf_pkt_len(m_dst) - op->dst.offset;
 
-	/* Prepare direct input data pointer */
+	/* Scatter output */
+	if (op->m_dst->next != NULL && outlen > ZIP_MAXSEG_SIZE) {
+		inst->s.ds = 1;
+		inst->s.totaloutputlength = outlen;
+
+		ret = zipvf_prepare_sgl(m_dst, offset, qp->s_info, inst->s.totaloutputlength,
+					m_dst->nb_segs, qp);
+
+		inst->s.out_ptr_addr.s.addr = rte_mem_virt2iova(qp->s_info);
+		inst->s.out_ptr_ctl.s.length = qp->num_sgbuf;
+
+		ZIP_PMD_LOG(DEBUG, "Scatter(output): nb_segs:%d, iova:0x%"PRIx64"\n",
+			    inst->s.out_ptr_ctl.s.length, inst->s.out_ptr_addr.s.addr);
+		return ret;
+	}
+
+	/* Prepare direct output data pointer */
 	inst->s.ds = 0;
-	inst->s.out_ptr_addr.s.addr =
-			rte_pktmbuf_iova_offset(m_dst, offset);
-	inst->s.totaloutputlength = rte_pktmbuf_pkt_len(m_dst) -
-			op->dst.offset;
+	inst->s.out_ptr_addr.s.addr = rte_pktmbuf_iova_offset(m_dst, offset);
+	inst->s.totaloutputlength = rte_pktmbuf_pkt_len(m_dst) - op->dst.offset;
 	if (inst->s.totaloutputlength == ZIP_MAXSEG_SIZE)
 		inst->s.totaloutputlength += ZIP_EXTRABUF_SIZE; /* DSTOP */
 
 	inst->s.out_ptr_ctl.s.length = inst->s.totaloutputlength;
+
+	ZIP_PMD_LOG(DEBUG, "Direct output - outlen:%d\n", inst->s.totaloutputlength);
+	return ret;
 }
 
-static inline void
-zipvf_prepare_cmd_stateless(struct rte_comp_op *op, union zip_inst_s *inst)
+static inline int
+zipvf_prepare_cmd_stateless(struct rte_comp_op *op, struct zipvf_qp *qp,
+			    union zip_inst_s *inst)
 {
 	/* set flush flag to always 1*/
 	inst->s.ef = 1;
@@ -217,9 +315,18 @@  zipvf_prepare_cmd_stateless(struct rte_comp_op *op, union zip_inst_s *inst)
 	/* Set input checksum */
 	inst->s.adlercrc32 = op->input_chksum;
 
-	/* Prepare gather buffers */
-	zipvf_prepare_in_buf(inst, op);
-	zipvf_prepare_out_buf(inst, op);
+	/* Prepare input/output buffers */
+	if (zipvf_prepare_in_buf(inst, qp, op)) {
+		ZIP_PMD_ERR("Con't fill input SGL ");
+		return -EINVAL;
+	}
+
+	if (zipvf_prepare_out_buf(inst, qp, op)) {
+		ZIP_PMD_ERR("Con't fill output SGL ");
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 #ifdef ZIP_DBG
@@ -229,6 +336,7 @@  zip_dump_instruction(void *inst)
 	union zip_inst_s *cmd83 = (union zip_inst_s *)inst;
 
 	printf("####### START ########\n");
+	printf("ZIP Instr:0x%"PRIx64"\n", cmd83);
 	printf("doneint:%d totaloutputlength:%d\n", cmd83->s.doneint,
 		cmd83->s.totaloutputlength);
 	printf("exnum:%d iv:%d exbits:%d hmif:%d halg:%d\n", cmd83->s.exn,
@@ -248,6 +356,7 @@  zip_dump_instruction(void *inst)
 	printf("inp_ptr.len:%d\n", cmd83->s.inp_ptr_ctl.s.length);
 	printf("out_ptr.addr:0x%"PRIx64"\n", cmd83->s.out_ptr_addr.s.addr);
 	printf("out_ptr.len:%d\n", cmd83->s.out_ptr_ctl.s.length);
+	printf("result_ptr.addr:0x%"PRIx64"\n", cmd83->s.res_ptr_addr.s.addr);
 	printf("result_ptr.len:%d\n", cmd83->s.res_ptr_ctl.s.length);
 	printf("####### END ########\n");
 }
diff --git a/drivers/compress/octeontx/otx_zip_pmd.c b/drivers/compress/octeontx/otx_zip_pmd.c
index 2d856b19bb..fd20139da6 100644
--- a/drivers/compress/octeontx/otx_zip_pmd.c
+++ b/drivers/compress/octeontx/otx_zip_pmd.c
@@ -16,7 +16,10 @@  static const struct rte_compressdev_capabilities
 	{	.algo = RTE_COMP_ALGO_DEFLATE,
 		/* Deflate */
 		.comp_feature_flags =	RTE_COMP_FF_HUFFMAN_FIXED |
-					RTE_COMP_FF_HUFFMAN_DYNAMIC,
+					RTE_COMP_FF_HUFFMAN_DYNAMIC |
+					RTE_COMP_FF_OOP_SGL_IN_SGL_OUT |
+					RTE_COMP_FF_OOP_SGL_IN_LB_OUT |
+					RTE_COMP_FF_OOP_LB_IN_SGL_OUT,
 		/* Non sharable Priv XFORM and Stateless */
 		.window_size = {
 				.min = 1,
@@ -46,15 +49,27 @@  zip_process_op(struct rte_comp_op *op,
 	union zip_inst_s *inst = zstrm->inst[num];
 	volatile union zip_zres_s *zresult = NULL;
 
-	if ((op->m_src->nb_segs > 1) || (op->m_dst->nb_segs > 1) ||
-			(op->src.offset > rte_pktmbuf_pkt_len(op->m_src)) ||
-			(op->dst.offset > rte_pktmbuf_pkt_len(op->m_dst))) {
+	if (op->m_src->nb_segs > 1)
+		if (rte_mempool_get(qp->vf->sg_mp, (void *)&qp->g_info) < 0) {
+			ZIP_PMD_ERR("Can't1 allocate object from SG pool");
+			return (-ENOMEM);
+		}
+
+	if (op->m_dst->nb_segs > 1)
+		if (rte_mempool_get(qp->vf->sg_mp, (void *)&qp->s_info) < 0) {
+			rte_mempool_put(qp->vf->sg_mp, qp->g_info);
+			ZIP_PMD_ERR("Can't allocate object from SG pool");
+			return (-ENOMEM);
+		}
+
+	if (zipvf_prepare_cmd_stateless(op, qp, inst)) {
 		op->status = RTE_COMP_OP_STATUS_INVALID_ARGS;
-		ZIP_PMD_ERR("Segmented packet is not supported\n");
-		return 0;
-	}
+		rte_mempool_put(qp->vf->sg_mp, qp->g_info);
+		rte_mempool_put(qp->vf->sg_mp, qp->s_info);
 
-	zipvf_prepare_cmd_stateless(op, inst);
+		ZIP_PMD_ERR("Can't fill SGL buffers");
+		return -EINVAL;
+	}
 
 	zresult = (union zip_zres_s *)zstrm->bufs[RES_BUF + num];
 	zresult->s.compcode = 0;
@@ -174,10 +189,12 @@  static int
 zip_pmd_config(struct rte_compressdev *dev,
 		struct rte_compressdev_config *config)
 {
-	int nb_streams;
 	char res_pool[RTE_MEMZONE_NAMESIZE];
-	struct zip_vf *vf;
+	char sg_pool[RTE_MEMZONE_NAMESIZE];
 	struct rte_mempool *zip_buf_mp;
+	struct rte_mempool *zip_sg_mp;
+	struct zip_vf *vf;
+	int nb_streams;
 
 	if (!config || !dev)
 		return -EIO;
@@ -194,6 +211,9 @@  zip_pmd_config(struct rte_compressdev *dev,
 	snprintf(res_pool, RTE_MEMZONE_NAMESIZE, "octtx_zip_res_pool%u",
 		 dev->data->dev_id);
 
+	snprintf(sg_pool, RTE_MEMZONE_NAMESIZE, "octtx_zip_sg_pool%u",
+		 dev->data->dev_id);
+
 	/** TBD Should we use the per core object cache for stream resources */
 	zip_buf_mp = rte_mempool_create(
 			res_pool,
@@ -215,7 +235,30 @@  zip_pmd_config(struct rte_compressdev *dev,
 		return -1;
 	}
 
+	/* Scatter gather buffer pool */
+	zip_sg_mp = rte_mempool_create(
+			sg_pool,
+			(2 * nb_streams * ZIP_BURST_SIZE * ZIP_MAX_SEGS),
+			ZIP_SGBUF_SIZE,
+			0,
+			0,
+			NULL,
+			NULL,
+			NULL,
+			NULL,
+			SOCKET_ID_ANY,
+			MEMPOOL_F_NO_SPREAD);
+
+	if (zip_sg_mp == NULL) {
+		ZIP_PMD_ERR("Failed to create SG buf mempool octtx_zip_sg_pool%u",
+			    dev->data->dev_id);
+
+		rte_mempool_free(vf->zip_mp);
+		return -1;
+	}
+
 	vf->zip_mp = zip_buf_mp;
+	vf->sg_mp = zip_sg_mp;
 
 	return 0;
 }
@@ -243,6 +286,7 @@  zip_pmd_close(struct rte_compressdev *dev)
 
 	struct zip_vf *vf = (struct zip_vf *)dev->data->dev_private;
 	rte_mempool_free(vf->zip_mp);
+	rte_mempool_free(vf->sg_mp);
 
 	return 0;
 }
@@ -482,9 +526,9 @@  zip_pmd_enqueue_burst(void *queue_pair,
 		}
 	}
 
-#ifdef ZIP_DBG
-	ZIP_PMD_INFO("ops_enqd[nb_ops:%d]:%d\n", nb_ops, enqd);
-#endif
+	qp->enqed = enqd;
+	ZIP_PMD_LOG(DEBUG, "ops_enqd[nb_ops:%d]:%d\n", nb_ops, enqd);
+
 	return enqd;
 }
 
@@ -527,9 +571,7 @@  zip_pmd_dequeue_burst(void *queue_pair,
 				op->status = RTE_COMP_OP_STATUS_ERROR;
 		}
 
-	#ifdef ZIP_DBG
-		ZIP_PMD_INFO("written %d\n", zresult->s.totalbyteswritten);
-	#endif
+		ZIP_PMD_LOG(DEBUG, "written %d\n", zresult->s.totalbyteswritten);
 
 		/* Update op stats */
 		switch (op->status) {
@@ -548,11 +590,15 @@  zip_pmd_dequeue_burst(void *queue_pair,
 		/* zstream is reset irrespective of result */
 		reset_stream(zstrm->inst[i]);
 		zresult->s.compcode = ZIP_COMP_E_NOTDONE;
+
+		if (op->m_src->nb_segs > 1)
+			rte_mempool_put(qp->vf->sg_mp, qp->g_info);
+
+		if (op->m_dst->nb_segs > 1)
+			rte_mempool_put(qp->vf->sg_mp, qp->s_info);
 	}
 
-#ifdef ZIP_DBG
-	ZIP_PMD_INFO("ops_deqd[nb_ops:%d]: %d\n", nb_ops, nb_dequeued);
-#endif
+	ZIP_PMD_LOG(DEBUG, "ops_deqd[nb_ops:%d]: %d\n", nb_ops, nb_dequeued);
 	return nb_dequeued;
 }