diff mbox series

[v2,5/7] raw/dpaa2_qdma: support scatter gather in enqueue

Message ID 1602755228-25535-6-git-send-email-g.singh@nxp.com (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers show
Series raw/dpaa2_qdma: driver enhancement | expand

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Gagandeep Singh Oct. 15, 2020, 9:47 a.m. UTC
From: Jun Yang <jun.yang@nxp.com>

This patch add support to add Scatter Gather support
for different jobs for qdma queues.
It also supports gathering  multiple enqueue jobs into SG enqueue job(s).

Signed-off-by: Jun Yang <jun.yang@nxp.com>
---
 drivers/bus/fslmc/portal/dpaa2_hw_pvt.h     |  18 +-
 drivers/raw/dpaa2_qdma/dpaa2_qdma.c         | 398 ++++++++++++++++----
 drivers/raw/dpaa2_qdma/dpaa2_qdma.h         |  75 +++-
 drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h |   2 +
 4 files changed, 412 insertions(+), 81 deletions(-)
diff mbox series

Patch

diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
index 2dd53c63b..ac24f0145 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
@@ -210,12 +210,28 @@  struct dpaa2_dpcon_dev {
 };
 
 /* Refer to Table 7-3 in SEC BG */
+#define QBMAN_FLE_WORD4_FMT_SBF 0x0    /* Single buffer frame */
+#define QBMAN_FLE_WORD4_FMT_SGE 0x2 /* Scatter gather frame */
+
+struct qbman_fle_word4 {
+	uint32_t bpid:14; /* Frame buffer pool ID */
+	uint32_t ivp:1; /* Invalid Pool ID. */
+	uint32_t bmt:1; /* Bypass Memory Translation */
+	uint32_t offset:12; /* Frame offset */
+	uint32_t fmt:2; /* Frame Format */
+	uint32_t sl:1; /* Short Length */
+	uint32_t f:1; /* Final bit */
+};
+
 struct qbman_fle {
 	uint32_t addr_lo;
 	uint32_t addr_hi;
 	uint32_t length;
 	/* FMT must be 00, MSB is final bit  */
-	uint32_t fin_bpid_offset;
+	union {
+		uint32_t fin_bpid_offset;
+		struct qbman_fle_word4 word4;
+	};
 	uint32_t frc;
 	uint32_t reserved[3]; /* Not used currently */
 };
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
index 94dc7886a..7b755cea7 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.c
@@ -116,17 +116,21 @@  qdma_populate_fd_ddr(phys_addr_t src, phys_addr_t dest,
 
 static void
 dpaa2_qdma_populate_fle(struct qbman_fle *fle,
+			uint64_t fle_iova,
 			struct rte_qdma_rbp *rbp,
 			uint64_t src, uint64_t dest,
-			size_t len, uint32_t flags)
+			size_t len, uint32_t flags, uint32_t fmt)
 {
 	struct qdma_sdd *sdd;
+	uint64_t sdd_iova;
 
-	sdd = (struct qdma_sdd *)((uint8_t *)(fle) +
-		(DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle)));
+	sdd = (struct qdma_sdd *)
+			((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET +
+			QDMA_FLE_SDD_OFFSET);
+	sdd_iova = fle_iova - QDMA_FLE_FLE_OFFSET + QDMA_FLE_SDD_OFFSET;
 
 	/* first frame list to source descriptor */
-	DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(sdd));
+	DPAA2_SET_FLE_ADDR(fle, sdd_iova);
 	DPAA2_SET_FLE_LEN(fle, (2 * (sizeof(struct qdma_sdd))));
 
 	/* source and destination descriptor */
@@ -164,20 +168,26 @@  dpaa2_qdma_populate_fle(struct qbman_fle *fle,
 	/* source frame list to source buffer */
 	if (flags & RTE_QDMA_JOB_SRC_PHY) {
 		DPAA2_SET_FLE_ADDR(fle, src);
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
 		DPAA2_SET_FLE_BMT(fle);
+#endif
 	} else {
 		DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(src));
 	}
+	fle->word4.fmt = fmt;
 	DPAA2_SET_FLE_LEN(fle, len);
 
 	fle++;
 	/* destination frame list to destination buffer */
 	if (flags & RTE_QDMA_JOB_DEST_PHY) {
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
 		DPAA2_SET_FLE_BMT(fle);
+#endif
 		DPAA2_SET_FLE_ADDR(fle, dest);
 	} else {
 		DPAA2_SET_FLE_ADDR(fle, DPAA2_VADDR_TO_IOVA(dest));
 	}
+	fle->word4.fmt = fmt;
 	DPAA2_SET_FLE_LEN(fle, len);
 
 	/* Final bit: 1, for last frame list */
@@ -187,44 +197,169 @@  dpaa2_qdma_populate_fle(struct qbman_fle *fle,
 static inline int dpdmai_dev_set_fd_us(
 		struct qdma_virt_queue *qdma_vq,
 		struct qbman_fd *fd,
-		struct rte_qdma_job *job)
+		struct rte_qdma_job **job,
+		uint16_t nb_jobs)
 {
 	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
 	struct rte_qdma_job **ppjob;
 	size_t iova;
-	int ret = 0;
+	int ret = 0, loop;
+
+	for (loop = 0; loop < nb_jobs; loop++) {
+		if (job[loop]->src & QDMA_RBP_UPPER_ADDRESS_MASK)
+			iova = (size_t)job[loop]->dest;
+		else
+			iova = (size_t)job[loop]->src;
+
+		/* Set the metadata */
+		job[loop]->vq_id = qdma_vq->vq_id;
+		ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
+		*ppjob = job[loop];
+
+		if ((rbp->drbp == 1) || (rbp->srbp == 1))
+			ret = qdma_populate_fd_pci((phys_addr_t)job[loop]->src,
+						(phys_addr_t)job[loop]->dest,
+						job[loop]->len, &fd[loop], rbp);
+		else
+			ret = qdma_populate_fd_ddr((phys_addr_t)job[loop]->src,
+						(phys_addr_t)job[loop]->dest,
+						job[loop]->len, &fd[loop]);
+	}
 
-	if (job->src & QDMA_RBP_UPPER_ADDRESS_MASK)
-		iova = (size_t)job->dest;
-	else
-		iova = (size_t)job->src;
+	return ret;
+}
 
-	/* Set the metadata */
-	job->vq_id = qdma_vq->vq_id;
-	ppjob = (struct rte_qdma_job **)DPAA2_IOVA_TO_VADDR(iova) - 1;
-	*ppjob = job;
+static uint32_t qdma_populate_sg_entry(
+		struct rte_qdma_job **jobs,
+		struct qdma_sg_entry *src_sge,
+		struct qdma_sg_entry *dst_sge,
+		uint16_t nb_jobs)
+{
+	uint16_t i;
+	uint32_t total_len = 0;
+	uint64_t iova;
+
+	for (i = 0; i < nb_jobs; i++) {
+		/* source SG */
+		if (likely(jobs[i]->flags & RTE_QDMA_JOB_SRC_PHY)) {
+			src_sge->addr_lo = (uint32_t)jobs[i]->src;
+			src_sge->addr_hi = (jobs[i]->src >> 32);
+		} else {
+			iova = DPAA2_VADDR_TO_IOVA(jobs[i]->src);
+			src_sge->addr_lo = (uint32_t)iova;
+			src_sge->addr_hi = iova >> 32;
+		}
+		src_sge->data_len.data_len_sl0 = jobs[i]->len;
+		src_sge->ctrl.sl = QDMA_SG_SL_LONG;
+		src_sge->ctrl.fmt = QDMA_SG_FMT_SDB;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+		src_sge->ctrl.bmt = QDMA_SG_BMT_ENABLE;
+#else
+		src_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE;
+#endif
+		/* destination SG */
+		if (likely(jobs[i]->flags & RTE_QDMA_JOB_DEST_PHY)) {
+			dst_sge->addr_lo = (uint32_t)jobs[i]->dest;
+			dst_sge->addr_hi = (jobs[i]->dest >> 32);
+		} else {
+			iova = DPAA2_VADDR_TO_IOVA(jobs[i]->dest);
+			dst_sge->addr_lo = (uint32_t)iova;
+			dst_sge->addr_hi = iova >> 32;
+		}
+		dst_sge->data_len.data_len_sl0 = jobs[i]->len;
+		dst_sge->ctrl.sl = QDMA_SG_SL_LONG;
+		dst_sge->ctrl.fmt = QDMA_SG_FMT_SDB;
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+		dst_sge->ctrl.bmt = QDMA_SG_BMT_ENABLE;
+#else
+		dst_sge->ctrl.bmt = QDMA_SG_BMT_DISABLE;
+#endif
+		total_len += jobs[i]->len;
 
-	if ((rbp->drbp == 1) || (rbp->srbp == 1))
-		ret = qdma_populate_fd_pci((phys_addr_t) job->src,
-					   (phys_addr_t) job->dest,
-					   job->len, fd, rbp);
-	else
-		ret = qdma_populate_fd_ddr((phys_addr_t) job->src,
-					   (phys_addr_t) job->dest,
-					   job->len, fd);
-	return ret;
+		if (i == (nb_jobs - 1)) {
+			src_sge->ctrl.f = QDMA_SG_F;
+			dst_sge->ctrl.f = QDMA_SG_F;
+		} else {
+			src_sge->ctrl.f = 0;
+			dst_sge->ctrl.f = 0;
+		}
+		src_sge++;
+		dst_sge++;
+	}
+
+	return total_len;
 }
-static inline int dpdmai_dev_set_fd_lf(
+
+static inline int dpdmai_dev_set_multi_fd_lf(
 		struct qdma_virt_queue *qdma_vq,
 		struct qbman_fd *fd,
-		struct rte_qdma_job *job)
+		struct rte_qdma_job **job,
+		uint16_t nb_jobs)
 {
 	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
 	struct rte_qdma_job **ppjob;
-	void *elem;
+	uint16_t i;
+	int ret;
+	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
+	void *elem[RTE_QDMA_BURST_NB_MAX];
 	struct qbman_fle *fle;
 	uint64_t elem_iova, fle_iova;
-	int ret = 0;
+
+	ret = rte_mempool_get_bulk(qdma_dev->fle_pool, elem, nb_jobs);
+	if (ret) {
+		DPAA2_QDMA_DP_DEBUG("Memory alloc failed for FLE");
+		return ret;
+	}
+
+	for (i = 0; i < nb_jobs; i++) {
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+		elem_iova = rte_mempool_virt2iova(elem[i]);
+#else
+		elem_iova = DPAA2_VADDR_TO_IOVA(elem[i]);
+#endif
+
+		*((uint16_t *)
+		((uintptr_t)(uint64_t)elem[i] + QDMA_FLE_JOB_NB_OFFSET)) = 1;
+
+		ppjob = (struct rte_qdma_job **)
+			((uintptr_t)(uint64_t)elem[i] + QDMA_FLE_JOBS_OFFSET);
+		*ppjob = job[i];
+
+		job[i]->vq_id = qdma_vq->vq_id;
+
+		fle = (struct qbman_fle *)
+			((uintptr_t)(uint64_t)elem[i] + QDMA_FLE_FLE_OFFSET);
+		fle_iova = elem_iova + QDMA_FLE_FLE_OFFSET;
+
+		DPAA2_SET_FD_ADDR(&fd[i], fle_iova);
+		DPAA2_SET_FD_COMPOUND_FMT(&fd[i]);
+		DPAA2_SET_FD_FRC(&fd[i], QDMA_SER_CTX);
+
+		memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
+			DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
+
+		dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
+				job[i]->src, job[i]->dest, job[i]->len,
+				job[i]->flags, QBMAN_FLE_WORD4_FMT_SBF);
+	}
+
+	return 0;
+}
+
+static inline int dpdmai_dev_set_sg_fd_lf(
+		struct qdma_virt_queue *qdma_vq,
+		struct qbman_fd *fd,
+		struct rte_qdma_job **job,
+		uint16_t nb_jobs)
+{
+	struct rte_qdma_rbp *rbp = &qdma_vq->rbp;
+	struct rte_qdma_job **ppjob;
+	void *elem;
+	struct qbman_fle *fle;
+	uint64_t elem_iova, fle_iova, src, dst;
+	int ret = 0, i;
+	struct qdma_sg_entry *src_sge, *dst_sge;
+	uint32_t len, fmt, flags;
 	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
 
 	/*
@@ -244,10 +379,15 @@  static inline int dpdmai_dev_set_fd_lf(
 #endif
 
 	/* Set the metadata */
-	job->vq_id = qdma_vq->vq_id;
+	/* Save job context. */
+	*((uint16_t *)
+	((uintptr_t)(uint64_t)elem + QDMA_FLE_JOB_NB_OFFSET)) = nb_jobs;
 	ppjob = (struct rte_qdma_job **)
-		((uintptr_t)(uint64_t)elem + QDMA_FLE_JOB_OFFSET);
-	*ppjob = job;
+		((uintptr_t)(uint64_t)elem + QDMA_FLE_JOBS_OFFSET);
+	for (i = 0; i < nb_jobs; i++)
+		ppjob[i] = job[i];
+
+	ppjob[0]->vq_id = qdma_vq->vq_id;
 
 	fle = (struct qbman_fle *)
 		((uintptr_t)(uint64_t)elem + QDMA_FLE_FLE_OFFSET);
@@ -258,9 +398,29 @@  static inline int dpdmai_dev_set_fd_lf(
 	DPAA2_SET_FD_FRC(fd, QDMA_SER_CTX);
 
 	/* Populate FLE */
-	memset(fle, 0, QDMA_FLE_POOL_SIZE);
-	dpaa2_qdma_populate_fle(fle, rbp, job->src, job->dest,
-				job->len, job->flags);
+	if (likely(nb_jobs > 1)) {
+		src_sge = (struct qdma_sg_entry *)
+			((uintptr_t)(uint64_t)elem + QDMA_FLE_SG_ENTRY_OFFSET);
+		dst_sge = src_sge + DPAA2_QDMA_MAX_SG_NB;
+		src = elem_iova + QDMA_FLE_SG_ENTRY_OFFSET;
+		dst = src +
+			DPAA2_QDMA_MAX_SG_NB * sizeof(struct qdma_sg_entry);
+		len = qdma_populate_sg_entry(job, src_sge, dst_sge, nb_jobs);
+		fmt = QBMAN_FLE_WORD4_FMT_SGE;
+		flags = RTE_QDMA_JOB_SRC_PHY | RTE_QDMA_JOB_DEST_PHY;
+	} else {
+		src = job[0]->src;
+		dst = job[0]->dest;
+		len = job[0]->len;
+		fmt = QBMAN_FLE_WORD4_FMT_SBF;
+		flags = job[0]->flags;
+	}
+
+	memset(fle, 0, DPAA2_QDMA_MAX_FLE * sizeof(struct qbman_fle) +
+			DPAA2_QDMA_MAX_SDD * sizeof(struct qdma_sdd));
+
+	dpaa2_qdma_populate_fle(fle, fle_iova, rbp,
+					src, dst, len, flags, fmt);
 
 	return 0;
 }
@@ -268,7 +428,7 @@  static inline int dpdmai_dev_set_fd_lf(
 static inline uint16_t dpdmai_dev_get_job_us(
 				struct qdma_virt_queue *qdma_vq __rte_unused,
 				const struct qbman_fd *fd,
-				struct rte_qdma_job **job)
+				struct rte_qdma_job **job, uint16_t *nb_jobs)
 {
 	uint16_t vqid;
 	size_t iova;
@@ -286,6 +446,7 @@  static inline uint16_t dpdmai_dev_get_job_us(
 	(*job)->status = (fd->simple_pci.acc_err << 8) |
 					(fd->simple_pci.error);
 	vqid = (*job)->vq_id;
+	*nb_jobs = 1;
 
 	return vqid;
 }
@@ -293,12 +454,12 @@  static inline uint16_t dpdmai_dev_get_job_us(
 static inline uint16_t dpdmai_dev_get_job_lf(
 						struct qdma_virt_queue *qdma_vq,
 						const struct qbman_fd *fd,
-						struct rte_qdma_job **job)
+						struct rte_qdma_job **job,
+						uint16_t *nb_jobs)
 {
-	void *elem;
 	struct qbman_fle *fle;
-	struct rte_qdma_job **ppjob;
-	uint16_t vqid;
+	struct rte_qdma_job **ppjob = NULL;
+	uint16_t i, status;
 	struct qdma_device *qdma_dev = QDMA_DEV_OF_VQ(qdma_vq);
 
 	/*
@@ -307,20 +468,24 @@  static inline uint16_t dpdmai_dev_get_job_lf(
 	 */
 	fle = (struct qbman_fle *)
 			DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd));
-	elem = (void *)((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET);
+	*nb_jobs = *((uint16_t *)((uintptr_t)(uint64_t)fle -
+				QDMA_FLE_FLE_OFFSET + QDMA_FLE_JOB_NB_OFFSET));
+	status = (DPAA2_GET_FD_ERR(fd) << 8) | (DPAA2_GET_FD_FRC(fd) & 0xFF);
 
-	ppjob = (struct rte_qdma_job **)
-		((uintptr_t)(uint64_t)elem + QDMA_FLE_JOB_OFFSET);
+	ppjob = (struct rte_qdma_job **)((uintptr_t)(uint64_t)fle -
+				QDMA_FLE_FLE_OFFSET + QDMA_FLE_JOBS_OFFSET);
 
-	*job = (struct rte_qdma_job *)*ppjob;
-	(*job)->status = (DPAA2_GET_FD_ERR(fd) << 8) |
-			 (DPAA2_GET_FD_FRC(fd) & 0xFF);
-	vqid = (*job)->vq_id;
+	for (i = 0; i < (*nb_jobs); i++) {
+		job[i] = ppjob[i];
+		job[i]->status = status;
+	}
 
 	/* Free FLE to the pool */
-	rte_mempool_put(qdma_dev->fle_pool, elem);
+	rte_mempool_put(qdma_dev->fle_pool,
+			(void *)
+			((uintptr_t)(uint64_t)fle - QDMA_FLE_FLE_OFFSET));
 
-	return vqid;
+	return job[0]->vq_id;
 }
 
 /* Function to receive a QDMA job for a given device and queue*/
@@ -344,9 +509,16 @@  dpdmai_dev_dequeue_multijob_prefetch(
 	uint8_t status, pending;
 	uint8_t num_rx = 0;
 	const struct qbman_fd *fd;
-	uint16_t vqid;
+	uint16_t vqid, num_rx_ret;
 	int ret, pull_size;
 
+	if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
+		/** Make sure there are enough space to get jobs.*/
+		if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
+			return -EINVAL;
+		nb_jobs = 1;
+	}
+
 	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
 		ret = dpaa2_affine_qbman_swp();
 		if (ret) {
@@ -440,12 +612,13 @@  dpdmai_dev_dequeue_multijob_prefetch(
 		}
 		fd = qbman_result_DQ_fd(dq_storage);
 
-		vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx]);
+		vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx],
+								&num_rx_ret);
 		if (vq_id)
 			vq_id[num_rx] = vqid;
 
 		dq_storage++;
-		num_rx++;
+		num_rx += num_rx_ret;
 	} while (pending);
 
 	if (check_swp_active_dqs(DPAA2_PER_LCORE_DPIO->index)) {
@@ -490,8 +663,17 @@  dpdmai_dev_dequeue_multijob_no_prefetch(
 	uint8_t status, pending;
 	uint8_t num_rx = 0;
 	const struct qbman_fd *fd;
-	uint16_t vqid;
-	int ret, next_pull = nb_jobs, num_pulled = 0;
+	uint16_t vqid, num_rx_ret;
+	int ret, next_pull, num_pulled = 0;
+
+	if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
+		/** Make sure there are enough space to get jobs.*/
+		if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
+			return -EINVAL;
+		nb_jobs = 1;
+	}
+
+	next_pull = nb_jobs;
 
 	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
 		ret = dpaa2_affine_qbman_swp();
@@ -560,12 +742,13 @@  dpdmai_dev_dequeue_multijob_no_prefetch(
 			}
 			fd = qbman_result_DQ_fd(dq_storage);
 
-			vqid = qdma_vq->get_job(qdma_vq, fd, &job[num_rx]);
+			vqid = qdma_vq->get_job(qdma_vq, fd,
+						&job[num_rx], &num_rx_ret);
 			if (vq_id)
 				vq_id[num_rx] = vqid;
 
 			dq_storage++;
-			num_rx++;
+			num_rx += num_rx_ret;
 			num_pulled++;
 
 		} while (pending);
@@ -592,6 +775,7 @@  dpdmai_dev_enqueue_multi(
 	int ret;
 	uint32_t num_to_send = 0;
 	uint16_t num_tx = 0;
+	uint32_t enqueue_loop, retry_count, loop;
 
 	if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
 		ret = dpaa2_affine_qbman_swp();
@@ -612,45 +796,87 @@  dpdmai_dev_enqueue_multi(
 	qbman_eq_desc_set_no_orp(&eqdesc, 0);
 	qbman_eq_desc_set_response(&eqdesc, 0, 0);
 
+	if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
+		uint16_t fd_nb;
+		uint16_t sg_entry_nb = nb_jobs > DPAA2_QDMA_MAX_SG_NB ?
+						DPAA2_QDMA_MAX_SG_NB : nb_jobs;
+		uint16_t job_idx = 0;
+		uint16_t fd_sg_nb[8];
+		uint16_t nb_jobs_ret = 0;
+
+		if (nb_jobs % DPAA2_QDMA_MAX_SG_NB)
+			fd_nb = nb_jobs / DPAA2_QDMA_MAX_SG_NB + 1;
+		else
+			fd_nb = nb_jobs / DPAA2_QDMA_MAX_SG_NB;
+
+		memset(&fd[0], 0, sizeof(struct qbman_fd) * fd_nb);
+
+		for (loop = 0; loop < fd_nb; loop++) {
+			ret = qdma_vq->set_fd(qdma_vq, &fd[loop], &job[job_idx],
+					      sg_entry_nb);
+			if (unlikely(ret < 0))
+				return 0;
+			fd_sg_nb[loop] = sg_entry_nb;
+			nb_jobs -= sg_entry_nb;
+			job_idx += sg_entry_nb;
+			sg_entry_nb = nb_jobs > DPAA2_QDMA_MAX_SG_NB ?
+						DPAA2_QDMA_MAX_SG_NB : nb_jobs;
+		}
+
+		/* Enqueue the packet to the QBMAN */
+		enqueue_loop = 0; retry_count = 0;
+
+		while (enqueue_loop < fd_nb) {
+			ret = qbman_swp_enqueue_multiple(swp,
+					&eqdesc, &fd[enqueue_loop],
+					NULL, fd_nb - enqueue_loop);
+			if (unlikely(ret < 0)) {
+				retry_count++;
+				if (retry_count > DPAA2_MAX_TX_RETRY_COUNT)
+					return nb_jobs_ret;
+			} else {
+				for (loop = 0; loop < (uint32_t)ret; loop++)
+					nb_jobs_ret +=
+						fd_sg_nb[enqueue_loop + loop];
+				enqueue_loop += ret;
+				retry_count = 0;
+			}
+		}
+
+		return nb_jobs_ret;
+	}
+
 	memset(fd, 0, nb_jobs * sizeof(struct qbman_fd));
 
 	while (nb_jobs > 0) {
-		uint32_t loop;
-
 		num_to_send = (nb_jobs > dpaa2_eqcr_size) ?
 			dpaa2_eqcr_size : nb_jobs;
 
-		for (loop = 0; loop < num_to_send; loop++) {
-			ret = qdma_vq->set_fd(qdma_vq, &fd[loop], job[num_tx]);
-			if (ret < 0) {
-				/* Set nb_jobs to loop, so outer while loop
-				 * breaks out.
-				 */
-				nb_jobs = loop;
-				break;
-			}
-
-			num_tx++;
-		}
+		ret = qdma_vq->set_fd(qdma_vq, &fd[num_tx],
+						&job[num_tx], num_to_send);
+		if (unlikely(ret < 0))
+			break;
 
 		/* Enqueue the packet to the QBMAN */
-		uint32_t enqueue_loop = 0, retry_count = 0;
+		enqueue_loop = 0; retry_count = 0;
+		loop = num_to_send;
 
 		while (enqueue_loop < loop) {
 			ret = qbman_swp_enqueue_multiple(swp,
 						&eqdesc,
-						&fd[enqueue_loop],
+						&fd[num_tx + enqueue_loop],
 						NULL,
 						loop - enqueue_loop);
 			if (unlikely(ret < 0)) {
 				retry_count++;
 				if (retry_count > DPAA2_MAX_TX_RETRY_COUNT)
-					return num_tx - (loop - enqueue_loop);
+					return num_tx;
 			} else {
 				enqueue_loop += ret;
 				retry_count = 0;
 			}
 		}
+		num_tx += num_to_send;
 		nb_jobs -= loop;
 	}
 	return num_tx;
@@ -977,6 +1203,21 @@  dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 		return -ENODEV;
 	}
 
+	if (q_config->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
+		if (!(q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ)) {
+			DPAA2_QDMA_ERR(
+				"qDMA SG format only supports physical queue!");
+			rte_spinlock_unlock(&qdma_dev->lock);
+			return -ENODEV;
+		}
+		if (!(q_config->flags & RTE_QDMA_VQ_FD_LONG_FORMAT)) {
+			DPAA2_QDMA_ERR(
+				"qDMA SG format only supports long FD format!");
+			rte_spinlock_unlock(&qdma_dev->lock);
+			return -ENODEV;
+		}
+	}
+
 	if (q_config->flags & RTE_QDMA_VQ_EXCLUSIVE_PQ) {
 		/* Allocate HW queue for a VQ */
 		qdma_dev->vqs[i].hw_queue = alloc_hw_queue(q_config->lcore_id);
@@ -1007,12 +1248,16 @@  dpaa2_qdma_queue_setup(struct rte_rawdev *rawdev,
 		return -ENODEV;
 	}
 
+	qdma_dev->vqs[i].flags = q_config->flags;
 	qdma_dev->vqs[i].in_use = 1;
 	qdma_dev->vqs[i].lcore_id = q_config->lcore_id;
 	memset(&qdma_dev->vqs[i].rbp, 0, sizeof(struct rte_qdma_rbp));
 
 	if (q_config->flags & RTE_QDMA_VQ_FD_LONG_FORMAT) {
-		qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_lf;
+		if (q_config->flags & RTE_QDMA_VQ_FD_SG_FORMAT)
+			qdma_dev->vqs[i].set_fd = dpdmai_dev_set_sg_fd_lf;
+		else
+			qdma_dev->vqs[i].set_fd = dpdmai_dev_set_multi_fd_lf;
 		qdma_dev->vqs[i].get_job = dpdmai_dev_get_job_lf;
 	} else {
 		qdma_dev->vqs[i].set_fd = dpdmai_dev_set_fd_us;
@@ -1087,6 +1332,12 @@  dpaa2_qdma_dequeue(struct rte_rawdev *rawdev,
 	int ret = 0, i;
 	unsigned int ring_count;
 
+	if (qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) {
+		/** Make sure there are enough space to get jobs.*/
+		if (unlikely(nb_jobs < DPAA2_QDMA_MAX_SG_NB))
+			return -EINVAL;
+	}
+
 	/* Return error in case of wrong lcore_id */
 	if (rte_lcore_id() != (unsigned int)(qdma_vq->lcore_id)) {
 		DPAA2_QDMA_WARN("QDMA dequeue for vqid %d on wrong core",
@@ -1098,7 +1349,8 @@  dpaa2_qdma_dequeue(struct rte_rawdev *rawdev,
 	if (qdma_vq->num_enqueues == qdma_vq->num_dequeues)
 		return 0;
 
-	if (qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs))
+	if (!(qdma_vq->flags & RTE_QDMA_VQ_FD_SG_FORMAT) &&
+		qdma_vq->num_enqueues < (qdma_vq->num_dequeues + nb_jobs))
 		nb_jobs = (qdma_vq->num_enqueues - qdma_vq->num_dequeues);
 
 	if (qdma_vq->exclusive_hw_queue) {
diff --git a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
index ff7743fb5..43a01d56f 100644
--- a/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/dpaa2_qdma.h
@@ -11,16 +11,37 @@  struct rte_qdma_job;
 #define DPAA2_QDMA_MAX_FLE 3
 #define DPAA2_QDMA_MAX_SDD 2
 
+#define DPAA2_QDMA_MAX_SG_NB 64
+
 #define DPAA2_DPDMAI_MAX_QUEUES	8
 
-/** FLE pool size: 3 Frame list + 2 source/destination descriptor */
-#define QDMA_FLE_POOL_SIZE (sizeof(struct rte_qdma_job *) + \
+/** FLE pool size: job number(uint64_t) +
+ * 3 Frame list + 2 source/destination descriptor  +
+ * 32 (src + dst) sg entries + 32 jobs pointers.
+ */
+
+#define QDMA_FLE_POOL_SIZE (sizeof(uint64_t) + \
 		sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE + \
-		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
+		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD + \
+		sizeof(struct qdma_sg_entry) * DPAA2_QDMA_MAX_SG_NB * 2 + \
+		sizeof(struct rte_qdma_job *) * DPAA2_QDMA_MAX_SG_NB)
+
+#define QDMA_FLE_JOB_NB_OFFSET 0
 
-#define QDMA_FLE_JOB_OFFSET 0
 #define QDMA_FLE_FLE_OFFSET \
-		(QDMA_FLE_JOB_OFFSET + sizeof(struct rte_qdma_job *))
+		(QDMA_FLE_JOB_NB_OFFSET + sizeof(uint64_t))
+
+#define QDMA_FLE_SDD_OFFSET \
+		(QDMA_FLE_FLE_OFFSET + \
+		sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE)
+
+#define QDMA_FLE_SG_ENTRY_OFFSET \
+		(QDMA_FLE_SDD_OFFSET + \
+		sizeof(struct qdma_sdd) * DPAA2_QDMA_MAX_SDD)
+
+#define QDMA_FLE_JOBS_OFFSET \
+		(QDMA_FLE_SG_ENTRY_OFFSET + \
+		sizeof(struct qdma_sg_entry) * DPAA2_QDMA_MAX_SG_NB * 2)
 
 /** FLE pool cache size */
 #define QDMA_FLE_CACHE_SIZE(_num) (_num/(RTE_MAX_LCORE * 2))
@@ -90,10 +111,12 @@  struct qdma_virt_queue;
 
 typedef uint16_t (qdma_get_job_t)(struct qdma_virt_queue *qdma_vq,
 					const struct qbman_fd *fd,
-					struct rte_qdma_job **job);
+					struct rte_qdma_job **job,
+					uint16_t *nb_jobs);
 typedef int (qdma_set_fd_t)(struct qdma_virt_queue *qdma_vq,
 					struct qbman_fd *fd,
-					struct rte_qdma_job *job);
+					struct rte_qdma_job **job,
+					uint16_t nb_jobs);
 
 typedef int (qdma_dequeue_multijob_t)(
 				struct qdma_virt_queue *qdma_vq,
@@ -126,6 +149,7 @@  struct qdma_virt_queue {
 	uint64_t num_dequeues;
 
 	uint16_t vq_id;
+	uint32_t flags;
 
 	qdma_set_fd_t *set_fd;
 	qdma_get_job_t *get_job;
@@ -191,6 +215,43 @@  struct qdma_sdd {
 	};
 } __rte_packed;
 
+#define QDMA_SG_FMT_SDB	0x0 /* single data buffer */
+#define QDMA_SG_FMT_FDS	0x1 /* frame data section */
+#define QDMA_SG_FMT_SGTE	0x2 /* SGT extension */
+#define QDMA_SG_SL_SHORT	0x1 /* short length */
+#define QDMA_SG_SL_LONG	0x0 /* long length */
+#define QDMA_SG_F	0x1 /* last sg entry */
+#define QDMA_SG_BMT_ENABLE 0x1
+#define QDMA_SG_BMT_DISABLE 0x0
+
+struct qdma_sg_entry {
+	uint32_t addr_lo;		/* address 0:31 */
+	uint32_t addr_hi:17;	/* address 32:48 */
+	uint32_t rsv:15;
+	union {
+		uint32_t data_len_sl0;	/* SL=0, the long format */
+		struct {
+			uint32_t len:17;	/* SL=1, the short format */
+			uint32_t reserve:3;
+			uint32_t sf:1;
+			uint32_t sr:1;
+			uint32_t size:10;	/* buff size */
+		} data_len_sl1;
+	} data_len;					/* AVAIL_LENGTH */
+	union {
+		uint32_t ctrl_fields;
+		struct {
+			uint32_t bpid:14;
+			uint32_t ivp:1;
+			uint32_t bmt:1;
+			uint32_t offset:12;
+			uint32_t fmt:2;
+			uint32_t sl:1;
+			uint32_t f:1;
+		} ctrl;
+	};
+} __attribute__((__packed__));
+
 /** Represents a DPDMAI raw device */
 struct dpaa2_dpdmai_dev {
 	/** Pointer to Next device instance */
diff --git a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
index ff4fc1d6c..cfec303c8 100644
--- a/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
+++ b/drivers/raw/dpaa2_qdma/rte_pmd_dpaa2_qdma.h
@@ -52,6 +52,8 @@  enum {
 
 #define RTE_QDMA_VQ_FD_LONG_FORMAT		(1ULL << 1)
 
+#define RTE_QDMA_VQ_FD_SG_FORMAT		(1ULL << 2)
+
 /** States if the source addresses is physical. */
 #define RTE_QDMA_JOB_SRC_PHY		(1ULL)