@@ -227,3 +227,8 @@ ethernet devices connected to event device to override this applications can
use `force_rx_bp=1` device arguments.
Using unique mempool per each ethernet device is recommended when they are
connected to event device.
+
+DMA adapter new mode support
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+DMA driver does not support DMA adapter configured in new mode.
@@ -142,6 +142,10 @@ New Features
to support TLS v1.2, TLS v1.3 and DTLS v1.2.
* Added PMD API to allow raw submission of instructions to CPT.
+* **Updated Marvell cnxk DMA driver.**
+
+ * Added support for DMA event enqueue and dequeue.
+ * Added support for dual workslot DMA event enqueue.
Removed Items
-------------
new file mode 100644
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2023 Marvell.
+ */
+
+#ifndef _CNXK_DMA_EVENT_DP_H_
+#define _CNXK_DMA_EVENT_DP_H_
+
+#include <stdint.h>
+
+#include <rte_common.h>
+#include <rte_eventdev.h>
+
+__rte_internal
+uint16_t cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events);
+
+__rte_internal
+uint16_t cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events);
+
+__rte_internal
+uint16_t cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events);
+
+__rte_internal
+uintptr_t cnxk_dma_adapter_dequeue(uintptr_t get_work1);
+#endif /* _CNXK_DMA_EVENT_DP_H_ */
@@ -589,10 +589,11 @@ cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_de
dmadev->fp_obj->copy_sg = cn10k_dmadev_copy_sg;
}
+ dpivf->mcs_lock = NULL;
rdpi = &dpivf->rdpi;
rdpi->pci_dev = pci_dev;
- rc = roc_dpi_dev_init(rdpi, 0);
+ rc = roc_dpi_dev_init(rdpi, offsetof(struct cnxk_dpi_compl_s, wqecs));
if (rc < 0)
goto err_out_free;
@@ -14,11 +14,14 @@
#include <rte_eal.h>
#include <rte_lcore.h>
#include <rte_mbuf_pool_ops.h>
+#include <rte_mcslock.h>
#include <rte_mempool.h>
#include <rte_pci.h>
#include <roc_api.h>
+#include "cnxk_dma_event_dp.h"
+
#define CNXK_DPI_MAX_POINTER 15
#define CNXK_DPI_STRM_INC(s, var) ((s).var = ((s).var + 1) & (s).max_cnt)
#define CNXK_DPI_STRM_DEC(s, var) ((s).var = ((s).var - 1) == -1 ? (s).max_cnt : \
@@ -40,6 +43,11 @@
*/
#define CNXK_DPI_REQ_CDATA 0xFF
+/* Set Completion data to 0xDEADBEEF when request submitted for SSO.
+ * This helps differentiate if the dequeue is called after cnxk enueue.
+ */
+#define CNXK_DPI_REQ_SSO_CDATA 0xDEADBEEF
+
union cnxk_dpi_instr_cmd {
uint64_t u;
struct cn9k_dpi_instr_cmd {
@@ -85,7 +93,10 @@ union cnxk_dpi_instr_cmd {
struct cnxk_dpi_compl_s {
uint64_t cdata;
- void *cb_data;
+ void *op;
+ uint16_t dev_id;
+ uint16_t vchan;
+ uint32_t wqecs;
};
struct cnxk_dpi_cdesc_data_s {
@@ -95,6 +106,11 @@ struct cnxk_dpi_cdesc_data_s {
uint16_t tail;
};
+struct cnxk_dma_adapter_info {
+ bool enabled; /* Set if vchan queue is added to dma adapter. */
+ struct rte_mempool *req_mp; /* DMA inflight request mempool. */
+};
+
struct cnxk_dpi_conf {
union cnxk_dpi_instr_cmd cmd;
struct cnxk_dpi_cdesc_data_s c_desc;
@@ -103,6 +119,7 @@ struct cnxk_dpi_conf {
uint16_t desc_idx;
struct rte_dma_stats stats;
uint64_t completed_offset;
+ struct cnxk_dma_adapter_info adapter_info;
};
struct cnxk_dpi_vf_s {
@@ -112,6 +129,7 @@ struct cnxk_dpi_vf_s {
uint16_t chunk_size_m1;
struct rte_mempool *chunk_pool;
struct cnxk_dpi_conf conf[CNXK_DPI_MAX_VCHANS_PER_QUEUE];
+ RTE_ATOMIC(rte_mcslock_t *) mcs_lock;
/* Slow path */
struct roc_dpi rdpi;
uint32_t aura;
@@ -5,6 +5,10 @@
#include <rte_vect.h>
#include "cnxk_dmadev.h"
+#include <rte_event_dma_adapter.h>
+
+#include <cn10k_eventdev.h>
+#include <cnxk_eventdev.h>
static __plt_always_inline void
__dpi_cpy_scalar(uint64_t *src, uint64_t *dst, uint8_t n)
@@ -434,3 +438,289 @@ cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
return dpi_conf->desc_idx++;
}
+
+static inline uint64_t
+cnxk_dma_adapter_format_event(uint64_t event)
+{
+ uint64_t w0;
+ w0 = (event & 0xFFC000000000) >> 6 |
+ (event & 0xFFFFFFF) | RTE_EVENT_TYPE_DMADEV << 28;
+
+ return w0;
+}
+
+uint16_t
+cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
+{
+ const struct rte_dma_sge *src, *dst;
+ struct rte_event_dma_adapter_op *op;
+ struct cnxk_dpi_compl_s *comp_ptr;
+ struct cnxk_dpi_conf *dpi_conf;
+ struct cnxk_dpi_vf_s *dpivf;
+ struct rte_event *rsp_info;
+ struct cn10k_sso_hws *work;
+ uint16_t nb_src, nb_dst;
+ rte_mcslock_t mcs_lock_me;
+ uint64_t hdr[4];
+ uint16_t count;
+ int rc;
+
+ work = (struct cn10k_sso_hws *)ws;
+
+ for (count = 0; count < nb_events; count++) {
+ op = ev[count].event_ptr;
+ rsp_info = (struct rte_event *)((uint8_t *)op +
+ sizeof(struct rte_event_dma_adapter_op));
+ dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
+ dpi_conf = &dpivf->conf[op->vchan];
+
+ if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
+ return count;
+
+ comp_ptr->op = op;
+ comp_ptr->dev_id = op->dma_dev_id;
+ comp_ptr->vchan = op->vchan;
+ comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
+
+ nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
+ nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
+
+ hdr[0] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 54);
+ hdr[0] |= (nb_dst << 6) | nb_src;
+ hdr[1] = ((uint64_t)comp_ptr);
+ hdr[2] = cnxk_dma_adapter_format_event(rsp_info->event);
+
+ src = &op->src_seg[0];
+ dst = &op->dst_seg[0];
+
+ if (CNXK_TAG_IS_HEAD(work->gw_rdata) ||
+ ((CNXK_TT_FROM_TAG(work->gw_rdata) == SSO_TT_ORDERED) &&
+ (rsp_info->sched_type & DPI_HDR_TT_MASK) ==
+ RTE_SCHED_TYPE_ORDERED))
+ roc_sso_hws_head_wait(work->base);
+
+ rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
+ rc = __dpi_queue_write_sg(dpivf, hdr, src, dst, nb_src, nb_dst);
+ if (unlikely(rc)) {
+ rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
+ return rc;
+ }
+
+ if (op->flags & RTE_DMA_OP_FLAG_SUBMIT) {
+ rte_wmb();
+ plt_write64(dpi_conf->pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst),
+ dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+ dpi_conf->stats.submitted += dpi_conf->pending + 1;
+ dpi_conf->pnum_words = 0;
+ dpi_conf->pending = 0;
+ } else {
+ dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst);
+ dpi_conf->pending++;
+ }
+ rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
+ }
+
+ return count;
+}
+
+uint16_t
+cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
+{
+ const struct rte_dma_sge *fptr, *lptr;
+ struct rte_event_dma_adapter_op *op;
+ struct cnxk_dpi_compl_s *comp_ptr;
+ struct cn9k_sso_hws_dual *work;
+ struct cnxk_dpi_conf *dpi_conf;
+ struct cnxk_dpi_vf_s *dpivf;
+ struct rte_event *rsp_info;
+ uint16_t nb_src, nb_dst;
+ rte_mcslock_t mcs_lock_me;
+ uint64_t hdr[4];
+ uint16_t count;
+ int rc;
+
+ work = (struct cn9k_sso_hws_dual *)ws;
+
+ for (count = 0; count < nb_events; count++) {
+ op = ev[count].event_ptr;
+ rsp_info = (struct rte_event *)((uint8_t *)op +
+ sizeof(struct rte_event_dma_adapter_op));
+ dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
+ dpi_conf = &dpivf->conf[op->vchan];
+
+ if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
+ return count;
+
+ comp_ptr->op = op;
+ comp_ptr->dev_id = op->dma_dev_id;
+ comp_ptr->vchan = op->vchan;
+ comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
+
+ hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36);
+ hdr[2] = (uint64_t)comp_ptr;
+
+ nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
+ nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
+ /*
+ * For inbound case, src pointers are last pointers.
+ * For all other cases, src pointers are first pointers.
+ */
+ if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) {
+ fptr = &op->dst_seg[0];
+ lptr = &op->src_seg[0];
+ RTE_SWAP(nb_src, nb_dst);
+ } else {
+ fptr = &op->src_seg[0];
+ lptr = &op->dst_seg[0];
+ }
+
+ hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
+ hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event);
+
+ if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)
+ roc_sso_hws_head_wait(work->base[!work->vws]);
+
+ rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
+ rc = __dpi_queue_write_sg(dpivf, hdr, fptr, lptr, nb_src, nb_dst);
+ if (unlikely(rc)) {
+ rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
+ return rc;
+ }
+
+ if (op->flags & RTE_DMA_OP_FLAG_SUBMIT) {
+ rte_wmb();
+ plt_write64(dpi_conf->pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst),
+ dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+ dpi_conf->stats.submitted += dpi_conf->pending + 1;
+ dpi_conf->pnum_words = 0;
+ dpi_conf->pending = 0;
+ } else {
+ dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst);
+ dpi_conf->pending++;
+ }
+ rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
+ }
+
+ return count;
+}
+
+uint16_t
+cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
+{
+ const struct rte_dma_sge *fptr, *lptr;
+ struct rte_event_dma_adapter_op *op;
+ struct cnxk_dpi_compl_s *comp_ptr;
+ struct cnxk_dpi_conf *dpi_conf;
+ struct cnxk_dpi_vf_s *dpivf;
+ struct rte_event *rsp_info;
+ struct cn9k_sso_hws *work;
+ uint16_t nb_src, nb_dst;
+ rte_mcslock_t mcs_lock_me;
+ uint64_t hdr[4];
+ uint16_t count;
+ int rc;
+
+ work = (struct cn9k_sso_hws *)ws;
+
+ for (count = 0; count < nb_events; count++) {
+ op = ev[count].event_ptr;
+ rsp_info = (struct rte_event *)((uint8_t *)op +
+ sizeof(struct rte_event_dma_adapter_op));
+ dpivf = rte_dma_fp_objs[op->dma_dev_id].dev_private;
+ dpi_conf = &dpivf->conf[op->vchan];
+
+ if (unlikely(rte_mempool_get(dpi_conf->adapter_info.req_mp, (void **)&comp_ptr)))
+ return count;
+
+ comp_ptr->op = op;
+ comp_ptr->dev_id = op->dma_dev_id;
+ comp_ptr->vchan = op->vchan;
+ comp_ptr->cdata = CNXK_DPI_REQ_SSO_CDATA;
+
+ hdr[1] = dpi_conf->cmd.u | ((uint64_t)DPI_HDR_PT_WQP << 36);
+ hdr[2] = (uint64_t)comp_ptr;
+
+ nb_src = op->nb_src & CNXK_DPI_MAX_POINTER;
+ nb_dst = op->nb_dst & CNXK_DPI_MAX_POINTER;
+ /*
+ * For inbound case, src pointers are last pointers.
+ * For all other cases, src pointers are first pointers.
+ */
+ if (((dpi_conf->cmd.u >> 48) & DPI_HDR_XTYPE_MASK) == DPI_XTYPE_INBOUND) {
+ fptr = &op->dst_seg[0];
+ lptr = &op->src_seg[0];
+ RTE_SWAP(nb_src, nb_dst);
+ } else {
+ fptr = &op->src_seg[0];
+ lptr = &op->dst_seg[0];
+ }
+
+ hdr[0] = ((uint64_t)nb_dst << 54) | (uint64_t)nb_src << 48;
+ hdr[0] |= cnxk_dma_adapter_format_event(rsp_info->event);
+
+ if ((rsp_info->sched_type & DPI_HDR_TT_MASK) == RTE_SCHED_TYPE_ORDERED)
+ roc_sso_hws_head_wait(work->base);
+
+ rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
+ rc = __dpi_queue_write_sg(dpivf, hdr, fptr, lptr, nb_src, nb_dst);
+ if (unlikely(rc)) {
+ rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
+ return rc;
+ }
+
+ if (op->flags & RTE_DMA_OP_FLAG_SUBMIT) {
+ rte_wmb();
+ plt_write64(dpi_conf->pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst),
+ dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+ dpi_conf->stats.submitted += dpi_conf->pending + 1;
+ dpi_conf->pnum_words = 0;
+ dpi_conf->pending = 0;
+ } else {
+ dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst);
+ dpi_conf->pending++;
+ }
+ rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
+ }
+
+ return count;
+}
+
+uintptr_t
+cnxk_dma_adapter_dequeue(uintptr_t get_work1)
+{
+ struct rte_event_dma_adapter_op *op;
+ struct cnxk_dpi_compl_s *comp_ptr;
+ struct cnxk_dpi_conf *dpi_conf;
+ struct cnxk_dpi_vf_s *dpivf;
+ rte_mcslock_t mcs_lock_me;
+ RTE_ATOMIC(uint8_t) *wqecs;
+
+ comp_ptr = (struct cnxk_dpi_compl_s *)get_work1;
+
+ /* Dequeue can be called without calling cnx_enqueue in case of
+ * dma_adapter. When its called from adapter, dma op will not be
+ * embedded in completion pointer. In those cases return op.
+ */
+ if (comp_ptr->cdata != CNXK_DPI_REQ_SSO_CDATA)
+ return (uintptr_t)comp_ptr;
+
+ dpivf = rte_dma_fp_objs[comp_ptr->dev_id].dev_private;
+ dpi_conf = &dpivf->conf[comp_ptr->vchan];
+
+ rte_mcslock_lock(&dpivf->mcs_lock, &mcs_lock_me);
+ wqecs = (uint8_t __rte_atomic *)&comp_ptr->wqecs;
+ if (rte_atomic_load_explicit(wqecs, rte_memory_order_relaxed) != 0)
+ dpi_conf->stats.errors++;
+
+ /* Take into account errors also. This is similar to
+ * cnxk_dmadev_completed_status().
+ */
+ dpi_conf->stats.completed++;
+ rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
+
+ op = (struct rte_event_dma_adapter_op *)comp_ptr->op;
+
+ rte_mempool_put(dpi_conf->adapter_info.req_mp, comp_ptr);
+
+ return (uintptr_t)op;
+}
@@ -8,6 +8,13 @@ foreach flag: error_cflags
endif
endforeach
-deps += ['bus_pci', 'common_cnxk', 'dmadev']
+driver_sdk_headers = files(
+ 'cnxk_dma_event_dp.h',
+)
+
+deps += ['bus_pci', 'common_cnxk', 'dmadev', 'eventdev']
+
+includes += include_directories('../../event/cnxk')
+
sources = files('cnxk_dmadev.c', 'cnxk_dmadev_fp.c')
require_iova_in_mbuf = false
new file mode 100644
@@ -0,0 +1,10 @@
+INTERNAL {
+ global:
+
+ cn10k_dma_adapter_enqueue;
+ cn9k_dma_adapter_enqueue;
+ cn9k_dma_adapter_dual_enqueue;
+ cnxk_dma_adapter_dequeue;
+
+ local: *;
+};
@@ -460,6 +460,7 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
}
}
event_dev->ca_enqueue = cn9k_sso_hws_ca_enq;
+ event_dev->dma_enqueue = cn9k_dma_adapter_enqueue;
if (dev->tx_offloads & NIX_TX_MULTI_SEG_F)
CN9K_SET_EVDEV_ENQ_OP(dev, event_dev->txa_enqueue,
@@ -475,6 +476,7 @@ cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
event_dev->enqueue_forward_burst =
cn9k_sso_hws_dual_enq_fwd_burst;
event_dev->ca_enqueue = cn9k_sso_hws_dual_ca_enq;
+ event_dev->dma_enqueue = cn9k_dma_adapter_dual_enqueue;
event_dev->profile_switch = cn9k_sso_hws_dual_profile_switch;
if (dev->rx_offloads & NIX_RX_MULTI_SEG_F) {
@@ -11,6 +11,7 @@
#include "cnxk_ethdev.h"
#include "cnxk_eventdev.h"
#include "cnxk_worker.h"
+#include "cnxk_dma_event_dp.h"
#include "cn9k_cryptodev_ops.h"
#include "cn9k_ethdev.h"
@@ -316,7 +316,7 @@ foreach flag: extra_flags
endforeach
headers = files('rte_pmd_cnxk_eventdev.h')
-deps += ['bus_pci', 'common_cnxk', 'net_cnxk', 'crypto_cnxk']
+deps += ['bus_pci', 'common_cnxk', 'net_cnxk', 'crypto_cnxk', 'dma_cnxk']
require_iova_in_mbuf = false