@@ -363,6 +363,10 @@ Please note that enabling debugging options may affect system performance.
Enable compilation of the extra run-time consistency checks.
+- ``RTE_PMD_NET_SFC_NIC_DMA_MAP`` (undefined by default)
+
+ Enable support for regioned NIC DMA mapping required on SN1022 SoC only.
+
Per-Device Parameters
~~~~~~~~~~~~~~~~~~~~~
@@ -206,6 +206,7 @@ New Features
* Added flow API transfer proxy support
* Added SN1000 virtual functions (VF) support
* Added support for flow counters without service cores
+ * Added support for regioned DMA mapping required on SN1022 SoC
* **Added power monitor API in vhost library.**
@@ -100,4 +100,6 @@ sources = files(
'sfc_service.c',
'sfc_repr_proxy.c',
'sfc_repr.c',
+ 'sfc_nic_dma.c',
+ 'sfc_nic_dma_dp.c',
)
@@ -26,6 +26,7 @@
#include "sfc_tweak.h"
#include "sfc_sw_stats.h"
#include "sfc_switch.h"
+#include "sfc_nic_dma.h"
bool
sfc_repr_supported(const struct sfc_adapter *sa)
@@ -53,10 +54,12 @@ sfc_repr_available(const struct sfc_adapter_shared *sas)
}
int
-sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
- size_t len, int socket_id, efsys_mem_t *esmp)
+sfc_dma_alloc(struct sfc_adapter *sa, const char *name, uint16_t id,
+ efx_nic_dma_addr_type_t addr_type, size_t len, int socket_id,
+ efsys_mem_t *esmp)
{
const struct rte_memzone *mz;
+ int rc;
sfc_log_init(sa, "name=%s id=%u len=%zu socket_id=%d",
name, id, len, socket_id);
@@ -69,13 +72,17 @@ sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
rte_strerror(rte_errno));
return ENOMEM;
}
-
- esmp->esm_addr = mz->iova;
- if (esmp->esm_addr == RTE_BAD_IOVA) {
+ if (mz->iova == RTE_BAD_IOVA) {
(void)rte_memzone_free(mz);
return EFAULT;
}
+ rc = sfc_nic_dma_mz_map(sa, mz, addr_type, &esmp->esm_addr);
+ if (rc != 0) {
+ (void)rte_memzone_free(mz);
+ return rc;
+ }
+
esmp->esm_mz = mz;
esmp->esm_base = mz->addr;
@@ -457,6 +464,13 @@ sfc_try_start(struct sfc_adapter *sa)
if (rc != 0)
goto fail_nic_init;
+ sfc_log_init(sa, "reconfigure NIC DMA");
+ rc = efx_nic_dma_reconfigure(sa->nic);
+ if (rc != 0) {
+ sfc_err(sa, "cannot reconfigure NIC DMA: %s", rte_strerror(rc));
+ goto fail_nic_dma_reconfigure;
+ }
+
encp = efx_nic_cfg_get(sa->nic);
/*
@@ -525,6 +539,7 @@ sfc_try_start(struct sfc_adapter *sa)
fail_intr_start:
fail_tunnel_reconfigure:
+fail_nic_dma_reconfigure:
efx_nic_fini(sa->nic);
fail_nic_init:
@@ -939,6 +954,10 @@ sfc_attach(struct sfc_adapter *sa)
sa->txq_min_entries = encp->enc_txq_min_ndescs;
SFC_ASSERT(rte_is_power_of_2(sa->txq_min_entries));
+ rc = sfc_nic_dma_attach(sa);
+ if (rc != 0)
+ goto fail_nic_dma_attach;
+
rc = sfc_intr_attach(sa);
if (rc != 0)
goto fail_intr_attach;
@@ -1030,6 +1049,9 @@ sfc_attach(struct sfc_adapter *sa)
sfc_intr_detach(sa);
fail_intr_attach:
+ sfc_nic_dma_detach(sa);
+
+fail_nic_dma_attach:
efx_nic_fini(sa->nic);
fail_estimate_rsrc_limits:
@@ -1076,6 +1098,7 @@ sfc_detach(struct sfc_adapter *sa)
sfc_port_detach(sa);
sfc_ev_detach(sa);
sfc_intr_detach(sa);
+ sfc_nic_dma_detach(sa);
efx_tunnel_fini(sa->nic);
sfc_sriov_detach(sa);
@@ -26,6 +26,7 @@
#include "sfc_debug.h"
#include "sfc_log.h"
+#include "sfc_tweak.h"
#include "sfc_filter.h"
#include "sfc_flow_tunnel.h"
#include "sfc_sriov.h"
@@ -35,6 +36,7 @@
#include "sfc_repr_proxy.h"
#include "sfc_service.h"
#include "sfc_ethdev_state.h"
+#include "sfc_nic_dma_dp.h"
#ifdef __cplusplus
extern "C" {
@@ -145,6 +147,10 @@ struct sfc_adapter_shared {
bool counters_rxq_allocated;
unsigned int nb_repr_rxq;
unsigned int nb_repr_txq;
+
+#if RTE_PMD_NET_SFC_NIC_DMA_MAP
+ struct sfc_nic_dma_info nic_dma_info;
+#endif
};
/* Adapter process private data */
@@ -392,8 +398,9 @@ sfc_get_system_msecs(void)
return rte_get_timer_cycles() * MS_PER_S / rte_get_timer_hz();
}
-int sfc_dma_alloc(const struct sfc_adapter *sa, const char *name, uint16_t id,
- size_t len, int socket_id, efsys_mem_t *esmp);
+int sfc_dma_alloc(struct sfc_adapter *sa, const char *name, uint16_t id,
+ efx_nic_dma_addr_type_t addr_type, size_t len, int socket_id,
+ efsys_mem_t *esmp);
void sfc_dma_free(const struct sfc_adapter *sa, efsys_mem_t *esmp);
uint32_t sfc_register_logtype(const struct rte_pci_addr *pci_addr,
@@ -17,6 +17,7 @@
#include "sfc_log.h"
#include "sfc_stats.h"
+#include "sfc_nic_dma_dp.h"
#ifdef __cplusplus
extern "C" {
@@ -13,6 +13,7 @@
#include <rte_mempool.h>
#include <ethdev_driver.h>
+#include "sfc_tweak.h"
#include "sfc_dp.h"
#ifdef __cplusplus
@@ -95,6 +96,11 @@ struct sfc_dp_rx_qcreate_info {
/** Mask to extract user bits from Rx prefix mark field */
uint32_t user_mark_mask;
+
+#if RTE_PMD_NET_SFC_NIC_DMA_MAP
+ /** NIC's DMA mapping information */
+ const struct sfc_nic_dma_info *nic_dma_info;
+#endif
};
/**
@@ -14,7 +14,9 @@
#include "sfc_dp.h"
#include "sfc_debug.h"
+#include "sfc_tweak.h"
#include "sfc_tso.h"
+#include "sfc_nic_dma_dp.h"
#ifdef __cplusplus
extern "C" {
@@ -80,6 +82,11 @@ struct sfc_dp_tx_qcreate_info {
uint32_t tso_max_payload_len;
/** Maximum number of frames to be generated per TSOv3 transaction */
uint32_t tso_max_nb_outgoing_frames;
+
+#if RTE_PMD_NET_SFC_NIC_DMA_MAP
+ /** NIC's DMA mapping information */
+ const struct sfc_nic_dma_info *nic_dma_info;
+#endif
};
/**
@@ -27,6 +27,7 @@
#include "sfc_dp_rx.h"
#include "sfc_kvargs.h"
#include "sfc_ef100.h"
+#include "sfc_nic_dma_dp.h"
#define sfc_ef100_rx_err(_rxq, ...) \
@@ -89,6 +90,9 @@ struct sfc_ef100_rxq {
struct rte_mempool *refill_mb_pool;
efx_qword_t *rxq_hw_ring;
volatile void *doorbell;
+#if RTE_PMD_NET_SFC_NIC_DMA_MAP
+ const struct sfc_nic_dma_info *nic_dma_info;
+#endif
/* Datapath receive queue anchor */
struct sfc_dp_rxq dp;
@@ -131,6 +135,32 @@ sfc_ef100_rx_qpush(struct sfc_ef100_rxq *rxq, unsigned int added)
added);
}
+static int
+sfc_ef100_rx_map(const struct sfc_ef100_rxq *rxq, rte_iova_t iova, size_t len,
+ rte_iova_t *dma_addr)
+{
+#if RTE_PMD_NET_SFC_NIC_DMA_MAP
+ *dma_addr = sfc_nic_dma_map(rxq->nic_dma_info, iova, len);
+ if (likely(*dma_addr != RTE_BAD_IOVA))
+ return 0;
+
+ sfc_ef100_rx_err(rxq, "failed to map DMA address on Rx");
+ return EFAULT;
+#else
+ RTE_SET_USED(rxq);
+ RTE_SET_USED(len);
+
+ *dma_addr = iova;
+
+ /*
+ * Constant return here allows compiler to throw away unused
+ * error handling in the caller. Otherwise it would be more
+ * logical to return mapping result.
+ */
+ return 0;
+#endif
+}
+
static void
sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
{
@@ -150,7 +180,6 @@ sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
SFC_ASSERT(bulks > 0);
do {
- unsigned int id;
unsigned int i;
if (unlikely(rte_mempool_get_bulk(rxq->refill_mb_pool, objs,
@@ -170,17 +199,24 @@ sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
break;
}
- for (i = 0, id = added & ptr_mask;
- i < RTE_DIM(objs);
- ++i, ++id) {
+ for (i = 0; i < RTE_DIM(objs); ++i) {
struct rte_mbuf *m = objs[i];
struct sfc_ef100_rx_sw_desc *rxd;
- rte_iova_t phys_addr;
+ rte_iova_t dma_addr;
+ int rc;
__rte_mbuf_raw_sanity_check(m);
- SFC_ASSERT((id & ~ptr_mask) == 0);
- rxd = &rxq->sw_ring[id];
+ rc = sfc_ef100_rx_map(rxq,
+ rte_mbuf_data_iova_default(m),
+ rte_pktmbuf_data_len(m), &dma_addr);
+ if (unlikely(rc != 0)) {
+ /* Just skip buffer and try to continue */
+ rte_mempool_put(rxq->refill_mb_pool, m);
+ continue;
+ }
+
+ rxd = &rxq->sw_ring[added & ptr_mask];
rxd->mbuf = m;
/*
@@ -189,12 +225,10 @@ sfc_ef100_rx_qrefill(struct sfc_ef100_rxq *rxq)
* structure members.
*/
- phys_addr = rte_mbuf_data_iova_default(m);
- EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[id],
- ESF_GZ_RX_BUF_ADDR, phys_addr);
+ EFX_POPULATE_QWORD_1(rxq->rxq_hw_ring[added & ptr_mask],
+ ESF_GZ_RX_BUF_ADDR, dma_addr);
+ added++;
}
-
- added += RTE_DIM(objs);
} while (--bulks > 0);
SFC_ASSERT(rxq->added != added);
@@ -794,6 +828,10 @@ sfc_ef100_rx_qcreate(uint16_t port_id, uint16_t queue_id,
info->fcw_offset +
ER_GZ_EVQ_INT_PRIME_OFST;
+#if RTE_PMD_NET_SFC_NIC_DMA_MAP
+ rxq->nic_dma_info = info->nic_dma_info;
+#endif
+
sfc_ef100_rx_debug(rxq, "RxQ doorbell is %p", rxq->doorbell);
*dp_rxqp = &rxq->dp;
@@ -24,6 +24,7 @@
#include "sfc_tweak.h"
#include "sfc_kvargs.h"
#include "sfc_ef100.h"
+#include "sfc_nic_dma_dp.h"
#define sfc_ef100_tx_err(_txq, ...) \
@@ -85,6 +86,10 @@ struct sfc_ef100_txq {
uint32_t tso_max_payload_len;
uint32_t tso_max_nb_outgoing_frames;
+#if RTE_PMD_NET_SFC_NIC_DMA_MAP
+ const struct sfc_nic_dma_info *nic_dma_info;
+#endif
+
/* Datapath transmit queue anchor */
struct sfc_dp_txq dp;
};
@@ -342,8 +347,35 @@ sfc_ef100_tx_qdesc_cso_inner_l3(uint64_t tx_tunnel)
return inner_l3;
}
-static void
-sfc_ef100_tx_qdesc_send_create(const struct rte_mbuf *m, efx_oword_t *tx_desc)
+static int
+sfc_ef100_tx_map(const struct sfc_ef100_txq *txq, rte_iova_t iova, size_t len,
+ rte_iova_t *dma_addr)
+{
+#if RTE_PMD_NET_SFC_NIC_DMA_MAP
+ *dma_addr = sfc_nic_dma_map(txq->nic_dma_info, iova, len);
+ if (likely(*dma_addr != RTE_BAD_IOVA))
+ return 0;
+
+ sfc_ef100_tx_err(txq, "failed to map DMA address on Tx");
+ return EFAULT;
+#else
+ RTE_SET_USED(txq);
+ RTE_SET_USED(len);
+
+ *dma_addr = iova;
+
+ /*
+ * Constant return here allows compiler to throw away unused
+ * error handling in the caller. Otherwise it would be more
+ * logical to return mapping result.
+ */
+ return 0;
+#endif
+}
+
+static int
+sfc_ef100_tx_qdesc_send_create(const struct sfc_ef100_txq *txq,
+ const struct rte_mbuf *m, efx_oword_t *tx_desc)
{
bool outer_l3;
bool outer_l4;
@@ -351,6 +383,8 @@ sfc_ef100_tx_qdesc_send_create(const struct rte_mbuf *m, efx_oword_t *tx_desc)
uint8_t partial_en;
uint16_t part_cksum_w;
uint16_t l4_offset_w;
+ rte_iova_t dma_addr;
+ int rc;
if ((m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) == 0) {
outer_l3 = (m->ol_flags & RTE_MBUF_F_TX_IP_CKSUM);
@@ -384,8 +418,13 @@ sfc_ef100_tx_qdesc_send_create(const struct rte_mbuf *m, efx_oword_t *tx_desc)
m->l2_len + m->l3_len) >> 1;
}
+ rc = sfc_ef100_tx_map(txq, rte_mbuf_data_iova_default(m),
+ rte_pktmbuf_data_len(m), &dma_addr);
+ if (unlikely(rc != 0))
+ return rc;
+
EFX_POPULATE_OWORD_10(*tx_desc,
- ESF_GZ_TX_SEND_ADDR, rte_mbuf_data_iova(m),
+ ESF_GZ_TX_SEND_ADDR, dma_addr,
ESF_GZ_TX_SEND_LEN, rte_pktmbuf_data_len(m),
ESF_GZ_TX_SEND_NUM_SEGS, m->nb_segs,
ESF_GZ_TX_SEND_CSO_PARTIAL_START_W, l4_offset_w,
@@ -405,6 +444,8 @@ sfc_ef100_tx_qdesc_send_create(const struct rte_mbuf *m, efx_oword_t *tx_desc)
EFX_OR_OWORD(*tx_desc, tx_desc_extra_fields);
}
+
+ return 0;
}
static void
@@ -554,11 +595,11 @@ sfc_ef100_tx_pkt_descs_max(const struct rte_mbuf *m)
return m->nb_segs + extra_descs;
}
-static struct rte_mbuf *
+static int
sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq,
- struct rte_mbuf *m, unsigned int *added)
+ struct rte_mbuf **m, unsigned int *added)
{
- struct rte_mbuf *m_seg = m;
+ struct rte_mbuf *m_seg = *m;
unsigned int nb_hdr_descs;
unsigned int nb_pld_descs;
unsigned int seg_split = 0;
@@ -570,17 +611,19 @@ sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq,
size_t tcph_off;
size_t header_len;
size_t remaining_hdr_len;
+ rte_iova_t dma_addr;
+ int rc;
- if (m->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
- outer_iph_off = m->outer_l2_len;
- outer_udph_off = outer_iph_off + m->outer_l3_len;
+ if (m_seg->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) {
+ outer_iph_off = m_seg->outer_l2_len;
+ outer_udph_off = outer_iph_off + m_seg->outer_l3_len;
} else {
outer_iph_off = 0;
outer_udph_off = 0;
}
- iph_off = outer_udph_off + m->l2_len;
- tcph_off = iph_off + m->l3_len;
- header_len = tcph_off + m->l4_len;
+ iph_off = outer_udph_off + m_seg->l2_len;
+ tcph_off = iph_off + m_seg->l3_len;
+ header_len = tcph_off + m_seg->l4_len;
/*
* Remember ID of the TX_TSO descriptor to be filled in.
@@ -592,11 +635,15 @@ sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq,
remaining_hdr_len = header_len;
do {
+ rc = sfc_ef100_tx_map(txq, rte_mbuf_data_iova(m_seg),
+ rte_pktmbuf_data_len(m_seg), &dma_addr);
+ if (unlikely(rc != 0))
+ return rc;
+
id = (*added)++ & txq->ptr_mask;
if (rte_pktmbuf_data_len(m_seg) <= remaining_hdr_len) {
/* The segment is fully header segment */
- sfc_ef100_tx_qdesc_seg_create(
- rte_mbuf_data_iova(m_seg),
+ sfc_ef100_tx_qdesc_seg_create(dma_addr,
rte_pktmbuf_data_len(m_seg),
&txq->txq_hw_ring[id]);
remaining_hdr_len -= rte_pktmbuf_data_len(m_seg);
@@ -605,15 +652,13 @@ sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq,
* The segment must be split into header and
* payload segments
*/
- sfc_ef100_tx_qdesc_seg_create(
- rte_mbuf_data_iova(m_seg),
- remaining_hdr_len,
- &txq->txq_hw_ring[id]);
- SFC_ASSERT(txq->sw_ring[id].mbuf == NULL);
+ sfc_ef100_tx_qdesc_seg_create(dma_addr,
+ remaining_hdr_len, &txq->txq_hw_ring[id]);
+ txq->sw_ring[id].mbuf = NULL;
id = (*added)++ & txq->ptr_mask;
sfc_ef100_tx_qdesc_seg_create(
- rte_mbuf_data_iova(m_seg) + remaining_hdr_len,
+ dma_addr + remaining_hdr_len,
rte_pktmbuf_data_len(m_seg) - remaining_hdr_len,
&txq->txq_hw_ring[id]);
remaining_hdr_len = 0;
@@ -628,15 +673,16 @@ sfc_ef100_xmit_tso_pkt(struct sfc_ef100_txq * const txq,
* pointer counts it twice and we should correct it.
*/
nb_hdr_descs = ((id - tso_desc_id) & txq->ptr_mask) - seg_split;
- nb_pld_descs = m->nb_segs - nb_hdr_descs + seg_split;
+ nb_pld_descs = (*m)->nb_segs - nb_hdr_descs + seg_split;
- sfc_ef100_tx_qdesc_tso_create(m, nb_hdr_descs, nb_pld_descs, header_len,
- rte_pktmbuf_pkt_len(m) - header_len,
+ sfc_ef100_tx_qdesc_tso_create(*m, nb_hdr_descs, nb_pld_descs, header_len,
+ rte_pktmbuf_pkt_len(*m) - header_len,
outer_iph_off, outer_udph_off,
iph_off, tcph_off,
&txq->txq_hw_ring[tso_desc_id]);
- return m_seg;
+ *m = m_seg;
+ return 0;
}
static uint16_t
@@ -648,6 +694,8 @@ sfc_ef100_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
bool reap_done;
struct rte_mbuf **pktp;
struct rte_mbuf **pktp_end;
+ rte_iova_t dma_addr;
+ int rc;
if (unlikely(txq->flags &
(SFC_EF100_TXQ_NOT_RUNNING | SFC_EF100_TXQ_EXCEPTION)))
@@ -694,14 +742,15 @@ sfc_ef100_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
id = added++ & txq->ptr_mask;
sfc_ef100_tx_qdesc_prefix_create(m_seg,
&txq->txq_hw_ring[id]);
+ txq->sw_ring[id].mbuf = NULL;
}
if (m_seg->ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
- m_seg = sfc_ef100_xmit_tso_pkt(txq, m_seg, &added);
+ rc = sfc_ef100_xmit_tso_pkt(txq, &m_seg, &added);
} else {
id = added++ & txq->ptr_mask;
- sfc_ef100_tx_qdesc_send_create(m_seg,
- &txq->txq_hw_ring[id]);
+ rc = sfc_ef100_tx_qdesc_send_create(txq, m_seg,
+ &txq->txq_hw_ring[id]);
/*
* rte_pktmbuf_free() is commonly used in DPDK for
@@ -722,22 +771,29 @@ sfc_ef100_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
m_seg = m_seg->next;
}
- while (m_seg != NULL) {
+ while (likely(rc == 0) && m_seg != NULL) {
RTE_BUILD_BUG_ON(SFC_MBUF_SEG_LEN_MAX >
SFC_EF100_TX_SEG_DESC_LEN_MAX);
id = added++ & txq->ptr_mask;
- sfc_ef100_tx_qdesc_seg_create(rte_mbuf_data_iova(m_seg),
+ rc = sfc_ef100_tx_map(txq, rte_mbuf_data_iova(m_seg),
+ rte_pktmbuf_data_len(m_seg),
+ &dma_addr);
+ sfc_ef100_tx_qdesc_seg_create(dma_addr,
rte_pktmbuf_data_len(m_seg),
&txq->txq_hw_ring[id]);
txq->sw_ring[id].mbuf = m_seg;
m_seg = m_seg->next;
}
- dma_desc_space -= (added - pkt_start);
+ if (likely(rc == 0)) {
+ dma_desc_space -= (added - pkt_start);
- sfc_pkts_bytes_add(&txq->dp.dpq.stats, 1,
- rte_pktmbuf_pkt_len(*pktp));
+ sfc_pkts_bytes_add(&txq->dp.dpq.stats, 1,
+ rte_pktmbuf_pkt_len(*pktp));
+ } else {
+ added = pkt_start;
+ }
}
if (likely(added != txq->added)) {
@@ -837,6 +893,10 @@ sfc_ef100_tx_qcreate(uint16_t port_id, uint16_t queue_id,
txq->tso_max_payload_len = info->tso_max_payload_len;
txq->tso_max_nb_outgoing_frames = info->tso_max_nb_outgoing_frames;
+#if RTE_PMD_NET_SFC_NIC_DMA_MAP
+ txq->nic_dma_info = info->nic_dma_info;
+#endif
+
sfc_ef100_tx_debug(txq, "TxQ doorbell is %p", txq->doorbell);
*dp_txqp = &txq->dp;
@@ -911,6 +911,7 @@ sfc_ev_qinit(struct sfc_adapter *sa,
/* Allocate DMA space */
rc = sfc_dma_alloc(sa, sfc_evq_type2str(type), type_index,
+ EFX_NIC_DMA_ADDR_EVENT_RING,
efx_evq_size(sa->nic, evq->entries, sa->evq_flags),
socket_id, &evq->mem);
if (rc != 0)
@@ -19,9 +19,10 @@ static int
sfc_mcdi_dma_alloc(void *cookie, const char *name, size_t len,
efsys_mem_t *esmp)
{
- const struct sfc_adapter *sa = cookie;
+ struct sfc_adapter *sa = cookie;
- return sfc_dma_alloc(sa, name, 0, len, sa->socket_id, esmp);
+ return sfc_dma_alloc(sa, name, 0, EFX_NIC_DMA_ADDR_MCDI_BUF, len,
+ sa->socket_id, esmp);
}
static sfc_efx_mcdi_dma_free_cb sfc_mcdi_dma_free;
new file mode 100644
@@ -0,0 +1,374 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright(c) 2021 Xilinx, Inc.
+ */
+
+#include <rte_mempool.h>
+#include <rte_memzone.h>
+
+#include "efx.h"
+
+#include "sfc_log.h"
+#include "sfc_tweak.h"
+#include "sfc.h"
+#include "sfc_nic_dma.h"
+
+#if RTE_PMD_NET_SFC_NIC_DMA_MAP
+
+/*
+ * Register mapping for all IOVA mempools at the time of creation to
+ * have mapping for all mbufs.
+ */
+
+struct sfc_nic_dma_register_mempool_data {
+ struct sfc_adapter *sa;
+ int rc;
+};
+
+static void
+sfc_nic_dma_register_mempool_chunk(struct rte_mempool *mp __rte_unused,
+ void *opaque,
+ struct rte_mempool_memhdr *memhdr,
+ unsigned mem_idx __rte_unused)
+{
+ struct sfc_nic_dma_register_mempool_data *register_data = opaque;
+ struct sfc_adapter *sa = register_data->sa;
+ struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
+ efsys_dma_addr_t nic_base;
+ efsys_dma_addr_t trgt_base;
+ size_t map_len;
+ int rc;
+
+ if (memhdr->iova == RTE_BAD_IOVA)
+ return;
+
+ /*
+ * Check if the memory chunk is mapped already. In that case, there's
+ * nothing left to do.
+ */
+ nic_base = sfc_nic_dma_map(&sas->nic_dma_info, memhdr->iova,
+ memhdr->len);
+ if (nic_base != RTE_BAD_IOVA)
+ return;
+
+ rc = efx_nic_dma_config_add(sa->nic, memhdr->iova, memhdr->len,
+ &nic_base, &trgt_base, &map_len);
+ if (rc != 0) {
+ sfc_err(sa,
+ "cannot handle memory buffer VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 ": %s",
+ memhdr->addr, (uint64_t)memhdr->iova, memhdr->len,
+ rte_strerror(rc));
+ register_data->rc = rc;
+ return;
+ }
+
+ sfc_info(sa,
+ "registered memory buffer VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 " -> NIC_BASE=%" PRIx64 " TRGT_BASE=%" PRIx64 " MAP_LEN=%" PRIx64,
+ memhdr->addr, (uint64_t)memhdr->iova, memhdr->len,
+ (uint64_t)nic_base, (uint64_t)trgt_base, (uint64_t)map_len);
+
+ rc = sfc_nic_dma_add_region(&sas->nic_dma_info, nic_base, trgt_base,
+ map_len);
+ if (rc != 0) {
+ sfc_err(sa, "failed to add regioned NIC DMA mapping: %s",
+ rte_strerror(rc));
+ register_data->rc = rc;
+ }
+}
+
+static int
+sfc_nic_dma_register_mempool(struct sfc_adapter *sa, struct rte_mempool *mp)
+{
+ struct sfc_nic_dma_register_mempool_data register_data = {
+ .sa = sa,
+ };
+ uint32_t iters;
+ int result = 0;
+ int rc;
+
+ SFC_ASSERT(sfc_adapter_is_locked(sa));
+
+ if (mp->flags & RTE_MEMPOOL_F_NON_IO)
+ return 0;
+
+ iters = rte_mempool_mem_iter(mp, sfc_nic_dma_register_mempool_chunk,
+ ®ister_data);
+ if (iters != mp->nb_mem_chunks) {
+ sfc_err(sa,
+
+ "failed to iterate over memory chunks, some mbufs may be unusable");
+ result = EFAULT;
+ /*
+ * Return an error, but try to continue if error is
+ * async and cannot be handled properly.
+ */
+ }
+
+ if (register_data.rc != 0) {
+ sfc_err(sa,
+ "failed to map some memory chunks (%s), some mbufs may be unusable",
+ rte_strerror(register_data.rc));
+ result = register_data.rc;
+ /* Try to continue */
+ }
+
+ /*
+ * There is no point to apply mapping changes triggered by mempool
+ * registration. Configuration will be propagated on start and
+ * mbufs mapping is required in started state only.
+ */
+ if (sa->state == SFC_ETHDEV_STARTED) {
+ /*
+ * It's safe to reconfigure the DMA mapping even if no changes
+ * have been made during memory chunks iteration. In that case,
+ * this operation will not change anything either.
+ */
+ rc = efx_nic_dma_reconfigure(sa->nic);
+ if (rc != 0) {
+ sfc_err(sa, "cannot reconfigure NIC DMA: %s",
+ rte_strerror(rc));
+ result = rc;
+ }
+ }
+
+ return result;
+}
+
+static int
+sfc_nic_dma_attach_flat(struct sfc_adapter *sa)
+{
+ struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
+
+ /*
+ * Add a trivially-mapped region that covers the whole address
+ * space to ensure compatibility.
+ */
+ return sfc_nic_dma_add_region(&sas->nic_dma_info, 0, 0, UINT64_MAX);
+}
+
+static void
+sfc_nic_dma_detach_flat(struct sfc_adapter *sa)
+{
+ struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
+
+ sas->nic_dma_info.nb_regions = 0;
+}
+
+static void
+sfc_mempool_event_cb(enum rte_mempool_event event, struct rte_mempool *mp,
+ void *user_data)
+{
+ struct sfc_adapter *sa = user_data;
+
+ if (event != RTE_MEMPOOL_EVENT_READY)
+ return;
+
+ sfc_adapter_lock(sa);
+
+ (void)sfc_nic_dma_register_mempool(sa, mp);
+
+ sfc_adapter_unlock(sa);
+}
+
+struct sfc_mempool_walk_data {
+ struct sfc_adapter *sa;
+ int rc;
+};
+
+static void
+sfc_mempool_walk_cb(struct rte_mempool *mp, void *arg)
+{
+ struct sfc_mempool_walk_data *walk_data = arg;
+ int rc;
+
+ rc = sfc_nic_dma_register_mempool(walk_data->sa, mp);
+ if (rc != 0)
+ walk_data->rc = rc;
+}
+
+static int
+sfc_nic_dma_attach_regioned(struct sfc_adapter *sa)
+{
+ struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
+ struct sfc_mempool_walk_data walk_data = {
+ .sa = sa,
+ };
+ int rc;
+
+ rc = rte_mempool_event_callback_register(sfc_mempool_event_cb, sa);
+ if (rc != 0) {
+ sfc_err(sa, "failed to register mempool event callback");
+ rc = EFAULT;
+ goto fail_mempool_event_callback_register;
+ }
+
+ rte_mempool_walk(sfc_mempool_walk_cb, &walk_data);
+ if (walk_data.rc != 0) {
+ rc = walk_data.rc;
+ goto fail_mempool_walk;
+ }
+
+ return 0;
+
+fail_mempool_walk:
+ rte_mempool_event_callback_unregister(sfc_mempool_event_cb, sa);
+ sas->nic_dma_info.nb_regions = 0;
+
+fail_mempool_event_callback_register:
+ return rc;
+}
+
+static void
+sfc_nic_dma_detach_regioned(struct sfc_adapter *sa)
+{
+ struct sfc_adapter_shared *sas = sfc_sa2shared(sa);
+
+ rte_mempool_event_callback_unregister(sfc_mempool_event_cb, sa);
+ sas->nic_dma_info.nb_regions = 0;
+}
+
+#else
+
+static int
+sfc_nic_dma_attach_flat(struct sfc_adapter *sa __rte_unused)
+{
+ /* Nothing to do */
+ return 0;
+}
+
+static void
+sfc_nic_dma_detach_flat(struct sfc_adapter *sa __rte_unused)
+{
+ /* Nothing to do */
+}
+
+static int
+sfc_nic_dma_attach_regioned(struct sfc_adapter *sa)
+{
+ sfc_err(sa, "regioned NIC DMA is not supported");
+ return ENOTSUP;
+}
+
+static void
+sfc_nic_dma_detach_regioned(struct sfc_adapter *sa __rte_unused)
+{
+ /* Nothing to do */
+}
+
+#endif
+
+int
+sfc_nic_dma_attach(struct sfc_adapter *sa)
+{
+ const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
+ int rc;
+
+ sfc_log_init(sa, "dma_mapping_type=%u", encp->enc_dma_mapping);
+
+ switch (encp->enc_dma_mapping) {
+ case EFX_NIC_DMA_MAPPING_FLAT:
+ rc = sfc_nic_dma_attach_flat(sa);
+ break;
+ case EFX_NIC_DMA_MAPPING_REGIONED:
+ rc = sfc_nic_dma_attach_regioned(sa);
+ break;
+ default:
+ rc = ENOTSUP;
+ break;
+ }
+
+ sfc_log_init(sa, "done: %s", rte_strerror(rc));
+ return rc;
+}
+
+void
+sfc_nic_dma_detach(struct sfc_adapter *sa)
+{
+ const efx_nic_cfg_t *encp = efx_nic_cfg_get(sa->nic);
+
+ sfc_log_init(sa, "dma_mapping_type=%u", encp->enc_dma_mapping);
+
+ switch (encp->enc_dma_mapping) {
+ case EFX_NIC_DMA_MAPPING_FLAT:
+ sfc_nic_dma_detach_flat(sa);
+ break;
+ case EFX_NIC_DMA_MAPPING_REGIONED:
+ sfc_nic_dma_detach_regioned(sa);
+ break;
+ default:
+ break;
+ }
+
+ sfc_log_init(sa, "done");
+}
+
+int
+sfc_nic_dma_mz_map(struct sfc_adapter *sa, const struct rte_memzone *mz,
+ efx_nic_dma_addr_type_t addr_type,
+ efsys_dma_addr_t *dma_addr)
+{
+ efsys_dma_addr_t nic_base;
+ efsys_dma_addr_t trgt_base;
+ size_t map_len;
+ int rc;
+
+ /*
+ * Check if the memzone can be mapped already without changing the DMA
+ * configuration.
+ * libefx is used instead of the driver cache since it can take the type
+ * of the buffer into account and make a better decision when it comes
+ * to buffers that are mapped by the FW itself.
+ * For generality's sake, not all region processing code is hidden
+ * behind the RTE_PMD_NET_SFC_NIC_DMA_MAP define.
+ */
+ rc = efx_nic_dma_map(sa->nic, addr_type, mz->iova, mz->len, dma_addr);
+ if (rc == 0)
+ return 0;
+
+ if (rc != ENOENT) {
+ sfc_err(sa,
+ "failed to map memory buffer VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 ": %s",
+ mz->addr, (uint64_t)mz->iova, mz->len,
+ rte_strerror(rc));
+ return rc;
+ }
+
+ rc = efx_nic_dma_config_add(sa->nic, mz->iova, mz->len,
+ &nic_base, &trgt_base, &map_len);
+ if (rc != 0) {
+ sfc_err(sa,
+ "cannot handle memory buffer VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 ": %s",
+ mz->addr, (uint64_t)mz->iova, mz->len,
+ rte_strerror(rc));
+ return EFAULT;
+ }
+
+#if RTE_PMD_NET_SFC_NIC_DMA_MAP
+ rc = sfc_nic_dma_add_region(&sfc_sa2shared(sa)->nic_dma_info,
+ nic_base, trgt_base, map_len);
+ if (rc != 0) {
+ sfc_err(sa,
+ "failed to add DMA region VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 ": %s",
+ mz->addr, (uint64_t)mz->iova, mz->len,
+ rte_strerror(rc));
+ return rc;
+ }
+#endif
+
+ rc = efx_nic_dma_reconfigure(sa->nic);
+ if (rc != 0) {
+ sfc_err(sa, "failed to reconfigure DMA");
+ return rc;
+ }
+
+ rc = efx_nic_dma_map(sa->nic, addr_type, mz->iova, mz->len, dma_addr);
+ if (rc != 0) {
+ sfc_err(sa,
+ "failed to map memory buffer VA=%p IOVA=%" PRIx64 " length=0x%" PRIx64 ": %s",
+ mz->addr, (uint64_t)mz->iova, mz->len,
+ rte_strerror(rc));
+ return rc;
+ }
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright(c) 2021 Xilinx, Inc.
+ */
+
+#ifndef _SFC_NIC_DMA_H
+#define _SFC_NIC_DMA_H
+
+#include <rte_memzone.h>
+
+#include "efx.h"
+
+#include "sfc.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int sfc_nic_dma_attach(struct sfc_adapter *sa);
+void sfc_nic_dma_detach(struct sfc_adapter *sa);
+
+int sfc_nic_dma_mz_map(struct sfc_adapter *sa, const struct rte_memzone *mz,
+ efx_nic_dma_addr_type_t addr_type,
+ efsys_dma_addr_t *dma_addr);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SFC_NIC_DMA_H */
new file mode 100644
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright(c) 2021 Xilinx, Inc.
+ */
+
+#include <rte_common.h>
+
+#include "sfc_tweak.h"
+#include "sfc_nic_dma.h"
+
+#if RTE_PMD_NET_SFC_NIC_DMA_MAP
+
+int
+sfc_nic_dma_add_region(struct sfc_nic_dma_info *nic_dma_info,
+ rte_iova_t nic_base, rte_iova_t trgt_base,
+ size_t map_len)
+{
+ struct sfc_nic_dma_region *region;
+
+ if (nic_dma_info->nb_regions == SFC_NIC_DMA_REGIONS_MAX)
+ return ENOMEM;
+
+ region = &nic_dma_info->regions[nic_dma_info->nb_regions];
+ region->nic_base = nic_base;
+ region->trgt_base = trgt_base;
+ region->trgt_end = trgt_base + map_len;
+
+ nic_dma_info->nb_regions++;
+ return 0;
+}
+
+rte_iova_t
+sfc_nic_dma_map(const struct sfc_nic_dma_info *nic_dma_info,
+ rte_iova_t trgt_addr, size_t len)
+{
+ unsigned int i;
+
+ for (i = 0; i < nic_dma_info->nb_regions; i++) {
+ const struct sfc_nic_dma_region *region;
+
+ region = &nic_dma_info->regions[i];
+ if (region->trgt_base <= trgt_addr &&
+ trgt_addr + len <= region->trgt_end) {
+ return region->nic_base +
+ (trgt_addr - region->trgt_base);
+ }
+ }
+
+ return RTE_BAD_IOVA;
+}
+
+#endif /* RTE_PMD_NET_SFC_NIC_DMA_MAP */
new file mode 100644
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright(c) 2021 Xilinx, Inc.
+ */
+
+#ifndef _SFC_NIC_DMA_DP_H
+#define _SFC_NIC_DMA_DP_H
+
+#include <rte_common.h>
+
+#include "sfc_tweak.h"
+
+#if RTE_PMD_NET_SFC_NIC_DMA_MAP
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SFC_NIC_DMA_REGIONS_MAX 2
+
+struct sfc_nic_dma_region {
+ rte_iova_t nic_base;
+ rte_iova_t trgt_base;
+ rte_iova_t trgt_end;
+};
+
+/** Driver cache for NIC DMA regions */
+struct sfc_nic_dma_info {
+ struct sfc_nic_dma_region regions[SFC_NIC_DMA_REGIONS_MAX];
+ unsigned int nb_regions;
+};
+
+int sfc_nic_dma_add_region(struct sfc_nic_dma_info *nic_dma_info,
+ rte_iova_t nic_base,
+ rte_iova_t trgt_base, size_t map_len);
+
+rte_iova_t sfc_nic_dma_map(const struct sfc_nic_dma_info *nic_dma_info,
+ rte_iova_t trgt_addr, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_PMD_NET_SFC_NIC_DMA_MAP */
+
+#endif /* _SFC_NIC_DMA_DP_H */
@@ -440,7 +440,8 @@ sfc_port_attach(struct sfc_adapter *sa)
mac_nstats = efx_nic_cfg_get(sa->nic)->enc_mac_stats_nstats;
mac_stats_size = RTE_ALIGN(mac_nstats * sizeof(uint64_t), EFX_BUF_SIZE);
- rc = sfc_dma_alloc(sa, "mac_stats", 0, mac_stats_size,
+ rc = sfc_dma_alloc(sa, "mac_stats", 0, EFX_NIC_DMA_ADDR_MAC_STATS_BUF,
+ mac_stats_size,
sa->socket_id, &port->mac_stats_dma_mem);
if (rc != 0)
goto fail_mac_stats_dma_alloc;
@@ -1218,7 +1218,7 @@ sfc_rx_qinit(struct sfc_adapter *sa, sfc_sw_index_t sw_index,
rxq->buf_size = buf_size;
- rc = sfc_dma_alloc(sa, "rxq", sw_index,
+ rc = sfc_dma_alloc(sa, "rxq", sw_index, EFX_NIC_DMA_ADDR_RX_RING,
efx_rxq_size(sa->nic, rxq_info->entries),
socket_id, &rxq->mem);
if (rc != 0)
@@ -1248,6 +1248,10 @@ sfc_rx_qinit(struct sfc_adapter *sa, sfc_sw_index_t sw_index,
info.vi_window_shift = encp->enc_vi_window_shift;
info.fcw_offset = sa->fcw_offset;
+#if RTE_PMD_NET_SFC_NIC_DMA_MAP
+ info.nic_dma_info = &sas->nic_dma_info;
+#endif
+
rc = sa->priv.dp_rx->qcreate(sa->eth_dev->data->port_id, sw_index,
&RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr,
socket_id, &info, &rxq_info->dp);
@@ -51,4 +51,13 @@
*/
#define SFC_SW_STATS_ATOMIC 0
+#ifndef RTE_PMD_NET_SFC_NIC_DMA_MAP
+/**
+ * Toggle regioned NIC DMA mapping support.
+ *
+ * Required on SN1022 SoC only.
+ */
+#define RTE_PMD_NET_SFC_NIC_DMA_MAP 0
+#endif
+
#endif /* _SFC_TWEAK_H_ */
@@ -194,7 +194,7 @@ sfc_tx_qinit(struct sfc_adapter *sa, sfc_sw_index_t sw_index,
SFC_TX_DEFAULT_FREE_THRESH;
txq_info->offloads = offloads;
- rc = sfc_dma_alloc(sa, "txq", sw_index,
+ rc = sfc_dma_alloc(sa, "txq", sw_index, EFX_NIC_DMA_ADDR_TX_RING,
efx_txq_size(sa->nic, txq_info->entries),
socket_id, &txq->mem);
if (rc != 0)
@@ -226,6 +226,10 @@ sfc_tx_qinit(struct sfc_adapter *sa, sfc_sw_index_t sw_index,
info.tso_max_payload_len = encp->enc_tx_tso_max_payload_length;
info.tso_max_nb_outgoing_frames = encp->enc_tx_tso_max_nframes;
+#if RTE_PMD_NET_SFC_NIC_DMA_MAP
+ info.nic_dma_info = &sas->nic_dma_info;
+#endif
+
rc = sa->priv.dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index,
&RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr,
socket_id, &info, &txq_info->dp);