@@ -52,7 +52,8 @@ The public API headers are grouped by topics:
[dpaa2_mempool] (@ref rte_dpaa2_mempool.h),
[dpaa2_cmdif] (@ref rte_pmd_dpaa2_cmdif.h),
[dpaa2_qdma] (@ref rte_pmd_dpaa2_qdma.h),
- [crypto_scheduler] (@ref rte_cryptodev_scheduler.h)
+ [crypto_scheduler] (@ref rte_cryptodev_scheduler.h),
+ [dlb] (@ref rte_pmd_dlb.h)
- **memory**:
[memseg] (@ref rte_memory.h),
@@ -7,6 +7,7 @@ USE_MDFILE_AS_MAINPAGE = @TOPDIR@/doc/api/doxy-api-index.md
INPUT = @TOPDIR@/doc/api/doxy-api-index.md \
@TOPDIR@/drivers/bus/vdev \
@TOPDIR@/drivers/crypto/scheduler \
+ @TOPDIR@/drivers/event/dlb \
@TOPDIR@/drivers/mempool/dpaa2 \
@TOPDIR@/drivers/net/ark \
@TOPDIR@/drivers/net/bnxt \
@@ -72,6 +72,25 @@ static struct rte_event_dev_info evdev_dlb_default_info = {
struct process_local_port_data
dlb_port[DLB_MAX_NUM_PORTS][NUM_DLB_PORT_TYPES];
+static inline uint16_t
+dlb_event_enqueue_delayed(void *event_port,
+ const struct rte_event events[]);
+
+static inline uint16_t
+dlb_event_enqueue_burst_delayed(void *event_port,
+ const struct rte_event events[],
+ uint16_t num);
+
+static inline uint16_t
+dlb_event_enqueue_new_burst_delayed(void *event_port,
+ const struct rte_event events[],
+ uint16_t num);
+
+static inline uint16_t
+dlb_event_enqueue_forward_burst_delayed(void *event_port,
+ const struct rte_event events[],
+ uint16_t num);
+
static int
dlb_hw_query_resources(struct dlb_eventdev *dlb)
{
@@ -1003,6 +1022,33 @@ dlb_hw_create_ldb_port(struct dlb_eventdev *dlb,
qm_port->dequeue_depth = dequeue_depth;
+ /* When using the reserved token scheme, token_pop_thresh is
+ * initially 2 * dequeue_depth. Once the tokens are reserved,
+ * the enqueue code re-assigns it to dequeue_depth.
+ */
+ qm_port->token_pop_thresh = cq_depth;
+
+ /* When the deferred scheduling vdev arg is selected, use deferred pop
+ * for all single-entry CQs.
+ */
+ if (cfg.cq_depth == 1 || (cfg.cq_depth == 2 && use_rsvd_token_scheme)) {
+ if (dlb->defer_sched)
+ qm_port->token_pop_mode = DEFERRED_POP;
+ }
+
+ /* The default enqueue functions do not include delayed-pop support for
+ * performance reasons.
+ */
+ if (qm_port->token_pop_mode == DELAYED_POP) {
+ dlb->event_dev->enqueue = dlb_event_enqueue_delayed;
+ dlb->event_dev->enqueue_burst =
+ dlb_event_enqueue_burst_delayed;
+ dlb->event_dev->enqueue_new_burst =
+ dlb_event_enqueue_new_burst_delayed;
+ dlb->event_dev->enqueue_forward_burst =
+ dlb_event_enqueue_forward_burst_delayed;
+ }
+
qm_port->owed_tokens = 0;
qm_port->issued_releases = 0;
@@ -1163,6 +1209,8 @@ dlb_hw_create_dir_port(struct dlb_eventdev *dlb,
qm_port->dequeue_depth = dequeue_depth;
+ /* Directed ports are auto-pop, by default. */
+ qm_port->token_pop_mode = AUTO_POP;
qm_port->owed_tokens = 0;
qm_port->issued_releases = 0;
@@ -2572,6 +2620,30 @@ dlb_event_build_hcws(struct dlb_port *qm_port,
}
}
+static inline void
+dlb_construct_token_pop_qe(struct dlb_port *qm_port, int idx)
+{
+ struct dlb_cq_pop_qe *qe = (void *)qm_port->qe4;
+ int num = qm_port->owed_tokens;
+
+ if (qm_port->use_rsvd_token_scheme) {
+ /* Check if there's a deficit of reserved tokens, and return
+ * early if there are no (unreserved) tokens to consume.
+ */
+ if (num <= qm_port->cq_rsvd_token_deficit) {
+ qm_port->cq_rsvd_token_deficit -= num;
+ qm_port->owed_tokens = 0;
+ return;
+ }
+ num -= qm_port->cq_rsvd_token_deficit;
+ qm_port->cq_rsvd_token_deficit = 0;
+ }
+
+ qe[idx].cmd_byte = DLB_POP_CMD_BYTE;
+ qe[idx].tokens = num - 1;
+ qm_port->owed_tokens = 0;
+}
+
static __rte_always_inline void
dlb_pp_write(struct dlb_enqueue_qe *qe4,
struct process_local_port_data *port_data)
@@ -2638,7 +2710,8 @@ dlb_consume_qe_immediate(struct dlb_port *qm_port, int num)
static inline uint16_t
__dlb_event_enqueue_burst(void *event_port,
const struct rte_event events[],
- uint16_t num)
+ uint16_t num,
+ bool use_delayed)
{
struct dlb_eventdev_port *ev_port = event_port;
struct dlb_port *qm_port = &ev_port->qm_port;
@@ -2666,6 +2739,35 @@ __dlb_event_enqueue_burst(void *event_port,
for (; j < DLB_NUM_QES_PER_CACHE_LINE && (i + j) < num; j++) {
const struct rte_event *ev = &events[i + j];
+ int16_t thresh = qm_port->token_pop_thresh;
+
+ if (use_delayed &&
+ qm_port->token_pop_mode == DELAYED_POP &&
+ (ev->op == RTE_EVENT_OP_FORWARD ||
+ ev->op == RTE_EVENT_OP_RELEASE) &&
+ qm_port->issued_releases >= thresh - 1) {
+ /* Insert the token pop QE and break out. This
+ * may result in a partial HCW, but that is
+ * simpler than supporting arbitrary QE
+ * insertion.
+ */
+ dlb_construct_token_pop_qe(qm_port, j);
+
+ /* Reset the releases for the next QE batch */
+ qm_port->issued_releases -= thresh;
+
+ /* When using delayed token pop mode, the
+ * initial token threshold is the full CQ
+ * depth. After the first token pop, we need to
+ * reset it to the dequeue_depth.
+ */
+ qm_port->token_pop_thresh =
+ qm_port->dequeue_depth;
+
+ pop_offs = 1;
+ j++;
+ break;
+ }
if (dlb_event_enqueue_prep(ev_port, qm_port, ev,
port_data, &sched_types[j],
@@ -2701,14 +2803,29 @@ dlb_event_enqueue_burst(void *event_port,
const struct rte_event events[],
uint16_t num)
{
- return __dlb_event_enqueue_burst(event_port, events, num);
+ return __dlb_event_enqueue_burst(event_port, events, num, false);
+}
+
+static inline uint16_t
+dlb_event_enqueue_burst_delayed(void *event_port,
+ const struct rte_event events[],
+ uint16_t num)
+{
+ return __dlb_event_enqueue_burst(event_port, events, num, true);
}
static inline uint16_t
dlb_event_enqueue(void *event_port,
const struct rte_event events[])
{
- return __dlb_event_enqueue_burst(event_port, events, 1);
+ return __dlb_event_enqueue_burst(event_port, events, 1, false);
+}
+
+static inline uint16_t
+dlb_event_enqueue_delayed(void *event_port,
+ const struct rte_event events[])
+{
+ return __dlb_event_enqueue_burst(event_port, events, 1, true);
}
static uint16_t
@@ -2716,7 +2833,15 @@ dlb_event_enqueue_new_burst(void *event_port,
const struct rte_event events[],
uint16_t num)
{
- return __dlb_event_enqueue_burst(event_port, events, num);
+ return __dlb_event_enqueue_burst(event_port, events, num, false);
+}
+
+static uint16_t
+dlb_event_enqueue_new_burst_delayed(void *event_port,
+ const struct rte_event events[],
+ uint16_t num)
+{
+ return __dlb_event_enqueue_burst(event_port, events, num, true);
}
static uint16_t
@@ -2724,7 +2849,15 @@ dlb_event_enqueue_forward_burst(void *event_port,
const struct rte_event events[],
uint16_t num)
{
- return __dlb_event_enqueue_burst(event_port, events, num);
+ return __dlb_event_enqueue_burst(event_port, events, num, false);
+}
+
+static uint16_t
+dlb_event_enqueue_forward_burst_delayed(void *event_port,
+ const struct rte_event events[],
+ uint16_t num)
+{
+ return __dlb_event_enqueue_burst(event_port, events, num, true);
}
static __rte_always_inline int
@@ -3124,7 +3257,8 @@ dlb_hw_dequeue(struct dlb_eventdev *dlb,
qm_port->owed_tokens += num;
- dlb_consume_qe_immediate(qm_port, num);
+ if (num && qm_port->token_pop_mode == AUTO_POP)
+ dlb_consume_qe_immediate(qm_port, num);
ev_port->outstanding_releases += num;
@@ -3249,7 +3383,8 @@ dlb_hw_dequeue_sparse(struct dlb_eventdev *dlb,
qm_port->owed_tokens += num;
- dlb_consume_qe_immediate(qm_port, num);
+ if (num && qm_port->token_pop_mode == AUTO_POP)
+ dlb_consume_qe_immediate(qm_port, num);
ev_port->outstanding_releases += num;
@@ -3293,6 +3428,28 @@ dlb_event_release(struct dlb_eventdev *dlb, uint8_t port_id, int n)
qm_port->qe4[3].cmd_byte = 0;
for (; j < DLB_NUM_QES_PER_CACHE_LINE && (i + j) < n; j++) {
+ int16_t thresh = qm_port->token_pop_thresh;
+
+ if (qm_port->token_pop_mode == DELAYED_POP &&
+ qm_port->issued_releases >= thresh - 1) {
+ /* Insert the token pop QE */
+ dlb_construct_token_pop_qe(qm_port, j);
+
+ /* Reset the releases for the next QE batch */
+ qm_port->issued_releases -= thresh;
+
+ /* When using delayed token pop mode, the
+ * initial token threshold is the full CQ
+ * depth. After the first token pop, we need to
+ * reset it to the dequeue_depth.
+ */
+ qm_port->token_pop_thresh =
+ qm_port->dequeue_depth;
+
+ pop_offs = 1;
+ j++;
+ break;
+ }
qm_port->qe4[j].cmd_byte = DLB_COMP_CMD_BYTE;
qm_port->issued_releases++;
@@ -3325,6 +3482,7 @@ dlb_event_dequeue_burst(void *event_port, struct rte_event *ev, uint16_t num,
uint64_t wait)
{
struct dlb_eventdev_port *ev_port = event_port;
+ struct dlb_port *qm_port = &ev_port->qm_port;
struct dlb_eventdev *dlb = ev_port->dlb;
uint16_t cnt;
int ret;
@@ -3344,6 +3502,10 @@ dlb_event_dequeue_burst(void *event_port, struct rte_event *ev, uint16_t num,
DLB_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
}
+ if (qm_port->token_pop_mode == DEFERRED_POP &&
+ qm_port->owed_tokens)
+ dlb_consume_qe_immediate(qm_port, qm_port->owed_tokens);
+
cnt = dlb_hw_dequeue(dlb, ev_port, ev, num, wait);
DLB_INC_STAT(ev_port->stats.traffic.total_polls, 1);
@@ -3362,6 +3524,7 @@ dlb_event_dequeue_burst_sparse(void *event_port, struct rte_event *ev,
uint16_t num, uint64_t wait)
{
struct dlb_eventdev_port *ev_port = event_port;
+ struct dlb_port *qm_port = &ev_port->qm_port;
struct dlb_eventdev *dlb = ev_port->dlb;
uint16_t cnt;
int ret;
@@ -3381,6 +3544,10 @@ dlb_event_dequeue_burst_sparse(void *event_port, struct rte_event *ev,
DLB_INC_STAT(ev_port->stats.tx_implicit_rel, out_rels);
}
+ if (qm_port->token_pop_mode == DEFERRED_POP &&
+ qm_port->owed_tokens)
+ dlb_consume_qe_immediate(qm_port, qm_port->owed_tokens);
+
cnt = dlb_hw_dequeue_sparse(dlb, ev_port, ev, num, wait);
DLB_INC_STAT(ev_port->stats.traffic.total_polls, 1);
@@ -3687,7 +3854,7 @@ dlb_primary_eventdev_probe(struct rte_eventdev *dev,
struct dlb_devargs *dlb_args)
{
struct dlb_eventdev *dlb;
- int err;
+ int err, i;
dlb = dev->data->dev_private;
@@ -3736,6 +3903,10 @@ dlb_primary_eventdev_probe(struct rte_eventdev *dev,
return err;
}
+ /* Initialize each port's token pop mode */
+ for (i = 0; i < DLB_MAX_NUM_PORTS; i++)
+ dlb->ev_ports[i].qm_port.token_pop_mode = AUTO_POP;
+
rte_spinlock_init(&dlb->qm_instance.resource_lock);
dlb_iface_low_level_io_init(dlb);
@@ -16,6 +16,7 @@
#include "dlb_user.h"
#include "dlb_log.h"
+#include "rte_pmd_dlb.h"
#ifndef RTE_LIBRTE_PMD_DLB_QUELL_STATS
#define DLB_INC_STAT(_stat, _incr_val) ((_stat) += _incr_val)
@@ -262,6 +263,7 @@ struct dlb_port {
bool gen_bit;
uint16_t dir_credits;
uint32_t dequeue_depth;
+ enum dlb_token_pop_mode token_pop_mode;
int pp_mmio_base;
uint16_t cached_ldb_credits;
uint16_t ldb_pushcount_at_credit_expiry;
@@ -273,6 +275,7 @@ struct dlb_port {
uint8_t cq_rsvd_token_deficit;
uint16_t owed_tokens;
int16_t issued_releases;
+ int16_t token_pop_thresh;
int cq_depth;
uint16_t cq_idx;
uint16_t cq_idx_unmasked;
@@ -12,9 +12,10 @@ sources = files('dlb.c',
'dlb_xstats.c',
'pf/dlb_main.c',
'pf/dlb_pf.c',
- 'pf/base/dlb_resource.c'
+ 'pf/base/dlb_resource.c',
+ 'rte_pmd_dlb.c',
)
-headers = files()
+headers = files('rte_pmd_dlb.h')
deps += ['mbuf', 'mempool', 'ring', 'pci', 'bus_pci']
new file mode 100644
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Intel Corporation
+ */
+
+#include "rte_eventdev.h"
+#include "rte_eventdev_pmd.h"
+#include "rte_pmd_dlb.h"
+#include "dlb_priv.h"
+#include "dlb_inline_fns.h"
+
+int
+rte_pmd_dlb_set_token_pop_mode(uint8_t dev_id,
+ uint8_t port_id,
+ enum dlb_token_pop_mode mode)
+{
+ struct dlb_eventdev *dlb;
+ struct rte_eventdev *dev;
+
+ RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+ dev = &rte_eventdevs[dev_id];
+
+ dlb = dlb_pmd_priv(dev);
+
+ if (mode >= NUM_TOKEN_POP_MODES)
+ return -EINVAL;
+
+ /* The event device must be configured, but not yet started */
+ if (!dlb->configured || dlb->run_state != DLB_RUN_STATE_STOPPED)
+ return -EINVAL;
+
+ /* The token pop mode must be set before configuring the port */
+ if (port_id >= dlb->num_ports || dlb->ev_ports[port_id].setup_done)
+ return -EINVAL;
+
+ dlb->ev_ports[port_id].qm_port.token_pop_mode = mode;
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019-2020 Intel Corporation
+ */
+
+/*!
+ * @file rte_pmd_dlb.h
+ *
+ * @brief DLB PMD-specific functions
+ *
+ */
+
+#ifndef _RTE_PMD_DLB_H_
+#define _RTE_PMD_DLB_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Selects the token pop mode for an DLB port.
+ */
+enum dlb_token_pop_mode {
+ /* Pop the CQ tokens immediately after dequeueing. */
+ AUTO_POP,
+ /* Pop CQ tokens after (dequeue_depth - 1) events are released.
+ * Supported on load-balanced ports only.
+ */
+ DELAYED_POP,
+ /* Pop the CQ tokens during next dequeue operation. */
+ DEFERRED_POP,
+
+ /* NUM_TOKEN_POP_MODES must be last */
+ NUM_TOKEN_POP_MODES
+};
+
+/*!
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Configure the token pop mode for an DLB port. By default, all ports use
+ * AUTO_POP. This function must be called before calling rte_event_port_setup()
+ * for the port, but after calling rte_event_dev_configure().
+ *
+ * @note
+ * The defer_sched vdev arg, which configures all load-balanced ports with
+ * dequeue_depth == 1 for DEFERRED_POP mode, takes precedence over this
+ * function.
+ *
+ * @param dev_id
+ * The identifier of the event device.
+ * @param port_id
+ * The identifier of the event port.
+ * @param mode
+ * The token pop mode.
+ *
+ * @return
+ * - 0: Success
+ * - EINVAL: Invalid dev_id, port_id, or mode
+ * - EINVAL: The DLB is not configured, is already running, or the port is
+ * already setup
+ */
+
+__rte_experimental
+int
+rte_pmd_dlb_set_token_pop_mode(uint8_t dev_id,
+ uint8_t port_id,
+ enum dlb_token_pop_mode mode);
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PMD_DLB_H_ */
@@ -1,3 +1,9 @@
DPDK_21 {
local: *;
};
+
+EXPERIMENTAL {
+ global:
+
+ rte_pmd_dlb_set_token_pop_mode;
+};