To minimize cache miss, adds flags and burst size used in forwarding to
stream, moves condition tests in forwarding to flags in stream.
Signed-off-by: Xueming Li <xuemingl@nvidia.com>
---
app/test-pmd/config.c | 18 ++++++++++++++----
app/test-pmd/flowgen.c | 6 +++---
app/test-pmd/noisy_vnf.c | 2 +-
app/test-pmd/testpmd.h | 21 ++++++++++++---------
app/test-pmd/txonly.c | 8 ++++----
5 files changed, 34 insertions(+), 21 deletions(-)
@@ -3050,6 +3050,16 @@ fwd_topology_tx_port_get(portid_t rxp)
}
}
+static void
+fwd_stream_set_common(struct fwd_stream *fs)
+{
+ fs->nb_pkt_per_burst = nb_pkt_per_burst;
+ fs->record_burst_stats = !!record_burst_stats;
+ fs->record_core_cycles = !!record_core_cycles;
+ fs->retry_enabled = !!retry_enabled;
+ fs->rxq_share = !!rxq_share;
+}
+
static void
simple_fwd_config_setup(void)
{
@@ -3079,7 +3089,7 @@ simple_fwd_config_setup(void)
fwd_ports_ids[fwd_topology_tx_port_get(i)];
fwd_streams[i]->tx_queue = 0;
fwd_streams[i]->peer_addr = fwd_streams[i]->tx_port;
- fwd_streams[i]->retry_enabled = retry_enabled;
+ fwd_stream_set_common(fwd_streams[i]);
}
}
@@ -3140,7 +3150,7 @@ rss_fwd_config_setup(void)
fs->tx_port = fwd_ports_ids[txp];
fs->tx_queue = rxq;
fs->peer_addr = fs->tx_port;
- fs->retry_enabled = retry_enabled;
+ fwd_stream_set_common(fs);
rxp++;
if (rxp < nb_fwd_ports)
continue;
@@ -3255,7 +3265,7 @@ dcb_fwd_config_setup(void)
fs->tx_port = fwd_ports_ids[txp];
fs->tx_queue = txq + j % nb_tx_queue;
fs->peer_addr = fs->tx_port;
- fs->retry_enabled = retry_enabled;
+ fwd_stream_set_common(fs);
}
fwd_lcores[lc_id]->stream_nb +=
rxp_dcb_info.tc_queue.tc_rxq[i][tc].nb_queue;
@@ -3326,7 +3336,7 @@ icmp_echo_config_setup(void)
fs->tx_port = fs->rx_port;
fs->tx_queue = rxq;
fs->peer_addr = fs->tx_port;
- fs->retry_enabled = retry_enabled;
+ fwd_stream_set_common(fs);
if (verbose_level > 0)
printf(" stream=%d port=%d rxq=%d txq=%d\n",
sm_id, fs->rx_port, fs->rx_queue,
@@ -97,12 +97,12 @@ flow_gen_stream(struct fwd_stream *fs, uint16_t nb_rx,
if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
ol_flags |= PKT_TX_MACSEC;
- for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+ for (nb_pkt = 0; nb_pkt < fs->nb_pkt_per_burst; nb_pkt++) {
if (!nb_pkt || !nb_clones) {
nb_clones = nb_pkt_flowgen_clones;
/* Logic limitation */
- if (nb_clones > nb_pkt_per_burst)
- nb_clones = nb_pkt_per_burst;
+ if (nb_clones > fs->nb_pkt_per_burst)
+ nb_clones = fs->nb_pkt_per_burst;
pkt = rte_mbuf_raw_alloc(mbp);
if (!pkt)
@@ -153,7 +153,7 @@ pkt_burst_noisy_vnf(struct fwd_stream *fs)
uint64_t now;
nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue,
- pkts_burst, nb_pkt_per_burst);
+ pkts_burst, fs->nb_pkt_per_burst);
inc_rx_burst_stats(fs, nb_rx);
if (unlikely(nb_rx == 0))
goto flush;
@@ -128,12 +128,17 @@ struct fwd_stream {
queueid_t tx_queue; /**< TX queue to send forwarded packets */
streamid_t peer_addr; /**< index of peer ethernet address of packets */
- unsigned int retry_enabled;
+ uint16_t nb_pkt_per_burst;
+ unsigned int record_burst_stats:1;
+ unsigned int record_core_cycles:1;
+ unsigned int retry_enabled:1;
+ unsigned int rxq_share:1;
/* "read-write" results */
uint64_t rx_packets; /**< received packets */
uint64_t tx_packets; /**< received packets transmitted */
uint64_t fwd_dropped; /**< received packets not forwarded */
+ uint64_t core_cycles; /**< used for RX and TX processing */
uint64_t rx_bad_ip_csum ; /**< received packets has bad ip checksum */
uint64_t rx_bad_l4_csum ; /**< received packets has bad l4 checksum */
uint64_t rx_bad_outer_l4_csum;
@@ -141,7 +146,6 @@ struct fwd_stream {
uint64_t rx_bad_outer_ip_csum;
/**< received packets having bad outer ip checksum */
unsigned int gro_times; /**< GRO operation times */
- uint64_t core_cycles; /**< used for RX and TX processing */
struct pkt_burst_stats rx_burst_stats;
struct pkt_burst_stats tx_burst_stats;
struct fwd_lcore *lcore; /**< Lcore being scheduled. */
@@ -750,28 +754,27 @@ port_pci_reg_write(struct rte_port *port, uint32_t reg_off, uint32_t reg_v)
static inline void
get_start_cycles(uint64_t *start_tsc)
{
- if (record_core_cycles)
- *start_tsc = rte_rdtsc();
+ *start_tsc = rte_rdtsc();
}
static inline void
get_end_cycles(struct fwd_stream *fs, uint64_t start_tsc)
{
- if (record_core_cycles)
+ if (unlikely(fs->record_core_cycles))
fs->core_cycles += rte_rdtsc() - start_tsc;
}
static inline void
inc_rx_burst_stats(struct fwd_stream *fs, uint16_t nb_rx)
{
- if (record_burst_stats)
+ if (unlikely(fs->record_burst_stats))
fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
}
static inline void
inc_tx_burst_stats(struct fwd_stream *fs, uint16_t nb_tx)
{
- if (record_burst_stats)
+ if (unlikely(fs->record_burst_stats))
fs->tx_burst_stats.pkt_burst_spread[nb_tx]++;
}
@@ -1032,13 +1035,13 @@ int update_jumbo_frame_offload(portid_t portid);
static void \
pkt_burst_fwd(struct fwd_stream *fs) \
{ \
- struct rte_mbuf *pkts_burst[nb_pkt_per_burst]; \
+ struct rte_mbuf *pkts_burst[fs->nb_pkt_per_burst]; \
uint16_t nb_rx; \
uint64_t start_tsc = 0; \
\
get_start_cycles(&start_tsc); \
nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue, \
- pkts_burst, nb_pkt_per_burst); \
+ pkts_burst, fs->nb_pkt_per_burst); \
inc_rx_burst_stats(fs, nb_rx); \
if (unlikely(nb_rx == 0)) \
return; \
@@ -367,8 +367,8 @@ pkt_burst_transmit(struct fwd_stream *fs)
eth_hdr.ether_type = rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4);
if (rte_mempool_get_bulk(mbp, (void **)pkts_burst,
- nb_pkt_per_burst) == 0) {
- for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+ fs->nb_pkt_per_burst) == 0) {
+ for (nb_pkt = 0; nb_pkt < fs->nb_pkt_per_burst; nb_pkt++) {
if (unlikely(!pkt_burst_prepare(pkts_burst[nb_pkt], mbp,
ð_hdr, vlan_tci,
vlan_tci_outer,
@@ -376,12 +376,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
nb_pkt, fs))) {
rte_mempool_put_bulk(mbp,
(void **)&pkts_burst[nb_pkt],
- nb_pkt_per_burst - nb_pkt);
+ fs->nb_pkt_per_burst - nb_pkt);
break;
}
}
} else {
- for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
+ for (nb_pkt = 0; nb_pkt < fs->nb_pkt_per_burst; nb_pkt++) {
pkt = rte_mbuf_raw_alloc(mbp);
if (pkt == NULL)
break;