@@ -39,8 +39,7 @@
#define NB_SOCKETS 8
-/* Configure how many packets ahead to prefetch, when reading packets */
-#define PREFETCH_OFFSET 3
+#define DEFAULT_PREFECH_OFFSET 4
/* Used to mark destination port as 'invalid'. */
#define BAD_PORT ((uint16_t)-1)
@@ -119,6 +118,9 @@ extern uint32_t max_pkt_len;
extern uint32_t nb_pkt_per_burst;
extern uint32_t mb_mempool_cache_size;
+/* Prefetch offset of packets processed by the main loop. */
+extern uint16_t prefetch_offset;
+
/* Send burst of packets on an output interface */
static inline int
send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t port)
@@ -72,14 +72,14 @@ l3fwd_acl_prepare_acl_parameter(struct rte_mbuf **pkts_in, struct acl_search_t *
acl->num_ipv6 = 0;
/* Prefetch first packets */
- for (i = 0; i < PREFETCH_OFFSET && i < nb_rx; i++) {
+ for (i = 0; i < prefetch_offset && i < nb_rx; i++) {
rte_prefetch0(rte_pktmbuf_mtod(
pkts_in[i], void *));
}
- for (i = 0; i < (nb_rx - PREFETCH_OFFSET); i++) {
+ for (i = 0; i < (nb_rx - prefetch_offset); i++) {
rte_prefetch0(rte_pktmbuf_mtod(pkts_in[
- i + PREFETCH_OFFSET], void *));
+ i + prefetch_offset], void *));
l3fwd_acl_prepare_one_packet(pkts_in, acl, i);
}
@@ -132,16 +132,16 @@ l3fwd_em_no_opt_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
int32_t j;
/* Prefetch first packets */
- for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++)
+ for (j = 0; j < prefetch_offset && j < nb_rx; j++)
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *));
/*
* Prefetch and forward already prefetched
* packets.
*/
- for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
+ for (j = 0; j < (nb_rx - prefetch_offset); j++) {
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
- j + PREFETCH_OFFSET], void *));
+ j + prefetch_offset], void *));
l3fwd_em_simple_forward(pkts_burst[j], portid, qconf);
}
@@ -161,16 +161,16 @@ l3fwd_em_no_opt_process_events(int nb_rx, struct rte_event **events,
int32_t j;
/* Prefetch first packets */
- for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++)
+ for (j = 0; j < prefetch_offset && j < nb_rx; j++)
rte_prefetch0(rte_pktmbuf_mtod(events[j]->mbuf, void *));
/*
* Prefetch and forward already prefetched
* packets.
*/
- for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
+ for (j = 0; j < (nb_rx - prefetch_offset); j++) {
rte_prefetch0(rte_pktmbuf_mtod(events[
- j + PREFETCH_OFFSET]->mbuf, void *));
+ j + prefetch_offset]->mbuf, void *));
l3fwd_em_simple_process(events[j]->mbuf, qconf);
}
@@ -188,15 +188,15 @@ l3fwd_em_no_opt_process_event_vector(struct rte_event_vector *vec,
int32_t i;
/* Prefetch first packets */
- for (i = 0; i < PREFETCH_OFFSET && i < vec->nb_elem; i++)
+ for (i = 0; i < prefetch_offset && i < vec->nb_elem; i++)
rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *));
/*
* Prefetch and forward already prefetched packets.
*/
- for (i = 0; i < (vec->nb_elem - PREFETCH_OFFSET); i++) {
+ for (i = 0; i < (vec->nb_elem - prefetch_offset); i++) {
rte_prefetch0(
- rte_pktmbuf_mtod(mbufs[i + PREFETCH_OFFSET], void *));
+ rte_pktmbuf_mtod(mbufs[i + prefetch_offset], void *));
dst_ports[i] = l3fwd_em_simple_process(mbufs[i], qconf);
}
@@ -190,7 +190,7 @@ l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
*/
int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT);
- for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) {
+ for (j = 0; j < prefetch_offset && j < nb_rx; j++) {
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],
struct rte_ether_hdr *) + 1);
}
@@ -207,7 +207,7 @@ l3fwd_em_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
l3_type = pkt_type & RTE_PTYPE_L3_MASK;
tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
- for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT;
+ for (i = 0, pos = j + prefetch_offset;
i < EM_HASH_LOOKUP_COUNT && pos < nb_rx; i++, pos++) {
rte_prefetch0(rte_pktmbuf_mtod(
pkts_burst[pos],
@@ -277,6 +277,9 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
for (j = 0; j < nb_rx; j++)
pkts_burst[j] = ev[j]->mbuf;
+ for (i = 0; i < prefetch_offset && i < nb_rx; i++)
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i], struct rte_ether_hdr *) + 1);
+
for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
uint32_t pkt_type = RTE_PTYPE_L3_MASK |
@@ -289,7 +292,7 @@ l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
l3_type = pkt_type & RTE_PTYPE_L3_MASK;
tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
- for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT;
+ for (i = 0, pos = j + prefetch_offset;
i < EM_HASH_LOOKUP_COUNT && pos < nb_rx; i++, pos++) {
rte_prefetch0(rte_pktmbuf_mtod(
pkts_burst[pos],
@@ -81,20 +81,19 @@ l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
int32_t i, j;
uint16_t dst_port[SENDM_PORT_OVERHEAD(MAX_PKT_BURST)];
- if (nb_rx > 0) {
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[0],
+ for (i = 0; i < prefetch_offset && i < nb_rx; i++)
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i],
struct rte_ether_hdr *) + 1);
- }
- for (i = 1, j = 0; j < nb_rx; i++, j++) {
- if (i < nb_rx) {
- rte_prefetch0(rte_pktmbuf_mtod(
- pkts_burst[i],
- struct rte_ether_hdr *) + 1);
- }
+ for (j = 0; j < nb_rx - prefetch_offset; j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j + prefetch_offset],
+ struct rte_ether_hdr *) + 1);
dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
}
+ for (; j < nb_rx; j++)
+ dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
+
send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
}
@@ -106,20 +105,26 @@ static inline void
l3fwd_em_process_events(int nb_rx, struct rte_event **events,
struct lcore_conf *qconf)
{
+ struct rte_mbuf *mbuf;
+ uint16_t port;
int32_t i, j;
- rte_prefetch0(rte_pktmbuf_mtod(events[0]->mbuf,
- struct rte_ether_hdr *) + 1);
+ for (i = 0; i < prefetch_offset && i < nb_rx; i++)
+ rte_prefetch0(rte_pktmbuf_mtod(events[i]->mbuf, struct rte_ether_hdr *) + 1);
- for (i = 1, j = 0; j < nb_rx; i++, j++) {
- struct rte_mbuf *mbuf = events[j]->mbuf;
- uint16_t port;
+ for (j = 0; j < nb_rx - prefetch_offset; j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(events[j + prefetch_offset]->mbuf,
+ struct rte_ether_hdr *) + 1);
+ mbuf = events[j]->mbuf;
+ port = mbuf->port;
+ mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port);
+ process_packet(mbuf, &mbuf->port);
+ if (mbuf->port == BAD_PORT)
+ mbuf->port = port;
+ }
- if (i < nb_rx) {
- rte_prefetch0(rte_pktmbuf_mtod(
- events[i]->mbuf,
- struct rte_ether_hdr *) + 1);
- }
+ for (; j < nb_rx; j++) {
+ mbuf = events[j]->mbuf;
port = mbuf->port;
mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port);
process_packet(mbuf, &mbuf->port);
@@ -136,17 +141,22 @@ l3fwd_em_process_event_vector(struct rte_event_vector *vec,
struct rte_mbuf **mbufs = vec->mbufs;
int32_t i, j;
- rte_prefetch0(rte_pktmbuf_mtod(mbufs[0], struct rte_ether_hdr *) + 1);
+ for (i = 0; i < prefetch_offset && i < vec->nb_elem; i++)
+ rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], struct rte_ether_hdr *) + 1);
- for (i = 0, j = 1; i < vec->nb_elem; i++, j++) {
- if (j < vec->nb_elem)
- rte_prefetch0(rte_pktmbuf_mtod(mbufs[j],
- struct rte_ether_hdr *) +
- 1);
+ for (i = 0; i < vec->nb_elem - prefetch_offset; i++) {
+ rte_prefetch0(rte_pktmbuf_mtod(mbufs[i + prefetch_offset],
+ struct rte_ether_hdr *) + 1);
dst_ports[i] = em_get_dst_port(qconf, mbufs[i],
attr_valid ? vec->port :
mbufs[i]->port);
}
+
+ for (; i < vec->nb_elem; i++)
+ dst_ports[i] = em_get_dst_port(qconf, mbufs[i],
+ attr_valid ? vec->port :
+ mbufs[i]->port);
+
j = RTE_ALIGN_FLOOR(vec->nb_elem, FWDSTEP);
for (i = 0; i != j; i += FWDSTEP)
@@ -24,9 +24,6 @@
#include "l3fwd_event.h"
#include "l3fwd_route.h"
-/* Configure how many packets ahead to prefetch for fib. */
-#define FIB_PREFETCH_OFFSET 4
-
/* A non-existent portid is needed to denote a default hop for fib. */
#define FIB_DEFAULT_HOP 999
@@ -130,14 +127,14 @@ fib_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
int32_t i;
/* Prefetch first packets. */
- for (i = 0; i < FIB_PREFETCH_OFFSET && i < nb_rx; i++)
+ for (i = 0; i < prefetch_offset && i < nb_rx; i++)
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i], void *));
/* Parse packet info and prefetch. */
- for (i = 0; i < (nb_rx - FIB_PREFETCH_OFFSET); i++) {
+ for (i = 0; i < (nb_rx - prefetch_offset); i++) {
/* Prefetch packet. */
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
- i + FIB_PREFETCH_OFFSET], void *));
+ i + prefetch_offset], void *));
fib_parse_packet(pkts_burst[i],
&ipv4_arr[ipv4_cnt], &ipv4_cnt,
&ipv6_arr[ipv6_cnt], &ipv6_cnt,
@@ -302,11 +299,11 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
ipv6_arr_assem = 0;
/* Prefetch first packets. */
- for (i = 0; i < FIB_PREFETCH_OFFSET && i < nb_deq; i++)
+ for (i = 0; i < prefetch_offset && i < nb_deq; i++)
rte_prefetch0(rte_pktmbuf_mtod(events[i].mbuf, void *));
/* Parse packet info and prefetch. */
- for (i = 0; i < (nb_deq - FIB_PREFETCH_OFFSET); i++) {
+ for (i = 0; i < (nb_deq - prefetch_offset); i++) {
if (flags & L3FWD_EVENT_TX_ENQ) {
events[i].queue_id = tx_q_id;
events[i].op = RTE_EVENT_OP_FORWARD;
@@ -318,7 +315,7 @@ fib_event_loop(struct l3fwd_event_resources *evt_rsrc,
/* Prefetch packet. */
rte_prefetch0(rte_pktmbuf_mtod(events[
- i + FIB_PREFETCH_OFFSET].mbuf,
+ i + prefetch_offset].mbuf,
void *));
fib_parse_packet(events[i].mbuf,
@@ -455,12 +452,12 @@ fib_process_event_vector(struct rte_event_vector *vec, uint8_t *type_arr,
ipv6_arr_assem = 0;
/* Prefetch first packets. */
- for (i = 0; i < FIB_PREFETCH_OFFSET && i < vec->nb_elem; i++)
+ for (i = 0; i < prefetch_offset && i < vec->nb_elem; i++)
rte_prefetch0(rte_pktmbuf_mtod(mbufs[i], void *));
/* Parse packet info and prefetch. */
- for (i = 0; i < (vec->nb_elem - FIB_PREFETCH_OFFSET); i++) {
- rte_prefetch0(rte_pktmbuf_mtod(mbufs[i + FIB_PREFETCH_OFFSET],
+ for (i = 0; i < (vec->nb_elem - prefetch_offset); i++) {
+ rte_prefetch0(rte_pktmbuf_mtod(mbufs[i + prefetch_offset],
void *));
fib_parse_packet(mbufs[i], &ipv4_arr[ipv4_cnt], &ipv4_cnt,
&ipv6_arr[ipv6_cnt], &ipv6_cnt, &type_arr[i]);
@@ -82,13 +82,13 @@ l3fwd_lpm_no_opt_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
int32_t j;
/* Prefetch first packets */
- for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++)
+ for (j = 0; j < prefetch_offset && j < nb_rx; j++)
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *));
/* Prefetch and forward already prefetched packets. */
- for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
+ for (j = 0; j < (nb_rx - prefetch_offset); j++) {
rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
- j + PREFETCH_OFFSET], void *));
+ j + prefetch_offset], void *));
l3fwd_lpm_simple_forward(pkts_burst[j], portid, qconf);
}
@@ -85,23 +85,20 @@ l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
uint16_t portid, uint16_t *dst_port,
struct lcore_conf *qconf, const uint8_t do_step3)
{
- int32_t i = 0, j = 0;
+ int32_t i = 0, j = 0, pos = 0;
int32x4_t dip;
uint32_t ipv4_flag;
const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
const int32_t m = nb_rx % FWDSTEP;
if (k) {
- for (i = 0; i < FWDSTEP; i++) {
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i],
- void *));
- }
- for (j = 0; j != k - FWDSTEP; j += FWDSTEP) {
- for (i = 0; i < FWDSTEP; i++) {
- rte_prefetch0(rte_pktmbuf_mtod(
- pkts_burst[j + i + FWDSTEP],
- void *));
- }
+ for (i = 0; i < prefetch_offset && i < k; i++)
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i], void *));
+
+ for (j = 0; j != k; j += FWDSTEP) {
+ for (i = 0, pos = j + prefetch_offset;
+ i < FWDSTEP && pos < k; i++, pos++)
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[pos], void *));
processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
processx4_step2(qconf, dip, ipv4_flag, portid,
@@ -109,35 +106,9 @@ l3fwd_lpm_process_packets(int nb_rx, struct rte_mbuf **pkts_burst,
if (do_step3)
processx4_step3(&pkts_burst[j], &dst_port[j]);
}
-
- processx4_step1(&pkts_burst[j], &dip, &ipv4_flag);
- processx4_step2(qconf, dip, ipv4_flag, portid, &pkts_burst[j],
- &dst_port[j]);
- if (do_step3)
- processx4_step3(&pkts_burst[j], &dst_port[j]);
-
- j += FWDSTEP;
}
if (m) {
- /* Prefetch last up to 3 packets one by one */
- switch (m) {
- case 3:
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],
- void *));
- j++;
- /* fallthrough */
- case 2:
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],
- void *));
- j++;
- /* fallthrough */
- case 1:
- rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],
- void *));
- j++;
- }
- j -= m;
/* Classify last up to 3 packets one by one */
switch (m) {
case 3:
@@ -59,6 +59,7 @@ uint16_t nb_rxd = RX_DESC_DEFAULT;
uint16_t nb_txd = TX_DESC_DEFAULT;
uint32_t nb_pkt_per_burst = DEFAULT_PKT_BURST;
uint32_t mb_mempool_cache_size = MEMPOOL_CACHE_SIZE;
+uint16_t prefetch_offset = DEFAULT_PREFECH_OFFSET;
/**< Ports set in promiscuous mode off by default. */
static int promiscuous_on;
@@ -769,6 +770,7 @@ static const char short_options[] =
#define CMD_LINE_OPT_ALG "alg"
#define CMD_LINE_OPT_PKT_BURST "burst"
#define CMD_LINE_OPT_MB_CACHE_SIZE "mbcache"
+#define CMD_PREFETCH_OFFSET "prefetch-offset"
enum {
/* long options mapped to a short option */
@@ -800,6 +802,7 @@ enum {
CMD_LINE_OPT_VECTOR_TMO_NS_NUM,
CMD_LINE_OPT_PKT_BURST_NUM,
CMD_LINE_OPT_MB_CACHE_SIZE_NUM,
+ CMD_PREFETCH_OFFSET_NUM,
};
static const struct option lgopts[] = {
@@ -828,6 +831,7 @@ static const struct option lgopts[] = {
{CMD_LINE_OPT_ALG, 1, 0, CMD_LINE_OPT_ALG_NUM},
{CMD_LINE_OPT_PKT_BURST, 1, 0, CMD_LINE_OPT_PKT_BURST_NUM},
{CMD_LINE_OPT_MB_CACHE_SIZE, 1, 0, CMD_LINE_OPT_MB_CACHE_SIZE_NUM},
+ {CMD_PREFETCH_OFFSET, 1, 0, CMD_PREFETCH_OFFSET_NUM},
{NULL, 0, 0, 0}
};
@@ -1017,6 +1021,9 @@ parse_args(int argc, char **argv)
case CMD_LINE_OPT_ALG_NUM:
l3fwd_set_alg(optarg);
break;
+ case CMD_PREFETCH_OFFSET_NUM:
+ prefetch_offset = strtol(optarg, NULL, 10);
+ break;
default:
print_usage(prgname);
return -1;
@@ -1054,6 +1061,13 @@ parse_args(int argc, char **argv)
}
#endif
+ if (prefetch_offset > nb_pkt_per_burst) {
+ fprintf(stderr, "Prefetch offset (%u) cannot be greater than burst size (%u). "
+ "Using burst size %u.\n",
+ prefetch_offset, nb_pkt_per_burst, nb_pkt_per_burst);
+ prefetch_offset = nb_pkt_per_burst;
+ }
+
/*
* Nothing is selected, pick longest-prefix match
* as default match.