diff mbox

[dpdk-dev,RFC,1/2] testpmd: add portfwd engine

Message ID 1461192195-104070-2-git-send-email-zhihong.wang@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers show

Commit Message

Zhihong Wang April 20, 2016, 10:43 p.m. UTC
This patch implements a general purpose forwarding engine in testpmd namely
"portfwd", to enable performance analysis and tuning for poll mode drivers
in vSwitching scenarios.

Features of portfwd:

   1) Build up traffic from simple rx/tx to complex scenarios easily

   2) Rich performance statistics for all ports

   3) Core affinity manipulation

   4) Commands for run time configuration

To enable flexible traffic flow setup, each port has 2 ways to forward
packets in portfwd:

   1) Forward based on dst ip

      For ip based forwarding, portfwd scans each packet to get the dst ip
      for dst port mapping.

      A simple suffix mapping method is used for dst ip based forwarding, a
      macro IPV4_ROUTE_MASK is used to specify how many (last) bits of dst
      ip will be used for hashing.

      It is recommended to make sure there's no conflict by setting proper
      IPV4_ROUTE_MASK and/or different ip ends for each port, otherwise it
      may hurt performance.

   2) Forward to a fixed port

      For fixed port forwarding, portfwd still scans each packet on purpose
      to simulate the impact of packet analysis behavior in real scenarios.

After dst ports are identified, packets are enqueued to a buffer which will
be burst sent when full. Packet buffers are built at each src port, so no
contention at enqueue stage.

There is a timeout interval to drain all buffers, which can be configured
or disabled.

Spinlock is used at dst port & queue to solve conflicts.

Notice that portfwd has fair performance, but it's not for getting the
"maximum" numbers:

   1) It buffers packets for burst send efficiency analysis, which increase
   latency

   2) It touches the packet header and collect performance statistics which
   adds overheads

These "extra" overheads are actually what happens in real applications.

Modifications are:
   1) Add the portfwd engine in portfwd.c
   2) Add related data structures
   3) Add support functions


Signed-off-by: Zhihong Wang <zhihong.wang@intel.com>
---
 app/test-pmd/Makefile  |   1 +
 app/test-pmd/config.c  | 408 ++++++++++++++++++++++++++++++++++++++++++++++-
 app/test-pmd/portfwd.c | 418 +++++++++++++++++++++++++++++++++++++++++++++++++
 app/test-pmd/testpmd.c |  19 +++
 app/test-pmd/testpmd.h |  62 ++++++++
 5 files changed, 900 insertions(+), 8 deletions(-)
 create mode 100644 app/test-pmd/portfwd.c
diff mbox

Patch

diff --git a/app/test-pmd/Makefile b/app/test-pmd/Makefile
index 72426f3..0352feb 100644
--- a/app/test-pmd/Makefile
+++ b/app/test-pmd/Makefile
@@ -49,6 +49,7 @@  SRCS-y += parameters.c
 SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline.c
 SRCS-y += config.c
 SRCS-y += iofwd.c
+SRCS-y += portfwd.c
 SRCS-y += macfwd.c
 SRCS-y += macfwd-retry.c
 SRCS-y += macswap.c
diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
index b1bbec6..9754229 100644
--- a/app/test-pmd/config.c
+++ b/app/test-pmd/config.c
@@ -92,6 +92,8 @@ 
 #include <rte_ether.h>
 #include <rte_ethdev.h>
 #include <rte_string_fns.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
 
 #include "testpmd.h"
 
@@ -150,6 +152,11 @@  print_ethaddr(const char *name, struct ether_addr *eth_addr)
 void
 nic_stats_display(portid_t port_id)
 {
+	static uint64_t cnt_rx[RTE_MAX_ETHPORTS];
+	static uint64_t cnt_tx[RTE_MAX_ETHPORTS];
+	static uint64_t cycle[RTE_MAX_ETHPORTS];
+	uint64_t crx, ctx, c;
+
 	struct rte_eth_stats stats;
 	struct rte_port *port = &ports[port_id];
 	uint8_t i;
@@ -209,6 +216,20 @@  nic_stats_display(portid_t port_id)
 		}
 	}
 
+	c = cycle[port_id];
+	cycle[port_id] = rte_rdtsc();
+	if (c > 0)
+		c = cycle[port_id] - c;
+
+	crx = stats.ipackets - cnt_rx[port_id];
+	ctx = stats.opackets - cnt_tx[port_id];
+	cnt_rx[port_id] = stats.ipackets;
+	cnt_tx[port_id] = stats.opackets;
+	printf("  Throughput (since last show):\n");
+	printf("  RX PPS: %12"PRIu64"\n  TX PPS: %12"PRIu64"\n",
+			c > 0 ? crx * rte_get_tsc_hz() / c : 0,
+			c > 0 ? ctx * rte_get_tsc_hz() / c : 0);
+
 	printf("  %s############################%s\n",
 	       nic_stats_border, nic_stats_border);
 }
@@ -1087,6 +1108,178 @@  setup_fwd_config_of_each_lcore(struct fwd_config *cfg)
 }
 
 static void
+copy_fwd_stream(struct fwd_stream *src, struct fwd_stream *dst)
+{
+	rte_memcpy(dst, src, sizeof(struct fwd_stream));
+}
+
+int
+set_fwd_stream_affinity(unsigned int idx, unsigned int core)
+{
+	struct fwd_stream **fwd_streams_tmp;
+	struct fwd_stream *fs;
+	unsigned int lc_id_dst;
+	unsigned int lc_id_src;
+	unsigned int fs_id;
+	unsigned int i, j, ci, cj;
+
+	if (cur_fwd_eng != &port_fwd_engine)
+		return 0;
+	if (test_done == 0) {
+		printf("please stop forwarding first\n");
+		return 0;
+	}
+	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
+		if (fwd_lcores_cpuids[i] == core) {
+			lc_id_dst = i;
+			break;
+		}
+	}
+	if (i >= cur_fwd_config.nb_fwd_lcores)
+		return -1;
+	for (i = 0; i < cur_fwd_config.nb_fwd_streams; i++) {
+		if (fwd_streams[i]->idx == idx) {
+			fs_id = i;
+			break;
+		}
+	}
+	if (i >= cur_fwd_config.nb_fwd_streams)
+		return -1;
+	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
+		for (j = 0; j < fwd_lcores[i]->stream_nb; j++) {
+			fs = fwd_streams[fwd_lcores[i]->stream_idx + j];
+			if (idx == fs->idx) {
+				lc_id_src = i;
+				break;
+			}
+		}
+		if (j < fwd_lcores[i]->stream_nb)
+			break;
+	}
+	if (i >= cur_fwd_config.nb_fwd_lcores)
+		return -1;
+	if (lc_id_src == lc_id_dst)
+		return 0;
+	fwd_streams_tmp = rte_zmalloc("testpmd: fwd_streams_tmp",
+		sizeof(struct fwd_stream *) * cur_fwd_config.nb_fwd_streams,
+		RTE_CACHE_LINE_SIZE);
+	if (fwd_streams_tmp == NULL)
+		rte_exit(EXIT_FAILURE,
+				"rte_zmalloc(%d (struct fwd_stream *)) "
+				"failed\n", nb_fwd_streams);
+
+	for (i = 0; i < cur_fwd_config.nb_fwd_streams; i++) {
+		fwd_streams_tmp[i] =
+			rte_zmalloc("testpmd: struct fwd_stream",
+				sizeof(struct fwd_stream),
+				RTE_CACHE_LINE_SIZE);
+		if (fwd_streams_tmp[i] == NULL)
+			rte_exit(EXIT_FAILURE,
+					"rte_zmalloc(struct fwd_stream)"
+					" failed\n");
+	}
+	ci = 0;
+	cj = 0;
+	for (i = 0; i < cur_fwd_config.nb_fwd_lcores; i++) {
+		if (i == lc_id_src) {
+			fwd_lcores[i]->stream_idx = cj;
+			for (j = 0; j < fwd_lcores[i]->stream_nb; j++) {
+				if (ci == fs_id) {
+					ci++;
+					continue;
+				}
+				copy_fwd_stream(fwd_streams[ci],
+						fwd_streams_tmp[cj]);
+				ci++;
+				cj++;
+			}
+			fwd_lcores[i]->stream_nb -= 1;
+		} else if (i == lc_id_dst) {
+			fwd_lcores[i]->stream_idx = cj;
+			for (j = 0; j < fwd_lcores[i]->stream_nb; j++) {
+				copy_fwd_stream(fwd_streams[ci],
+						fwd_streams_tmp[cj]);
+				ci++;
+				cj++;
+			}
+			copy_fwd_stream(fwd_streams[fs_id],
+					fwd_streams_tmp[cj]);
+			cj++;
+			fwd_lcores[i]->stream_nb += 1;
+		} else {
+			fwd_lcores[i]->stream_idx = cj;
+			for (j = 0; j < fwd_lcores[i]->stream_nb; j++) {
+				copy_fwd_stream(fwd_streams[ci],
+						fwd_streams_tmp[cj]);
+				ci++;
+				cj++;
+			}
+		}
+	}
+	if (fwd_streams != NULL) {
+		for (i = 0; i < nb_fwd_streams; i++) {
+			if (fwd_streams[i] == NULL)
+				continue;
+			rte_free(fwd_streams[i]);
+			fwd_streams[i] = NULL;
+		}
+		rte_free(fwd_streams);
+		fwd_streams = NULL;
+	}
+	fwd_streams = fwd_streams_tmp;
+
+	return 0;
+}
+
+static void
+port_fwd_config_setup(void)
+{
+	portid_t   rxp;
+	queueid_t  rxq;
+	lcoreid_t  lc_id;
+
+	if (fwd_config_init == 1)
+			return;
+
+	fwd_config_init = 1;
+	cur_fwd_config.nb_fwd_lcores = (lcoreid_t) nb_fwd_lcores;
+	cur_fwd_config.nb_fwd_ports = nb_fwd_ports;
+	cur_fwd_config.nb_fwd_streams =
+		(streamid_t) (nb_rxq * nb_fwd_ports);
+	if (cur_fwd_config.nb_fwd_lcores > cur_fwd_config.nb_fwd_streams)
+		cur_fwd_config.nb_fwd_lcores =
+			(lcoreid_t)cur_fwd_config.nb_fwd_streams;
+	init_fwd_streams();
+	setup_fwd_config_of_each_lcore(&cur_fwd_config);
+
+	for (rxp = 0; rxp < nb_ports; rxp++)
+		fwd_ports_ids[rxp] = rxp;
+
+	rxp = 0;
+	rxq = 0;
+	for (lc_id = 0; lc_id < cur_fwd_config.nb_fwd_streams; lc_id++) {
+		struct fwd_stream *fs = fwd_streams[lc_id];
+
+		fs->idx = lc_id;
+		fs->rx_port = fwd_ports_ids[rxp];
+		fs->rx_queue = rxq;
+		fs->tx_port = 0;
+		fs->tx_queue = 0;
+		fs->peer_addr = fs->tx_port;
+		rxq = (queueid_t) (rxq + 1);
+		if (rxq < nb_rxq)
+			continue;
+		rxq = 0;
+		if (numa_support && (nb_fwd_ports <= (nb_ports >> 1)))
+			rxp = (portid_t) (rxp +
+					((nb_ports >> 1) / nb_fwd_ports));
+		else
+			rxp = (portid_t) (rxp + 1);
+	}
+	clear_perf_stats();
+}
+
+static void
 simple_fwd_config_setup(void)
 {
 	portid_t i;
@@ -1371,14 +1564,17 @@  fwd_config_setup(void)
 		icmp_echo_config_setup();
 		return;
 	}
-	if ((nb_rxq > 1) && (nb_txq > 1)){
-		if (dcb_config)
-			dcb_fwd_config_setup();
-		else
-			rss_fwd_config_setup();
+	if (cur_fwd_eng == &port_fwd_engine)
+		port_fwd_config_setup();
+	else {
+		if ((nb_rxq > 1) && (nb_txq > 1)) {
+			if (dcb_config)
+				dcb_fwd_config_setup();
+			else
+				rss_fwd_config_setup();
+		} else
+			simple_fwd_config_setup();
 	}
-	else
-		simple_fwd_config_setup();
 }
 
 static void
@@ -1406,8 +1602,9 @@  pkt_fwd_config_display(struct fwd_config *cfg)
 		       fwd_lcores[lc_id]->stream_nb);
 		for (sm_id = 0; sm_id < fwd_lcores[lc_id]->stream_nb; sm_id++) {
 			fs = fwd_streams[fwd_lcores[lc_id]->stream_idx + sm_id];
-			printf("\n  RX P=%d/Q=%d (socket %u) -> TX "
+			printf("\n  %2u: RX P=%d/Q=%d (socket %u) -> TX "
 			       "P=%d/Q=%d (socket %u) ",
+				   fs->idx,
 			       fs->rx_port, fs->rx_queue,
 			       ports[fs->rx_port].socket_id,
 			       fs->tx_port, fs->tx_queue,
@@ -1688,12 +1885,17 @@  set_pkt_forwarding_mode(const char *fwd_mode_name)
 	struct fwd_engine *fwd_eng;
 	unsigned i;
 
+	if (test_done == 0) {
+		printf("Please stop forwarding first\n");
+		return;
+	}
 	i = 0;
 	while ((fwd_eng = fwd_engines[i]) != NULL) {
 		if (! strcmp(fwd_eng->fwd_mode_name, fwd_mode_name)) {
 			printf("Set %s packet forwarding mode\n",
 			       fwd_mode_name);
 			cur_fwd_eng = fwd_eng;
+			fwd_config_init = 0;
 			return;
 		}
 		i++;
@@ -2479,3 +2681,193 @@  port_dcb_info_display(uint8_t port_id)
 		printf("\t%4d", dcb_info.tc_queue.tc_txq[0][i].nb_queue);
 	printf("\n");
 }
+
+void set_fixed_route(uint16_t srcp, uint16_t dstp)
+{
+	uint32_t error = 0;
+
+	if (cur_fwd_eng != &port_fwd_engine)
+		printf("warning: current forward engine is not portfwd!\n");
+	if (srcp >= nb_fwd_ports) {
+		printf("error: invalid srcport!\n");
+		error = 1;
+	}
+	if (dstp >= nb_fwd_ports) {
+		printf("error: invalid dstport!\n");
+		error = 1;
+	}
+	if (error == 0)
+		route_table[srcp] = dstp;
+}
+
+void set_ip_route(uint16_t srcp)
+{
+	if (cur_fwd_eng != &port_fwd_engine)
+		printf("warning: current forward engine is not portfwd!\n");
+	if (srcp >= nb_fwd_ports) {
+		printf("error: invalid srcport!\n");
+		return;
+	}
+	route_table[srcp] = PORT_ROUTE_IP;
+}
+
+void show_route(void)
+{
+	portid_t i;
+
+	if (cur_fwd_eng != &port_fwd_engine)
+		printf("warning: current forward engine is not portfwd!\n");
+	if (1 != ipv4_route_available) {
+		printf("warning: ipv4 route not available!\n");
+		printf("         try increase IPV4_ROUTE_MASK\n");
+		printf("         and/or set different ip ends\n");
+	}
+	printf("route table:\n");
+	printf(" srcport  dstport\n");
+	for (i = 0; i < nb_fwd_ports; ++i) {
+		if (route_table[i] == PORT_ROUTE_IP)
+			printf("%8d       ip\n", i);
+		else
+			printf("%8d %8d\n", i, route_table[i]);
+	}
+}
+
+void
+init_txq_lock(void)
+{
+	uint16_t portid, queueid;
+
+	for (portid = 0; portid < RTE_MAX_ETHPORTS; ++portid)
+		for (queueid = 0; queueid < MAX_TX_QUEUE; ++queueid)
+			rte_spinlock_init(
+				&(txq_lock[portid][queueid].spinlock));
+}
+
+void build_ipv4_route(void)
+{
+	uint16_t i, j;
+
+	printf("--------------------------------\n");
+	printf("building ipv4 route table...\n");
+	printf("ipv4 route mask: 0x%x\n", IPV4_ROUTE_MASK);
+	for (i = 0; i < MAX_IP_DIFF; ++i)
+		ipv4_route[i] = MAX_IP_DIFF;
+	for (i = 0; i < nb_fwd_ports; ++i) {
+		j = ipv4_table[i] & IPV4_ROUTE_MASK;
+		if (ipv4_route[j] < MAX_IP_DIFF) {
+			ipv4_route_available = 0;
+			printf("warning: ipv4 route failed\n");
+			printf("         try increase IPV4_ROUTE_MASK\n");
+			printf("         and/or set different ip ends\n");
+			printf("--------------------------------\n");
+			return;
+		}
+		ipv4_route[j] = i;
+		printf("route: suffix: %d -> port: %d\n", j, i);
+	}
+	ipv4_route_available = 1;
+	for (i = 0; i < MAX_IP_DIFF; ++i)
+		if (ipv4_route[i] == MAX_IP_DIFF)
+			ipv4_route[i] = 0;
+	printf("ipv4 route available\n");
+	printf("--------------------------------\n");
+}
+
+void add_port_addr(void)
+{
+	unsigned long i, j, k;
+
+	printf("--------------------------------\n");
+	printf("adding port addr...\n");
+	printf("port number: %d, rxq nubmer: %d, txq number: %d\n",
+			nb_fwd_ports, nb_rxq, nb_txq);
+	for (i = 0; i < nb_fwd_streams; ++i)
+		fwd_streams[i]->rx_port = i;
+	for (i = 0; i < nb_fwd_ports; ++i) {
+		/* assume less than 256 ports */
+		ipv4_table[i] = (192 << 24) + (168 << 16) + (1 << 8) + (1 + i);
+		printf("port %lu: ipv4: %d.%d.%d.%d\n",
+				i,
+				(ipv4_table[i] & 0xFF000000) >> 24,
+				(ipv4_table[i] & 0x00FF0000) >> 16,
+				(ipv4_table[i] & 0x0000FF00) >> 8,
+				(ipv4_table[i] & 0x000000FF));
+		for (j = 0; j < nb_rxq; ++j) {
+			for (k = 0; k < nb_fwd_ports; ++k)
+				fs_buf[i][j].tx_mbufs[k].len = 0;
+			fs_buf[i][j].dst_queue = (i * nb_rxq + j) % nb_txq;
+			printf("port %lu: rxq: %lu -> txq: %d\n",
+					i, j, fs_buf[i][j].dst_queue);
+		}
+		route_table[i] = PORT_ROUTE_IP;
+	}
+	printf("--------------------------------\n");
+	for (i = 0; i < RTE_MAX_ETHPORTS; ++i)
+		for (j = 0; j < MAX_RX_QUEUE; ++j) {
+			drainer[i][j].cycle_last = 0;
+			drainer[i][j].cycle_now = 0;
+		}
+
+	build_ipv4_route();
+}
+
+void set_ip(uint32_t srcp, uint32_t num0, uint32_t num1,
+		uint32_t num2, uint32_t num3)
+{
+	if (cur_fwd_eng != &port_fwd_engine)
+		printf("warning: current forward engine is not portfwd!\n");
+	if (srcp >= nb_fwd_ports) {
+		printf("error: invalid srcport!\n");
+		return;
+	}
+	if (num0 > 255 || num1 > 255 || num2 > 255 || num3 > 255) {
+		printf("error: invalid ip address!\n");
+		return;
+	}
+	ipv4_table[srcp] = (num0 << 24) + (num1 << 16) + (num2 << 8) + num3;
+	printf("port %u: ipv4: %d.%d.%d.%d\n",
+			srcp,
+			(ipv4_table[srcp] & 0xFF000000) >> 24,
+			(ipv4_table[srcp] & 0x00FF0000) >> 16,
+			(ipv4_table[srcp] & 0x0000FF00) >> 8,
+			(ipv4_table[srcp] & 0x000000FF));
+
+	build_ipv4_route();
+}
+
+void set_drain_interval_ns(unsigned long drain_ns)
+{
+	if (cur_fwd_eng != &port_fwd_engine)
+		printf("warning: current forward engine is not portfwd!\n");
+	drain_cycle = rte_get_tsc_hz() / 1E9 * drain_ns;
+	if (drain_cycle > 0)
+		printf("portfwd drain interval: %lu ns, %lu cycles\n",
+				drain_ns, drain_cycle);
+	else
+		printf("portfwd drain disabled\n");
+}
+
+void print_port_info(void)
+{
+	portid_t pid;
+	struct rte_port *port;
+
+	printf("port  mac                ip               name\n");
+	FOREACH_PORT(pid, ports) {
+		port = &ports[pid];
+		printf("%4u", pid);
+		printf("  %02x:%02x:%02x:%02x:%02x:%02x",
+				(unsigned int)port->eth_addr.addr_bytes[0],
+				(unsigned int)port->eth_addr.addr_bytes[1],
+				(unsigned int)port->eth_addr.addr_bytes[2],
+				(unsigned int)port->eth_addr.addr_bytes[3],
+				(unsigned int)port->eth_addr.addr_bytes[4],
+				(unsigned int)port->eth_addr.addr_bytes[5]);
+		printf("  %3d.%3d.%3d.%3d"
+				, (ipv4_table[pid] & 0xFF000000) >> 24
+				, (ipv4_table[pid] & 0x00FF0000) >> 16
+				, (ipv4_table[pid] & 0x0000FF00) >> 8
+				, (ipv4_table[pid] & 0x000000FF));
+		printf("  %s\n", port->dev_info.driver_name);
+	}
+}
diff --git a/app/test-pmd/portfwd.c b/app/test-pmd/portfwd.c
new file mode 100644
index 0000000..52b0b95
--- /dev/null
+++ b/app/test-pmd/portfwd.c
@@ -0,0 +1,418 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+#include <sys/stat.h>
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ring.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_string_fns.h>
+#include <rte_ip.h>
+#include "testpmd.h"
+
+#define PORTFWD_PERF_STATS 1
+
+struct op_stat {
+	uint64_t run[DEF_PKT_BURST+1];
+	uint64_t cycle[DEF_PKT_BURST+1];
+	uint64_t cycle_min[DEF_PKT_BURST+1];
+	uint64_t cycle_max[DEF_PKT_BURST+1];
+} __rte_cache_aligned;
+
+struct port_perf_stat {
+	struct op_stat rx_stat[MAX_RX_QUEUE];
+	struct op_stat tx_stat[MAX_TX_QUEUE];
+	uint64_t pkt_loss[MAX_TX_QUEUE];
+};
+
+struct port_perf_stat port_stat[RTE_MAX_ETHPORTS];
+uint64_t print_stat_cycle = 1e10;
+uint64_t last_stat_cycle;
+
+inline void __attribute__((always_inline))
+clear_perf_stats(void)
+{
+	uint16_t i, j, k;
+	struct op_stat *ops;
+
+	for (i = 0; i < nb_fwd_ports; ++i) {
+		for (j = 0; j < nb_rxq; ++j) {
+			for (k = 0; k <= DEF_PKT_BURST; ++k) {
+				ops = &port_stat[i].rx_stat[j];
+				ops->run[k] = 0;
+				ops->cycle[k] = 0;
+				ops->cycle_min[k] = print_stat_cycle;
+				ops->cycle_max[k] = 0;
+			}
+		}
+		for (j = 0; j < nb_txq; ++j) {
+			for (k = 0; k <= DEF_PKT_BURST; ++k) {
+				ops = &port_stat[i].tx_stat[j];
+				ops->run[k] = 0;
+				ops->cycle[k] = 0;
+				ops->cycle_min[k] = print_stat_cycle;
+				ops->cycle_max[k] = 0;
+			}
+		}
+		for (j = 0; j < nb_txq; ++j)
+			port_stat[i].pkt_loss[j] = 0;
+	}
+}
+
+inline void __attribute__((always_inline))
+print_perf_stats(void)
+{
+#if PORTFWD_PERF_STATS
+	uint16_t i, j, k;
+	struct op_stat *ops;
+
+	printf("\ncycle stat (since last show)\n");
+	printf("--------------------------------\n");
+	for (i = 0; i < nb_fwd_ports; ++i) {
+		printf("port %3d, burst %3d,\n", i, DEF_PKT_BURST);
+		printf("          rx,");
+		for (j = 0; j < nb_txq; ++j)
+			printf("         run,         min,"
+				"         avg,         max,");
+		printf("\n");
+		for (k = 0; k <= DEF_PKT_BURST; ++k) {
+			printf("%12d,", k);
+			for (j = 0; j < nb_rxq; ++j) {
+				ops = &port_stat[i].rx_stat[j];
+				printf("%12"PRIu64",%12"PRIu64","
+					"%12"PRIu64",%12"PRIu64",",
+					ops->run[k],
+					ops->run[k] > 0 ?
+					ops->cycle_min[k] : 0,
+					ops->run[k] > 0 ?
+					ops->cycle[k] /
+					ops->run[k] : 0,
+					ops->cycle_max[k]);
+			}
+			printf("\n");
+		}
+		printf("          tx,");
+		for (j = 0; j < nb_txq; ++j)
+			printf("         run,         min,"
+				"         avg,         max,");
+		printf("\n");
+		for (k = 0; k <= DEF_PKT_BURST; ++k) {
+			printf("%12d,", k);
+			for (j = 0; j < nb_txq; ++j) {
+				ops = &port_stat[i].tx_stat[j];
+				printf("%12"PRIu64",%12"PRIu64","
+					"%12"PRIu64",%12"PRIu64",",
+					ops->run[k],
+					ops->run[k] > 0 ?
+					ops->cycle_min[k] : 0,
+					ops->run[k] > 0 ?
+					ops->cycle[k] /
+					ops->run[k] : 0,
+					ops->cycle_max[k]);
+			}
+			printf("\n");
+		}
+	}
+	printf("\ntx pktloss (since last show)\n");
+	printf("--------------------------------\n");
+	printf("         ");
+	for (i = 0; i < nb_txq; ++i)
+		printf("     txq %3d,", i);
+	printf("\n");
+	for (i = 0; i < nb_fwd_ports; ++i) {
+		printf("port %3d,", i);
+		for (j = 0; j < nb_txq; ++j)
+			printf("%12"PRIu64",", port_stat[i].pkt_loss[j]);
+		printf("\n");
+	}
+	printf("\n");
+	clear_perf_stats();
+#else
+	printf("warning: portfwd perf stats not enabled!\n");
+#endif
+}
+
+static inline uint32_t __attribute__((always_inline))
+get_ipv4_dst_port(void *ipv4_hdr,  uint16_t portid)
+{
+	uint32_t route;
+	uint32_t ip;
+	uint32_t i;
+
+	route = route_table[portid];
+	if (route == PORT_ROUTE_IP) {
+		ip = rte_be_to_cpu_32(((struct ipv4_hdr *)ipv4_hdr)->dst_addr);
+		i = ipv4_route[ip & IPV4_ROUTE_MASK];
+		if (likely(ipv4_route_available == 1 && ipv4_table[i] == ip))
+			return i;
+		if (likely(ipv4_route_available == 1))
+			return portid;
+		for (i = 0; i < nb_fwd_ports; ++i)
+			if (ip == ipv4_table[i])
+				return i;
+		return portid;
+	}
+	return route;
+}
+
+static inline void __attribute__((always_inline))
+log_stat(uint16_t port, uint16_t queue, uint64_t pkt,
+		uint64_t pktloss, uint64_t t, uint16_t is_tx)
+{
+	if (is_tx) {
+		port_stat[port].pkt_loss[queue] += pktloss;
+		port_stat[port].tx_stat[queue].run[pkt]++;
+		port_stat[port].tx_stat[queue].cycle[pkt] += t;
+		if (port_stat[port].tx_stat[queue].cycle_min[pkt] > t)
+			port_stat[port].tx_stat[queue].cycle_min[pkt] = t;
+		if (port_stat[port].tx_stat[queue].cycle_max[pkt] < t)
+			port_stat[port].tx_stat[queue].cycle_max[pkt] = t;
+	} else {
+		port_stat[port].rx_stat[queue].run[pkt]++;
+		port_stat[port].rx_stat[queue].cycle[pkt] += t;
+		if (port_stat[port].rx_stat[queue].cycle_min[pkt] > t)
+			port_stat[port].rx_stat[queue].cycle_min[pkt] = t;
+		if (port_stat[port].rx_stat[queue].cycle_max[pkt] < t)
+			port_stat[port].rx_stat[queue].cycle_max[pkt] = t;
+	}
+}
+
+static inline uint32_t __attribute__((always_inline))
+send_single_packet(uint16_t dst_port, uint16_t src_port,
+		uint16_t src_queue, struct rte_mbuf *m)
+{
+	struct rte_mbuf **pkts_burst;
+	struct fwd_stream_buffer *fsb;
+	uint32_t len;
+	uint16_t dst_queue;
+	uint32_t nb_tx;
+	uint32_t ret;
+#if PORTFWD_PERF_STATS
+	uint64_t t[2];
+#endif
+
+	ret = 0;
+	fsb = &fs_buf[src_port][src_queue];
+	len = fsb->tx_mbufs[dst_port].len;
+	fsb->tx_mbufs[dst_port].m_table[len] = m;
+	++len;
+	if (len == DEF_PKT_BURST) {
+		dst_queue = fsb->dst_queue;
+		pkts_burst = fsb->tx_mbufs[dst_port].m_table;
+		rte_spinlock_lock(&(txq_lock[dst_port][dst_queue].spinlock));
+#if PORTFWD_PERF_STATS
+		t[0] = rte_rdtsc();
+#endif
+		nb_tx = rte_eth_tx_burst(dst_port, dst_queue,
+				pkts_burst, DEF_PKT_BURST);
+#if PORTFWD_PERF_STATS
+		t[1] = rte_rdtsc();
+		log_stat(dst_port, dst_queue, nb_tx, DEF_PKT_BURST - nb_tx,
+				t[1] - t[0], 1);
+#endif
+		rte_spinlock_unlock(&(txq_lock[dst_port][dst_queue].spinlock));
+		ret = nb_tx;
+		if (nb_tx < DEF_PKT_BURST)
+			do {
+				rte_pktmbuf_free(pkts_burst[nb_tx]);
+			} while (++nb_tx < DEF_PKT_BURST);
+		len = 0;
+	}
+	fsb->tx_mbufs[dst_port].len = len;
+	return ret;
+}
+
+static inline uint32_t __attribute__((always_inline))
+drain_packets(uint16_t src_port, uint16_t src_queue)
+{
+	struct rte_mbuf **pkts_burst;
+	struct fwd_stream_buffer *fsb;
+	uint16_t dst_port;
+	uint16_t dst_queue;
+	uint16_t len;
+	uint16_t nb_tx;
+	uint32_t ret;
+#if PORTFWD_PERF_STATS
+	uint64_t t[2];
+#endif
+
+	ret = 0;
+	for (dst_port = 0; dst_port < nb_fwd_ports; ++dst_port) {
+		fsb = &fs_buf[src_port][src_queue];
+		len = fsb->tx_mbufs[dst_port].len;
+		if (len > 0) {
+			dst_queue = fsb->dst_queue;
+			pkts_burst =
+				fsb->tx_mbufs[dst_port].m_table;
+			rte_spinlock_lock(
+				&(txq_lock[dst_port][dst_queue].spinlock));
+#if PORTFWD_PERF_STATS
+			t[0] = rte_rdtsc();
+#endif
+			nb_tx = rte_eth_tx_burst(dst_port, dst_queue,
+					pkts_burst, len);
+#if PORTFWD_PERF_STATS
+			t[1] = rte_rdtsc();
+			log_stat(dst_port, dst_queue, nb_tx, len - nb_tx,
+					t[1] - t[0], 1);
+#endif
+			rte_spinlock_unlock(
+				&(txq_lock[dst_port][dst_queue].spinlock));
+			ret += nb_tx;
+			if (nb_tx < len)
+				do {
+					rte_pktmbuf_free(pkts_burst[nb_tx]);
+				} while (++nb_tx < len);
+			fsb->tx_mbufs[dst_port].len = 0;
+		}
+	}
+	return ret;
+}
+
+static inline uint32_t __attribute__((always_inline))
+batch_send_ipv4_packets(uint16_t src_port, uint16_t src_queue,
+		struct rte_mbuf *m[], uint16_t len)
+{
+	struct ipv4_hdr *ipv4_hdr[2];
+	struct rte_mbuf **mp;
+	uint16_t dst_port[2];
+	uint16_t nb_tx;
+
+	nb_tx = 0;
+	mp = m;
+	while (len >= 4) {
+		rte_prefetch0((void *)mp[2]);
+		rte_prefetch0((void *)mp[3]);
+		ipv4_hdr[0] = rte_pktmbuf_mtod_offset(mp[0], struct ipv4_hdr *,
+				sizeof(struct ether_hdr));
+		ipv4_hdr[1] = rte_pktmbuf_mtod_offset(mp[1], struct ipv4_hdr *,
+				sizeof(struct ether_hdr));
+		dst_port[0] = get_ipv4_dst_port(ipv4_hdr[0], src_port);
+		dst_port[1] = get_ipv4_dst_port(ipv4_hdr[1], src_port);
+		nb_tx += send_single_packet(dst_port[0], src_port,
+				src_queue, mp[0]);
+		nb_tx += send_single_packet(dst_port[1], src_port,
+				src_queue, mp[1]);
+		mp += 2;
+		len -= 2;
+	}
+	while (len > 0) {
+		ipv4_hdr[0] = rte_pktmbuf_mtod_offset(mp[0], struct ipv4_hdr *,
+				sizeof(struct ether_hdr));
+		dst_port[0] = get_ipv4_dst_port(ipv4_hdr[0], src_port);
+		nb_tx += send_single_packet(dst_port[0], src_port,
+				src_queue, mp[0]);
+		mp += 1;
+		len -= 1;
+	}
+
+	return nb_tx;
+}
+
+static void
+pkt_burst_ip_forward(struct fwd_stream *fs)
+{
+	struct rte_mbuf *pkts_burst[DEF_PKT_BURST];
+	uint32_t nb_rx;
+	uint32_t nb_tx;
+	uint16_t src_port = fs->rx_port;
+	uint16_t src_queue = fs->rx_queue;
+#if PORTFWD_PERF_STATS
+	uint64_t t[2];
+#endif
+
+	/* send timeout packets */
+	if (drain_cycle > 0) {
+		drainer[src_port][src_queue].cycle_now = rte_rdtsc();
+		if (drainer[src_port][src_queue].cycle_now -
+				drainer[src_port][src_queue].cycle_last >
+				drain_cycle) {
+			nb_tx = drain_packets(src_port, src_queue);
+			fs->tx_packets += nb_tx;
+			drainer[src_port][src_queue].cycle_last =
+				drainer[src_port][src_queue].cycle_now;
+		}
+	}
+
+#if PORTFWD_PERF_STATS
+	t[0] = rte_rdtsc();
+#endif
+	nb_rx = rte_eth_rx_burst(src_port, src_queue,
+			pkts_burst, nb_pkt_per_burst);
+#if PORTFWD_PERF_STATS
+	t[1] = rte_rdtsc();
+	log_stat(src_port, src_queue, nb_rx, 0, t[1] - t[0], 0);
+#endif
+	if (unlikely(nb_rx == 0))
+		return;
+	fs->rx_packets += nb_rx;
+	nb_tx = 0;
+	/* assume ipv4 packet */
+	nb_tx += batch_send_ipv4_packets(src_port, src_queue,
+			pkts_burst, nb_rx);
+	fs->tx_packets += nb_tx;
+	fs->fwd_dropped = fs->rx_packets - fs->tx_packets;
+}
+
+struct fwd_engine port_fwd_engine = {
+	.fwd_mode_name  = "port",
+	.port_fwd_begin = NULL,
+	.port_fwd_end   = NULL,
+	.packet_fwd     = pkt_burst_ip_forward,
+};
diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 26a174c..3316400 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -138,11 +138,26 @@  portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];      /**< Port ids configuration. */
 struct fwd_stream **fwd_streams; /**< For each RX queue of each port. */
 streamid_t nb_fwd_streams;       /**< Is equal to (nb_ports * nb_rxq). */
 
+unsigned long drain_cycle = 10000;
+struct drain_counter drainer[RTE_MAX_ETHPORTS][MAX_RX_QUEUE];
+
+uint32_t ipv4_table[RTE_MAX_ETHPORTS];
+
+uint32_t ipv4_route[MAX_IP_DIFF];
+uint32_t ipv4_route_available;
+uint32_t route_table[RTE_MAX_ETHPORTS];
+
+struct fwd_stream_buffer fs_buf[RTE_MAX_ETHPORTS][MAX_RX_QUEUE];
+struct queue_lock txq_lock[RTE_MAX_ETHPORTS][MAX_TX_QUEUE];
+
+volatile int fwd_config_init;
+
 /*
  * Forwarding engines.
  */
 struct fwd_engine * fwd_engines[] = {
 	&io_fwd_engine,
+	&port_fwd_engine,
 	&mac_fwd_engine,
 	&mac_retry_fwd_engine,
 	&mac_swap_engine,
@@ -1002,6 +1017,7 @@  start_packet_forwarding(int with_tx_first)
 	if(!no_flush_rx)
 		flush_fwd_rx_queues();
 
+	init_txq_lock();
 	fwd_config_setup();
 	rxtx_config_display();
 
@@ -2063,6 +2079,9 @@  main(int argc, char** argv)
 		       "but nb_txq=%d will prevent to fully test it.\n",
 		       nb_rxq, nb_txq);
 
+	add_port_addr();
+	set_drain_interval_ns(DRAIN_INTERVAL_NS);
+
 	init_config();
 	if (start_port(RTE_PORT_ALL) != 0)
 		rte_exit(EXIT_FAILURE, "Start ports failed\n");
diff --git a/app/test-pmd/testpmd.h b/app/test-pmd/testpmd.h
index 0f72ca1..7cbfc5f 100644
--- a/app/test-pmd/testpmd.h
+++ b/app/test-pmd/testpmd.h
@@ -34,6 +34,8 @@ 
 #ifndef _TESTPMD_H_
 #define _TESTPMD_H_
 
+#include <rte_spinlock.h>
+
 #define RTE_PORT_ALL            (~(portid_t)0x0)
 
 #define RTE_TEST_RX_DESC_MAX    2048
@@ -103,6 +105,8 @@  struct fwd_stream {
 	queueid_t  tx_queue;  /**< TX queue to send forwarded packets */
 	streamid_t peer_addr; /**< index of peer ethernet address of packets */
 
+	unsigned int idx;
+
 	/* "read-write" results */
 	unsigned int rx_packets;  /**< received packets */
 	unsigned int tx_packets;  /**< received packets transmitted */
@@ -221,6 +225,7 @@  struct fwd_engine {
 };
 
 extern struct fwd_engine io_fwd_engine;
+extern struct fwd_engine port_fwd_engine;
 extern struct fwd_engine mac_fwd_engine;
 extern struct fwd_engine mac_retry_fwd_engine;
 extern struct fwd_engine mac_swap_engine;
@@ -329,6 +334,48 @@  extern portid_t nb_fwd_ports; /**< Number of forwarding ports. */
 extern portid_t fwd_ports_ids[RTE_MAX_ETHPORTS];
 extern struct rte_port *ports;
 
+#define MAX_RX_QUEUE 32
+#define MAX_TX_QUEUE 32
+#define PORT_ROUTE_IP 65536
+#define IPV4_ROUTE_MASK	0x3F
+#define MAX_IP_DIFF	((IPV4_ROUTE_MASK) + 1)
+#define DRAIN_INTERVAL_NS 5000
+
+struct mbuf_table {
+	uint16_t len;
+	struct rte_mbuf *m_table[DEF_PKT_BURST];
+};
+
+struct fwd_stream_buffer {
+	struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
+	uint16_t dst_queue;
+} __rte_cache_aligned;
+
+struct drain_counter {
+	unsigned long cycle_now;
+	unsigned long cycle_last;
+} __rte_cache_aligned;
+
+struct queue_lock {
+	rte_spinlock_t spinlock;
+} __rte_cache_aligned;
+
+extern unsigned long drain_cycle;
+extern struct drain_counter drainer[RTE_MAX_ETHPORTS][MAX_RX_QUEUE];
+
+extern streamid_t nb_fwd_streams;
+
+extern uint32_t ipv4_table[RTE_MAX_ETHPORTS];
+
+extern uint32_t route_table[RTE_MAX_ETHPORTS];
+extern uint32_t ipv4_route[MAX_IP_DIFF];
+extern uint32_t ipv4_route_available;
+
+extern struct fwd_stream_buffer fs_buf[RTE_MAX_ETHPORTS][MAX_RX_QUEUE];
+extern struct queue_lock txq_lock[RTE_MAX_ETHPORTS][MAX_TX_QUEUE];
+
+extern volatile int fwd_config_init;
+
 extern struct rte_eth_rxmode rx_mode;
 extern uint64_t rss_hf;
 
@@ -575,6 +622,21 @@  void mcast_addr_add(uint8_t port_id, struct ether_addr *mc_addr);
 void mcast_addr_remove(uint8_t port_id, struct ether_addr *mc_addr);
 void port_dcb_info_display(uint8_t port_id);
 
+void set_fixed_route(uint16_t srcp, uint16_t dstp);
+void set_ip_route(uint16_t srcp);
+void show_route(void);
+void init_txq_lock(void);
+void set_ip(uint32_t srcp, uint32_t num0, uint32_t num1,
+		uint32_t num2, uint32_t num3);
+void add_port_addr(void);
+void build_ipv4_route(void);
+void set_drain_interval_ns(unsigned long drain_ns);
+void print_perf_stats(void);
+void clear_perf_stats(void);
+int set_fwd_stream_affinity(unsigned int idx, unsigned int core);
+void print_port_info(void);
+
+
 enum print_warning {
 	ENABLED_WARN = 0,
 	DISABLED_WARN