[dpdk-dev,v3,1/2] This patch add support of mode 5 to link bonding pmd

Message ID 1417022002-9213-2-git-send-email-danielx.t.mrzyglod@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Daniel Mrzyglod Nov. 26, 2014, 5:13 p.m. UTC
  v3 change:
Rebase patch version to HEAD of orgin/master.
Unit tests moved to the separate patch v3 2/2.

v2 change:
Add Unit Tests
Modification that updates obytes structure in virtualpmd driver.
change internals->slaves[i].last_obytes to have proper values.
Update codebase to Declan's patches.

v1 change:
Add support for mode 5 (Transmit load balancing) into pmd driver

This mode provides an adaptive transmit load balancing. 
It dynamically changes the transmitting slave, according to the computed load. 
Statistics are collected in 100ms intervals and scheduled every 10ms.

Signed-off-by: Daniel Mrzyglod <danielx.t.mrzyglod@intel.com>
---
 lib/librte_pmd_bond/rte_eth_bond.h         |  11 ++
 lib/librte_pmd_bond/rte_eth_bond_args.c    |   1 +
 lib/librte_pmd_bond/rte_eth_bond_pmd.c     | 160 ++++++++++++++++++++++++++++-
 lib/librte_pmd_bond/rte_eth_bond_private.h |   2 +-
 4 files changed, 171 insertions(+), 3 deletions(-)
  

Patch

diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h
index 085500b..29b9a89 100644
--- a/lib/librte_pmd_bond/rte_eth_bond.h
+++ b/lib/librte_pmd_bond/rte_eth_bond.h
@@ -77,6 +77,17 @@  extern "C" {
  * In this mode all transmitted packets will be transmitted on all available
  * active slaves of the bonded. */
 #endif
+#define BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING	(5)
+/**< Adaptive TLB (Mode 5)
+ * Adaptive transmit load balancing: channel bonding that
+ * does not require any special switch support.  The
+ * outgoing traffic is distributed according to the
+ * current load (computed relative to the speed) on each
+ * slave.  Incoming traffic is received by the current
+ * slave.  If the receiving slave fails, another slave
+ * takes over the MAC address of the failed receiving
+ * slave.*/
+
 /* Balance Mode Transmit Policies */
 #define BALANCE_XMIT_POLICY_LAYER2		(0)
 /**< Layer 2 (Ethernet MAC) */
diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c b/lib/librte_pmd_bond/rte_eth_bond_args.c
index d8ce681..2675cf6 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_args.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_args.c
@@ -173,6 +173,7 @@  bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused,
 #ifdef RTE_MBUF_REFCNT
 	case BONDING_MODE_BROADCAST:
 #endif
+	case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
 		return 0;
 	default:
 		RTE_BOND_LOG(ERR, "Invalid slave mode value (%s) specified", value);
diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
index cf2fbab..7a5dae6 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c
+++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c
@@ -30,7 +30,7 @@ 
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-
+#include <stdlib.h>
 #include <rte_mbuf.h>
 #include <rte_malloc.h>
 #include <rte_ethdev.h>
@@ -41,10 +41,15 @@ 
 #include <rte_kvargs.h>
 #include <rte_dev.h>
 #include <rte_alarm.h>
+#include <rte_cycles.h>
 
 #include "rte_eth_bond.h"
 #include "rte_eth_bond_private.h"
 
+#define REORDER_PERIOD_MS 10
+/* Table for statistics in mode 5 TLB */
+static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
+
 static uint16_t
 bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
@@ -288,6 +293,144 @@  xmit_slave_hash(const struct rte_mbuf *buf, uint8_t slave_count, uint8_t policy)
 	return hash % slave_count;
 }
 
+struct bwg_slave {
+	uint64_t bwg_left_int;
+	uint64_t bwg_left_remainder;
+	uint8_t slave;
+};
+
+static int
+bandwidth_cmp(const void *a, const void *b)
+{
+	const struct bwg_slave *bwg_a = a;
+	const struct bwg_slave *bwg_b = b;
+	int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
+	int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
+			(int64_t)bwg_a->bwg_left_remainder;
+	if (diff > 0)
+		return 1;
+	else if (diff < 0)
+		return -1;
+	else if (diff2 > 0)
+		return 1;
+	else if (diff2 < 0)
+		return -1;
+	else
+		return 0;
+}
+
+static void
+bandwidth_left(int port_id, uint64_t load, uint8_t update_idx,
+		struct bwg_slave *bwg_slave)
+{
+	struct rte_eth_link link_status;
+
+	rte_eth_link_get(port_id, &link_status);
+	uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
+	if (link_bwg == 0)
+		return;
+	link_bwg = (link_bwg * (update_idx+1) * REORDER_PERIOD_MS);
+	bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
+	bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
+}
+
+static void
+bond_ethdev_update_tlb_slave_cb(void *arg)
+{
+	struct bond_dev_private *internals = arg;
+	struct rte_eth_stats slave_stats;
+	struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
+	uint8_t slave_count;
+	uint64_t tx_bytes;
+
+	uint8_t update_stats = 0;
+	uint8_t i, slave_id;
+
+	internals->slave_update_idx++;
+
+
+	if (internals->slave_update_idx >= REORDER_PERIOD_MS)
+		update_stats = 1;
+
+	for (i = 0; i < internals->active_slave_count; i++) {
+		slave_id = internals->active_slaves[i];
+		rte_eth_stats_get(slave_id, &slave_stats);
+		tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
+		bandwidth_left(slave_id, tx_bytes,
+				internals->slave_update_idx, &bwg_array[i]);
+		bwg_array[i].slave = slave_id;
+
+		if (update_stats)
+			tlb_last_obytets[slave_id] = slave_stats.obytes;
+	}
+
+	if (update_stats == 1)
+		internals->slave_update_idx = 0;
+
+	slave_count = i;
+	qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
+	for (i = 0; i < slave_count; i++)
+		internals->active_slaves[i] = bwg_array[i].slave;
+
+	rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
+			(struct bond_dev_private *)internals);
+}
+
+static uint16_t
+bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
+	struct bond_dev_private *internals = bd_tx_q->dev_private;
+
+	struct rte_eth_dev *primary_port =
+			&rte_eth_devices[internals->primary_port];
+	uint16_t num_tx_total = 0;
+	uint8_t i, j;
+
+	uint8_t num_of_slaves = internals->active_slave_count;
+	uint8_t slaves[RTE_MAX_ETHPORTS];
+
+	struct ether_hdr *ether_hdr;
+	struct ether_addr primary_slave_addr;
+	struct ether_addr active_slave_addr;
+
+	if (num_of_slaves < 1)
+		return num_tx_total;
+
+	memcpy(slaves, internals->active_slaves,
+				sizeof(internals->active_slaves[0]) * num_of_slaves);
+
+
+	ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
+
+	if (nb_pkts > 3) {
+		for (i = 0; i < 3; i++)
+			rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
+	}
+
+	for (i = 0; i < num_of_slaves; i++) {
+		ether_addr_copy(&internals->slaves[slaves[i]].persisted_mac_addr,
+				&active_slave_addr);
+
+		for (j = num_tx_total; j < nb_pkts; j++) {
+			if (j + 3 < nb_pkts)
+				rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
+
+			ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
+			if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
+				ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
+		}
+
+		num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
+				bufs + num_tx_total, nb_pkts - num_tx_total);
+
+		if (num_tx_total == nb_pkts)
+			break;
+	}
+
+	return num_tx_total;
+}
+
 static uint16_t
 bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
 		uint16_t nb_pkts)
@@ -500,6 +643,7 @@  mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
 		}
 		break;
 	case BONDING_MODE_ACTIVE_BACKUP:
+	case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
 	default:
 		for (i = 0; i < internals->slave_count; i++) {
 			if (internals->slaves[i].port_id ==
@@ -551,6 +695,10 @@  bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
 		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
 		break;
 #endif
+	case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
+		eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
+		eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
+		break;
 	default:
 		return -1;
 	}
@@ -676,7 +824,7 @@  slave_add(struct bond_dev_private *internals,
 	}
 
 	slave_details->link_status_wait_to_complete = 0;
-
+	/* clean tlb_last_obytes when adding port for bonding device */
 	memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
 			sizeof(struct ether_addr));
 }
@@ -762,6 +910,9 @@  bond_ethdev_start(struct rte_eth_dev *eth_dev)
 	if (internals->user_defined_primary_port)
 		bond_ethdev_primary_set(internals, internals->primary_port);
 
+	if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING)
+		bond_ethdev_update_tlb_slave_cb(internals);
+
 	return 0;
 }
 
@@ -770,6 +921,9 @@  bond_ethdev_stop(struct rte_eth_dev *eth_dev)
 {
 	struct bond_dev_private *internals = eth_dev->data->dev_private;
 
+	if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING) {
+		rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
+    }
 	internals->active_slave_count = 0;
 	internals->link_status_polling_enabled = 0;
 
@@ -1016,6 +1170,7 @@  bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
 		break;
 	/* Promiscuous mode is propagated only to primary slave */
 	case BONDING_MODE_ACTIVE_BACKUP:
+	case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
 	default:
 		rte_eth_promiscuous_enable(internals->current_primary_port);
 
@@ -1042,6 +1197,7 @@  bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
 		break;
 	/* Promiscuous mode is propagated only to primary slave */
 	case BONDING_MODE_ACTIVE_BACKUP:
+	case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING:
 	default:
 		rte_eth_promiscuous_disable(internals->current_primary_port);
 	}
diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h b/lib/librte_pmd_bond/rte_eth_bond_private.h
index 6254c84..2a4e129 100644
--- a/lib/librte_pmd_bond/rte_eth_bond_private.h
+++ b/lib/librte_pmd_bond/rte_eth_bond_private.h
@@ -102,7 +102,6 @@  struct bond_slave_details {
 	uint8_t link_status_poll_enabled;
 	uint8_t link_status_wait_to_complete;
 	uint8_t last_link_status;
-
 	/**< Port Id of slave eth_dev */
 	struct ether_addr persisted_mac_addr;
 };
@@ -145,6 +144,7 @@  struct bond_dev_private {
 	/**< Arary of bonded slaves details */
 
 	struct rte_kvargs *kvlist;
+	uint8_t slave_update_idx;
 };
 
 extern struct eth_dev_ops default_dev_ops;