From patchwork Wed Sep 17 10:01:00 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Daniel Mrzyglod X-Patchwork-Id: 393 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id 82A53B39F; Wed, 17 Sep 2014 11:56:25 +0200 (CEST) Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by dpdk.org (Postfix) with ESMTP id AC4B3B39C for ; Wed, 17 Sep 2014 11:56:22 +0200 (CEST) Received: from fmsmga003.fm.intel.com ([10.253.24.29]) by fmsmga103.fm.intel.com with ESMTP; 17 Sep 2014 02:53:10 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.97,862,1389772800"; d="scan'208";a="387314746" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by FMSMGA003.fm.intel.com with ESMTP; 17 Sep 2014 02:56:31 -0700 Received: from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com [10.237.217.45]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id s8HA20h0017195; Wed, 17 Sep 2014 11:02:00 +0100 Received: from sivswdev01.ir.intel.com (localhost [127.0.0.1]) by sivswdev01.ir.intel.com with ESMTP id s8HA20jo031603; Wed, 17 Sep 2014 11:02:00 +0100 Received: (from dtmrzglx@localhost) by sivswdev01.ir.intel.com with id s8HA1xvq031598; Wed, 17 Sep 2014 11:01:59 +0100 From: Daniel Mrzyglod To: dev@dpdk.org Date: Wed, 17 Sep 2014 11:01:00 +0100 Message-Id: <1410948060-31098-1-git-send-email-danielx.t.mrzyglod@intel.com> X-Mailer: git-send-email 1.7.4.1 Subject: [dpdk-dev] [PATCH] ADD mode 5(tlb) to link bonding pmd X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch set adds support of mode 5 to link bonding pmd This patchset depend on Declan Doherty patch set: http://dpdk.org/ml/archives/dev/2014-September/005069.html Signed-off-by: Daniel Mrzyglod --- lib/librte_pmd_bond/rte_eth_bond.h | 23 ++++ lib/librte_pmd_bond/rte_eth_bond_args.c | 1 + lib/librte_pmd_bond/rte_eth_bond_pmd.c | 163 +++++++++++++++++++++++++++- lib/librte_pmd_bond/rte_eth_bond_private.h | 5 +- 4 files changed, 189 insertions(+), 3 deletions(-) diff --git a/lib/librte_pmd_bond/rte_eth_bond.h b/lib/librte_pmd_bond/rte_eth_bond.h index bd59780..1bd76ce 100644 --- a/lib/librte_pmd_bond/rte_eth_bond.h +++ b/lib/librte_pmd_bond/rte_eth_bond.h @@ -75,6 +75,29 @@ extern "C" { /**< Broadcast (Mode 3). * In this mode all transmitted packets will be transmitted on all available * active slaves of the bonded. */ +#define BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING (5) +/**< Broadcast (Mode 5) + * Adaptive transmit load balancing: channel bonding that + * does not require any special switch support. The + * outgoing traffic is distributed according to the + * current load (computed relative to the speed) on each + * slave. Incoming traffic is received by the current + * slave. If the receiving slave fails, another slave + * takes over the MAC address of the failed receiving + * slave.*/ +#define BONDING_MODE_ADAPTIVE_LOAD_BALANCING (6) +/** + * Adaptive load balancing: includes balance-tlb plus + * receive load balancing (rlb) for IPV4 traffic, and + * does not require any special switch support. The + * receive load balancing is achieved by ARP negotiation. + * The bonding driver intercepts the ARP Replies sent by + * the local system on their way out and overwrites the + * source hardware address with the unique hardware + * address of one of the slaves in the bond such that + * different peers use different hardware addresses for + * the server. */ + /* Balance Mode Transmit Policies */ #define BALANCE_XMIT_POLICY_LAYER2 (0) diff --git a/lib/librte_pmd_bond/rte_eth_bond_args.c b/lib/librte_pmd_bond/rte_eth_bond_args.c index 11d9816..bb1cfae 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_args.c +++ b/lib/librte_pmd_bond/rte_eth_bond_args.c @@ -170,6 +170,7 @@ bond_ethdev_parse_slave_mode_kvarg(const char *key __rte_unused, case BONDING_MODE_ACTIVE_BACKUP: case BONDING_MODE_BALANCE: case BONDING_MODE_BROADCAST: + case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING: return 0; default: return -1; diff --git a/lib/librte_pmd_bond/rte_eth_bond_pmd.c b/lib/librte_pmd_bond/rte_eth_bond_pmd.c index 38cc1ae..9c4c174 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_pmd.c +++ b/lib/librte_pmd_bond/rte_eth_bond_pmd.c @@ -30,7 +30,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - +#include #include #include #include @@ -40,10 +40,14 @@ #include #include #include +#include +#include #include "rte_eth_bond.h" #include "rte_eth_bond_private.h" +#define REORDER_PERIOD_MS 10 + static uint16_t bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { @@ -286,6 +290,141 @@ xmit_slave_hash(const struct rte_mbuf *buf, uint8_t slave_count, uint8_t policy) return hash % slave_count; } +struct bwg_slave { + uint64_t bwg_left_int; + uint64_t bwg_left_remainder; + uint8_t slave; +}; + +static int +bandwidth_cmp(const void *a, const void *b) +{ + const struct bwg_slave *bwg_a = a; + const struct bwg_slave *bwg_b = b; + int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int; + int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder - + (int64_t)bwg_a->bwg_left_remainder; + if (diff > 0) + return 1; + else if (diff < 0) + return -1; + else if (diff2 > 0) + return 1; + else if (diff2 < 0) + return -1; + else + return 0; +} + +static void +bandwidth_left(int port_id, uint64_t load, uint8_t update_idx, struct bwg_slave *bwg_slave) +{ + struct rte_eth_link link_status; + + rte_eth_link_get(port_id, &link_status); + uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8; + if (link_bwg == 0) + return; + link_bwg = (link_bwg * (update_idx+1) * REORDER_PERIOD_MS); + bwg_slave->bwg_left_int = ((link_bwg - 1000*load)) / link_bwg; + bwg_slave->bwg_left_remainder = ((link_bwg - 1000*load)) % link_bwg; +} + +static void +bond_ethdev_update_tlb_slave_cb(void *arg) +{ + struct bond_dev_private *internals = arg; + struct rte_eth_stats slave_stats; + struct bwg_slave bwg_array[RTE_MAX_ETHPORTS]; + uint8_t slave_count; + uint64_t tx_bytes; + uint8_t update_stats = 0; + int8_t i; + + internals->slave_update_idx++; + + + if (internals->slave_update_idx >= REORDER_PERIOD_MS) + update_stats = 1; + + for (i = 0; i < internals->active_slave_count; i++) { + rte_eth_stats_get(internals->active_slaves[i], &slave_stats); + tx_bytes = slave_stats.obytes - + internals->presisted_slaves_conf[i].last_obytes; + bandwidth_left(internals->active_slaves[i], tx_bytes, + internals->slave_update_idx, &bwg_array[i]); + bwg_array[i].slave = internals->active_slaves[i]; + + if (update_stats) + internals->presisted_slaves_conf[i].last_obytes += tx_bytes; + } + + if (update_stats == 1) + internals->slave_update_idx = 0; + + slave_count = i; + qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp); + for (i = 0; i < slave_count; i++) + internals->active_slaves[i] = bwg_array[i].slave; + + rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb, + (struct bond_dev_private *)internals); +} + +static uint16_t +bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue; + struct bond_dev_private *internals = bd_tx_q->dev_private; + + struct rte_eth_dev *primary_port = &rte_eth_devices[internals->primary_port]; + uint16_t num_tx_total = 0; + uint8_t i, j; + + uint8_t num_of_slaves = internals->active_slave_count; + uint8_t slaves[RTE_MAX_ETHPORTS]; + + struct ether_hdr *ether_hdr; + struct ether_addr primary_slave_addr; + struct ether_addr active_slave_addr; + + if (num_of_slaves < 1) + return num_tx_total; + + memcpy(slaves, internals->active_slaves, + sizeof(internals->active_slaves[0]) * num_of_slaves); + + + ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr); + + if (nb_pkts > 3) { + for (i = 0; i < 3; i++) + rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*)); + } + + for (i = 0; i < num_of_slaves; i++) { + ether_addr_copy(&internals->presisted_slaves_conf[slaves[i]].mac_addr, + &active_slave_addr); + + for (j = num_tx_total; j < nb_pkts; j++) { + if (j + 3 < nb_pkts) + rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*)); + + ether_hdr = bufs[j]->pkt.data; + if (is_same_ether_addr(ðer_hdr->s_addr, &primary_slave_addr)) + ether_addr_copy(&active_slave_addr, ðer_hdr->s_addr); + } + + num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id, + bufs + num_tx_total, nb_pkts - num_tx_total); + + if (num_tx_total == nb_pkts) + break; + } + + return num_tx_total; +} + static uint16_t bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) @@ -495,6 +634,7 @@ mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev) } break; case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING: default: for (i = 0; i < internals->slave_count; i++) { if (internals->slaves[i] == internals->current_primary_port) { @@ -547,6 +687,10 @@ bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode) eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast; eth_dev->rx_pkt_burst = bond_ethdev_rx_burst; break; + case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING: + eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb; + eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup; + break; default: return -1; } @@ -742,7 +886,9 @@ bond_ethdev_start(struct rte_eth_dev *eth_dev) if (internals->user_defined_primary_port) bond_ethdev_primary_set(internals, internals->primary_port); - + if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING) { + bond_ethdev_update_tlb_slave_cb(internals); + } return 0; } @@ -751,6 +897,10 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev) { struct bond_dev_private *internals = eth_dev->data->dev_private; + if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING) { + rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals); + rte_delay_ms(REORDER_PERIOD_MS + 1); + } internals->active_slave_count = 0; eth_dev->data->dev_link.link_status = 0; @@ -760,6 +910,13 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev) static void bond_ethdev_close(struct rte_eth_dev *dev __rte_unused) { + struct bond_dev_private *internals = dev->data->dev_private; + + if (internals->mode == BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING) { + rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals); + rte_delay_ms(REORDER_PERIOD_MS+1); + } + internals->active_slave_count = 0; } static int @@ -938,6 +1095,7 @@ bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev) break; /* Promiscuous mode is propagated only to primary slave */ case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING: default: rte_eth_promiscuous_enable(internals->current_primary_port); @@ -962,6 +1120,7 @@ bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev) break; /* Promiscuous mode is propagated only to primary slave */ case BONDING_MODE_ACTIVE_BACKUP: + case BONDING_MODE_ADAPTIVE_TRANSMIT_LOAD_BALANCING: default: rte_eth_promiscuous_disable(internals->current_primary_port); } diff --git a/lib/librte_pmd_bond/rte_eth_bond_private.h b/lib/librte_pmd_bond/rte_eth_bond_private.h index 60f1e8d..9cc7915 100644 --- a/lib/librte_pmd_bond/rte_eth_bond_private.h +++ b/lib/librte_pmd_bond/rte_eth_bond_private.h @@ -88,6 +88,7 @@ struct slave_conf { /**< Port Id of slave eth_dev */ struct ether_addr mac_addr; /**< Slave eth_dev original MAC address */ + uint64_t last_obytes; }; /** Bonded slave devices structure */ struct bond_ethdev_slave_ports { @@ -120,9 +121,11 @@ struct bond_dev_private { uint8_t active_slaves[RTE_MAX_ETHPORTS]; /**< Active slave list */ uint8_t slaves[RTE_MAX_ETHPORTS]; /**< Slave list */ - + uint8_t slaves_order[RTE_MAX_ETHPORTS]; + uint8_t slave_update_idx; /** Persisted configuration of slaves */ struct slave_conf presisted_slaves_conf[RTE_MAX_ETHPORTS]; + }; extern struct eth_dev_ops default_dev_ops;